From: James Molloy <james.molloy@arm.com>
Date: Mon, 9 Nov 2015 14:32:05 +0000 (+0000)
Subject: [LoopVectorize] Address post-commit feedback on r250032
X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=ae263d48b03ca2f9eb94c11a236d998ff3453619

[LoopVectorize] Address post-commit feedback on r250032

Implemented as many of Michael's suggestions as were possible:
  * clang-format the added code while it is still fresh.
  * tried to change Value* to Instruction* in many places in computeMinimumValueSizes - unfortunately there are several places where Constants need to be handled so this wasn't possible.
  * Reduce the pass list on loop-vectorization-factors.ll.
  * Fix a bug where we were querying MinBWs for I->getOperand(0) but using MinBWs[I].

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252469 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 4153c843c40..6f25687b635 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -438,21 +438,21 @@ llvm::Value *llvm::getSplatValue(Value *V) {
   return InsertEltInst->getOperand(1);
 }
 
-DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes(
-  ArrayRef<BasicBlock*> Blocks, DemandedBits &DB,
-  const TargetTransformInfo *TTI) {
+DenseMap<Instruction *, uint64_t>
+llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
+                               const TargetTransformInfo *TTI) {
 
   // DemandedBits will give us every value's live-out bits. But we want
   // to ensure no extra casts would need to be inserted, so every DAG
   // of connected values must have the same minimum bitwidth.
-  EquivalenceClasses<Value*> ECs;
-  SmallVector<Value*,16> Worklist;
-  SmallPtrSet<Value*,4> Roots;
-  SmallPtrSet<Value*,16> Visited;
-  DenseMap<Value*,uint64_t> DBits;
-  SmallPtrSet<Instruction*,4> InstructionSet;
-  DenseMap<Instruction*, uint64_t> MinBWs;
-  
+  EquivalenceClasses<Value *> ECs;
+  SmallVector<Value *, 16> Worklist;
+  SmallPtrSet<Value *, 4> Roots;
+  SmallPtrSet<Value *, 16> Visited;
+  DenseMap<Value *, uint64_t> DBits;
+  SmallPtrSet<Instruction *, 4> InstructionSet;
+  DenseMap<Instruction *, uint64_t> MinBWs;
+
   // Determine the roots. We work bottom-up, from truncs or icmps.
   bool SeenExtFromIllegalType = false;
   for (auto *BB : Blocks)
@@ -462,7 +462,7 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes(
       if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
           !TTI->isTypeLegal(I.getOperand(0)->getType()))
         SeenExtFromIllegalType = true;
-    
+
       // Only deal with non-vector integers up to 64-bits wide.
       if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
           !I.getType()->isVectorTy() &&
@@ -471,7 +471,7 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes(
         // don't add it to the worklist.
         if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
           continue;
-      
+
         Worklist.push_back(&I);
         Roots.insert(&I);
       }
@@ -479,12 +479,12 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes(
   // Early exit.
   if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
     return MinBWs;
-  
+
   // Now proceed breadth-first, unioning values together.
   while (!Worklist.empty()) {
     Value *Val = Worklist.pop_back_val();
     Value *Leader = ECs.getOrInsertLeaderValue(Val);
-    
+
     if (Visited.count(Val))
       continue;
     Visited.insert(Val);
@@ -497,11 +497,11 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes(
     // If we encounter a type that is larger than 64 bits, we can't represent
     // it so bail out.
     if (DB.getDemandedBits(I).getBitWidth() > 64)
-      return DenseMap<Instruction*,uint64_t>();
-    
+      return DenseMap<Instruction *, uint64_t>();
+
     uint64_t V = DB.getDemandedBits(I).getZExtValue();
     DBits[Leader] |= V;
-    
+
     // Casts, loads and instructions outside of our range terminate a chain
     // successfully.
     if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
@@ -540,7 +540,7 @@ DenseMap<Instruction*, uint64_t> llvm::computeMinimumValueSizes(
     for (auto *U : I.first->users())
       if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
         DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
-  
+
   for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
     uint64_t LeaderDemandedBits = 0;
     for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1a6021cb3c8..d3101c69e01 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2100,7 +2100,6 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
   // If this scalar is unknown, assume that it is a constant or that it is
   // loop invariant. Broadcast V and save the value for future uses.
   Value *B = getBroadcastInstrs(V);
-
   return WidenMap.splat(V, B);
 }
 
@@ -5409,8 +5408,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   case Instruction::ICmp:
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
-    if (VF > 1 && MinBWs.count(dyn_cast<Instruction>(I->getOperand(0))))
-      ValTy = IntegerType::get(ValTy->getContext(), MinBWs[I]);
+    Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
+    auto It = MinBWs.find(Op0AsInstruction);
+    if (VF > 1 && It != MinBWs.end())
+      ValTy = IntegerType::get(ValTy->getContext(), It->second);
     VectorTy = ToVectorTy(ValTy, VF);
     return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
   }
diff --git a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index f5b6a643c07..eee31049180 100644
--- a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -basicaa -loop-vectorize -simplifycfg -instsimplify -instcombine -licm -force-vector-interleave=1 2>&1 | FileCheck %s
+; RUN: opt -S < %s -basicaa -loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64"