[LoopVectorizer] Remove redundant variable LastBypassBlock. NFC.
[oota-llvm.git] / lib / Transforms / Vectorize / LoopVectorize.cpp
index b7faa204927d70803bd3391946dae459498b17de..f1f18613d9e73eec99a451a91aaba5bae487dc5c 100644 (file)
@@ -96,7 +96,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <map>
@@ -850,6 +850,8 @@ public:
         return B.CreateAdd(StartValue, Index);
 
       case IK_PtrInduction:
+        assert(Index->getType() == StepValue->getType() &&
+               "Index type does not match StepValue type");
         if (StepValue->isMinusOne())
           Index = B.CreateNeg(Index);
         else if (!StepValue->isOne())
@@ -2413,9 +2415,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
          LoopVectorBody.push_back(NewIfBlock);
          VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
          Builder.SetInsertPoint(InsertPt);
-         Instruction *OldBr = IfBlock->getTerminator();
-         BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
-         OldBr->eraseFromParent();
+         ReplaceInstWithInst(IfBlock->getTerminator(),
+                             BranchInst::Create(CondBlock, NewIfBlock, Cmp));
          IfBlock = NewIfBlock;
       }
     }
@@ -2646,22 +2647,20 @@ void InnerLoopVectorizer::createEmptyLoop() {
   Value *Cmp =
       BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
 
-  BasicBlock *LastBypassBlock = BypassBlock;
-
   // Generate code to check that the loops trip count that we computed by adding
   // one to the backedge-taken count will not overflow.
   {
     auto PastOverflowCheck =
         std::next(BasicBlock::iterator(OverflowCheckAnchor));
     BasicBlock *CheckBlock =
-      LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
+      BypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
     if (ParentLoop)
       ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
     LoopBypassBlocks.push_back(CheckBlock);
-    Instruction *OldTerm = LastBypassBlock->getTerminator();
-    BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm);
-    OldTerm->eraseFromParent();
-    LastBypassBlock = CheckBlock;
+    ReplaceInstWithInst(
+        BypassBlock->getTerminator(),
+        BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow));
+    BypassBlock = CheckBlock;
   }
 
   // Generate the code to check that the strides we assumed to be one are really
@@ -2670,24 +2669,23 @@ void InnerLoopVectorizer::createEmptyLoop() {
   Instruction *StrideCheck;
   Instruction *FirstCheckInst;
   std::tie(FirstCheckInst, StrideCheck) =
-      addStrideCheck(LastBypassBlock->getTerminator());
+      addStrideCheck(BypassBlock->getTerminator());
   if (StrideCheck) {
     AddedSafetyChecks = true;
     // Create a new block containing the stride check.
     BasicBlock *CheckBlock =
-        LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
+        BypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
     if (ParentLoop)
       ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
     LoopBypassBlocks.push_back(CheckBlock);
 
     // Replace the branch into the memory check block with a conditional branch
     // for the "few elements case".
-    Instruction *OldTerm = LastBypassBlock->getTerminator();
-    BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
-    OldTerm->eraseFromParent();
+    ReplaceInstWithInst(BypassBlock->getTerminator(),
+                        BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
 
     Cmp = StrideCheck;
-    LastBypassBlock = CheckBlock;
+    BypassBlock = CheckBlock;
   }
 
   // Generate the code that checks in runtime if arrays overlap. We put the
@@ -2695,29 +2693,27 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // faster.
   Instruction *MemRuntimeCheck;
   std::tie(FirstCheckInst, MemRuntimeCheck) =
-    Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
+    Legal->getLAI()->addRuntimeCheck(BypassBlock->getTerminator());
   if (MemRuntimeCheck) {
     AddedSafetyChecks = true;
     // Create a new block containing the memory check.
     BasicBlock *CheckBlock =
-        LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
+        BypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
     if (ParentLoop)
       ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
     LoopBypassBlocks.push_back(CheckBlock);
 
     // Replace the branch into the memory check block with a conditional branch
     // for the "few elements case".
-    Instruction *OldTerm = LastBypassBlock->getTerminator();
-    BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
-    OldTerm->eraseFromParent();
+    ReplaceInstWithInst(BypassBlock->getTerminator(),
+                        BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
 
     Cmp = MemRuntimeCheck;
-    LastBypassBlock = CheckBlock;
+    BypassBlock = CheckBlock;
   }
 
-  LastBypassBlock->getTerminator()->eraseFromParent();
-  BranchInst::Create(MiddleBlock, VectorPH, Cmp,
-                     LastBypassBlock);
+  ReplaceInstWithInst(BypassBlock->getTerminator(),
+                      BranchInst::Create(MiddleBlock, VectorPH, Cmp));
 
   // We are going to resume the execution of the scalar loop.
   // Go over all of the induction variables that we found and fix the
@@ -2798,7 +2794,10 @@ void InnerLoopVectorizer::createEmptyLoop() {
       break;
     }
     case LoopVectorizationLegality::IK_PtrInduction: {
-      EndValue = II.transform(BypassBuilder, CountRoundDown);
+      Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+                                                   II.StepValue->getType(),
+                                                   "cast.crd");
+      EndValue = II.transform(BypassBuilder, CRD);
       EndValue->setName("ptr.ind.end");
       break;
     }
@@ -2851,10 +2850,8 @@ void InnerLoopVectorizer::createEmptyLoop() {
   Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
                                 ResumeIndex, "cmp.n",
                                 MiddleBlock->getTerminator());
-
-  BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
-  // Remove the old terminator.
-  MiddleBlock->getTerminator()->eraseFromParent();
+  ReplaceInstWithInst(MiddleBlock->getTerminator(),
+                      BranchInst::Create(ExitBlock, ScalarPH, CmpN));
 
   // Create i+1 and fill the PHINode.
   Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
@@ -2906,7 +2903,7 @@ struct CSEDenseMapInfo {
     return LHS->isIdenticalTo(RHS);
   }
 };
-} // namespace
+}
 
 /// \brief Check whether this block is a predicated block.
 /// Due to if predication of stores we might create a sequence of "if(pred) a[i]
@@ -3448,12 +3445,14 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
       // This is the normalized GEP that starts counting at zero.
       Value *NormalizedIdx =
           Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
+      NormalizedIdx =
+          Builder.CreateSExtOrTrunc(NormalizedIdx, II.StepValue->getType());
       // This is the vector of results. Notice that we don't generate
       // vector geps because scalar geps result in better code.
       for (unsigned part = 0; part < UF; ++part) {
         if (VF == 1) {
           int EltIndex = part;
-          Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+          Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
           Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
           Value *SclrGep = II.transform(Builder, GlobalIdx);
           SclrGep->setName("next.gep");
@@ -3464,7 +3463,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
         Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
         for (unsigned int i = 0; i < VF; ++i) {
           int EltIndex = i + part * VF;
-          Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+          Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
           Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
           Value *SclrGep = II.transform(Builder, GlobalIdx);
           SclrGep->setName("next.gep");
@@ -4642,10 +4641,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
 
     if (VF == 0)
       VF = MaxVectorSize;
-
-    // If the trip count that we found modulo the vectorization factor is not
-    // zero then we require a tail.
-    if (VF < 2) {
+    else {
+      // If the trip count that we found modulo the vectorization factor is not
+      // zero then we require a tail.
       emitAnalysis(VectorizationReport() <<
                    "cannot optimize for size and vectorize at the "
                    "same time. Enable vectorization of this loop "
@@ -5507,9 +5505,8 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
         LoopVectorBody.push_back(NewIfBlock);
         VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
         Builder.SetInsertPoint(InsertPt);
-        Instruction *OldBr = IfBlock->getTerminator();
-        BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
-        OldBr->eraseFromParent();
+        ReplaceInstWithInst(IfBlock->getTerminator(),
+                            BranchInst::Create(CondBlock, NewIfBlock, Cmp));
         IfBlock = NewIfBlock;
       }
   }