#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>
#include <map>
return B.CreateAdd(StartValue, Index);
case IK_PtrInduction:
+ assert(Index->getType() == StepValue->getType() &&
+ "Index type does not match StepValue type");
if (StepValue->isMinusOne())
Index = B.CreateNeg(Index);
else if (!StepValue->isOne())
LoopVectorBody.push_back(NewIfBlock);
VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
+ ReplaceInstWithInst(IfBlock->getTerminator(),
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp));
IfBlock = NewIfBlock;
}
}
Value *Cmp =
BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
- BasicBlock *LastBypassBlock = BypassBlock;
-
// Generate code to check that the loops trip count that we computed by adding
// one to the backedge-taken count will not overflow.
{
auto PastOverflowCheck =
std::next(BasicBlock::iterator(OverflowCheckAnchor));
BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
+ BypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
LoopBypassBlocks.push_back(CheckBlock);
- Instruction *OldTerm = LastBypassBlock->getTerminator();
- BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm);
- OldTerm->eraseFromParent();
- LastBypassBlock = CheckBlock;
+ ReplaceInstWithInst(
+ BypassBlock->getTerminator(),
+ BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow));
+ BypassBlock = CheckBlock;
}
// Generate the code to check that the strides we assumed to be one are really
Instruction *StrideCheck;
Instruction *FirstCheckInst;
std::tie(FirstCheckInst, StrideCheck) =
- addStrideCheck(LastBypassBlock->getTerminator());
+ addStrideCheck(BypassBlock->getTerminator());
if (StrideCheck) {
AddedSafetyChecks = true;
// Create a new block containing the stride check.
BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
+ BypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
// for the "few elements case".
- Instruction *OldTerm = LastBypassBlock->getTerminator();
- BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
- OldTerm->eraseFromParent();
+ ReplaceInstWithInst(BypassBlock->getTerminator(),
+ BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
Cmp = StrideCheck;
- LastBypassBlock = CheckBlock;
+ BypassBlock = CheckBlock;
}
// Generate the code that checks in runtime if arrays overlap. We put the
// faster.
Instruction *MemRuntimeCheck;
std::tie(FirstCheckInst, MemRuntimeCheck) =
- Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
+ Legal->getLAI()->addRuntimeCheck(BypassBlock->getTerminator());
if (MemRuntimeCheck) {
AddedSafetyChecks = true;
// Create a new block containing the memory check.
BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
+ BypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
// for the "few elements case".
- Instruction *OldTerm = LastBypassBlock->getTerminator();
- BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
- OldTerm->eraseFromParent();
+ ReplaceInstWithInst(BypassBlock->getTerminator(),
+ BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
Cmp = MemRuntimeCheck;
- LastBypassBlock = CheckBlock;
+ BypassBlock = CheckBlock;
}
- LastBypassBlock->getTerminator()->eraseFromParent();
- BranchInst::Create(MiddleBlock, VectorPH, Cmp,
- LastBypassBlock);
+ ReplaceInstWithInst(BypassBlock->getTerminator(),
+ BranchInst::Create(MiddleBlock, VectorPH, Cmp));
// We are going to resume the execution of the scalar loop.
// Go over all of the induction variables that we found and fix the
break;
}
case LoopVectorizationLegality::IK_PtrInduction: {
- EndValue = II.transform(BypassBuilder, CountRoundDown);
+ Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
+ II.StepValue->getType(),
+ "cast.crd");
+ EndValue = II.transform(BypassBuilder, CRD);
EndValue->setName("ptr.ind.end");
break;
}
Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
ResumeIndex, "cmp.n",
MiddleBlock->getTerminator());
-
- BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
- // Remove the old terminator.
- MiddleBlock->getTerminator()->eraseFromParent();
+ ReplaceInstWithInst(MiddleBlock->getTerminator(),
+ BranchInst::Create(ExitBlock, ScalarPH, CmpN));
// Create i+1 and fill the PHINode.
Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
return LHS->isIdenticalTo(RHS);
}
};
-} // namespace
+}
/// \brief Check whether this block is a predicated block.
/// Due to if predication of stores we might create a sequence of "if(pred) a[i]
// This is the normalized GEP that starts counting at zero.
Value *NormalizedIdx =
Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
+ NormalizedIdx =
+ Builder.CreateSExtOrTrunc(NormalizedIdx, II.StepValue->getType());
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
if (VF == 1) {
int EltIndex = part;
- Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) {
int EltIndex = i + part * VF;
- Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
+ Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
if (VF == 0)
VF = MaxVectorSize;
-
- // If the trip count that we found modulo the vectorization factor is not
- // zero then we require a tail.
- if (VF < 2) {
+ else {
+ // If the trip count that we found modulo the vectorization factor is not
+ // zero then we require a tail.
emitAnalysis(VectorizationReport() <<
"cannot optimize for size and vectorize at the "
"same time. Enable vectorization of this loop "
LoopVectorBody.push_back(NewIfBlock);
VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
+ ReplaceInstWithInst(IfBlock->getTerminator(),
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp));
IfBlock = NewIfBlock;
}
}