/// and reduction variables that were found to a given vectorization factor.
class InnerLoopVectorizer {
public:
- InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
+ InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, unsigned VecWidth,
- unsigned UnrollFactor, SCEVUnionPredicate &Preds)
- : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
- VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
+ unsigned UnrollFactor)
+ : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
+ VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr),
- AddedSafetyChecks(false), Preds(Preds) {}
+ AddedSafetyChecks(false) {}
// Perform the actual loop widening (vectorization).
// MinimumBitWidths maps scalar integer values to the smallest bitwidth they
// can be validly truncated to. The cost model has assumed this truncation
// will happen when vectorizing.
void vectorize(LoopVectorizationLegality *L,
- DenseMap<Instruction*,uint64_t> MinimumBitWidths) {
+ MapVector<Instruction*,uint64_t> MinimumBitWidths) {
MinBWs = MinimumBitWidths;
Legal = L;
// Create a new empty loop. Unlink the old loop and connect the new one.
/// The original loop.
Loop *OrigLoop;
- /// Scev analysis to use.
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies
+ /// dynamic knowledge to simplify SCEV expressions and converts them to a
+ /// more usable form.
+ PredicatedScalarEvolution &PSE;
/// Loop Info.
LoopInfo *LI;
/// Dominator Tree.
/// Map of scalar integer values to the smallest bitwidth they can be legally
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
- DenseMap<Instruction*,uint64_t> MinBWs;
+ MapVector<Instruction*,uint64_t> MinBWs;
LoopVectorizationLegality *Legal;
// Record whether runtime check is added.
bool AddedSafetyChecks;
-
- /// The SCEV predicate containing all the SCEV-related assumptions.
- /// The predicate is used to simplify existing expressions in the
- /// context of existing SCEV assumptions. Since legality checking is
- /// not done here, we don't need to use this predicate to record
- /// further assumptions.
- SCEVUnionPredicate &Preds;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
public:
- InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, unsigned UnrollFactor,
- SCEVUnionPredicate &Preds)
- : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor,
- Preds) {}
+ InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, unsigned UnrollFactor)
+ : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
private:
void scalarizeInstruction(Instruction *Instr,
/// between the member and the group in a map.
class InterleavedAccessInfo {
public:
- InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT,
- SCEVUnionPredicate &Preds)
- : SE(SE), TheLoop(L), DT(DT), Preds(Preds) {}
+ InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
+ DominatorTree *DT)
+ : PSE(PSE), TheLoop(L), DT(DT) {}
~InterleavedAccessInfo() {
SmallSet<InterleaveGroup *, 4> DelSet;
}
private:
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
+ /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
+ /// The interleaved access analysis can also add new predicates (for example
+ /// by versioning strides of pointers).
+ PredicatedScalarEvolution &PSE;
Loop *TheLoop;
DominatorTree *DT;
- /// The SCEV predicate containing all the SCEV-related assumptions.
- /// The predicate is used to simplify SCEV expressions in the
- /// context of existing SCEV assumptions. The interleaved access
- /// analysis can also add new predicates (for example by versioning
- /// strides of pointers).
- SCEVUnionPredicate &Preds;
-
/// Holds the relationships between the members and the interleave group.
DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
- LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
- TargetLibraryInfo *TLI, AliasAnalysis *AA,
- Function *F, const TargetTransformInfo *TTI,
+ LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ AliasAnalysis *AA, Function *F,
+ const TargetTransformInfo *TTI,
LoopAccessAnalysis *LAA,
LoopVectorizationRequirements *R,
- const LoopVectorizeHints *H,
- SCEVUnionPredicate &Preds)
- : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
- TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr),
- InterleaveInfo(SE, L, DT, Preds), Induction(nullptr),
- WidestIndTy(nullptr), HasFunNoNaNAttr(false), Requirements(R), Hints(H),
- Preds(Preds) {}
+ const LoopVectorizeHints *H)
+ : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F),
+ TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PSE, L, DT),
+ Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
+ Requirements(R), Hints(H) {}
/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
/// The loop that we evaluate.
Loop *TheLoop;
- /// Scev analysis.
- ScalarEvolution *SE;
+ /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
+ /// Applies dynamic knowledge to simplify SCEV expressions in the context
+ /// of existing SCEV assumptions. The analysis will also add a minimal set
+ /// of new predicates if this is required to enable vectorization and
+ /// unrolling.
+ PredicatedScalarEvolution &PSE;
/// Target Library Info.
TargetLibraryInfo *TLI;
/// Parent function
/// While vectorizing these instructions we have to generate a
/// call to the appropriate masked intrinsic
SmallPtrSet<const Instruction *, 8> MaskedOp;
-
- /// The SCEV predicate containing all the SCEV-related assumptions.
- /// The predicate is used to simplify SCEV expressions in the
- /// context of existing SCEV assumptions. The analysis will also
- /// add a minimal set of new predicates if this is required to
- /// enable vectorization/unrolling.
- SCEVUnionPredicate &Preds;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
/// different operations.
class LoopVectorizationCostModel {
public:
- LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
- LoopVectorizationLegality *Legal,
+ LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
const TargetLibraryInfo *TLI, DemandedBits *DB,
AssumptionCache *AC, const Function *F,
- const LoopVectorizeHints *Hints,
- SmallPtrSetImpl<const Value *> &ValuesToIgnore,
- SCEVUnionPredicate &Preds)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
- TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {}
+ const LoopVectorizeHints *Hints)
+ : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
+ AC(AC), TheFunction(F), Hints(Hints) {}
/// Information about vectorization costs
struct VectorizationFactor {
SmallVector<RegisterUsage, 8>
calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs);
+ /// Collect values we want to ignore in the cost model.
+ void collectValuesToIgnore();
+
private:
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare different
/// Map of scalar integer values to the smallest bitwidth they can be legally
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
- DenseMap<Instruction*,uint64_t> MinBWs;
+ MapVector<Instruction*,uint64_t> MinBWs;
/// The loop that we evaluate.
Loop *TheLoop;
- /// Scev analysis.
- ScalarEvolution *SE;
+ /// Predicated scalar evolution analysis.
+ PredicatedScalarEvolution &PSE;
/// Loop Info analysis.
LoopInfo *LI;
/// Vectorization legality.
const TargetTransformInfo &TTI;
/// Target Library Info.
const TargetLibraryInfo *TLI;
- /// Demanded bits analysis
+ /// Demanded bits analysis.
DemandedBits *DB;
+ /// Assumption cache.
+ AssumptionCache *AC;
const Function *TheFunction;
- // Loop Vectorize Hint.
+ /// Loop Vectorize Hint.
const LoopVectorizeHints *Hints;
- // Values to ignore in the cost model.
- const SmallPtrSetImpl<const Value *> &ValuesToIgnore;
+ /// Values to ignore in the cost model.
+ SmallPtrSet<const Value *, 16> ValuesToIgnore;
+ /// Values to ignore in the cost model when VF > 1.
+ SmallPtrSet<const Value *, 16> VecValuesToIgnore;
};
/// \brief This holds vectorization requirements that must be verified late in
}
}
- SCEVUnionPredicate Preds;
+ PredicatedScalarEvolution PSE(*SE);
// Check if it is legal to vectorize the loop.
LoopVectorizationRequirements Requirements;
- LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA,
- &Requirements, &Hints, Preds);
+ LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA,
+ &Requirements, &Hints);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
return false;
}
- // Collect values we want to ignore in the cost model. This includes
- // type-promoting instructions we identified during reduction detection.
- SmallPtrSet<const Value *, 32> ValuesToIgnore;
- CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore);
- for (auto &Reduction : *LVL.getReductionVars()) {
- RecurrenceDescriptor &RedDes = Reduction.second;
- SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
- ValuesToIgnore.insert(Casts.begin(), Casts.end());
- }
-
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, DB, AC, F, &Hints,
- ValuesToIgnore, Preds);
+ LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
+ &Hints);
+ CM.collectValuesToIgnore();
// Check the function attributes to find out if this function should be
// optimized for size.
assert(IC > 1 && "interleave count should not be 1 or 0");
// If we decided that it is not legal to vectorize the loop then
// interleave it.
- InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC, Preds);
+ InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC);
Unroller.vectorize(&LVL, CM.MinBWs);
emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
Twine(IC) + ")");
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC, Preds);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC);
LB.vectorize(&LVL, CM.MinBWs);
++LoopsVectorized;
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
+ auto *SE = PSE.getSE();
// Make sure that the pointer does not point to structs.
if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;
// Make sure that all of the index operands are loop invariant.
for (unsigned i = 1; i < NumOperands; ++i)
- if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
InductionDescriptor II = Inductions[Phi];
// operand.
for (unsigned i = 0; i != NumOperands; ++i)
if (i != InductionOperand &&
- !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
return 0;
// We can emit wide load/stores only if the last non-zero index is the
// induction variable.
const SCEV *Last = nullptr;
if (!Strides.count(Gep))
- Last = SE->getSCEV(Gep->getOperand(InductionOperand));
+ Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
else {
// Because of the multiplication by a stride we can have a s/zext cast.
// We are going to replace this stride by 1 so the cast is safe to ignore.
// %idxprom = zext i32 %mul to i64 << Safe cast.
// %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
//
- Last = replaceSymbolicStrideSCEV(SE, Strides, Preds,
+ Last = replaceSymbolicStrideSCEV(PSE, Strides,
Gep->getOperand(InductionOperand), Gep);
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
Last =
Ptr = Builder.Insert(Gep2);
} else if (Gep) {
setDebugLocFromInst(Builder, Gep);
- assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
- OrigLoop) && "Base ptr must be invariant");
+ assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
+ OrigLoop) &&
+ "Base ptr must be invariant");
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
if (i == InductionOperand ||
(GepOperandInst && OrigLoop->contains(GepOperandInst))) {
assert((i == InductionOperand ||
- SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
+ PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
+ OrigLoop)) &&
"Must be last index or loop invariant");
VectorParts &GEPParts = getVectorValue(GepOperand);
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
// Find the loop boundaries.
+ ScalarEvolution *SE = PSE.getSE();
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop);
assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
"Invalid loop count");
// Generate the code to check that the SCEV assumptions that we made.
// We want the new basic block to start at the first instruction in a
// sequence of instructions that form a check.
- SCEVExpander Exp(*SE, Bypass->getModule()->getDataLayout(), "scev.check");
- Value *SCEVCheck = Exp.expandCodeForPredicate(&Preds, BB->getTerminator());
+ SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
+ "scev.check");
+ Value *SCEVCheck =
+ Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator());
if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
if (C->isZero())
// Widen selects.
// If the selector is loop invariant we can create a select
// instruction with a scalar condition. Otherwise, use vector-select.
- bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
- OrigLoop);
+ auto *SE = PSE.getSE();
+ bool InvariantCond =
+ SE->isLoopInvariant(PSE.getSCEV(it->getOperand(0)), OrigLoop);
setDebugLocFromInst(Builder, &*it);
// The condition can be loop invariant but still defined inside the
void InnerLoopVectorizer::updateAnalysis() {
// Forget the original basic block.
- SE->forgetLoop(OrigLoop);
+ PSE.getSE()->forgetLoop(OrigLoop);
// Update the dominator tree information.
assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
}
// ScalarEvolution needs to be able to find the exit count.
- const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
- if (ExitCount == SE->getCouldNotCompute()) {
- emitAnalysis(VectorizationReport() <<
- "could not determine number of loop iterations");
+ const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+ if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
+ emitAnalysis(VectorizationReport()
+ << "could not determine number of loop iterations");
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
- if (Preds.getComplexity() > SCEVThreshold) {
+ if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
emitAnalysis(VectorizationReport()
<< "Too many SCEV assumptions need to be made and checked "
<< "at runtime");
}
InductionDescriptor ID;
- if (InductionDescriptor::isInductionPHI(Phi, SE, ID)) {
+ if (InductionDescriptor::isInductionPHI(Phi, PSE.getSE(), ID)) {
Inductions[Phi] = ID;
// Get the widest type.
if (!WidestIndTy)
continue;
}
- if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop,
- Reductions[Phi])) {
- if (Reductions[Phi].hasUnsafeAlgebra())
- Requirements->addUnsafeAlgebraInst(
- Reductions[Phi].getUnsafeAlgebraInst());
- AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
+ RecurrenceDescriptor RedDes;
+ if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) {
+ if (RedDes.hasUnsafeAlgebra())
+ Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
+ AllowedExit.insert(RedDes.getLoopExitInstr());
+ Reductions[Phi] = RedDes;
continue;
}
// second argument is the same (i.e. loop invariant)
if (CI &&
hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
- if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
+ auto *SE = PSE.getSE();
+ if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
emitAnalysis(VectorizationReport(&*it)
<< "intrinsic instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
else
return;
- Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
+ Value *Stride = getStrideFromPointer(Ptr, PSE.getSE(), TheLoop);
if (!Stride)
return;
}
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
- Preds.add(&LAI->Preds);
+ PSE.addPredicate(LAI->PSE.getUnionPredicate());
return true;
}
StoreInst *SI = dyn_cast<StoreInst>(I);
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
- int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides, Preds);
+ int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides);
// The factor of the corresponding interleave group.
unsigned Factor = std::abs(Stride);
if (Factor < 2 || Factor > MaxInterleaveGroupFactor)
continue;
- const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr);
+ const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
// Holds all interleaved store groups temporarily.
SmallSetVector<InterleaveGroup *, 4> StoreGroups;
+ // Holds all interleaved load groups temporarily.
+ SmallSetVector<InterleaveGroup *, 4> LoadGroups;
// Search the load-load/write-write pair B-A in bottom-up order and try to
// insert B into the interleave group of A according to 3 rules:
if (A->mayWriteToMemory())
StoreGroups.insert(Group);
+ else
+ LoadGroups.insert(Group);
for (auto II = std::next(I); II != E; ++II) {
Instruction *B = II->first;
continue;
// Calculate the distance and prepare for the rule 3.
- const SCEVConstant *DistToA =
- dyn_cast<SCEVConstant>(SE->getMinusSCEV(DesB.Scev, DesA.Scev));
+ const SCEVConstant *DistToA = dyn_cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev));
if (!DistToA)
continue;
- int DistanceToA = DistToA->getValue()->getValue().getSExtValue();
+ int DistanceToA = DistToA->getAPInt().getSExtValue();
// Skip if the distance is not multiple of size as they are not in the
// same group.
for (InterleaveGroup *Group : StoreGroups)
if (Group->getNumMembers() != Group->getFactor())
releaseGroup(Group);
+
+ // Remove interleaved load groups that don't have the first and last member.
+ // This guarantees that we won't do speculative out of bounds loads.
+ for (InterleaveGroup *Group : LoadGroups)
+ if (!Group->getMember(0) || !Group->getMember(Group->getFactor() - 1))
+ releaseGroup(Group);
}
LoopVectorizationCostModel::VectorizationFactor
}
// Find the trip count.
- unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
return 1;
// Do not interleave loops with a relatively small trip count.
- unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
// Ignore instructions that are never used within the loop.
if (!Ends.count(I)) continue;
- // Skip ignored values.
- if (ValuesToIgnore.count(I))
- continue;
-
// Remove all of the instructions that end at this location.
InstrList &List = TransposeEnds[i];
for (unsigned int j = 0, e = List.size(); j < e; ++j)
OpenIntervals.erase(List[j]);
+ // Skip ignored values.
+ if (ValuesToIgnore.count(I))
+ continue;
+
// For each VF find the maximum usage of registers.
for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
if (VFs[j] == 1) {
continue;
}
- // Count the number of live interals.
+ // Count the number of live intervals.
unsigned RegUsage = 0;
- for (auto Inst : OpenIntervals)
+ for (auto Inst : OpenIntervals) {
+ // Skip ignored values for VF > 1.
+ if (VecValuesToIgnore.count(Inst))
+ continue;
RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
+ }
MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
}
if (!C)
return true;
- const APInt &APStepVal = C->getValue()->getValue();
+ const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
if (VF > 1 && MinBWs.count(I))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
Type *VectorTy = ToVectorTy(RetTy, VF);
+ auto SE = PSE.getSE();
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
return false;
}
+void LoopVectorizationCostModel::collectValuesToIgnore() {
+ // Ignore ephemeral values.
+ CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
+
+ // Ignore type-promoting instructions we identified during reduction
+ // detection.
+ for (auto &Reduction : *Legal->getReductionVars()) {
+ RecurrenceDescriptor &RedDes = Reduction.second;
+ SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
+ VecValuesToIgnore.insert(Casts.begin(), Casts.end());
+ }
+
+ // Ignore induction phis that are only used in either GetElementPtr or ICmp
+ // instruction to exit loop. Induction variables usually have large types and
+ // can have big impact when estimating register usage.
+ // This is for when VF > 1.
+ for (auto &Induction : *Legal->getInductionVars()) {
+ auto *PN = Induction.first;
+ auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
+
+ // Check that the PHI is only used by the induction increment (UpdateV) or
+ // by GEPs. Then check that UpdateV is only used by a compare instruction or
+ // the loop header PHI.
+ // FIXME: Need precise def-use analysis to determine if this instruction
+ // variable will be vectorized.
+ if (std::all_of(PN->user_begin(), PN->user_end(),
+ [&](const User *U) -> bool {
+ return U == UpdateV || isa<GetElementPtrInst>(U);
+ }) &&
+ std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
+ [&](const User *U) -> bool {
+ return U == PN || isa<ICmpInst>(U);
+ })) {
+ VecValuesToIgnore.insert(PN);
+ VecValuesToIgnore.insert(UpdateV);
+ }
+ }
+
+ // Ignore instructions that will not be vectorized.
+ // This is for when VF > 1.
+ for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be;
+ ++bb) {
+ for (auto &Inst : **bb) {
+ switch (Inst.getOpcode()) {
+ case Instruction::GetElementPtr: {
+ // Ignore GEP if its last operand is an induction variable so that it is
+ // a consecutive load/store and won't be vectorized as scatter/gather
+ // pattern.
+
+ GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst);
+ unsigned NumOperands = Gep->getNumOperands();
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
+ bool GepToIgnore = true;
+
+ // Check that all of the gep indices are uniform except for the
+ // induction operand.
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ if (i != InductionOperand &&
+ !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
+ TheLoop)) {
+ GepToIgnore = false;
+ break;
+ }
+ }
+
+ if (GepToIgnore)
+ VecValuesToIgnore.insert(&Inst);
+ break;
+ }
+ }
+ }
+ }
+}
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
bool IfPredicateStore) {