STATISTIC(LoopsVectorized, "Number of loops vectorized");
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
-static cl::opt<unsigned>
-VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
- cl::desc("Sets the SIMD width. Zero is autoselect."));
-
-static cl::opt<unsigned>
-VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden,
- cl::desc("Sets the vectorization interleave count. "
- "Zero is autoselect."));
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+ cl::desc("Sets the SIMD width. Zero is autoselect."),
+ cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor = 0;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+ cl::desc("Sets the vectorization interleave count. "
+ "Zero is autoselect."),
+ cl::location(
+ VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave = 0;
static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
/// When performing memory disambiguation checks at runtime do not make more
/// than this number of comparisons.
-static const unsigned RuntimeMemoryCheckThreshold = 8;
+const unsigned VectorizerParams::RuntimeMemoryCheckThreshold = 8;
/// Maximum simd width.
-static const unsigned MaxVectorWidth = 64;
+const unsigned VectorizerParams::MaxVectorWidth = 64;
static cl::opt<unsigned> ForceTargetNumScalarRegs(
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
class LoopVectorizationCostModel;
class LoopVectorizeHints;
+/// \brief This modifies LoopAccessReport to initialize message with
+/// loop-vectorizer-specific part.
+class VectorizationReport : public LoopAccessReport {
+public:
+ VectorizationReport(Instruction *I = nullptr)
+ : LoopAccessReport("loop not vectorized: ", I) {}
+
+ /// \brief This allows promotion of the loop-access analysis report into the
+ /// loop-vectorizer report. It modifies the message to add the
+ /// loop-vectorizer-specific part of the message.
+ explicit VectorizationReport(const LoopAccessReport &R)
+ : LoopAccessReport(Twine("loop not vectorized: ") + R.str(),
+ R.getInstr()) {}
+};
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
VectorParts> EdgeMaskCache;
- /// \brief Add code that checks at runtime if the accessed arrays overlap.
- ///
- /// Returns a pair of instructions where the first element is the first
- /// instruction generated in possibly a sequence of instructions and the
- /// second value is the final comparator value or NULL if no check is needed.
- std::pair<Instruction *, Instruction *> addRuntimeCheck(Instruction *Loc);
-
/// \brief Add checks for strides that where assumed to be 1.
///
/// Returns the last check instruction and the first check instruction in the
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI,
AliasAnalysis *AA, Function *F,
- const TargetTransformInfo *TTI)
- : NumPredStores(0), TheLoop(L), SE(SE), DL(DL), TLI(TLI), TheFunction(F),
- TTI(TTI), Induction(nullptr), WidestIndTy(nullptr),
- LAA(F, L, SE, DL, TLI, AA, DT,
- LoopAccessAnalysis::VectorizerParams(
- MaxVectorWidth, VectorizationFactor, VectorizationInterleave,
- RuntimeMemoryCheckThreshold)),
- HasFunNoNaNAttr(false) {}
+ const TargetTransformInfo *TTI,
+ LoopAccessAnalysis *LAA)
+ : NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
+ TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr),
+ Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
/// Returns the information that we collected about runtime memory check.
- LoopAccessAnalysis::RuntimePointerCheck *getRuntimePointerCheck() {
- return LAA.getRuntimePointerCheck();
+ const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const {
+ return LAI->getRuntimePointerCheck();
+ }
+
+ const LoopAccessInfo *getLAI() const {
+ return LAI;
}
/// This function returns the identity element (or neutral element) for
/// the operation K.
static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
- unsigned getMaxSafeDepDistBytes() { return LAA.getMaxSafeDepDistBytes(); }
+ unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
bool hasStride(Value *V) { return StrideSet.count(V); }
bool mustCheckStrides() { return !StrideSet.empty(); }
return (MaskedOp.count(I) != 0);
}
unsigned getNumStores() const {
- return NumStores;
+ return LAI->getNumStores();
}
unsigned getNumLoads() const {
- return NumLoads;
+ return LAI->getNumLoads();
}
unsigned getNumPredStores() const {
return NumPredStores;
void collectStridedAccess(Value *LoadOrStoreInst);
/// Report an analysis message to assist the user in diagnosing loops that are
- /// not vectorized.
- void emitAnalysis(VectorizationReport &Message) {
- VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
+ /// not vectorized. These are handled as LoopAccessReport rather than
+ /// VectorizationReport because the << operator of VectorizationReport returns
+ /// LoopAccessReport.
+ void emitAnalysis(const LoopAccessReport &Message) {
+ LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
}
- unsigned NumLoads;
- unsigned NumStores;
unsigned NumPredStores;
/// The loop that we evaluate.
Function *TheFunction;
/// Target Transform Info
const TargetTransformInfo *TTI;
+ /// Dominator Tree.
+ DominatorTree *DT;
+ // LoopAccess analysis.
+ LoopAccessAnalysis *LAA;
+ // And the loop-accesses info corresponding to this loop. This pointer is
+ // null until canVectorizeMemory sets it up.
+ const LoopAccessInfo *LAI;
// --- vectorization state --- //
/// This set holds the variables which are known to be uniform after
/// vectorization.
SmallPtrSet<Instruction*, 4> Uniforms;
- LoopAccessAnalysis LAA;
+
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
bool isConsecutiveLoadOrStore(Instruction *I);
/// Report an analysis message to assist the user in diagnosing loops that are
- /// not vectorized.
- void emitAnalysis(VectorizationReport &Message) {
- VectorizationReport::emitAnalysis(Message, TheFunction, TheLoop);
+ /// not vectorized. These are handled as LoopAccessReport rather than
+ /// VectorizationReport because the << operator of VectorizationReport returns
+ /// LoopAccessReport.
+ void emitAnalysis(const LoopAccessReport &Message) {
+ LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
}
/// Values used only by @llvm.assume calls.
bool validate(unsigned Val) {
switch (Kind) {
case HK_WIDTH:
- return isPowerOf2_32(Val) && Val <= MaxVectorWidth;
+ return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
case HK_UNROLL:
return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
case HK_FORCE:
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
AssumptionCache *AC;
+ LoopAccessAnalysis *LAA;
bool DisableUnrolling;
bool AlwaysVectorize;
TLI = TLIP ? &TLIP->getTLI() : nullptr;
AA = &getAnalysis<AliasAnalysis>();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ LAA = &getAnalysis<LoopAccessAnalysis>();
// Compute some weights outside of the loop over the loops. Compute this
// using a BranchProbability to re-use its scaling math.
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
AU.addRequired<ScalarEvolution>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<LoopAccessAnalysis>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<AliasAnalysis>();
}
bool LoopVectorizationLegality::isUniform(Value *V) {
- return LAA.isUniform(V);
+ return LAI->isUniform(V);
}
InnerLoopVectorizer::VectorParts&
return std::make_pair(FirstInst, TheCheck);
}
-std::pair<Instruction *, Instruction *>
-InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
- LoopAccessAnalysis::RuntimePointerCheck *PtrRtCheck =
- Legal->getRuntimePointerCheck();
-
- Instruction *tnullptr = nullptr;
- if (!PtrRtCheck->Need)
- return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
-
- unsigned NumPointers = PtrRtCheck->Pointers.size();
- SmallVector<TrackingVH<Value> , 2> Starts;
- SmallVector<TrackingVH<Value> , 2> Ends;
-
- LLVMContext &Ctx = Loc->getContext();
- SCEVExpander Exp(*SE, "induction");
- Instruction *FirstInst = nullptr;
-
- for (unsigned i = 0; i < NumPointers; ++i) {
- Value *Ptr = PtrRtCheck->Pointers[i];
- const SCEV *Sc = SE->getSCEV(Ptr);
-
- if (SE->isLoopInvariant(Sc, OrigLoop)) {
- DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
- *Ptr <<"\n");
- Starts.push_back(Ptr);
- Ends.push_back(Ptr);
- } else {
- DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr << '\n');
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
-
- // Use this type for pointer arithmetic.
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
-
- Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
- Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
- Starts.push_back(Start);
- Ends.push_back(End);
- }
- }
-
- IRBuilder<> ChkBuilder(Loc);
- // Our instructions might fold to a constant.
- Value *MemoryRuntimeCheck = nullptr;
- for (unsigned i = 0; i < NumPointers; ++i) {
- for (unsigned j = i+1; j < NumPointers; ++j) {
- // No need to check if two readonly pointers intersect.
- if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
- continue;
-
- // Only need to check pointers between two different dependency sets.
- if (PtrRtCheck->DependencySetId[i] == PtrRtCheck->DependencySetId[j])
- continue;
- // Only need to check pointers in the same alias set.
- if (PtrRtCheck->AliasSetId[i] != PtrRtCheck->AliasSetId[j])
- continue;
-
- unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
- unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
-
- assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
- (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
- "Trying to bounds check pointers with different address spaces");
-
- Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
- Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
-
- Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
- Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
- Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
- Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
-
- Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
- FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
- Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
- FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
- Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
- FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- if (MemoryRuntimeCheck) {
- IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
- "conflict.rdx");
- FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
- }
- MemoryRuntimeCheck = IsConflict;
- }
- }
-
- // We have to do this trickery because the IRBuilder might fold the check to a
- // constant expression in which case there is no Instruction anchored in a
- // the block.
- Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
- ConstantInt::getTrue(Ctx));
- ChkBuilder.Insert(Check, "memcheck.conflict");
- FirstInst = getFirstInst(FirstInst, Check, Loc);
- return std::make_pair(FirstInst, Check);
-}
-
void InnerLoopVectorizer::createEmptyLoop() {
/*
In this function we generate a new loop. The new loop will contain
// faster.
Instruction *MemRuntimeCheck;
std::tie(FirstCheckInst, MemRuntimeCheck) =
- addRuntimeCheck(LastBypassBlock->getTerminator());
+ Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
if (MemRuntimeCheck) {
// Create a new block containing the memory check.
BasicBlock *CheckBlock =
- LastBypassBlock->splitBasicBlock(MemRuntimeCheck, "vector.memcheck");
+ LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
LoopBypassBlocks.push_back(CheckBlock);
collectLoopUniforms();
DEBUG(dbgs() << "LV: We can vectorize this loop" <<
- (LAA.getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
+ (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
"")
<<"!\n");
// Look for the attribute signaling the absence of NaNs.
Function &F = *Header->getParent();
if (F.hasFnAttribute("no-nans-fp-math"))
- HasFunNoNaNAttr = F.getAttributes().getAttribute(
- AttributeSet::FunctionIndex,
- "no-nans-fp-math").getValueAsString() == "true";
+ HasFunNoNaNAttr =
+ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
}
bool LoopVectorizationLegality::canVectorizeMemory() {
- return LAA.canVectorizeMemory(Strides);
+ LAI = &LAA->getInfo(TheLoop, Strides);
+ auto &OptionalReport = LAI->getReport();
+ if (OptionalReport)
+ emitAnalysis(VectorizationReport(*OptionalReport));
+ return LAI->canVectorizeMemory();
}
static bool hasMultipleUsesOf(Instruction *I,
}
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
- return LAA.blockNeedsPredication(BB);
+ return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
// Wide load/stores.
unsigned Cost = TTI.getAddressComputationCost(VectorTy);
- Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+ if (Legal->isMaskRequired(I))
+ Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment,
+ AS);
+ else
+ Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
if (Reverse)
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {