X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FVectorize%2FLoopVectorize.cpp;h=6caaa236836562ece45b501f1ae38ca1044d28e6;hb=0ea25c2e64c98d820a5cac6d5ce229a7d8f4e3e4;hp=bd39b8048b18947176b9284bebe7f1927c4007d2;hpb=38a9ebb065801cd4fca4da2e3ca972392af3cb8b;p=oota-llvm.git diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index bd39b8048b1..6caaa236836 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -106,15 +106,6 @@ using namespace llvm::PatternMatch; STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); -static cl::opt -VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, - cl::desc("Sets the SIMD width. Zero is autoselect.")); - -static cl::opt -VectorizationInterleave("force-vector-interleave", cl::init(0), cl::Hidden, - cl::desc("Sets the vectorization interleave count. " - "Zero is autoselect.")); - static cl::opt EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); @@ -145,13 +136,6 @@ static cl::opt EnableMemAccessVersioning( /// We don't unroll loops with a known constant trip count below this number. static const unsigned TinyTripCountUnrollThreshold = 128; -/// When performing memory disambiguation checks at runtime do not make more -/// than this number of comparisons. -static const unsigned RuntimeMemoryCheckThreshold = 8; - -/// Maximum simd width. -static const unsigned MaxVectorWidth = 64; - static cl::opt ForceTargetNumScalarRegs( "force-target-num-scalar-regs", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's number of scalar registers.")); @@ -547,15 +531,11 @@ public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, DominatorTree *DT, TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, - const TargetTransformInfo *TTI) + const TargetTransformInfo *TTI, + LoopAccessAnalysis *LAA) : NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - TLI(TLI), TheFunction(F), TTI(TTI), Induction(nullptr), - WidestIndTy(nullptr), - LAI(F, L, SE, DL, TLI, AA, DT, - LoopAccessInfo::VectorizerParams( - MaxVectorWidth, VectorizationFactor, VectorizationInterleave, - RuntimeMemoryCheckThreshold)), - HasFunNoNaNAttr(false) {} + TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), + Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -741,18 +721,18 @@ public: /// Returns the information that we collected about runtime memory check. LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() { - return LAI.getRuntimePointerCheck(); + return LAI->getRuntimePointerCheck(); } LoopAccessInfo *getLAI() { - return &LAI; + return LAI; } /// This function returns the identity element (or neutral element) for /// the operation K. static Constant *getReductionIdentity(ReductionKind K, Type *Tp); - unsigned getMaxSafeDepDistBytes() { return LAI.getMaxSafeDepDistBytes(); } + unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } bool hasStride(Value *V) { return StrideSet.count(V); } bool mustCheckStrides() { return !StrideSet.empty(); } @@ -777,10 +757,10 @@ public: return (MaskedOp.count(I) != 0); } unsigned getNumStores() const { - return LAI.getNumStores(); + return LAI->getNumStores(); } unsigned getNumLoads() const { - return LAI.getNumLoads(); + return LAI->getNumLoads(); } unsigned getNumPredStores() const { return NumPredStores; @@ -853,6 +833,13 @@ private: Function *TheFunction; /// Target Transform Info const TargetTransformInfo *TTI; + /// Dominator Tree. + DominatorTree *DT; + // LoopAccess analysis. + LoopAccessAnalysis *LAA; + // And the loop-accesses info corresponding to this loop. This pointer is + // null until canVectorizeMemory sets it up. + LoopAccessInfo *LAI; // --- vectorization state --- // @@ -874,7 +861,7 @@ private: /// This set holds the variables which are known to be uniform after /// vectorization. SmallPtrSet Uniforms; - LoopAccessInfo LAI; + /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr; @@ -1019,7 +1006,7 @@ class LoopVectorizeHints { bool validate(unsigned Val) { switch (Kind) { case HK_WIDTH: - return isPowerOf2_32(Val) && Val <= MaxVectorWidth; + return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; case HK_UNROLL: return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; case HK_FORCE: @@ -1047,7 +1034,8 @@ public: }; LoopVectorizeHints(const Loop *L, bool DisableInterleaving) - : Width("vectorize.width", VectorizationFactor, HK_WIDTH), + : Width("vectorize.width", VectorizerParams::VectorizationFactor, + HK_WIDTH), Interleave("interleave.count", DisableInterleaving, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), TheLoop(L) { @@ -1055,8 +1043,8 @@ public: getHintsFromMetadata(); // force-vector-interleave overrides DisableInterleaving. - if (VectorizationInterleave.getNumOccurrences() > 0) - Interleave.Value = VectorizationInterleave; + if (VectorizerParams::isInterleaveForced()) + Interleave.Value = VectorizerParams::VectorizationInterleave; DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs() << "LV: Interleaving disabled by the pass manager\n"); @@ -1255,6 +1243,7 @@ struct LoopVectorize : public FunctionPass { TargetLibraryInfo *TLI; AliasAnalysis *AA; AssumptionCache *AC; + LoopAccessAnalysis *LAA; bool DisableUnrolling; bool AlwaysVectorize; @@ -1272,6 +1261,7 @@ struct LoopVectorize : public FunctionPass { TLI = TLIP ? &TLIP->getTLI() : nullptr; AA = &getAnalysis(); AC = &getAnalysis().getAssumptionCache(F); + LAA = &getAnalysis(); // Compute some weights outside of the loop over the loops. Compute this // using a BranchProbability to re-use its scaling math. @@ -1382,7 +1372,7 @@ struct LoopVectorize : public FunctionPass { } // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI); + LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1487,6 +1477,7 @@ struct LoopVectorize : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -1658,7 +1649,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { } bool LoopVectorizationLegality::isUniform(Value *V) { - return LAI.isUniform(V); + return LAI->isUniform(V); } InnerLoopVectorizer::VectorParts& @@ -3398,7 +3389,7 @@ bool LoopVectorizationLegality::canVectorize() { collectLoopUniforms(); DEBUG(dbgs() << "LV: We can vectorize this loop" << - (LAI.getRuntimePointerCheck()->Need ? " (with a runtime bound check)" : + (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" : "") <<"!\n"); @@ -3823,7 +3814,11 @@ void LoopVectorizationLegality::collectLoopUniforms() { } bool LoopVectorizationLegality::canVectorizeMemory() { - return LAI.canVectorizeMemory(Strides); + LAI = &LAA->getInfo(TheLoop, Strides); + auto &OptionalReport = LAI->getReport(); + if (OptionalReport) + emitAnalysis(*OptionalReport); + return LAI->canVectorizeMemory(); } static bool hasMultipleUsesOf(Instruction *I, @@ -4167,7 +4162,7 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) { } bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { - return LAI.blockNeedsPredication(BB); + return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, @@ -4998,6 +4993,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm {