X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FVectorize%2FLoopVectorize.cpp;h=9adc80c8bd0fae5e29fecc4661477c320a48b331;hp=62b1cce48bc2f93362be47626afad09ac2c23a29;hb=dbef3b079d970dcdb77c257d5acd81e41cb3bf58;hpb=be7b5146ad436d54417950c467a72fe6cc883b4a diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 62b1cce48bc..9adc80c8bd0 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1409,14 +1409,15 @@ private: /// different operations. class LoopVectorizationCostModel { public: - LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE, - LoopInfo *LI, LoopVectorizationLegality *Legal, + LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, + LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, const Function *F, - const LoopVectorizeHints *Hints) - : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), - AC(AC), TheFunction(F), Hints(Hints) {} + const LoopVectorizeHints *Hints, + SmallPtrSetImpl &ValuesToIgnore) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), + TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {} /// Information about vectorization costs struct VectorizationFactor { @@ -1464,9 +1465,6 @@ public: SmallVector calculateRegisterUsage(const SmallVector &VFs); - /// Collect values we want to ignore in the cost model. - void collectValuesToIgnore(); - private: /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -1498,8 +1496,8 @@ public: /// The loop that we evaluate. Loop *TheLoop; - /// Predicated scalar evolution analysis. - PredicatedScalarEvolution &PSE; + /// Scev analysis. + ScalarEvolution *SE; /// Loop Info analysis. LoopInfo *LI; /// Vectorization legality. @@ -1508,17 +1506,13 @@ public: const TargetTransformInfo &TTI; /// Target Library Info. const TargetLibraryInfo *TLI; - /// Demanded bits analysis. + /// Demanded bits analysis DemandedBits *DB; - /// Assumption cache. - AssumptionCache *AC; const Function *TheFunction; - /// Loop Vectorize Hint. + // Loop Vectorize Hint. const LoopVectorizeHints *Hints; - /// Values to ignore in the cost model. - SmallPtrSet ValuesToIgnore; - /// Values to ignore in the cost model when VF > 1. - SmallPtrSet VecValuesToIgnore; + // Values to ignore in the cost model. + const SmallPtrSetImpl &ValuesToIgnore; }; /// \brief This holds vectorization requirements that must be verified late in @@ -1763,10 +1757,19 @@ struct LoopVectorize : public FunctionPass { return false; } + // Collect values we want to ignore in the cost model. This includes + // type-promoting instructions we identified during reduction detection. + SmallPtrSet ValuesToIgnore; + CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore); + for (auto &Reduction : *LVL.getReductionVars()) { + RecurrenceDescriptor &RedDes = Reduction.second; + SmallPtrSetImpl &Casts = RedDes.getCastInsts(); + ValuesToIgnore.insert(Casts.begin(), Casts.end()); + } + // Use the cost model. - LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F, - &Hints); - CM.collectValuesToIgnore(); + LoopVectorizationCostModel CM(L, PSE.getSE(), LI, &LVL, *TTI, TLI, DB, AC, + F, &Hints, ValuesToIgnore); // Check the function attributes to find out if this function should be // optimized for size. @@ -4734,7 +4737,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { } // Find the trip count. - unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); + unsigned TC = SE->getSmallConstantTripCount(TheLoop); DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI); @@ -4936,7 +4939,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; // Do not interleave loops with a relatively small trip count. - unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop); + unsigned TC = SE->getSmallConstantTripCount(TheLoop); if (TC > 1 && TC < TinyTripCountInterleaveThreshold) return 1; @@ -5164,15 +5167,15 @@ LoopVectorizationCostModel::calculateRegisterUsage( // Ignore instructions that are never used within the loop. if (!Ends.count(I)) continue; + // Skip ignored values. + if (ValuesToIgnore.count(I)) + continue; + // Remove all of the instructions that end at this location. InstrList &List = TransposeEnds[i]; for (unsigned int j = 0, e = List.size(); j < e; ++j) OpenIntervals.erase(List[j]); - // Skip ignored values. - if (ValuesToIgnore.count(I)) - continue; - // For each VF find the maximum usage of registers. for (unsigned j = 0, e = VFs.size(); j < e; ++j) { if (VFs[j] == 1) { @@ -5182,12 +5185,8 @@ LoopVectorizationCostModel::calculateRegisterUsage( // Count the number of live intervals. unsigned RegUsage = 0; - for (auto Inst : OpenIntervals) { - // Skip ignored values for VF > 1. - if (VecValuesToIgnore.count(Inst)) - continue; + for (auto Inst : OpenIntervals) RegUsage += GetRegUsage(Inst->getType(), VFs[j]); - } MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } @@ -5331,7 +5330,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { if (VF > 1 && MinBWs.count(I)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); Type *VectorTy = ToVectorTy(RetTy, VF); - auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { @@ -5633,79 +5631,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { return false; } -void LoopVectorizationCostModel::collectValuesToIgnore() { - // Ignore ephemeral values. - CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore); - - // Ignore type-promoting instructions we identified during reduction - // detection. - for (auto &Reduction : *Legal->getReductionVars()) { - RecurrenceDescriptor &RedDes = Reduction.second; - SmallPtrSetImpl &Casts = RedDes.getCastInsts(); - VecValuesToIgnore.insert(Casts.begin(), Casts.end()); - } - - // Ignore induction phis that are only used in either GetElementPtr or ICmp - // instruction to exit loop. Induction variables usually have large types and - // can have big impact when estimating register usage. - // This is for when VF > 1. - for (auto &Induction : *Legal->getInductionVars()) { - auto *PN = Induction.first; - auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch()); - - // Check that the PHI is only used by the induction increment (UpdateV) or - // by GEPs. Then check that UpdateV is only used by a compare instruction or - // the loop header PHI. - // FIXME: Need precise def-use analysis to determine if this instruction - // variable will be vectorized. - if (std::all_of(PN->user_begin(), PN->user_end(), - [&](const User *U) -> bool { - return U == UpdateV || isa(U); - }) && - std::all_of(UpdateV->user_begin(), UpdateV->user_end(), - [&](const User *U) -> bool { - return U == PN || isa(U); - })) { - VecValuesToIgnore.insert(PN); - VecValuesToIgnore.insert(UpdateV); - } - } - - // Ignore instructions that will not be vectorized. - // This is for when VF > 1. - for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; - ++bb) { - for (auto &Inst : **bb) { - switch (Inst.getOpcode()) { - case Instruction::GetElementPtr: { - // Ignore GEP if its last operand is an induction variable so that it is - // a consecutive load/store and won't be vectorized as scatter/gather - // pattern. - - GetElementPtrInst *Gep = cast(&Inst); - unsigned NumOperands = Gep->getNumOperands(); - unsigned InductionOperand = getGEPInductionOperand(Gep); - bool GepToIgnore = true; - - // Check that all of the gep indices are uniform except for the - // induction operand. - for (unsigned i = 0; i != NumOperands; ++i) { - if (i != InductionOperand && - !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), - TheLoop)) { - GepToIgnore = false; - break; - } - } - - if (GepToIgnore) - VecValuesToIgnore.insert(&Inst); - break; - } - } - } - } -} void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) {