X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopUnrollPass.cpp;h=643bf41457522ac2346e017687be0212daef46e2;hp=25b0877a4f4ca0ffa899bc62dc246f87b4864383;hb=8a88fc9a2b252ce0f7680d1822703284899e7bee;hpb=815580fe7a4fdf1fb62104edf9baf7d315004fd6 diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 25b0877a4f4..643bf414575 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -137,6 +137,7 @@ namespace { /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); @@ -207,6 +208,7 @@ namespace { : UP.DynamicCostSavingsDiscount; if (!UserThreshold && + // FIXME: Use Function::optForSize(). L->getHeader()->getParent()->hasFnAttribute( Attribute::OptimizeForSize)) { Threshold = UP.OptSizeThreshold; @@ -235,6 +237,7 @@ char LoopUnroll::ID = 0; INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -494,11 +497,11 @@ private: namespace { struct EstimatedUnrollCost { /// \brief The estimated cost after unrolling. - unsigned UnrolledCost; + int UnrolledCost; /// \brief The estimated dynamic cost of executing the instructions in the /// rolled form. - unsigned RolledDynamicCost; + int RolledDynamicCost; }; } @@ -516,9 +519,9 @@ struct EstimatedUnrollCost { /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. Optional -analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, - const TargetTransformInfo &TTI, - unsigned MaxUnrolledLoopSize) { +analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, + ScalarEvolution &SE, const TargetTransformInfo &TTI, + int MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. @@ -532,16 +535,23 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, SmallSetVector BBWorklist; DenseMap SimplifiedValues; + SmallVector, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. - unsigned UnrolledCost = 0; + int UnrolledCost = 0; // We also track the estimated dynamic (that is, actually executed) cost in // the rolled form. This helps identify cases when the savings from unrolling // aren't just exposing dead control flows, but actual reduced dynamic // instructions due to the simplifications which we expect to occur after // unrolling. - unsigned RolledDynamicCost = 0; + int RolledDynamicCost = 0; + + // Ensure that we don't violate the loop structure invariants relied on by + // this analysis. + assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); + assert(L->isLCSSAForm(DT) && + "Must have loops in LCSSA form to track live-out values."); DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); @@ -551,7 +561,34 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); + + // Prepare for the iteration by collecting any simplified entry or backedge + // inputs. + for (Instruction &I : *L->getHeader()) { + auto *PHI = dyn_cast(&I); + if (!PHI) + break; + + // The loop header PHI nodes must have exactly two input: one from the + // loop preheader and one from the loop latch. + assert( + PHI->getNumIncomingValues() == 2 && + "Must have an incoming value only for the preheader and the latch."); + + Value *V = PHI->getIncomingValueForBlock( + Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); + Constant *C = dyn_cast(V); + if (Iteration != 0 && !C) + C = SimplifiedValues.lookup(V); + if (C) + SimplifiedInputValues.push_back({PHI, C}); + } + + // Now clear and re-populate the map for the next iteration. SimplifiedValues.clear(); + while (!SimplifiedInputValues.empty()) + SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); + UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE); BBWorklist.clear(); @@ -564,7 +601,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, // it. We don't change the actual IR, just count optimization // opportunities. for (Instruction &I : *BB) { - unsigned InstCost = TTI.getUserCost(&I); + int InstCost = TTI.getUserCost(&I); // Visit the instruction to analyze its loop cost after unrolling, // and if the visitor returns false, include this instruction in the @@ -619,8 +656,8 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, if (isa(SimpleCond)) Succ = SI->getSuccessor(0); else - Succ = - SI->getSuccessor(cast(SimpleCond)->getSExtValue()); + Succ = SI->findCaseValue(cast(SimpleCond)) + .getCaseSuccessor(); if (L->contains(Succ)) BBWorklist.insert(Succ); continue; @@ -849,6 +886,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { Function &F = *L->getHeader()->getParent(); + auto &DT = getAnalysis().getDomTree(); LoopInfo *LI = &getAnalysis().getLoopInfo(); ScalarEvolution *SE = &getAnalysis(); const TargetTransformInfo &TTI = @@ -930,8 +968,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. - if (Optional Cost = analyzeLoopUnrollCost( - L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount)) + if (Optional Cost = + analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI, + Threshold + DynamicCostSavingsDiscount)) if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold, DynamicCostSavingsDiscount, Cost->UnrolledCost, Cost->RolledDynamicCost)) {