From: Chandler Carruth Date: Fri, 22 May 2015 17:41:35 +0000 (+0000) Subject: [Unroll] Separate the logic for testing each iteration of the loop, X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=15fe7cf70fc92f9e7623fd2c7ffd38662cc29b1b;p=oota-llvm.git [Unroll] Separate the logic for testing each iteration of the loop, accumulating estimated cost, and other loop-centric logic from the logic used to analyze instructions in a particular iteration. This makes the visitor very narrow in scope -- all it does is visit instructions, update a map of simplified values, and return whether it is able to optimize away a particular instruction. The two cost metrics are now returned as an optional struct. When the optional is left unengaged, there is no information about the unrolled cost of the loop, when it is engaged the cost metrics are available to run against the thresholds. No functionality changed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238033 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 7bd1fcffbd5..3d2ae3a5b61 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -414,41 +414,39 @@ namespace { // v = b[0]* 0 + b[1]* 1 + b[2]* 0 // And finally: // v = b[1] -class UnrollAnalyzer : public InstVisitor { - typedef InstVisitor Base; - friend class InstVisitor; +class UnrolledInstAnalyzer : private InstVisitor { + typedef InstVisitor Base; + friend class InstVisitor; - /// \brief The loop we're going to analyze. - const Loop *L; +public: + UnrolledInstAnalyzer(unsigned Iteration, + DenseMap &SimplifiedValues, + SmallDenseMap &SCEVGEPCache) + : Iteration(Iteration), SimplifiedValues(SimplifiedValues), + SCEVGEPCache(SCEVGEPCache) {} - /// \brief TripCount of the given loop. - unsigned TripCount; + // Allow access to the initial visit method. + using Base::visit; - ScalarEvolution &SE; - - const TargetTransformInfo &TTI; +private: + /// \brief Number of currently simulated iteration. + /// + /// If an expression is ConstAddress+Constant, then the Constant is + /// Start + Iteration*Step, where Start and Step could be obtained from + /// SCEVGEPCache. + unsigned Iteration; // While we walk the loop instructions, we we build up and maintain a mapping // of simplified values specific to this iteration. The idea is to propagate // any special information we have about loads that can be replaced with // constants after complete unrolling, and account for likely simplifications // post-unrolling. - DenseMap SimplifiedValues; + DenseMap &SimplifiedValues; // To avoid requesting SCEV info on every iteration, request it once, and // for each value that would become ConstAddress+Constant after loop // unrolling, save the corresponding data. - SmallDenseMap SCEVGEPCache; - - /// \brief Number of currently simulated iteration. - /// - /// If an expression is ConstAddress+Constant, then the Constant is - /// Start + Iteration*Step, where Start and Step could be obtained from - /// SCEVGEPCache. - unsigned Iteration; - - /// \brief Upper threshold for complete unrolling. - unsigned MaxUnrolledLoopSize; + SmallDenseMap &SCEVGEPCache; /// Base case for the instruction visitor. bool visitInstruction(Instruction &I) { return false; }; @@ -524,95 +522,102 @@ class UnrollAnalyzer : public InstVisitor { return true; } +}; +} // namespace -public: - UnrollAnalyzer(const Loop *L, unsigned TripCount, ScalarEvolution &SE, - const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) - : L(L), TripCount(TripCount), SE(SE), TTI(TTI), - MaxUnrolledLoopSize(MaxUnrolledLoopSize), - NumberOfOptimizedInstructions(0), UnrolledLoopSize(0) {} +namespace { +struct EstimatedUnrollCost { /// \brief Count the number of optimized instructions. unsigned NumberOfOptimizedInstructions; /// \brief Count the total number of instructions. unsigned UnrolledLoopSize; +}; +} - /// \brief Figure out if the loop is worth full unrolling. - /// - /// Complete loop unrolling can make some loads constant, and we need to know - /// if that would expose any further optimization opportunities. This routine - /// estimates this optimization. It assigns computed number of instructions, - /// that potentially might be optimized away, to - /// NumberOfOptimizedInstructions, and total number of instructions to - /// UnrolledLoopSize (not counting blocks that won't be reached, if we were - /// able to compute the condition). - /// \returns false if we can't analyze the loop, or if we discovered that - /// unrolling won't give anything. Otherwise, returns true. - bool analyzeLoop() { - SmallSetVector BBWorklist; - - // We want to be able to scale offsets by the trip count and add more - // offsets to them without checking for overflows, and we already don't want - // to analyze *massive* trip counts, so we force the max to be reasonably - // small. - assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && - "The unroll iterations max is too large!"); - - // Don't simulate loops with a big or unknown tripcount - if (!UnrollMaxIterationsCountToAnalyze || !TripCount || - TripCount > UnrollMaxIterationsCountToAnalyze) - return false; - - // To avoid compute SCEV-expressions on every iteration, compute them once - // and store interesting to us in SCEVGEPCache. - SCEVGEPCache = buildSCEVGEPCache(*L, SE); - - // Simulate execution of each iteration of the loop counting instructions, - // which would be simplified. - // Since the same load will take different values on different iterations, - // we literally have to go through all loop's iterations. - for (Iteration = 0; Iteration < TripCount; ++Iteration) { - SimplifiedValues.clear(); - BBWorklist.clear(); - BBWorklist.insert(L->getHeader()); - // Note that we *must not* cache the size, this loop grows the worklist. - for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { - BasicBlock *BB = BBWorklist[Idx]; - - // Visit all instructions in the given basic block and try to simplify - // it. We don't change the actual IR, just count optimization - // opportunities. - for (Instruction &I : *BB) { - UnrolledLoopSize += TTI.getUserCost(&I); - - // Visit the instruction to analyze its loop cost after unrolling, - // and if the visitor returns true, then we can optimize this - // instruction away. - if (Base::visit(I)) - NumberOfOptimizedInstructions += TTI.getUserCost(&I); - - // If unrolled body turns out to be too big, bail out. - if (UnrolledLoopSize - NumberOfOptimizedInstructions > - MaxUnrolledLoopSize) - return false; - } +/// \brief Figure out if the loop is worth full unrolling. +/// +/// Complete loop unrolling can make some loads constant, and we need to know +/// if that would expose any further optimization opportunities. This routine +/// estimates this optimization. It assigns computed number of instructions, +/// that potentially might be optimized away, to +/// NumberOfOptimizedInstructions, and total number of instructions to +/// UnrolledLoopSize (not counting blocks that won't be reached, if we were +/// able to compute the condition). +/// \returns false if we can't analyze the loop, or if we discovered that +/// unrolling won't give anything. Otherwise, returns true. +Optional +analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + unsigned MaxUnrolledLoopSize) { + // We want to be able to scale offsets by the trip count and add more offsets + // to them without checking for overflows, and we already don't want to + // analyze *massive* trip counts, so we force the max to be reasonably small. + assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && + "The unroll iterations max is too large!"); + + // Don't simulate loops with a big or unknown tripcount + if (!UnrollMaxIterationsCountToAnalyze || !TripCount || + TripCount > UnrollMaxIterationsCountToAnalyze) + return None; + + // To avoid compute SCEV-expressions on every iteration, compute them once + // and store interesting to us in SCEVGEPCache. + SmallDenseMap SCEVGEPCache = + buildSCEVGEPCache(*L, SE); + + SmallSetVector BBWorklist; + DenseMap SimplifiedValues; - // Add BB's successors to the worklist. - for (BasicBlock *Succ : successors(BB)) - if (L->contains(Succ)) - BBWorklist.insert(Succ); + unsigned NumberOfOptimizedInstructions = 0; + unsigned UnrolledLoopSize = 0; + + // Simulate execution of each iteration of the loop counting instructions, + // which would be simplified. + // Since the same load will take different values on different iterations, + // we literally have to go through all loop's iterations. + for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { + SimplifiedValues.clear(); + UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SCEVGEPCache); + + BBWorklist.clear(); + BBWorklist.insert(L->getHeader()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + BasicBlock *BB = BBWorklist[Idx]; + + // Visit all instructions in the given basic block and try to simplify + // it. We don't change the actual IR, just count optimization + // opportunities. + for (Instruction &I : *BB) { + UnrolledLoopSize += TTI.getUserCost(&I); + + // Visit the instruction to analyze its loop cost after unrolling, + // and if the visitor returns true, then we can optimize this + // instruction away. + if (Analyzer.visit(I)) + NumberOfOptimizedInstructions += TTI.getUserCost(&I); + + // If unrolled body turns out to be too big, bail out. + if (UnrolledLoopSize - NumberOfOptimizedInstructions > + MaxUnrolledLoopSize) + return None; } - // If we found no optimization opportunities on the first iteration, we - // won't find them on later ones too. - if (!NumberOfOptimizedInstructions) - return false; + // Add BB's successors to the worklist. + for (BasicBlock *Succ : successors(BB)) + if (L->contains(Succ)) + BBWorklist.insert(Succ); } - return true; + + // If we found no optimization opportunities on the first iteration, we + // won't find them on later ones too. + if (!NumberOfOptimizedInstructions) + return None; } -}; -} // namespace + return {{NumberOfOptimizedInstructions, UnrolledLoopSize}}; +} /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, @@ -886,14 +891,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. - UnrollAnalyzer UA(L, TripCount, *SE, TTI, AbsoluteThreshold); - if (UA.analyzeLoop() && - canUnrollCompletely(L, Threshold, AbsoluteThreshold, - UA.UnrolledLoopSize, - UA.NumberOfOptimizedInstructions, - PercentOfOptimizedForCompleteUnroll)) { - Unrolling = Full; - } + if (Optional Cost = + analyzeLoopUnrollCost(L, TripCount, *SE, TTI, AbsoluteThreshold)) + if (canUnrollCompletely(L, Threshold, AbsoluteThreshold, + Cost->UnrolledLoopSize, + Cost->NumberOfOptimizedInstructions, + PercentOfOptimizedForCompleteUnroll)) { + Unrolling = Full; + } } } else if (TripCount && Count < TripCount) { Unrolling = Partial;