[Unroll] Separate the logic for testing each iteration of the loop,

author Chandler Carruth <chandlerc@gmail.com>

Fri, 22 May 2015 17:41:35 +0000 (17:41 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Fri, 22 May 2015 17:41:35 +0000 (17:41 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Fri, 22 May 2015 17:41:35 +0000 (17:41 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Fri, 22 May 2015 17:41:35 +0000 (17:41 +0000)
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp

index 7bd1fcffbd57801d96e94fe5ba9c57720db5871b..3d2ae3a5b61a77fbe5532dcc829d2b16f6fccfea 100644 (file)
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -414,41 +414,39 @@ namespace {
  //   v = b[0]* 0 + b[1]* 1 + b[2]* 0
  // And finally:
  //   v = b[1]
-class UnrollAnalyzer : public InstVisitor<UnrollAnalyzer, bool> {
-  typedef InstVisitor<UnrollAnalyzer, bool> Base;
-  friend class InstVisitor<UnrollAnalyzer, bool>;
+class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
+  typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;
+  friend class InstVisitor<UnrolledInstAnalyzer, bool>;
  
-  /// \brief The loop we're going to analyze.
-  const Loop *L;
+public:
+  UnrolledInstAnalyzer(unsigned Iteration,
+                       DenseMap<Value *, Constant *> &SimplifiedValues,
+                       SmallDenseMap<Value *, SCEVGEPDescriptor> &SCEVGEPCache)
+      : Iteration(Iteration), SimplifiedValues(SimplifiedValues),
+        SCEVGEPCache(SCEVGEPCache) {}
  
-  /// \brief TripCount of the given loop.
-  unsigned TripCount;
+  // Allow access to the initial visit method.
+  using Base::visit;
  
-  ScalarEvolution &SE;
-
-  const TargetTransformInfo &TTI;
+private:
+  /// \brief Number of currently simulated iteration.
+  ///
+  /// If an expression is ConstAddress+Constant, then the Constant is
+  /// Start + Iteration*Step, where Start and Step could be obtained from
+  /// SCEVGEPCache.
+  unsigned Iteration;
  
    // While we walk the loop instructions, we we build up and maintain a mapping
    // of simplified values specific to this iteration.  The idea is to propagate
    // any special information we have about loads that can be replaced with
    // constants after complete unrolling, and account for likely simplifications
    // post-unrolling.
-  DenseMap<Value *, Constant *> SimplifiedValues;
+  DenseMap<Value *, Constant *> &SimplifiedValues;
  
    // To avoid requesting SCEV info on every iteration, request it once, and
    // for each value that would become ConstAddress+Constant after loop
    // unrolling, save the corresponding data.
-  SmallDenseMap<Value *, SCEVGEPDescriptor> SCEVGEPCache;
-
-  /// \brief Number of currently simulated iteration.
-  ///
-  /// If an expression is ConstAddress+Constant, then the Constant is
-  /// Start + Iteration*Step, where Start and Step could be obtained from
-  /// SCEVGEPCache.
-  unsigned Iteration;
-
-  /// \brief Upper threshold for complete unrolling.
-  unsigned MaxUnrolledLoopSize;
+  SmallDenseMap<Value *, SCEVGEPDescriptor> &SCEVGEPCache;
  
    /// Base case for the instruction visitor.
    bool visitInstruction(Instruction &I) { return false; };
@@ -524,95 +522,102 @@ class UnrollAnalyzer : public InstVisitor<UnrollAnalyzer, bool> {
  
      return true;
    }
+};
+} // namespace
  
-public:
-  UnrollAnalyzer(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
-                 const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize)
-      : L(L), TripCount(TripCount), SE(SE), TTI(TTI),
-        MaxUnrolledLoopSize(MaxUnrolledLoopSize),
-        NumberOfOptimizedInstructions(0), UnrolledLoopSize(0) {}
  
+namespace {
+struct EstimatedUnrollCost {
    /// \brief Count the number of optimized instructions.
    unsigned NumberOfOptimizedInstructions;
  
    /// \brief Count the total number of instructions.
    unsigned UnrolledLoopSize;
+};
+}
  
-  /// \brief Figure out if the loop is worth full unrolling.
-  ///
-  /// Complete loop unrolling can make some loads constant, and we need to know
-  /// if that would expose any further optimization opportunities.  This routine
-  /// estimates this optimization.  It assigns computed number of instructions,
-  /// that potentially might be optimized away, to
-  /// NumberOfOptimizedInstructions, and total number of instructions to
-  /// UnrolledLoopSize (not counting blocks that won't be reached, if we were
-  /// able to compute the condition).
-  /// \returns false if we can't analyze the loop, or if we discovered that
-  /// unrolling won't give anything. Otherwise, returns true.
-  bool analyzeLoop() {
-    SmallSetVector<BasicBlock *, 16> BBWorklist;
-
-    // We want to be able to scale offsets by the trip count and add more
-    // offsets to them without checking for overflows, and we already don't want
-    // to analyze *massive* trip counts, so we force the max to be reasonably
-    // small.
-    assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
-           "The unroll iterations max is too large!");
-
-    // Don't simulate loops with a big or unknown tripcount
-    if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
-        TripCount > UnrollMaxIterationsCountToAnalyze)
-      return false;
-
-    // To avoid compute SCEV-expressions on every iteration, compute them once
-    // and store interesting to us in SCEVGEPCache.
-    SCEVGEPCache = buildSCEVGEPCache(*L, SE);
-
-    // Simulate execution of each iteration of the loop counting instructions,
-    // which would be simplified.
-    // Since the same load will take different values on different iterations,
-    // we literally have to go through all loop's iterations.
-    for (Iteration = 0; Iteration < TripCount; ++Iteration) {
-      SimplifiedValues.clear();
-      BBWorklist.clear();
-      BBWorklist.insert(L->getHeader());
-      // Note that we *must not* cache the size, this loop grows the worklist.
-      for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
-        BasicBlock *BB = BBWorklist[Idx];
-
-        // Visit all instructions in the given basic block and try to simplify
-        // it.  We don't change the actual IR, just count optimization
-        // opportunities.
-        for (Instruction &I : *BB) {
-          UnrolledLoopSize += TTI.getUserCost(&I);
-
-          // Visit the instruction to analyze its loop cost after unrolling,
-          // and if the visitor returns true, then we can optimize this
-          // instruction away.
-          if (Base::visit(I))
-            NumberOfOptimizedInstructions += TTI.getUserCost(&I);
-
-          // If unrolled body turns out to be too big, bail out.
-          if (UnrolledLoopSize - NumberOfOptimizedInstructions >
-              MaxUnrolledLoopSize)
-            return false;
-        }
+/// \brief Figure out if the loop is worth full unrolling.
+///
+/// Complete loop unrolling can make some loads constant, and we need to know
+/// if that would expose any further optimization opportunities.  This routine
+/// estimates this optimization.  It assigns computed number of instructions,
+/// that potentially might be optimized away, to
+/// NumberOfOptimizedInstructions, and total number of instructions to
+/// UnrolledLoopSize (not counting blocks that won't be reached, if we were
+/// able to compute the condition).
+/// \returns false if we can't analyze the loop, or if we discovered that
+/// unrolling won't give anything. Otherwise, returns true.
+Optional<EstimatedUnrollCost>
+analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
+                      const TargetTransformInfo &TTI,
+                      unsigned MaxUnrolledLoopSize) {
+  // We want to be able to scale offsets by the trip count and add more offsets
+  // to them without checking for overflows, and we already don't want to
+  // analyze *massive* trip counts, so we force the max to be reasonably small.
+  assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) &&
+         "The unroll iterations max is too large!");
+
+  // Don't simulate loops with a big or unknown tripcount
+  if (!UnrollMaxIterationsCountToAnalyze || !TripCount ||
+      TripCount > UnrollMaxIterationsCountToAnalyze)
+    return None;
+
+  // To avoid compute SCEV-expressions on every iteration, compute them once
+  // and store interesting to us in SCEVGEPCache.
+  SmallDenseMap<Value *, SCEVGEPDescriptor> SCEVGEPCache =
+      buildSCEVGEPCache(*L, SE);
+
+  SmallSetVector<BasicBlock *, 16> BBWorklist;
+  DenseMap<Value *, Constant *> SimplifiedValues;
  
-        // Add BB's successors to the worklist.
-        for (BasicBlock *Succ : successors(BB))
-          if (L->contains(Succ))
-            BBWorklist.insert(Succ);
+  unsigned NumberOfOptimizedInstructions = 0;
+  unsigned UnrolledLoopSize = 0;
+
+  // Simulate execution of each iteration of the loop counting instructions,
+  // which would be simplified.
+  // Since the same load will take different values on different iterations,
+  // we literally have to go through all loop's iterations.
+  for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
+    SimplifiedValues.clear();
+    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SCEVGEPCache);
+
+    BBWorklist.clear();
+    BBWorklist.insert(L->getHeader());
+    // Note that we *must not* cache the size, this loop grows the worklist.
+    for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
+      BasicBlock *BB = BBWorklist[Idx];
+
+      // Visit all instructions in the given basic block and try to simplify
+      // it.  We don't change the actual IR, just count optimization
+      // opportunities.
+      for (Instruction &I : *BB) {
+        UnrolledLoopSize += TTI.getUserCost(&I);
+
+        // Visit the instruction to analyze its loop cost after unrolling,
+        // and if the visitor returns true, then we can optimize this
+        // instruction away.
+        if (Analyzer.visit(I))
+          NumberOfOptimizedInstructions += TTI.getUserCost(&I);
+
+        // If unrolled body turns out to be too big, bail out.
+        if (UnrolledLoopSize - NumberOfOptimizedInstructions >
+            MaxUnrolledLoopSize)
+          return None;
        }
  
-      // If we found no optimization opportunities on the first iteration, we
-      // won't find them on later ones too.
-      if (!NumberOfOptimizedInstructions)
-        return false;
+      // Add BB's successors to the worklist.
+      for (BasicBlock *Succ : successors(BB))
+        if (L->contains(Succ))
+          BBWorklist.insert(Succ);
      }
-    return true;
+
+    // If we found no optimization opportunities on the first iteration, we
+    // won't find them on later ones too.
+    if (!NumberOfOptimizedInstructions)
+      return None;
    }
-};
-} // namespace
+  return {{NumberOfOptimizedInstructions, UnrolledLoopSize}};
+}
  
  /// ApproximateLoopSize - Approximate the size of the loop.
  static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
@@ -886,14 +891,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
        // The loop isn't that small, but we still can fully unroll it if that
        // helps to remove a significant number of instructions.
        // To check that, run additional analysis on the loop.
-      UnrollAnalyzer UA(L, TripCount, *SE, TTI, AbsoluteThreshold);
-      if (UA.analyzeLoop() &&
-          canUnrollCompletely(L, Threshold, AbsoluteThreshold,
-                              UA.UnrolledLoopSize,
-                              UA.NumberOfOptimizedInstructions,
-                              PercentOfOptimizedForCompleteUnroll)) {
-        Unrolling = Full;
-      }
+      if (Optional<EstimatedUnrollCost> Cost =
+              analyzeLoopUnrollCost(L, TripCount, *SE, TTI, AbsoluteThreshold))
+        if (canUnrollCompletely(L, Threshold, AbsoluteThreshold,
+                                Cost->UnrolledLoopSize,
+                                Cost->NumberOfOptimizedInstructions,
+                                PercentOfOptimizedForCompleteUnroll)) {
+          Unrolling = Full;
+        }
      }
    } else if (TripCount && Count < TripCount) {
      Unrolling = Partial;
author	Chandler Carruth <chandlerc@gmail.com>
	Fri, 22 May 2015 17:41:35 +0000 (17:41 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Fri, 22 May 2015 17:41:35 +0000 (17:41 +0000)