[NaryReassociate] speeds up candidate searching

author Jingyue Wu <jingyue@google.com>

Thu, 16 Apr 2015 18:42:31 +0000 (18:42 +0000)

committer Jingyue Wu <jingyue@google.com>

Thu, 16 Apr 2015 18:42:31 +0000 (18:42 +0000)
author Jingyue Wu <jingyue@google.com>
Thu, 16 Apr 2015 18:42:31 +0000 (18:42 +0000)
committer Jingyue Wu <jingyue@google.com>
Thu, 16 Apr 2015 18:42:31 +0000 (18:42 +0000)
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp

index ed7b24fa60462ad1a3a7fbb8f9566ae2f921e498..d6bc925955b75d5f9563e3230cdf5c6c5f5c8014 100644 (file)
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -105,7 +105,9 @@ private:
    ScalarEvolution *SE;
    // A lookup table quickly telling which instructions compute the given SCEV.
    // Note that there can be multiple instructions at different locations
-  // computing to the same SCEV.  For example,
+  // computing to the same SCEV, so we map a SCEV to an instruction list.  For
+  // example,
+  //
    //   if (p1)
    //     foo(a + b);
    //   if (p2)
@@ -190,17 +192,21 @@ Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
      return nullptr;
  
    auto &LHSCandidates = Pos->second;
-  unsigned NumIterations = 0;
-  // Search at most 10 items to avoid running quadratically.
-  static const unsigned MaxNumIterations = 10;
-  for (auto LHS = LHSCandidates.rbegin();
-       LHS != LHSCandidates.rend() && NumIterations < MaxNumIterations;
-       ++LHS, ++NumIterations) {
-    if (DT->dominates(*LHS, I)) {
-      Instruction *NewI = BinaryOperator::CreateAdd(*LHS, RHS, "", I);
+  // Look for the closest dominator LHS of I that computes LHSExpr, and replace
+  // I with LHS + RHS.
+  //
+  // Because we traverse the dominator tree in the pre-order, a
+  // candidate that doesn't dominate the current instruction won't dominate any
+  // future instruction either. Therefore, we pop it out of the stack. This
+  // optimization makes the algorithm O(n).
+  while (!LHSCandidates.empty()) {
+    Instruction *LHS = LHSCandidates.back();
+    if (DT->dominates(LHS, I)) {
+      Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
        NewI->takeName(I);
        return NewI;
      }
+    LHSCandidates.pop_back();
    }
    return nullptr;
  }
diff --git a/test/Transforms/NaryReassociate/nary-add.ll b/test/Transforms/NaryReassociate/nary-add.ll

index c030c709418c64e906cf379a47382a77970f768a..9f3d60ab94314b941eb227ab21f53a410568c167 100644 (file)
--- a/test/Transforms/NaryReassociate/nary-add.ll
+++ b/test/Transforms/NaryReassociate/nary-add.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+; RUN: opt < %s -nary-reassociate -dce -S | FileCheck %s
  
  target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
  
@@ -105,6 +105,57 @@ return:
    ret void
  }
  
+; This test involves more conditional reassociation candidates. It exercises
+; the stack optimization in tryReassociatedAdd that pops the candidates that
+; do not dominate the current instruction.
+;
+;       def1
+;      cond1
+;      /  \
+;     /    \
+;   cond2  use2
+;   /  \
+;  /    \
+; def2  def3
+;      cond3
+;       /  \
+;      /    \
+;    def4   use1
+;
+; NaryReassociate should match use1 with def3, and use2 with def1.
+define void @conditional2(i32 %a, i32 %b, i32 %c, i1 %cond1, i1 %cond2, i1 %cond3) {
+entry:
+  %def1 = add i32 %a, %b
+  br i1 %cond1, label %bb1, label %bb6
+bb1:
+  br i1 %cond2, label %bb2, label %bb3
+bb2:
+  %def2 = add i32 %a, %b
+  call void @foo(i32 %def2)
+  ret void
+bb3:
+  %def3 = add i32 %a, %b
+  br i1 %cond3, label %bb4, label %bb5
+bb4:
+  %def4 = add i32 %a, %b
+  call void @foo(i32 %def4)
+  ret void
+bb5:
+  %0 = add i32 %a, %c
+  %1 = add i32 %0, %b
+; CHECK: [[t1:%[0-9]+]] = add i32 %def3, %c
+  call void @foo(i32 %1) ; foo((a + c) + b);
+; CHECK-NEXT: call void @foo(i32 [[t1]])
+  ret void
+bb6:
+  %2 = add i32 %a, %c
+  %3 = add i32 %2, %b
+; CHECK: [[t2:%[0-9]+]] = add i32 %def1, %c
+  call void @foo(i32 %3) ; foo((a + c) + b);
+; CHECK-NEXT: call void @foo(i32 [[t2]])
+  ret void
+}
+
  ; foo((a + b) + c)
  ; foo(((a + d) + b) + c)
  ;   =>
author	Jingyue Wu <jingyue@google.com>
	Thu, 16 Apr 2015 18:42:31 +0000 (18:42 +0000)
committer	Jingyue Wu <jingyue@google.com>
	Thu, 16 Apr 2015 18:42:31 +0000 (18:42 +0000)
lib/Transforms/Scalar/NaryReassociate.cpp		patch \| blob \| history
test/Transforms/NaryReassociate/nary-add.ll		patch \| blob \| history