Add intermediate subtract instructions to reassociation worklist.

author Owen Anderson <resistor@mac.com>

Mon, 16 Nov 2015 18:07:30 +0000 (18:07 +0000)

committer Owen Anderson <resistor@mac.com>

Mon, 16 Nov 2015 18:07:30 +0000 (18:07 +0000)
author Owen Anderson <resistor@mac.com>
Mon, 16 Nov 2015 18:07:30 +0000 (18:07 +0000)
committer Owen Anderson <resistor@mac.com>
Mon, 16 Nov 2015 18:07:30 +0000 (18:07 +0000)
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp

index f6c18d626cfe956c7448a9f348cf4112c75b2a45..82c79e7d9199d45efdf841646cf45cc3b19280a3 100644 (file)
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -881,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
  /// that computes the negative version of the value specified.  The negative
  /// version of the value is returned, and BI is left pointing at the instruction
  /// that should be processed next by the reassociation pass.
-static Value *NegateValue(Value *V, Instruction *BI) {
+/// Also add intermediate instructions to the redo list that are modified while
+/// pushing the negates through adds.  These will be revisited to see if
+/// additional opportunities have been exposed.
+static Value *NegateValue(Value *V, Instruction *BI,
+                          SetVector<AssertingVH<Instruction>> &ToRedo) {
    if (Constant *C = dyn_cast<Constant>(V)) {
      if (C->getType()->isFPOrFPVectorTy()) {
        return ConstantExpr::getFNeg(C);
@@ -902,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
    if (BinaryOperator *I =
            isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
      // Push the negates through the add.
-    I->setOperand(0, NegateValue(I->getOperand(0), BI));
-    I->setOperand(1, NegateValue(I->getOperand(1), BI));
+    I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
+    I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
      if (I->getOpcode() == Instruction::Add) {
        I->setHasNoUnsignedWrap(false);
        I->setHasNoSignedWrap(false);
@@ -916,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
      //
      I->moveBefore(BI);
      I->setName(I->getName()+".neg");
+
+    // Add the intermediate negates to the redo list as processing them later
+    // could expose more reassociating opportunities.
+    ToRedo.insert(I);
      return I;
    }
  
@@ -955,12 +963,15 @@ static Value *NegateValue(Value *V, Instruction *BI) {
      } else {
        TheNeg->andIRFlags(BI);
      }
+    ToRedo.insert(TheNeg);
      return TheNeg;
    }
  
    // Insert a 'neg' instruction that subtracts the value from zero to get the
    // negation.
-  return CreateNeg(V, V->getName() + ".neg", BI, BI);
+  BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
+  ToRedo.insert(NewNeg);
+  return NewNeg;
  }
  
  /// Return true if we should break up this subtract of X-Y into (X + -Y).
@@ -994,14 +1005,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
  
  /// If we have (X-Y), and if either X is an add, or if this is only used by an
  /// add, transform this into (X+(0-Y)) to promote better reassociation.
-static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
+static BinaryOperator *
+BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
    // Convert a subtract into an add and a neg instruction. This allows sub
    // instructions to be commuted with other add instructions.
    //
    // Calculate the negative value of Operand 1 of the sub instruction,
    // and set it as the RHS of the add instruction we just made.
    //
-  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+  Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
    BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
    Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
    Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
@@ -2068,7 +2080,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
    // see if we can convert it to X+-Y.
    if (I->getOpcode() == Instruction::Sub) {
      if (ShouldBreakUpSubtract(I)) {
-      Instruction *NI = BreakUpSubtract(I);
+      Instruction *NI = BreakUpSubtract(I, RedoInsts);
        RedoInsts.insert(I);
        MadeChange = true;
        I = NI;
@@ -2079,6 +2091,12 @@ void Reassociate::OptimizeInst(Instruction *I) {
            (!I->hasOneUse() ||
             !isReassociableOp(I->user_back(), Instruction::Mul))) {
          Instruction *NI = LowerNegateToMultiply(I);
+        // If the negate was simplified, revisit the users to see if we can
+        // reassociate further.
+        for (User *U : NI->users()) {
+          if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+            RedoInsts.insert(Tmp);
+        }
          RedoInsts.insert(I);
          MadeChange = true;
          I = NI;
@@ -2086,7 +2104,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
      }
    } else if (I->getOpcode() == Instruction::FSub) {
      if (ShouldBreakUpSubtract(I)) {
-      Instruction *NI = BreakUpSubtract(I);
+      Instruction *NI = BreakUpSubtract(I, RedoInsts);
        RedoInsts.insert(I);
        MadeChange = true;
        I = NI;
@@ -2096,7 +2114,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
        if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
            (!I->hasOneUse() ||
             !isReassociableOp(I->user_back(), Instruction::FMul))) {
+        // If the negate was simplified, revisit the users to see if we can
+        // reassociate further.
          Instruction *NI = LowerNegateToMultiply(I);
+        for (User *U : NI->users()) {
+          if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+            RedoInsts.insert(Tmp);
+        }
          RedoInsts.insert(I);
          MadeChange = true;
          I = NI;
@@ -2111,8 +2135,14 @@ void Reassociate::OptimizeInst(Instruction *I) {
    // If this is an interior node of a reassociable tree, ignore it until we
    // get to the root of the tree, to avoid N^2 analysis.
    unsigned Opcode = BO->getOpcode();
-  if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
+  if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
+    // During the initial run we will get to the root of the tree.
+    // But if we get here while we are redoing instructions, there is no
+    // guarantee that the root will be visited. So Redo later
+    if (BO->user_back() != BO)
+      RedoInsts.insert(BO->user_back());
      return;
+  }
  
    // If this is an add tree that is used by a sub instruction, ignore it
    // until we process the subtract.
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll

index 9fbb5ccfe9a25ad4ea85c949f1144408e59a06f3..fb76b9d990b0627065a2ad86596ff46b37fa70b3 100644 (file)
--- a/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -16,9 +16,9 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
  define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
  ; CHECK-LABEL: @test2
-; CHECK-NEXT: fadd fast <2 x float> %c, %b
-; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
-; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a
+; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]]
  ; CHECK-NEXT: ret <2 x float>
  
    %t0 = fmul fast <2 x float> %a, %b
@@ -133,8 +133,8 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
  ; Check x*y+y*x -> x*y*2.
  define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
  ; CHECK-LABEL: @test11
-; CHECK-NEXT: %factor = fmul fast <2 x double> %y, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x
+; CHECK-NEXT: %factor = fmul fast <2 x double> %x, <double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y
  ; CHECK-NEXT: ret <2 x double> %tmp1
  
    %1 = fmul fast <2 x double> %x, %y
diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll

index 64b74e3e8c1683fb7665f35144facaea86749f2d..c8a2bd9c1935aa313094d27c80390030f432ec83 100644 (file)
--- a/test/Transforms/Reassociate/fast-basictest.ll
+++ b/test/Transforms/Reassociate/fast-basictest.ll
@@ -108,7 +108,7 @@ define float @test7(float %A, float %B, float %C) {
  ; CHECK-LABEL: @test7
  ; CHECK-NEXT: fadd fast float %C, %B
  ; CHECK-NEXT: fmul fast float %A, %A
-; CHECK-NEXT: fmul fast float %1, %tmp2
+; CHECK-NEXT: fmul fast float %tmp3, %tmp2
  ; CHECK-NEXT: ret float
  
    %aa = fmul fast float %A, %A
diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll

index ad89607a21e4db0fc696f659cb749753766e65ff..6565bbb3d20172be41f004b96c77eab37df8d37d 100644 (file)
--- a/test/Transforms/Reassociate/fast-fp-commute.ll
+++ b/test/Transforms/Reassociate/fast-fp-commute.ll
@@ -33,8 +33,8 @@ define float @test2(float %x, float %y) {
  
  define float @test3(float %x, float %y) {
  ; CHECK-LABEL: test3
-; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
-; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
+; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00
+; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y
  ; CHECK-NEXT: ret float %tmp1
  
    %1 = fmul fast float %x, %y
diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll

index 45e15c7f35391bf4f91b9ac1ba7465bd55510e3e..aea997cdcbdabae91713d059e4fd12eb6cf724e1 100644 (file)
--- a/test/Transforms/Reassociate/fast-multistep.ll
+++ b/test/Transforms/Reassociate/fast-multistep.ll
@@ -3,9 +3,9 @@
  define float @fmultistep1(float %a, float %b, float %c) {
  ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
  ; CHECK-LABEL: @fmultistep1
-; CHECK-NEXT: fadd fast float %c, %b
-; CHECK-NEXT: fmul fast float %a, %tmp2
-; CHECK-NEXT: fmul fast float %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a
+; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]]
  ; CHECK-NEXT: ret float
  
    %t0 = fmul fast float %a, %b
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll

index c499646a8b6ac9e3e4ab1be887d0902a1a4e591e..5685bb9495375de7ea702af99dfb6957cf465edc 100644 (file)
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -8,9 +8,9 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
    %t2 = mul i64 %a, %c
    %t3 = mul i64 %a, %t2 ; a*(a*c)
    %t4 = add i64 %t1, %t3
-; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %a, %tmp{{.*}}
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a
+; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]]
  ; CHECK-NEXT: ret
    ret i64 %t4
  }
diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll

new file mode 100644 (file)

index 0000000..c2cdffc
--- /dev/null
+++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; CHECK-LABEL: faddsubAssoc1
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
+; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is A op (B op C)
+define half @faddsubAssoc1(half %a, half %b) {
+  %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+  %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+  %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+  %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b
+  %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b)
+  ret half %tmp5 ; = 5 * (b - a)
+}
+
+; CHECK-LABEL: faddsubAssoc2
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00
+; CHECK: fadd fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is (A op B) op C
+define half @faddsubAssoc2(half %a, half %b) {
+  %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+  %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+  %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+  %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b
+  %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b)
+  ret half %tmp5 ; = 5 * a + b
+}
+
diff --git a/test/Transforms/Reassociate/secondary.ll b/test/Transforms/Reassociate/secondary.ll

index a52000ada53780ed2b0eda1cecd4dd5aff7afec1..388cd6bcb6fe90bb7994635a99f12b6f9aadaa1a 100644 (file)
--- a/test/Transforms/Reassociate/secondary.ll
+++ b/test/Transforms/Reassociate/secondary.ll
@@ -6,7 +6,7 @@
  
  ; CHECK:     define
  ; CHECK-NOT: undef
-; CHECK:     %factor = mul i32 %tmp3, -2
+; CHECK:     %factor = mul i32 %tmp3.neg, 2
  ; CHECK-NOT: undef
  ; CHECK:     }
  
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll

index a22689805fb582ae7a9f3b5901213eeca1ff288d..0bed6f3588080d0bdd3dc0f79275255a611fc458 100644 (file)
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
    %xor1 = xor i32 %xor, %and
    ret i32 %xor1
  ; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %y, 123
-; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: %xor = xor i32 %x, 123
+; CHECK: %xor1 = xor i32 %xor, %y
  ; CHECK: ret i32 %xor1
  }
author	Owen Anderson <resistor@mac.com>
	Mon, 16 Nov 2015 18:07:30 +0000 (18:07 +0000)
committer	Owen Anderson <resistor@mac.com>
	Mon, 16 Nov 2015 18:07:30 +0000 (18:07 +0000)
lib/Transforms/Scalar/Reassociate.cpp		patch \| blob \| history
test/Transforms/Reassociate/fast-ReassociateVector.ll		patch \| blob \| history
test/Transforms/Reassociate/fast-basictest.ll		patch \| blob \| history
test/Transforms/Reassociate/fast-fp-commute.ll		patch \| blob \| history
test/Transforms/Reassociate/fast-multistep.ll		patch \| blob \| history
test/Transforms/Reassociate/multistep.ll		patch \| blob \| history
test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/Reassociate/secondary.ll		patch \| blob \| history
test/Transforms/Reassociate/xor_reassoc.ll		patch \| blob \| history