From: Owen Anderson Date: Mon, 16 Nov 2015 18:07:30 +0000 (+0000) Subject: Add intermediate subtract instructions to reassociation worklist. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=2a949077a97c3bcc57be2507ad8c9b5035f14a3d Add intermediate subtract instructions to reassociation worklist. We sometimes create intermediate subtract instructions during reassociation. Adding these to the worklist to revisit exposes many additional reassociation opportunities. Patch by Aditya Nandakumar. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253240 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index f6c18d626cf..82c79e7d919 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -881,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, /// that computes the negative version of the value specified. The negative /// version of the value is returned, and BI is left pointing at the instruction /// that should be processed next by the reassociation pass. -static Value *NegateValue(Value *V, Instruction *BI) { +/// Also add intermediate instructions to the redo list that are modified while +/// pushing the negates through adds. These will be revisited to see if +/// additional opportunities have been exposed. +static Value *NegateValue(Value *V, Instruction *BI, + SetVector> &ToRedo) { if (Constant *C = dyn_cast(V)) { if (C->getType()->isFPOrFPVectorTy()) { return ConstantExpr::getFNeg(C); @@ -902,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) { if (BinaryOperator *I = isReassociableOp(V, Instruction::Add, Instruction::FAdd)) { // Push the negates through the add. - I->setOperand(0, NegateValue(I->getOperand(0), BI)); - I->setOperand(1, NegateValue(I->getOperand(1), BI)); + I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo)); + I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo)); if (I->getOpcode() == Instruction::Add) { I->setHasNoUnsignedWrap(false); I->setHasNoSignedWrap(false); @@ -916,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) { // I->moveBefore(BI); I->setName(I->getName()+".neg"); + + // Add the intermediate negates to the redo list as processing them later + // could expose more reassociating opportunities. + ToRedo.insert(I); return I; } @@ -955,12 +963,15 @@ static Value *NegateValue(Value *V, Instruction *BI) { } else { TheNeg->andIRFlags(BI); } + ToRedo.insert(TheNeg); return TheNeg; } // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. - return CreateNeg(V, V->getName() + ".neg", BI, BI); + BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI); + ToRedo.insert(NewNeg); + return NewNeg; } /// Return true if we should break up this subtract of X-Y into (X + -Y). @@ -994,14 +1005,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { /// If we have (X-Y), and if either X is an add, or if this is only used by an /// add, transform this into (X+(0-Y)) to promote better reassociation. -static BinaryOperator *BreakUpSubtract(Instruction *Sub) { +static BinaryOperator * +BreakUpSubtract(Instruction *Sub, SetVector> &ToRedo) { // Convert a subtract into an add and a neg instruction. This allows sub // instructions to be commuted with other add instructions. // // Calculate the negative value of Operand 1 of the sub instruction, // and set it as the RHS of the add instruction we just made. // - Value *NegVal = NegateValue(Sub->getOperand(1), Sub); + Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo); BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub); Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op. @@ -2068,7 +2080,7 @@ void Reassociate::OptimizeInst(Instruction *I) { // see if we can convert it to X+-Y. if (I->getOpcode() == Instruction::Sub) { if (ShouldBreakUpSubtract(I)) { - Instruction *NI = BreakUpSubtract(I); + Instruction *NI = BreakUpSubtract(I, RedoInsts); RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2079,6 +2091,12 @@ void Reassociate::OptimizeInst(Instruction *I) { (!I->hasOneUse() || !isReassociableOp(I->user_back(), Instruction::Mul))) { Instruction *NI = LowerNegateToMultiply(I); + // If the negate was simplified, revisit the users to see if we can + // reassociate further. + for (User *U : NI->users()) { + if (BinaryOperator *Tmp = dyn_cast(U)) + RedoInsts.insert(Tmp); + } RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2086,7 +2104,7 @@ void Reassociate::OptimizeInst(Instruction *I) { } } else if (I->getOpcode() == Instruction::FSub) { if (ShouldBreakUpSubtract(I)) { - Instruction *NI = BreakUpSubtract(I); + Instruction *NI = BreakUpSubtract(I, RedoInsts); RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2096,7 +2114,13 @@ void Reassociate::OptimizeInst(Instruction *I) { if (isReassociableOp(I->getOperand(1), Instruction::FMul) && (!I->hasOneUse() || !isReassociableOp(I->user_back(), Instruction::FMul))) { + // If the negate was simplified, revisit the users to see if we can + // reassociate further. Instruction *NI = LowerNegateToMultiply(I); + for (User *U : NI->users()) { + if (BinaryOperator *Tmp = dyn_cast(U)) + RedoInsts.insert(Tmp); + } RedoInsts.insert(I); MadeChange = true; I = NI; @@ -2111,8 +2135,14 @@ void Reassociate::OptimizeInst(Instruction *I) { // If this is an interior node of a reassociable tree, ignore it until we // get to the root of the tree, to avoid N^2 analysis. unsigned Opcode = BO->getOpcode(); - if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) + if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) { + // During the initial run we will get to the root of the tree. + // But if we get here while we are redoing instructions, there is no + // guarantee that the root will be visited. So Redo later + if (BO->user_back() != BO) + RedoInsts.insert(BO->user_back()); return; + } // If this is an add tree that is used by a sub instruction, ignore it // until we process the subtract. diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll index 9fbb5ccfe9a..fb76b9d990b 100644 --- a/test/Transforms/Reassociate/fast-ReassociateVector.ll +++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -16,9 +16,9 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { ; CHECK-LABEL: @test2 -; CHECK-NEXT: fadd fast <2 x float> %c, %b -; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2 -; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a +; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <2 x float> %t0 = fmul fast <2 x float> %a, %b @@ -133,8 +133,8 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; Check x*y+y*x -> x*y*2. define <2 x double> @test11(<2 x double> %x, <2 x double> %y) { ; CHECK-LABEL: @test11 -; CHECK-NEXT: %factor = fmul fast <2 x double> %y, -; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x +; CHECK-NEXT: %factor = fmul fast <2 x double> %x, +; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y ; CHECK-NEXT: ret <2 x double> %tmp1 %1 = fmul fast <2 x double> %x, %y diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll index 64b74e3e8c1..c8a2bd9c193 100644 --- a/test/Transforms/Reassociate/fast-basictest.ll +++ b/test/Transforms/Reassociate/fast-basictest.ll @@ -108,7 +108,7 @@ define float @test7(float %A, float %B, float %C) { ; CHECK-LABEL: @test7 ; CHECK-NEXT: fadd fast float %C, %B ; CHECK-NEXT: fmul fast float %A, %A -; CHECK-NEXT: fmul fast float %1, %tmp2 +; CHECK-NEXT: fmul fast float %tmp3, %tmp2 ; CHECK-NEXT: ret float %aa = fmul fast float %A, %A diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll index ad89607a21e..6565bbb3d20 100644 --- a/test/Transforms/Reassociate/fast-fp-commute.ll +++ b/test/Transforms/Reassociate/fast-fp-commute.ll @@ -33,8 +33,8 @@ define float @test2(float %x, float %y) { define float @test3(float %x, float %y) { ; CHECK-LABEL: test3 -; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00 -; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x +; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00 +; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y ; CHECK-NEXT: ret float %tmp1 %1 = fmul fast float %x, %y diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll index 45e15c7f353..aea997cdcbd 100644 --- a/test/Transforms/Reassociate/fast-multistep.ll +++ b/test/Transforms/Reassociate/fast-multistep.ll @@ -3,9 +3,9 @@ define float @fmultistep1(float %a, float %b, float %c) { ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). ; CHECK-LABEL: @fmultistep1 -; CHECK-NEXT: fadd fast float %c, %b -; CHECK-NEXT: fmul fast float %a, %tmp2 -; CHECK-NEXT: fmul fast float %tmp3, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a +; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret float %t0 = fmul fast float %a, %b diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll index c499646a8b6..5685bb94953 100644 --- a/test/Transforms/Reassociate/multistep.ll +++ b/test/Transforms/Reassociate/multistep.ll @@ -8,9 +8,9 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) { %t2 = mul i64 %a, %c %t3 = mul i64 %a, %t2 ; a*(a*c) %t4 = add i64 %t1, %t3 -; CHECK-NEXT: add i64 %c, %b -; CHECK-NEXT: mul i64 %a, %tmp{{.*}} -; CHECK-NEXT: mul i64 %tmp{{.*}}, %a +; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b +; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a +; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret ret i64 %t4 } diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll new file mode 100644 index 00000000000..c2cdffce61e --- /dev/null +++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -reassociate -S | FileCheck %s +; CHECK-LABEL: faddsubAssoc1 +; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500 +; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500 +; CHECK: fsub fast half [[TMP2]], [[TMP1]] +; CHECK: ret +; Input is A op (B op C) +define half @faddsubAssoc1(half %a, half %b) { + %tmp1 = fmul fast half %b, 0xH4200 ; 3*b + %tmp2 = fmul fast half %a, 0xH4500 ; 5*a + %tmp3 = fmul fast half %b, 0xH4000 ; 2*b + %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b + %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b) + ret half %tmp5 ; = 5 * (b - a) +} + +; CHECK-LABEL: faddsubAssoc2 +; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500 +; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00 +; CHECK: fadd fast half [[TMP2]], [[TMP1]] +; CHECK: ret +; Input is (A op B) op C +define half @faddsubAssoc2(half %a, half %b) { + %tmp1 = fmul fast half %b, 0xH4200 ; 3*b + %tmp2 = fmul fast half %a, 0xH4500 ; 5*a + %tmp3 = fmul fast half %b, 0xH4000 ; 2*b + %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b + %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b) + ret half %tmp5 ; = 5 * a + b +} + diff --git a/test/Transforms/Reassociate/secondary.ll b/test/Transforms/Reassociate/secondary.ll index a52000ada53..388cd6bcb6f 100644 --- a/test/Transforms/Reassociate/secondary.ll +++ b/test/Transforms/Reassociate/secondary.ll @@ -6,7 +6,7 @@ ; CHECK: define ; CHECK-NOT: undef -; CHECK: %factor = mul i32 %tmp3, -2 +; CHECK: %factor = mul i32 %tmp3.neg, 2 ; CHECK-NOT: undef ; CHECK: } diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll index a22689805fb..0bed6f35880 100644 --- a/test/Transforms/Reassociate/xor_reassoc.ll +++ b/test/Transforms/Reassociate/xor_reassoc.ll @@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) { %xor1 = xor i32 %xor, %and ret i32 %xor1 ; CHECK-LABEL: @xor_special2( -; CHECK: %xor = xor i32 %y, 123 -; CHECK: %xor1 = xor i32 %xor, %x +; CHECK: %xor = xor i32 %x, 123 +; CHECK: %xor1 = xor i32 %xor, %y ; CHECK: ret i32 %xor1 }