From: Hal Finkel Date: Fri, 19 Sep 2014 11:42:56 +0000 (+0000) Subject: Optionally enable more-aggressive FMA formation in DAGCombine X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=c404e8208cf90eb12ec9f7c11ab94f8e63a9bea3;p=oota-llvm.git Optionally enable more-aggressive FMA formation in DAGCombine The heuristic used by DAGCombine to form FMAs checks that the FMUL has only one use, but this is overly-conservative on some systems. Specifically, if the FMA and the FADD have the same latency (and the FMA does not compete for resources with the FMUL any more than the FADD does), there is no need for the restriction, and furthermore, forming the FMA leaving the FMUL can still allow for higher overall throughput and decreased critical-path length. Here we add a new TLI callback, enableAggressiveFMAFusion, false by default, to elide the hasOneUse check. This is enabled for PowerPC by default, as most PowerPC systems will benefit. Patch by Olivier Sallenave, thanks! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218120 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 54d8b99ad45..e6c4634079c 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -268,6 +268,13 @@ public: return HasFloatingPointExceptions; } + /// Return true if target always beneficiates from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + virtual bool enableAggressiveFMAFusion(EVT VT) const { + return false; + } + /// Return the ValueType of the result of SETCC operations. Also used to /// obtain the target's preferred type for the condition operand of SELECT and /// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 013fbb0b004..aa2f2d1f2b1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6684,13 +6684,15 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); } @@ -6762,14 +6764,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), @@ -6778,7 +6782,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && - N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || + TLI.enableAggressiveFMAFusion(VT))) { SDValue N00 = N0.getOperand(0).getOperand(0); SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(ISD::FMA, dl, VT, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6eded03313d..fd188fe37e2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -826,6 +826,11 @@ EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { return VT.changeVectorElementTypeToInteger(); } +bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { + assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); + return true; +} + //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index de8f8366f96..c53dc83fa8a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -360,6 +360,11 @@ namespace llvm { /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + /// Return true if target always beneficiates from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + bool enableAggressiveFMAFusion(EVT VT) const override; + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll index db19761b431..6bef3acf64b 100644 --- a/test/CodeGen/PowerPC/fma.ll +++ b/test/CodeGen/PowerPC/fma.ll @@ -1,8 +1,12 @@ ; RUN: llc < %s -march=ppc32 -fp-contract=fast | FileCheck %s +declare double @dummy1(double) #0 +declare double @dummy2(double, double) #0 +declare double @dummy3(double, double, double) #0 + define double @test_FMADD1(double %A, double %B, double %C) { %D = fmul double %A, %B ; [#uses=1] - %E = fadd double %D, %C ; [#uses=1] + %E = fadd double %C, %D ; [#uses=1] ret double %E ; CHECK-LABEL: test_FMADD1: ; CHECK: fmadd @@ -18,15 +22,26 @@ define double @test_FMADD2(double %A, double %B, double %C) { ; CHECK-NEXT: blr } -define double @test_FMSUB(double %A, double %B, double %C) { +define double @test_FMSUB1(double %A, double %B, double %C) { %D = fmul double %A, %B ; [#uses=1] %E = fsub double %D, %C ; [#uses=1] ret double %E -; CHECK-LABEL: test_FMSUB: +; CHECK-LABEL: test_FMSUB1: ; CHECK: fmsub ; CHECK-NEXT: blr } +define double @test_FMSUB2(double %A, double %B, double %C, double %D) { + %E = fmul double %A, %B ; [#uses=2] + %F = fadd double %E, %C ; [#uses=1] + %G = fsub double %E, %D ; [#uses=1] + %H = call double @dummy2(double %F, double %G) ; [#uses=1] + ret double %H +; CHECK-LABEL: test_FMSUB2: +; CHECK: fmadd +; CHECK-NEXT: fmsub +} + define double @test_FNMADD1(double %A, double %B, double %C) { %D = fmul double %A, %B ; [#uses=1] %E = fadd double %D, %C ; [#uses=1]