Add DAG-combines for aggressive FMA formation.

author Lang Hames <lhames@gmail.com>

Tue, 19 Jun 2012 22:51:23 +0000 (22:51 +0000)

committer Lang Hames <lhames@gmail.com>

Tue, 19 Jun 2012 22:51:23 +0000 (22:51 +0000)
author Lang Hames <lhames@gmail.com>
Tue, 19 Jun 2012 22:51:23 +0000 (22:51 +0000)
committer Lang Hames <lhames@gmail.com>
Tue, 19 Jun 2012 22:51:23 +0000 (22:51 +0000)
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h

index bc60673589f60a40b40a2bcdcfa80f378bf4c66b..84287fb5d76ed737a2671ec17881db23e405411a 100644 (file)
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -35,7 +35,7 @@ namespace llvm {
      TargetOptions()
          : PrintMachineCode(false), NoFramePointerElim(false),
            NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false),
-          NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
+          AllowExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
            NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),
            UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
            JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
@@ -74,13 +74,13 @@ namespace llvm {
      unsigned LessPreciseFPMADOption : 1;
      bool LessPreciseFPMAD() const;
  
-    /// NoExcessFPPrecision - This flag is enabled when the
-    /// -disable-excess-fp-precision flag is specified on the command line.
-    /// When this flag is off (the default), the code generator is allowed to
-    /// produce results that are "more precise" than IEEE allows.  This includes
-    /// use of FMA-like operations and use of the X86 FP registers without
-    /// rounding all over the place.
-    unsigned NoExcessFPPrecision : 1;
+    /// AllowExcessFPPrecision - This flag is enabled when the
+    /// -enable-excess-fp-precision flag is specified on the command line. This
+    /// flag is OFF by default. When it is turned on, the code generator is
+    /// allowed to produce results that are "more precise" than IEEE allows.
+    /// This includes use of FMA-like operations and use of the X86 FP registers
+    /// without rounding all over the place.
+    unsigned AllowExcessFPPrecision : 1;
  
      /// UnsafeFPMath - This flag is enabled when the
      /// -enable-unsafe-fp-math flag is specified on the command line.  When
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 369d927112f6286813731194b9cbb50e6a4112a6..3517b7cfbe3ae07db961d1b226e687899e5192c1 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5633,6 +5633,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                         DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                     N0.getOperand(1), N1));
  
+  // FADD -> FMA combines:
+  if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+       DAG.getTarget().Options.UnsafeFPMath) &&
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+      TLI.isOperationLegal(ISD::FMA, VT)) {
+
+    // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+                         N0.getOperand(0), N0.getOperand(1), N1);
+    }
+  
+    // fold (fadd x, (fmul y, z)) -> (fma x, y, z)
+    // Note: Commutes FADD operands.
+    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+                         N1.getOperand(0), N1.getOperand(1), N0);
+    }
+  }
+
    return SDValue();
  }
  
@@ -5690,6 +5710,29 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
      }
    }
  
+  // FSUB -> FMA combines:
+  if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+       DAG.getTarget().Options.UnsafeFPMath) &&
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
+      TLI.isOperationLegal(ISD::FMA, VT)) {
+
+    // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
+      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+    }
+
+    // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+    // Note: Commutes FSUB operands.
+    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
+      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+                         N1.getOperand(0)),
+                         N1.getOperand(1), N0);
+    }
+  }
+
    return SDValue();
  }
  
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index 5131152d1e83062cc66f3f002c148270a888c00b..81e3527a6f044a8b0c18d98fd4cbdf2147385b80 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -236,7 +236,7 @@ def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
  // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
  // But only select them if more precision in FP computation is allowed.
  // Do not use them for Darwin platforms.
-def UseFusedMAC      : Predicate<"!TM.Options.NoExcessFPPrecision && "
+def UseFusedMAC      : Predicate<"TM.Options.AllowExcessFPPrecision && "
                                   "!Subtarget->isTargetDarwin()">;
  def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
                                   "Subtarget->isTargetDarwin()">;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td

index c9cdd5c192034830f221a69801fa9cee15402cbc..25b6dc733eade7a5519233b00a59deb7f9554299 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -353,7 +353,7 @@ def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
  
  //===----------------------------------------------------------------------===//
  // PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
+def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">;
  def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
  def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
  def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll

index 3bf1ef4ad261091d218291a0d551532162a8ab8c..61e7d7b1a210879b6bd14bab42f19df15938e5a2 100644 (file)
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -enable-excess-fp-precision | FileCheck %s
  ; Check generated fused MAC and MLS.
  
  define double @fusedMACTest1(double %d1, double %d2, double %d3) {
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll

index 932ad7a63ce415916b4f854b0ef019e1f4ff51af..a47e662cc8748de270b743c1b043f3861aa7a073 100644 (file)
--- a/test/CodeGen/PowerPC/a2-fp-basic.ll
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -enable-excess-fp-precision | FileCheck %s
  
  %0 = type { double, double }
  
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll

index 815c72c1f8a744e33fced6482bf3324b83877481..02847147edb33520cd757247d14be153b746ea4f 100644 (file)
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | \
+; RUN: llc < %s -march=ppc32 -enable-excess-fp-precision | \
  ; RUN:   egrep {fn?madd|fn?msub} | count 8
  
  define double @test_FMADD1(double %A, double %B, double %C) {
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll

index 1fad2fa3aaf506acf63f2b076399863ab973ad90..25ec5f892c501ddda829546cac294b06622f53df 100644 (file)
--- a/test/CodeGen/PowerPC/ppc440-fp-basic.ll
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 -enable-excess-fp-precision | FileCheck %s
  
  %0 = type { double, double }
  
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp

index d0880913baf7c5f19b5c2f842b99130e62906d71..b303cec3b51d6cf05fe7377066d7d209acc4beb7 100644 (file)
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -156,8 +156,8 @@ DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
    cl::init(false));
  
  static cl::opt<bool>
-DisableExcessPrecision("disable-excess-fp-precision",
-  cl::desc("Disable optimizations that may increase FP precision"),
+EnableExcessPrecision("enable-excess-fp-precision",
+  cl::desc("Enable optimizations that may increase FP precision"),
    cl::init(false));
  
  static cl::opt<bool>
@@ -404,7 +404,7 @@ int main(int argc, char **argv) {
    Options.LessPreciseFPMADOption = EnableFPMAD;
    Options.NoFramePointerElim = DisableFPElim;
    Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
-  Options.NoExcessFPPrecision = DisableExcessPrecision;
+  Options.AllowExcessFPPrecision = EnableExcessPrecision;
    Options.UnsafeFPMath = EnableUnsafeFPMath;
    Options.NoInfsFPMath = EnableNoInfsFPMath;
    Options.NoNaNsFPMath = EnableNoNaNsFPMath;
author	Lang Hames <lhames@gmail.com>
	Tue, 19 Jun 2012 22:51:23 +0000 (22:51 +0000)
committer	Lang Hames <lhames@gmail.com>
	Tue, 19 Jun 2012 22:51:23 +0000 (22:51 +0000)
include/llvm/Target/TargetOptions.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
lib/Target/PowerPC/PPCInstrInfo.td		patch \| blob \| history
test/CodeGen/ARM/fusedMAC.ll		patch \| blob \| history
test/CodeGen/PowerPC/a2-fp-basic.ll		patch \| blob \| history
test/CodeGen/PowerPC/fma.ll		patch \| blob \| history
test/CodeGen/PowerPC/ppc440-fp-basic.ll		patch \| blob \| history
tools/llc/llc.cpp		patch \| blob \| history