Rename -allow-excess-fp-precision flag to -fuse-fp-ops, and switch from a

author Lang Hames <lhames@gmail.com>

Fri, 22 Jun 2012 01:09:09 +0000 (01:09 +0000)

committer Lang Hames <lhames@gmail.com>

Fri, 22 Jun 2012 01:09:09 +0000 (01:09 +0000)
author Lang Hames <lhames@gmail.com>
Fri, 22 Jun 2012 01:09:09 +0000 (01:09 +0000)
committer Lang Hames <lhames@gmail.com>
Fri, 22 Jun 2012 01:09:09 +0000 (01:09 +0000)
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h

index 84287fb5d76ed737a2671ec17881db23e405411a..3a1809a80a0d8f24bfc770343dbfa11d8f4a5e06 100644 (file)
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -30,12 +30,20 @@ namespace llvm {
      };
    }
  
+  namespace FPOpFusion {
+    enum FPOpFusionMode {
+      Fast,     // Enable fusion of FP ops wherever it's profitable.
+      Standard, // Only allow fusion of 'blessed' ops (currently just fmuladd).
+      Strict    // Never fuse FP-ops.
+    };
+  }
+
    class TargetOptions {
    public:
      TargetOptions()
          : PrintMachineCode(false), NoFramePointerElim(false),
            NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false),
-          AllowExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
+          UnsafeFPMath(false), NoInfsFPMath(false),
            NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),
            UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
            JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
@@ -43,7 +51,8 @@ namespace llvm {
            StackAlignmentOverride(0), RealignStack(true),
            DisableJumpTables(false), EnableFastISel(false),
            PositionIndependentExecutable(false), EnableSegmentedStacks(false),
-          UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default)
+          UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default),
+          AllowFPOpFusion(FPOpFusion::Standard)
      {}
  
      /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
@@ -74,14 +83,6 @@ namespace llvm {
      unsigned LessPreciseFPMADOption : 1;
      bool LessPreciseFPMAD() const;
  
-    /// AllowExcessFPPrecision - This flag is enabled when the
-    /// -enable-excess-fp-precision flag is specified on the command line. This
-    /// flag is OFF by default. When it is turned on, the code generator is
-    /// allowed to produce results that are "more precise" than IEEE allows.
-    /// This includes use of FMA-like operations and use of the X86 FP registers
-    /// without rounding all over the place.
-    unsigned AllowExcessFPPrecision : 1;
-
      /// UnsafeFPMath - This flag is enabled when the
      /// -enable-unsafe-fp-math flag is specified on the command line.  When
      /// this flag is off (the default), the code generator is not allowed to
@@ -189,6 +190,25 @@ namespace llvm {
      /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
      /// Hard presumes that the normal FP ABI is used.
      FloatABI::ABIType FloatABIType;
+
+    /// AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
+    /// This controls the creation of fused FP ops that store intermediate
+    /// results in higher precision than IEEE allows (E.g. FMAs).
+    ///
+    /// Fast mode - allows formation of fused FP ops whenever they're
+    /// profitable.
+    /// Standard mode - allow fusion only for 'blessed' FP ops. At present the
+    /// only blessed op is the fmuladd intrinsic. In the future more blessed ops
+    /// may be added.
+    /// Strict mode - allow fusion only if/when it can be proven that the excess
+    /// precision won't effect the result.
+    ///
+    /// Note: This option only controls formation of fused ops by the optimizers.
+    /// Fused operations that are explicitly specified (e.g. FMA via the
+    /// llvm.fma.* intrinsic) will always be honored, regardless of the value of
+    /// this option.
+    FPOpFusion::FPOpFusionMode AllowFPOpFusion;
+
    };
  } // End llvm namespace
  
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 7babf4ab9534f606a6ed4d39b00de004d1030f95..0bdd8b85497990005892f317e8427363995c1a13 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5644,7 +5644,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                                     N0.getOperand(1), N1));
  
    // FADD -> FMA combines:
-  if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+  if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
         DAG.getTarget().Options.UnsafeFPMath) &&
        DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
        TLI.isOperationLegal(ISD::FMA, VT)) {
@@ -5721,7 +5721,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
    }
  
    // FSUB -> FMA combines:
-  if ((DAG.getTarget().Options.AllowExcessFPPrecision ||
+  if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
         DAG.getTarget().Options.UnsafeFPMath) &&
        DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
        TLI.isOperationLegal(ISD::FMA, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 4152aa1ae16da0183394457e0bce015d06333454..50fd45e88bc50bc50a76984b93af2084e4eff97c 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4934,7 +4934,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      return 0;
    case Intrinsic::fmuladd: {
      EVT VT = TLI.getValueType(I.getType());
-    if (TLI.isOperationLegal(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){
+    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+        TLI.isOperationLegal(ISD::FMA, VT) &&
+        TLI.isFMAFasterThanMulAndAdd(VT)){
        setValue(&I, DAG.getNode(ISD::FMA, dl,
                                 getValue(I.getArgOperand(0)).getValueType(),
                                 getValue(I.getArgOperand(0)),
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index 81e3527a6f044a8b0c18d98fd4cbdf2147385b80..67f050131bfba93d3ed979559f35ff15c7f68388 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -236,7 +236,8 @@ def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
  // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
  // But only select them if more precision in FP computation is allowed.
  // Do not use them for Darwin platforms.
-def UseFusedMAC      : Predicate<"TM.Options.AllowExcessFPPrecision && "
+def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
+                                 " FPOpFusion::Fast) && "
                                   "!Subtarget->isTargetDarwin()">;
  def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
                                   "Subtarget->isTargetDarwin()">;
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll

index 0cc1cddf2185d286ab6ae2b8fa35d159819be817..725dd274e3729cbfcdff379bec7af5f1f9d079d8 100644 (file)
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -enable-excess-fp-precision | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fuse-fp-ops=fast | FileCheck %s
  ; Check generated fused MAC and MLS.
  
  define double @fusedMACTest1(double %d1, double %d2, double %d3) {
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll

index a47e662cc8748de270b743c1b043f3861aa7a073..a4370fa452a17a213ccc6d622c9d649ac3516f5b 100644 (file)
--- a/test/CodeGen/PowerPC/a2-fp-basic.ll
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -mcpu=a2 -enable-excess-fp-precision | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 -fuse-fp-ops=fast | FileCheck %s
  
  %0 = type { double, double }
  
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll

index 02847147edb33520cd757247d14be153b746ea4f..4e05c279b79ab393060bc482e3e23c1dd0cd75af 100644 (file)
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -enable-excess-fp-precision | \
+; RUN: llc < %s -march=ppc32 -fuse-fp-ops=fast | \
  ; RUN:   egrep {fn?madd|fn?msub} | count 8
  
  define double @test_FMADD1(double %A, double %B, double %C) {
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll

index 25ec5f892c501ddda829546cac294b06622f53df..6884570a8aa69f31d421c8bbdafcb13687730c8c 100644 (file)
--- a/test/CodeGen/PowerPC/ppc440-fp-basic.ll
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=440 -enable-excess-fp-precision | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 -fuse-fp-ops=fast | FileCheck %s
  
  %0 = type { double, double }
  
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp

index b303cec3b51d6cf05fe7377066d7d209acc4beb7..e08bad93b07d125e4ea96da39f243e7f8bd0f063 100644 (file)
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -155,11 +155,6 @@ DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
    cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
    cl::init(false));
  
-static cl::opt<bool>
-EnableExcessPrecision("enable-excess-fp-precision",
-  cl::desc("Enable optimizations that may increase FP precision"),
-  cl::init(false));
-
  static cl::opt<bool>
  EnableUnsafeFPMath("enable-unsafe-fp-math",
    cl::desc("Enable optimizations that may decrease FP precision"),
@@ -199,6 +194,19 @@ FloatABIForCalls("float-abi",
                 "Hard float ABI (uses FP registers)"),
      clEnumValEnd));
  
+static cl::opt<llvm::FPOpFusion::FPOpFusionMode>
+FuseFPOps("fuse-fp-ops",
+  cl::desc("Enable aggresive formation of fused FP ops"),
+  cl::init(FPOpFusion::Standard),
+  cl::values(
+    clEnumValN(FPOpFusion::Fast, "fast",
+               "Fuse FP ops whenever profitable"),
+    clEnumValN(FPOpFusion::Standard, "standard",
+               "Only fuse 'blessed' FP ops."),
+    clEnumValN(FPOpFusion::Strict, "strict",
+               "Only fuse FP ops when the result won't be effected."),
+    clEnumValEnd));
+
  static cl::opt<bool>
  DontPlaceZerosInBSS("nozero-initialized-in-bss",
    cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -404,7 +412,7 @@ int main(int argc, char **argv) {
    Options.LessPreciseFPMADOption = EnableFPMAD;
    Options.NoFramePointerElim = DisableFPElim;
    Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
-  Options.AllowExcessFPPrecision = EnableExcessPrecision;
+  Options.AllowFPOpFusion = FuseFPOps;
    Options.UnsafeFPMath = EnableUnsafeFPMath;
    Options.NoInfsFPMath = EnableNoInfsFPMath;
    Options.NoNaNsFPMath = EnableNoNaNsFPMath;
author	Lang Hames <lhames@gmail.com>
	Fri, 22 Jun 2012 01:09:09 +0000 (01:09 +0000)
committer	Lang Hames <lhames@gmail.com>
	Fri, 22 Jun 2012 01:09:09 +0000 (01:09 +0000)
include/llvm/Target/TargetOptions.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
test/CodeGen/ARM/fusedMAC.ll		patch \| blob \| history
test/CodeGen/PowerPC/a2-fp-basic.ll		patch \| blob \| history
test/CodeGen/PowerPC/fma.ll		patch \| blob \| history
test/CodeGen/PowerPC/ppc440-fp-basic.ll		patch \| blob \| history
tools/llc/llc.cpp		patch \| blob \| history