Split the estimate() interface into separate functions for each type. NFC.

author Sanjay Patel <spatel@rotateright.com>

Tue, 30 Sep 2014 20:28:48 +0000 (20:28 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 30 Sep 2014 20:28:48 +0000 (20:28 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 30 Sep 2014 20:28:48 +0000 (20:28 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 30 Sep 2014 20:28:48 +0000 (20:28 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index 2079d7690d001a5c43037d381e8b575cb02e7e10..306bad00a2172d042cd416af599d537831374a8d 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2624,21 +2624,37 @@ public:
      return SDValue();
    }
  
-  /// Hooks for building estimates in place of, for example, slower divisions
-  /// and square roots. These are not builder functions themselves, just the
-  /// target-specific variables needed for building the estimate algorithm.
-
-  /// Return an estimate value for the input opcode and input operand.
-  /// The RefinementSteps output is the number of refinement iterations
-  /// required to generate a sufficient (though not necessarily IEEE-754
-  /// compliant) estimate for the value type.
+  /// Hooks for building estimates in place of slower divisions and square
+  /// roots.
+  
+  /// Return a reciprocal square root estimate value for the input operand.
+  /// The RefinementSteps output is the number of Newton-Raphson refinement
+  /// iterations required to generate a sufficient (though not necessarily
+  /// IEEE-754 compliant) estimate for the value type.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
    /// An empty SDValue return means no estimate sequence can be created.
-  virtual SDValue getEstimate(unsigned Opcode, SDValue Operand,
+  virtual SDValue getRsqrtEstimate(SDValue Operand,
                                DAGCombinerInfo &DCI,
                                unsigned &RefinementSteps) const {
      return SDValue();
    }
-  
+
+  /// Return a reciprocal estimate value for the input operand.
+  /// The RefinementSteps output is the number of Newton-Raphson refinement
+  /// iterations required to generate a sufficient (though not necessarily
+  /// IEEE-754 compliant) estimate for the value type.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
+  /// An empty SDValue return means no estimate sequence can be created.
+  virtual SDValue getRecipEstimate(SDValue Operand,
+                                   DAGCombinerInfo &DCI,
+                                   unsigned &RefinementSteps) const {
+    return SDValue();
+  }
+
    //===--------------------------------------------------------------------===//
    // Legalization utility functions
    //
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index aee6455713afbd14cbae306293a6b0292179b721..407a8747746920b070eb4eb8f3234e62c90f090e 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11779,7 +11779,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
    TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
  
    unsigned Iterations;
-  if (SDValue Est = TLI.getEstimate(ISD::FDIV, Op, DCI, Iterations)) {
+  if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
      // For the reciprocal, we need to find the zero of the function:
      //   F(X) = A X - 1 [which has a zero at X = 1/A]
@@ -11820,7 +11820,7 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
    // Expose the DAG combiner to the target combiner implementations.
    TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
    unsigned Iterations;
-  if (SDValue Est = TLI.getEstimate(ISD::FSQRT, Op, DCI, Iterations)) {
+  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) {
      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
      // For the reciprocal sqrt, we need to find the zero of the function:
      //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 5750e2fbb658d025f59b02f65b53e36738b270cb..e0396fdfe4656514bf118c88ef7a440a053d43d3 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7458,25 +7458,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  // Target Optimization Hooks
  //===----------------------------------------------------------------------===//
  
-SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
-                                       DAGCombinerInfo &DCI,
-                                       unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
    EVT VT = Operand.getValueType();
-  SDValue RV;
-  if (Opcode == ISD::FSQRT) {
-    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
-  } else if (Opcode == ISD::FDIV) {
-    if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
-  }
-  if (RV.getNode()) {
+  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
      // Convergence is quadratic, so we essentially double the number of digits
      // correct after every iteration. For both FRE and FRSQRTE, the minimum
      // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7484,8 +7473,29 @@ SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
      RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
      if (VT.getScalarType() == MVT::f64)
        ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
    }
-  return RV;
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
+  EVT VT = Operand.getValueType();
+  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. For both FRE and FRSQRTE, the minimum
+    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+    // 2^-14. IEEE float has 23 digits and double has 52 digits.
+    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+  }
+  return SDValue();
  }
  
  static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 197d97779b19220f9669d51a2011beedb446d954..3cf965d286180803281fe418a316d6dd775e423c 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -701,9 +701,10 @@ namespace llvm {
      SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
      SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
  
-    SDValue getEstimate(unsigned Opcode, SDValue Operand,
-                        DAGCombinerInfo &DCI,
-                        unsigned &RefinementSteps) const override;
+    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
+    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
  
      CCAssignFn *useFastISelCCs(unsigned Flag) const;
    };
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 30 Sep 2014 20:28:48 +0000 (20:28 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 30 Sep 2014 20:28:48 +0000 (20:28 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history