From cafc85bf1ed5a70351c8040ad7c6be32cab712f2 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 30 Sep 2014 20:28:48 +0000
Subject: [PATCH] Split the estimate() interface into separate functions for
 each type. NFC.

It was hacky to use an opcode as a switch because it won't always match
(rsqrte != sqrte), and it looks like we'll need to add more special casing
per arch than I had hoped for. Eg, x86 will prefer a different NR estimate
implementation. ARM will want to use it's 'step' instructions. There also
don't appear to be any new estimate instructions in any arch in a long,
long time. Altivec vloge and vexpte may have been the first and last in
that field...


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218698 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Target/TargetLowering.h     | 36 +++++++++++++-----
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  4 +-
 lib/Target/PowerPC/PPCISelLowering.cpp   | 48 ++++++++++++++----------
 lib/Target/PowerPC/PPCISelLowering.h     |  7 ++--
 4 files changed, 61 insertions(+), 34 deletions(-)

diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 2079d7690d0..306bad00a21 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2624,21 +2624,37 @@ public:
     return SDValue();
   }
 
-  /// Hooks for building estimates in place of, for example, slower divisions
-  /// and square roots. These are not builder functions themselves, just the
-  /// target-specific variables needed for building the estimate algorithm.
-
-  /// Return an estimate value for the input opcode and input operand.
-  /// The RefinementSteps output is the number of refinement iterations
-  /// required to generate a sufficient (though not necessarily IEEE-754
-  /// compliant) estimate for the value type.
+  /// Hooks for building estimates in place of slower divisions and square
+  /// roots.
+  
+  /// Return a reciprocal square root estimate value for the input operand.
+  /// The RefinementSteps output is the number of Newton-Raphson refinement
+  /// iterations required to generate a sufficient (though not necessarily
+  /// IEEE-754 compliant) estimate for the value type.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
   /// An empty SDValue return means no estimate sequence can be created.
-  virtual SDValue getEstimate(unsigned Opcode, SDValue Operand,
+  virtual SDValue getRsqrtEstimate(SDValue Operand,
                               DAGCombinerInfo &DCI,
                               unsigned &RefinementSteps) const {
     return SDValue();
   }
-  
+
+  /// Return a reciprocal estimate value for the input operand.
+  /// The RefinementSteps output is the number of Newton-Raphson refinement
+  /// iterations required to generate a sufficient (though not necessarily
+  /// IEEE-754 compliant) estimate for the value type.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
+  /// An empty SDValue return means no estimate sequence can be created.
+  virtual SDValue getRecipEstimate(SDValue Operand,
+                                   DAGCombinerInfo &DCI,
+                                   unsigned &RefinementSteps) const {
+    return SDValue();
+  }
+
   //===--------------------------------------------------------------------===//
   // Legalization utility functions
   //
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aee6455713a..407a8747746 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11779,7 +11779,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
 
   unsigned Iterations;
-  if (SDValue Est = TLI.getEstimate(ISD::FDIV, Op, DCI, Iterations)) {
+  if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
     // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
     // For the reciprocal, we need to find the zero of the function:
     //   F(X) = A X - 1 [which has a zero at X = 1/A]
@@ -11820,7 +11820,7 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
   // Expose the DAG combiner to the target combiner implementations.
   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
   unsigned Iterations;
-  if (SDValue Est = TLI.getEstimate(ISD::FSQRT, Op, DCI, Iterations)) {
+  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) {
     // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
     // For the reciprocal sqrt, we need to find the zero of the function:
     //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 5750e2fbb65..e0396fdfe46 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7458,25 +7458,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
-SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
-                                       DAGCombinerInfo &DCI,
-                                       unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
   EVT VT = Operand.getValueType();
-  SDValue RV;
-  if (Opcode == ISD::FSQRT) {
-    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
-  } else if (Opcode == ISD::FDIV) {
-    if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
-  }
-  if (RV.getNode()) {
+  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
     // Convergence is quadratic, so we essentially double the number of digits
     // correct after every iteration. For both FRE and FRSQRTE, the minimum
     // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7484,8 +7473,29 @@ SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
     RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
     if (VT.getScalarType() == MVT::f64)
       ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
-  return RV;
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
+  EVT VT = Operand.getValueType();
+  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. For both FRE and FRSQRTE, the minimum
+    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+    // 2^-14. IEEE float has 23 digits and double has 52 digits.
+    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+  }
+  return SDValue();
 }
 
 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 197d97779b1..3cf965d2861 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -701,9 +701,10 @@ namespace llvm {
     SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
 
-    SDValue getEstimate(unsigned Opcode, SDValue Operand,
-                        DAGCombinerInfo &DCI,
-                        unsigned &RefinementSteps) const override;
+    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
+    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
 
     CCAssignFn *useFastISelCCs(unsigned Flag) const;
   };
-- 
2.34.1