From cafc85bf1ed5a70351c8040ad7c6be32cab712f2 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 30 Sep 2014 20:28:48 +0000 Subject: [PATCH] Split the estimate() interface into separate functions for each type. NFC. It was hacky to use an opcode as a switch because it won't always match (rsqrte != sqrte), and it looks like we'll need to add more special casing per arch than I had hoped for. Eg, x86 will prefer a different NR estimate implementation. ARM will want to use it's 'step' instructions. There also don't appear to be any new estimate instructions in any arch in a long, long time. Altivec vloge and vexpte may have been the first and last in that field... git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218698 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 36 +++++++++++++----- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +- lib/Target/PowerPC/PPCISelLowering.cpp | 48 ++++++++++++++---------- lib/Target/PowerPC/PPCISelLowering.h | 7 ++-- 4 files changed, 61 insertions(+), 34 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 2079d7690d0..306bad00a21 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -2624,21 +2624,37 @@ public: return SDValue(); } - /// Hooks for building estimates in place of, for example, slower divisions - /// and square roots. These are not builder functions themselves, just the - /// target-specific variables needed for building the estimate algorithm. - - /// Return an estimate value for the input opcode and input operand. - /// The RefinementSteps output is the number of refinement iterations - /// required to generate a sufficient (though not necessarily IEEE-754 - /// compliant) estimate for the value type. + /// Hooks for building estimates in place of slower divisions and square + /// roots. + + /// Return a reciprocal square root estimate value for the input operand. + /// The RefinementSteps output is the number of Newton-Raphson refinement + /// iterations required to generate a sufficient (though not necessarily + /// IEEE-754 compliant) estimate for the value type. + /// A target may choose to implement its own refinement within this function. + /// If that's true, then return '0' as the number of RefinementSteps to avoid + /// any further refinement of the estimate. /// An empty SDValue return means no estimate sequence can be created. - virtual SDValue getEstimate(unsigned Opcode, SDValue Operand, + virtual SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const { return SDValue(); } - + + /// Return a reciprocal estimate value for the input operand. + /// The RefinementSteps output is the number of Newton-Raphson refinement + /// iterations required to generate a sufficient (though not necessarily + /// IEEE-754 compliant) estimate for the value type. + /// A target may choose to implement its own refinement within this function. + /// If that's true, then return '0' as the number of RefinementSteps to avoid + /// any further refinement of the estimate. + /// An empty SDValue return means no estimate sequence can be created. + virtual SDValue getRecipEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { + return SDValue(); + } + //===--------------------------------------------------------------------===// // Legalization utility functions // diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index aee6455713a..407a8747746 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11779,7 +11779,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); unsigned Iterations; - if (SDValue Est = TLI.getEstimate(ISD::FDIV, Op, DCI, Iterations)) { + if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal, we need to find the zero of the function: // F(X) = A X - 1 [which has a zero at X = 1/A] @@ -11820,7 +11820,7 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { // Expose the DAG combiner to the target combiner implementations. TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); unsigned Iterations; - if (SDValue Est = TLI.getEstimate(ISD::FSQRT, Op, DCI, Iterations)) { + if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal sqrt, we need to find the zero of the function: // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 5750e2fbb65..e0396fdfe46 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7458,25 +7458,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// -SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const { +SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { EVT VT = Operand.getValueType(); - SDValue RV; - if (Opcode == ISD::FSQRT) { - if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || - (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || - (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) - RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); - } else if (Opcode == ISD::FDIV) { - if ((VT == MVT::f32 && Subtarget.hasFRES()) || - (VT == MVT::f64 && Subtarget.hasFRE()) || - (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) - RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); - } - if (RV.getNode()) { + if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || + (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || + (VT == MVT::v4f32 && Subtarget.hasAltivec()) || + (VT == MVT::v2f64 && Subtarget.hasVSX())) { // Convergence is quadratic, so we essentially double the number of digits // correct after every iteration. For both FRE and FRSQRTE, the minimum // architected relative accuracy is 2^-5. When hasRecipPrec(), this is @@ -7484,8 +7473,29 @@ SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand, RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) ++RefinementSteps; + return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } - return RV; + return SDValue(); +} + +SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { + EVT VT = Operand.getValueType(); + if ((VT == MVT::f32 && Subtarget.hasFRES()) || + (VT == MVT::f64 && Subtarget.hasFRE()) || + (VT == MVT::v4f32 && Subtarget.hasAltivec()) || + (VT == MVT::v2f64 && Subtarget.hasVSX())) { + // Convergence is quadratic, so we essentially double the number of digits + // correct after every iteration. For both FRE and FRSQRTE, the minimum + // architected relative accuracy is 2^-5. When hasRecipPrec(), this is + // 2^-14. IEEE float has 23 digits and double has 52 digits. + RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; + if (VT.getScalarType() == MVT::f64) + ++RefinementSteps; + return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); + } + return SDValue(); } static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 197d97779b1..3cf965d2861 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -701,9 +701,10 @@ namespace llvm { SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue getEstimate(unsigned Opcode, SDValue Operand, - DAGCombinerInfo &DCI, - unsigned &RefinementSteps) const override; + SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; }; -- 2.34.1