Refactor reciprocal and reciprocal square root estimate into target-independent funct...

author Sanjay Patel <spatel@rotateright.com>

Fri, 26 Sep 2014 23:01:47 +0000 (23:01 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 26 Sep 2014 23:01:47 +0000 (23:01 +0000)
author Sanjay Patel <spatel@rotateright.com>
Fri, 26 Sep 2014 23:01:47 +0000 (23:01 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 26 Sep 2014 23:01:47 +0000 (23:01 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index ad5fc5d848bf3959ce0b06ae3831f2143e6de129..2079d7690d001a5c43037d381e8b575cb02e7e10 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2624,10 +2624,21 @@ public:
      return SDValue();
    }
  
-  virtual SDValue BuildRSQRTE(SDValue Op, DAGCombinerInfo &DCI) const {
+  /// Hooks for building estimates in place of, for example, slower divisions
+  /// and square roots. These are not builder functions themselves, just the
+  /// target-specific variables needed for building the estimate algorithm.
+
+  /// Return an estimate value for the input opcode and input operand.
+  /// The RefinementSteps output is the number of refinement iterations
+  /// required to generate a sufficient (though not necessarily IEEE-754
+  /// compliant) estimate for the value type.
+  /// An empty SDValue return means no estimate sequence can be created.
+  virtual SDValue getEstimate(unsigned Opcode, SDValue Operand,
+                              DAGCombinerInfo &DCI,
+                              unsigned &RefinementSteps) const {
      return SDValue();
    }
-
+  
    //===--------------------------------------------------------------------===//
    // Legalization utility functions
    //
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 33e70593d154d06996b59a32665bc458e3aaa625..34a0e04bc055c8bfe9c613160634f837ea44b52b 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -276,6 +276,7 @@ namespace {
      SDValue visitFMA(SDNode *N);
      SDValue visitFDIV(SDNode *N);
      SDValue visitFREM(SDNode *N);
+    SDValue visitFSQRT(SDNode *N);
      SDValue visitFCOPYSIGN(SDNode *N);
      SDValue visitSINT_TO_FP(SDNode *N);
      SDValue visitUINT_TO_FP(SDNode *N);
@@ -326,7 +327,8 @@ namespace {
      SDValue BuildSDIV(SDNode *N);
      SDValue BuildSDIVPow2(SDNode *N);
      SDValue BuildUDIV(SDNode *N);
-    SDValue BuildRSQRTE(SDNode *N);
+    SDValue BuildReciprocalEstimate(SDValue Op);
+    SDValue BuildRsqrtEstimate(SDValue Op);
      SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                 bool DemandHighBits = true);
      SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -1307,6 +1309,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::FMA:                return visitFMA(N);
    case ISD::FDIV:               return visitFDIV(N);
    case ISD::FREM:               return visitFREM(N);
+  case ISD::FSQRT:              return visitFSQRT(N);
    case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
    case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
    case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
@@ -6976,6 +6979,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    EVT VT = N->getValueType(0);
+  SDLoc DL(N);
    const TargetOptions &Options = DAG.getTarget().Options;
  
    // fold vector ops
@@ -7007,10 +7011,37 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
                             DAG.getConstantFP(Recip, VT));
      }
+    
      // If this FDIV is part of a reciprocal square root, it may be folded
      // into a target-specific square root estimate instruction.
-    if (SDValue SqrtOp = BuildRSQRTE(N))
-      return SqrtOp;
+    if (N1.getOpcode() == ISD::FSQRT) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
+        AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      }
+    } else if (N1.getOpcode() == ISD::FP_EXTEND &&
+               N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+        AddToWorklist(RV.getNode());
+        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
+        AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      }
+    } else if (N1.getOpcode() == ISD::FP_ROUND &&
+               N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+        AddToWorklist(RV.getNode());
+        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
+        AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      }
+    }
+    
+    // Fold into a reciprocal estimate and multiply instead of a real divide.
+    if (SDValue RV = BuildReciprocalEstimate(N1)) {
+      AddToWorklist(RV.getNode());
+      return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+    }
    }
  
    // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
@@ -7042,6 +7073,33 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitFSQRT(SDNode *N) {
+  if (DAG.getTarget().Options.UnsafeFPMath) {
+    // Compute this as 1/(1/sqrt(X)): the reciprocal of the reciprocal sqrt.
+    if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
+      AddToWorklist(RV.getNode());
+      RV = BuildReciprocalEstimate(RV);
+      if (RV.getNode()) {
+        // Unfortunately, RV is now NaN if the input was exactly 0.
+        // Select out this case and force the answer to 0.
+        EVT VT = RV.getValueType();
+      
+        SDValue Zero = DAG.getConstantFP(0.0, VT);
+        SDValue ZeroCmp =
+          DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+                       N->getOperand(0), Zero, ISD::SETEQ);
+        AddToWorklist(ZeroCmp.getNode());
+        AddToWorklist(RV.getNode());
+
+        RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
+                         SDLoc(N), VT, ZeroCmp, Zero, RV);
+        return RV;
+      }
+    }
+  }
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -11702,36 +11760,92 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
    return S;
  }
  
-/// Given an ISD::FDIV node with either a direct or indirect ISD::FSQRT operand,
-/// generate a DAG expression using a reciprocal square root estimate op.
-SDValue DAGCombiner::BuildRSQRTE(SDNode *N) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+  if (Level >= AfterLegalizeDAG)
+    return SDValue();
+
    // Expose the DAG combiner to the target combiner implementations.
    TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
-  SDLoc DL(N);
-  EVT VT = N->getValueType(0);
-  SDValue N1 = N->getOperand(1);
  
-  if (N1.getOpcode() == ISD::FSQRT) {
-    if (SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0), DCI)) {
-      AddToWorklist(RV.getNode());
-      return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
-    }
-  } else if (N1.getOpcode() == ISD::FP_EXTEND &&
-             N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-    if (SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0).getOperand(0), DCI)) {
-      DCI.AddToWorklist(RV.getNode());
-      RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
-      AddToWorklist(RV.getNode());
-      return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+  unsigned Iterations;
+  if (SDValue Est = TLI.getEstimate(ISD::FDIV, Op, DCI, Iterations)) {
+    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+    // For the reciprocal, we need to find the zero of the function:
+    //   F(X) = A X - 1 [which has a zero at X = 1/A]
+    //     =>
+    //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+    //     does not require additional intermediate precision]
+    EVT VT = Op.getValueType();
+    SDLoc DL(Op);
+    SDValue FPOne = DAG.getConstantFP(1.0, VT);
+
+    AddToWorklist(Est.getNode());
+
+    // Newton iterations: Est = Est + Est (1 - Arg * Est)
+    for (unsigned i = 0; i < Iterations; ++i) {
+      SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+      AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+      AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+      AddToWorklist(NewEst.getNode());
+
+      Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+      AddToWorklist(Est.getNode());
      }
-  } else if (N1.getOpcode() == ISD::FP_ROUND &&
-             N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-    if (SDValue RV = TLI.BuildRSQRTE(N1.getOperand(0).getOperand(0), DCI)) {
-      DCI.AddToWorklist(RV.getNode());
-      RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
-      AddToWorklist(RV.getNode());
-      return DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+
+    return Est;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+  if (Level >= AfterLegalizeDAG)
+    return SDValue();
+
+  // Expose the DAG combiner to the target combiner implementations.
+  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+  unsigned Iterations;
+  if (SDValue Est = TLI.getEstimate(ISD::FSQRT, Op, DCI, Iterations)) {
+    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+    // For the reciprocal sqrt, we need to find the zero of the function:
+    //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+    //     =>
+    //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+    // As a result, we precompute A/2 prior to the iteration loop.
+    EVT VT = Op.getValueType();
+    SDLoc DL(Op);
+    SDValue FPThreeHalves = DAG.getConstantFP(1.5, VT);
+
+    AddToWorklist(Est.getNode());
+
+    // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+    // this entire sequence requires only one FP constant.
+    SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, FPThreeHalves, Op);
+    AddToWorklist(HalfArg.getNode());
+
+    HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Op);
+    AddToWorklist(HalfArg.getNode());
+
+    // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+    for (unsigned i = 0; i < Iterations; ++i) {
+      SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+      AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+      AddToWorklist(NewEst.getNode());
+
+      NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPThreeHalves, NewEst);
+      AddToWorklist(NewEst.getNode());
+
+      Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+      AddToWorklist(Est.getNode());
      }
+
+    return Est;
    }
  
    return SDValue();
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index e216a72c495c115496a294f3de9c589b03fd2f15..5750e2fbb658d025f59b02f65b53e36738b270cb 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7458,138 +7458,34 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  // Target Optimization Hooks
  //===----------------------------------------------------------------------===//
  
-SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
-                                               DAGCombinerInfo &DCI) const {
-  if (DCI.isAfterLegalizeVectorOps())
-    return SDValue();
-
-  EVT VT = Op.getValueType();
-
-  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-      (VT == MVT::f64 && Subtarget.hasFRE())  ||
-      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
-    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
-    // For the reciprocal, we need to find the zero of the function:
-    //   F(X) = A X - 1 [which has a zero at X = 1/A]
-    //     =>
-    //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
-    //     does not require additional intermediate precision]
-
+SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
+                                       DAGCombinerInfo &DCI,
+                                       unsigned &RefinementSteps) const {
+  EVT VT = Operand.getValueType();
+  SDValue RV;
+  if (Opcode == ISD::FSQRT) {
+    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+        (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+        (VT == MVT::v2f64 && Subtarget.hasVSX()))
+      RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
+  } else if (Opcode == ISD::FDIV) {
+    if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+        (VT == MVT::f64 && Subtarget.hasFRE())  ||
+        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+        (VT == MVT::v2f64 && Subtarget.hasVSX()))
+      RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+  }
+  if (RV.getNode()) {
      // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. The minimum architected relative
-    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
-    // 23 digits and double has 52 digits.
-    int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+    // correct after every iteration. For both FRE and FRSQRTE, the minimum
+    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+    // 2^-14. IEEE float has 23 digits and double has 52 digits.
+    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
      if (VT.getScalarType() == MVT::f64)
-      ++Iterations;
-
-    SelectionDAG &DAG = DCI.DAG;
-    SDLoc dl(Op);
-
-    SDValue FPOne =
-      DAG.getConstantFP(1.0, VT.getScalarType());
-    if (VT.isVector()) {
-      assert(VT.getVectorNumElements() == 4 &&
-             "Unknown vector type");
-      FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
-                          FPOne, FPOne, FPOne, FPOne);
-    }
-
-    SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
-    DCI.AddToWorklist(Est.getNode());
-
-    // Newton iterations: Est = Est + Est (1 - Arg * Est)
-    for (int i = 0; i < Iterations; ++i) {
-      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
-      DCI.AddToWorklist(Est.getNode());
-    }
-
-    return Est;
+      ++RefinementSteps;
    }
-
-  return SDValue();
-}
-
-SDValue PPCTargetLowering::BuildRSQRTE(SDValue Op, DAGCombinerInfo &DCI) const {
-  if (DCI.isAfterLegalizeVectorOps())
-    return SDValue();
-
-  EVT VT = Op.getValueType();
-
-  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
-      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
-
-    // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
-    // For the reciprocal sqrt, we need to find the zero of the function:
-    //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
-    //     =>
-    //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
-    // As a result, we precompute A/2 prior to the iteration loop.
-
-    // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. The minimum architected relative
-    // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
-    // 23 digits and double has 52 digits.
-    int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
-    if (VT.getScalarType() == MVT::f64)
-      ++Iterations;
-
-    SelectionDAG &DAG = DCI.DAG;
-    SDLoc dl(Op);
-
-    SDValue FPThreeHalves =
-      DAG.getConstantFP(1.5, VT.getScalarType());
-    if (VT.isVector()) {
-      assert(VT.getVectorNumElements() == 4 &&
-             "Unknown vector type");
-      FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
-                                  FPThreeHalves, FPThreeHalves,
-                                  FPThreeHalves, FPThreeHalves);
-    }
-
-    SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
-    DCI.AddToWorklist(Est.getNode());
-
-    // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
-    // this entire sequence requires only one FP constant.
-    SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
-    DCI.AddToWorklist(HalfArg.getNode());
-
-    HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
-    DCI.AddToWorklist(HalfArg.getNode());
-
-    // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
-    for (int i = 0; i < Iterations; ++i) {
-      SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
-      DCI.AddToWorklist(NewEst.getNode());
-
-      Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
-      DCI.AddToWorklist(Est.getNode());
-    }
-
-    return Est;
-  }
-
-  return SDValue();
+  return RV;
  }
  
  static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
@@ -8316,55 +8212,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
    case ISD::SETCC:
    case ISD::SELECT_CC:
      return DAGCombineTruncBoolExt(N, DCI);
-  case ISD::FDIV: {
-    assert(TM.Options.UnsafeFPMath &&
-           "Reciprocal estimates require UnsafeFPMath");
-
-    SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
-    if (RV.getNode()) {
-      DCI.AddToWorklist(RV.getNode());
-      return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
-                         N->getOperand(0), RV);
-    }
-
-    }
-    break;
-  case ISD::FSQRT: {
-    assert(TM.Options.UnsafeFPMath &&
-           "Reciprocal estimates require UnsafeFPMath");
-
-    // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
-    // reciprocal sqrt.
-    SDValue RV = BuildRSQRTE(N->getOperand(0), DCI);
-    if (RV.getNode()) {
-      DCI.AddToWorklist(RV.getNode());
-      RV = DAGCombineFastRecip(RV, DCI);
-      if (RV.getNode()) {
-        // Unfortunately, RV is now NaN if the input was exactly 0. Select out
-        // this case and force the answer to 0.
-
-        EVT VT = RV.getValueType();
-
-        SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
-        if (VT.isVector()) {
-          assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
-          Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
-        }
-
-        SDValue ZeroCmp =
-          DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
-                       N->getOperand(0), Zero, ISD::SETEQ);
-        DCI.AddToWorklist(ZeroCmp.getNode());
-        DCI.AddToWorklist(RV.getNode());
-
-        RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
-                         ZeroCmp, Zero, RV);
-        return RV;
-      }
-    }
-
-    }
-    break;
    case ISD::SINT_TO_FP:
      if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
        if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 7444d41bbc47603b399f0a85100652f52c9342ad..197d97779b19220f9669d51a2011beedb446d954 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -700,8 +700,10 @@ namespace llvm {
  
      SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
      SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
-    SDValue BuildRSQRTE(SDValue Op, DAGCombinerInfo &DCI) const;
+
+    SDValue getEstimate(unsigned Opcode, SDValue Operand,
+                        DAGCombinerInfo &DCI,
+                        unsigned &RefinementSteps) const override;
  
      CCAssignFn *useFastISelCCs(unsigned Flag) const;
    };
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll

index 8111b544366441c394969656b5fbed503d0c7202..d9c5d4061c84e5eff2428eb30f320bb454fa5e51 100644 (file)
--- a/test/CodeGen/PowerPC/recipest.ll
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -16,12 +16,12 @@ define double @foo(double %a, double %b) nounwind {
  ; CHECK-DAG: frsqrte
  ; CHECK-DAG: fnmsub
  ; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fmul
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
  ; CHECK: blr
  
  ; CHECK-SAFE: @foo
@@ -85,10 +85,10 @@ define float @goo(float %a, float %b) nounwind {
  ; CHECK-DAG: frsqrtes
  ; CHECK-DAG: fnmsubs
  ; CHECK: fmuls
-; CHECK: fmadds
-; CHECK: fmuls
-; CHECK: fmuls
-; CHECK: blr
+; CHECK-NEXT: fmadds
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: blr
  
  ; CHECK-SAFE: @goo
  ; CHECK-SAFE: fsqrts
@@ -117,10 +117,10 @@ define double @foo2(double %a, double %b) nounwind {
  ; CHECK-DAG: fre
  ; CHECK-DAG: fnmsub
  ; CHECK: fmadd
-; CHECK: fnmsub
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: blr
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: blr
  
  ; CHECK-SAFE: @foo2
  ; CHECK-SAFE: fdiv
@@ -135,8 +135,8 @@ define float @goo2(float %a, float %b) nounwind {
  ; CHECK-DAG: fres
  ; CHECK-DAG: fnmsubs
  ; CHECK: fmadds
-; CHECK: fmuls
-; CHECK: blr
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: blr
  
  ; CHECK-SAFE: @goo2
  ; CHECK-SAFE: fdivs
@@ -164,16 +164,16 @@ define double @foo3(double %a) nounwind {
  ; CHECK-DAG: frsqrte
  ; CHECK-DAG: fnmsub
  ; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fre
-; CHECK: fnmsub
-; CHECK: fmadd
-; CHECK: fnmsub
-; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fre
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: fmadd
  ; CHECK: blr
  
  ; CHECK-SAFE: @foo3
@@ -190,11 +190,11 @@ define float @goo3(float %a) nounwind {
  ; CHECK-DAG: frsqrtes
  ; CHECK-DAG: fnmsubs
  ; CHECK: fmuls
-; CHECK: fmadds
-; CHECK: fmuls
-; CHECK: fres
-; CHECK: fnmsubs
-; CHECK: fmadds
+; CHECK-NEXT: fmadds
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: fres
+; CHECK-NEXT: fnmsubs
+; CHECK-NEXT: fmadds
  ; CHECK: blr
  
  ; CHECK-SAFE: @goo3
author	Sanjay Patel <spatel@rotateright.com>
	Fri, 26 Sep 2014 23:01:47 +0000 (23:01 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 26 Sep 2014 23:01:47 +0000 (23:01 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
test/CodeGen/PowerPC/recipest.ll		patch \| blob \| history