Fast-math fold: x / (y * sqrt(z)) -> x * (rsqrt(z) / y)

author Sanjay Patel <spatel@rotateright.com>

Mon, 6 Oct 2014 19:31:18 +0000 (19:31 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Mon, 6 Oct 2014 19:31:18 +0000 (19:31 +0000)
author Sanjay Patel <spatel@rotateright.com>
Mon, 6 Oct 2014 19:31:18 +0000 (19:31 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Mon, 6 Oct 2014 19:31:18 +0000 (19:31 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index d8d3380e92834a528fb34c525292677f7d36c49e..f98ef96780fa0b624fa68519287f34254bfa9eec 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7036,6 +7036,28 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
          AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
        }
+    } else if (N1.getOpcode() == ISD::FMUL) {
+      // Look through an FMUL. Even though this won't remove the FDIV directly,
+      // it's still worthwhile to get rid of the FSQRT if possible.
+      SDValue SqrtOp;
+      SDValue OtherOp;
+      if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+        SqrtOp = N1.getOperand(0);
+        OtherOp = N1.getOperand(1);
+      } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
+        SqrtOp = N1.getOperand(1);
+        OtherOp = N1.getOperand(0);
+      }
+      if (SqrtOp.getNode()) {
+        // We found a FSQRT, so try to make this fold:
+        // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
+        if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
+          AddToWorklist(RV.getNode());
+          RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+          AddToWorklist(RV.getNode());
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+        }
+      }
      }
      
      // Fold into a reciprocal estimate and multiply instead of a real divide.
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll

index d9c5d4061c84e5eff2428eb30f320bb454fa5e51..de74c043ecec58ce2855ee37150d84cb9f71a099 100644 (file)
--- a/test/CodeGen/PowerPC/recipest.ll
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -96,6 +96,34 @@ define float @goo(float %a, float %b) nounwind {
  ; CHECK-SAFE: blr
  }
  
+; Recognize that this is rsqrt(a) * rcp(b) * c, 
+; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
+define float @rsqrt_fmul(float %a, float %b, float %c) {
+  %x = call float @llvm.sqrt.f32(float %a)
+  %y = fmul float %x, %b 
+  %z = fdiv float %c, %y
+  ret float %z
+
+; CHECK: @rsqrt_fmul
+; CHECK-DAG: frsqrtes
+; CHECK-DAG: fres
+; CHECK-DAG: fnmsubs
+; CHECK-DAG: fmuls
+; CHECK-DAG: fnmsubs
+; CHECK-DAG: fmadds
+; CHECK-DAG: fmadds
+; CHECK: fmuls
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: blr
+
+; CHECK-SAFE: @rsqrt_fmul
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fmuls
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
  define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
    %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
    %r = fdiv <4 x float> %a, %x
author	Sanjay Patel <spatel@rotateright.com>
	Mon, 6 Oct 2014 19:31:18 +0000 (19:31 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Mon, 6 Oct 2014 19:31:18 +0000 (19:31 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/PowerPC/recipest.ll		patch \| blob \| history