Expose the number of Newton-Raphson iterations applied to the hardware's reciprocal...
authorSanjay Patel <spatel@rotateright.com>
Wed, 12 Nov 2014 21:39:01 +0000 (21:39 +0000)
committerSanjay Patel <spatel@rotateright.com>
Wed, 12 Nov 2014 21:39:01 +0000 (21:39 +0000)
This is a follow-on to r221706 and r221731 and discussed in more detail in PR21385.

This patch also loosens the testcase checking for btver2. We know that the "1.0" will be loaded, but
we can't tell exactly when, so replace the CHECK-NEXT specifiers with plain CHECKs. The CHECK-NEXT
sequence relied on a quirk of post-RA-scheduling that may change independently of anything in these tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221819 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86ISelLowering.cpp
test/CodeGen/X86/recip-fastmath.ll

index 65e5342fe4b2061bce303c8ff3f61ce12513b342..72a9df77076788d74a28ca03242be85c0ed26893 100644 (file)
@@ -71,6 +71,12 @@ static cl::opt<bool> ExperimentalVectorShuffleLowering(
     cl::desc("Enable an experimental vector shuffle lowering code path."),
     cl::Hidden);
 
+static cl::opt<int> ReciprocalEstimateRefinementSteps(
+    "x86-recip-refinement-steps", cl::init(1),
+    cl::desc("Specify the number of Newton-Raphson iterations applied to the "
+             "result of the hardware reciprocal estimate instruction."),
+    cl::NotHidden);
+
 // Forward declarations.
 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
@@ -14543,9 +14549,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
   // along with FMA, this could be a throughput win.
   if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
       (Subtarget->hasAVX() && VT == MVT::v8f32)) {
-    // TODO: Expose this as a user-configurable parameter to allow for
-    // speed vs. accuracy flexibility.
-    RefinementSteps = 1;
+    RefinementSteps = ReciprocalEstimateRefinementSteps;
     return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
   }
   return SDValue();
index dd5563c965fb86eef98e33c5b3c89e87ab50fb81..83b86accdb38c58873f5810c16212c8ebaec37a6 100644 (file)
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
 
 ; If the target's divss/divps instructions are substantially
 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -21,11 +22,23 @@ define float @reciprocal_estimate(float %x) #0 {
 
 ; BTVER2-LABEL: reciprocal_estimate:
 ; BTVER2: vrcpss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vsubss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vaddss
+; BTVER2: vmulss
+; BTVER2: vsubss
+; BTVER2: vmulss
+; BTVER2: vaddss
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate:
+; REFINE: vrcpss
+; REFINE: vmulss
+; REFINE: vsubss
+; REFINE: vmulss
+; REFINE: vaddss
+; REFINE: vmulss
+; REFINE: vsubss
+; REFINE: vmulss
+; REFINE: vaddss
+; REFINE-NEXT: retq
 }
 
 define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
@@ -40,11 +53,23 @@ define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
 
 ; BTVER2-LABEL: reciprocal_estimate_v4f32:
 ; BTVER2: vrcpps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vsubps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
+; BTVER2: vmulps
+; BTVER2: vsubps
+; BTVER2: vmulps
+; BTVER2: vaddps
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate_v4f32:
+; REFINE: vrcpps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE-NEXT: retq
 }
 
 define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
@@ -62,11 +87,23 @@ define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
 
 ; BTVER2-LABEL: reciprocal_estimate_v8f32:
 ; BTVER2: vrcpps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vsubps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
+; BTVER2: vmulps
+; BTVER2: vsubps
+; BTVER2: vmulps
+; BTVER2: vaddps
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate_v8f32:
+; REFINE: vrcpps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE-NEXT: retq
 }
 
 attributes #0 = { "unsafe-fp-math"="true" }