[x86] Implement combineRepeatedFPDivisors

author Sanjay Patel <spatel@rotateright.com>

Wed, 15 Apr 2015 15:22:55 +0000 (15:22 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 15 Apr 2015 15:22:55 +0000 (15:22 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 15 Apr 2015 15:22:55 +0000 (15:22 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 15 Apr 2015 15:22:55 +0000 (15:22 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 1c60237f75b056c0d4a0411cb1fae04efbf70a06..c32412a741c872b06daca2d97e6c7fabff8ac44a 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12818,6 +12818,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
    return SDValue();
  }
  
+/// If we have at least two divisions that use the same divisor, convert to
+/// multplication by a reciprocal. This may need to be adjusted for a given
+/// CPU if a division's cost is not at least twice the cost of a multiplication.
+/// This is because we still need one division to calculate the reciprocal and
+/// then we need two multiplies by that reciprocal as replacements for the
+/// original divisions.
+bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+  return NumUsers > 1;
+}
+
  static bool isAllOnes(SDValue V) {
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
    return C && C->isAllOnesValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index dd20ec23976c15552939fa84960919f78c25c24a..5130c37b04284c6fb72e750dfcdcf398bbee3b71 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1072,6 +1072,9 @@ namespace llvm {
      /// Use rcp* to speed up fdiv calculations.
      SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                               unsigned &RefinementSteps) const override;
+
+    /// Reassociate floating point divisions into multiply by reciprocal.
+    bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
    };
  
    namespace X86 {
diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll

new file mode 100644 (file)

index 0000000..279bb06
--- /dev/null
+++ b/test/CodeGen/X86/fdiv-combine.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; Anything more than one division using a single divisor operand
+; should be converted into a reciprocal and multiplication.
+
+define float @div1_arcp(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: div1_arcp:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    divss %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %div1 = fdiv arcp float %x, %y
+  ret float %div1
+}
+
+define float @div2_arcp(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: div2_arcp:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    divss %xmm2, %xmm3
+; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    mulss %xmm1, %xmm0
+; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %div1 = fdiv arcp float %x, %z
+  %mul = fmul arcp float %div1, %y
+  %div2 = fdiv arcp float %mul, %z
+  ret float %div2
+}
+
+; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
+attributes #0 = { "unsafe-fp-math"="true" }
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 15 Apr 2015 15:22:55 +0000 (15:22 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 15 Apr 2015 15:22:55 +0000 (15:22 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/X86/fdiv-combine.ll	[new file with mode: 0644]	patch \| blob