From: Hal Finkel Date: Mon, 24 Nov 2014 23:45:21 +0000 (+0000) Subject: [PowerPC] Implement combineRepeatedFPDivisors X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=5d6f18565358decf831d9abe415e7380419d39da [PowerPC] Implement combineRepeatedFPDivisors This does not matter on newer cores (where we can use reciprocal estimates in fast-math mode anyway), but for older cores this allows us to generate better fast-math code where we have multiple FDIVs with a common divisor. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222710 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 351356028d6..7351d19120a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7526,6 +7526,28 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, return SDValue(); } +bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { + // Note: This functionality is used only when unsafe-fp-math is enabled, and + // on cores with reciprocal estimates (which are used when unsafe-fp-math is + // enabled for division), this functionality is redundant with the default + // combiner logic (once the division -> reciprocal/multiply transformation + // has taken place). As a result, this matters more for older cores than for + // newer ones. + + // Combine multiple FDIVs with the same divisor into multiple FMULs by the + // reciprocal if there are two or more FDIVs (for embedded cores with only + // one FP pipeline) for three or more FDIVs (for generic OOO cores). + switch (Subtarget.getDarwinDirective()) { + default: + return NumUsers > 2; + case PPC::DIR_440: + case PPC::DIR_A2: + case PPC::DIR_E500mc: + case PPC::DIR_E5500: + return NumUsers > 1; + } +} + static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index bb4d1f1f019..4b4d25e3a3c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -704,6 +704,7 @@ namespace llvm { bool &UseOneConstNR) const override; SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const override; + bool combineRepeatedFPDivisors(unsigned NumUsers) const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; }; diff --git a/test/CodeGen/PowerPC/fdiv-combine.ll b/test/CodeGen/PowerPC/fdiv-combine.ll new file mode 100644 index 00000000000..d3dc3fe913f --- /dev/null +++ b/test/CodeGen/PowerPC/fdiv-combine.ll @@ -0,0 +1,39 @@ +; RUN: llc -mcpu=ppc64 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Following test case checks: +; a / D; b / D; c / D; +; => +; recip = 1.0 / D; a * recip; b * recip; c * recip; + +define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 { +; CHECK-LABEL: three_fdiv_double: +; CHECK: fdiv +; CHECK-NEXT-NOT: fdiv +; CHECK: fmul +; CHECK: fmul +; CHECK: fmul + %div = fdiv double %a, %D + %div1 = fdiv double %b, %D + %div2 = fdiv double %c, %D + tail call void @foo_3d(double %div, double %div1, double %div2) + ret void +} + +define void @two_fdiv_double(double %D, double %a, double %b) #0 { +; CHECK-LABEL: two_fdiv_double: +; CHECK: fdiv +; CHECK: fdiv +; CHECK-NEXT-NOT: fmul + %div = fdiv double %a, %D + %div1 = fdiv double %b, %D + tail call void @foo_2d(double %div, double %div1) + ret void +} + +declare void @foo_3d(double, double, double) +declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) +declare void @foo_2d(double, double) + +attributes #0 = { "unsafe-fp-math"="true" }