From 0e292376d09223d791ed2ccc8935e98cbd36f1f8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 24 Aug 2012 04:03:22 +0000 Subject: [PATCH] Custom lower FMA intrinsics to target specific nodes and remove the patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162534 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 72 ++++++++++++++++ lib/Target/X86/X86InstrFMA.td | 131 ----------------------------- lib/Target/X86/X86Subtarget.h | 2 +- 3 files changed, 73 insertions(+), 132 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1bbd4a76406..224cf01a8bd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10077,6 +10077,78 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); } + case Intrinsic::x86_fma_vfmadd_ps: + case Intrinsic::x86_fma_vfmadd_pd: + case Intrinsic::x86_fma_vfmsub_ps: + case Intrinsic::x86_fma_vfmsub_pd: + case Intrinsic::x86_fma_vfnmadd_ps: + case Intrinsic::x86_fma_vfnmadd_pd: + case Intrinsic::x86_fma_vfnmsub_ps: + case Intrinsic::x86_fma_vfnmsub_pd: + case Intrinsic::x86_fma_vfmaddsub_ps: + case Intrinsic::x86_fma_vfmaddsub_pd: + case Intrinsic::x86_fma_vfmsubadd_ps: + case Intrinsic::x86_fma_vfmsubadd_pd: + case Intrinsic::x86_fma_vfmadd_ps_256: + case Intrinsic::x86_fma_vfmadd_pd_256: + case Intrinsic::x86_fma_vfmsub_ps_256: + case Intrinsic::x86_fma_vfmsub_pd_256: + case Intrinsic::x86_fma_vfnmadd_ps_256: + case Intrinsic::x86_fma_vfnmadd_pd_256: + case Intrinsic::x86_fma_vfnmsub_ps_256: + case Intrinsic::x86_fma_vfnmsub_pd_256: + case Intrinsic::x86_fma_vfmaddsub_ps_256: + case Intrinsic::x86_fma_vfmaddsub_pd_256: + case Intrinsic::x86_fma_vfmsubadd_ps_256: + case Intrinsic::x86_fma_vfmsubadd_pd_256: { + // Only lower intrinsics if FMA is enabled. FMA4 still uses patterns. + if (!Subtarget->hasFMA()) + return SDValue(); + + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_fma_vfmadd_ps: + case Intrinsic::x86_fma_vfmadd_pd: + case Intrinsic::x86_fma_vfmadd_ps_256: + case Intrinsic::x86_fma_vfmadd_pd_256: + Opc = X86ISD::FMADD; + break; + case Intrinsic::x86_fma_vfmsub_ps: + case Intrinsic::x86_fma_vfmsub_pd: + case Intrinsic::x86_fma_vfmsub_ps_256: + case Intrinsic::x86_fma_vfmsub_pd_256: + Opc = X86ISD::FMSUB; + break; + case Intrinsic::x86_fma_vfnmadd_ps: + case Intrinsic::x86_fma_vfnmadd_pd: + case Intrinsic::x86_fma_vfnmadd_ps_256: + case Intrinsic::x86_fma_vfnmadd_pd_256: + Opc = X86ISD::FNMADD; + break; + case Intrinsic::x86_fma_vfnmsub_ps: + case Intrinsic::x86_fma_vfnmsub_pd: + case Intrinsic::x86_fma_vfnmsub_ps_256: + case Intrinsic::x86_fma_vfnmsub_pd_256: + Opc = X86ISD::FNMSUB; + break; + case Intrinsic::x86_fma_vfmaddsub_ps: + case Intrinsic::x86_fma_vfmaddsub_pd: + case Intrinsic::x86_fma_vfmaddsub_ps_256: + case Intrinsic::x86_fma_vfmaddsub_pd_256: + Opc = X86ISD::FMADDSUB; + break; + case Intrinsic::x86_fma_vfmsubadd_ps: + case Intrinsic::x86_fma_vfmsubadd_pd: + case Intrinsic::x86_fma_vfmsubadd_ps_256: + case Intrinsic::x86_fma_vfmsubadd_pd_256: + Opc = X86ISD::FMSUBADD; + break; + } + + return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + } } } diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 95ee7e50ba1..a4e689e6bb3 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -112,137 +112,6 @@ let ExeDomain = SSEPackedDouble in { v4f64>, VEX_W; } -let Predicates = [HasFMA] in { - def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, - (memopv4f32 addr:$src3)), - (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, - (memopv8f32 addr:$src3)), - (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), - (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, - (memopv2f64 addr:$src3)), - (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; - - def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), - (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; - def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, - (memopv4f64 addr:$src3)), - (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; - -} // Predicates = [HasFMA] - let Constraints = "$src1 = $dst" in { multiclass fma3s_rm opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, ValueType OpVT, PatFrag mem_frag, diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 3280ce52fd6..8d3797ec3b4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -205,7 +205,7 @@ public: bool hasAES() const { return HasAES; } bool hasPCLMUL() const { return HasPCLMUL; } bool hasFMA() const { return HasFMA; } - // FIXME: Favor FMA when both are enabled. Is this right? + // FIXME: Favor FMA when both are enabled. Is this the right thing to do? bool hasFMA4() const { return HasFMA4 && !HasFMA; } bool hasXOP() const { return HasXOP; } bool hasMOVBE() const { return HasMOVBE; } -- 2.34.1