R600/SI: Use MULADD_IEEE/V_MAD_F32 instruction for mad pattern

author Vincent Lejeune <vljn@ovi.com>

Mon, 18 Feb 2013 14:11:28 +0000 (14:11 +0000)

committer Vincent Lejeune <vljn@ovi.com>

Mon, 18 Feb 2013 14:11:28 +0000 (14:11 +0000)
author Vincent Lejeune <vljn@ovi.com>
Mon, 18 Feb 2013 14:11:28 +0000 (14:11 +0000)
committer Vincent Lejeune <vljn@ovi.com>
Mon, 18 Feb 2013 14:11:28 +0000 (14:11 +0000)
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index d0d23d692a390c50f3497e5db1f18f20a728bdf4..0a33264686d20b2342c0155ee839f58834e3e45e 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -127,9 +127,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
        return LowerIntrinsicLRP(Op, DAG);
      case AMDGPUIntrinsic::AMDIL_fraction:
        return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
-    case AMDGPUIntrinsic::AMDIL_mad:
-      return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-                              Op.getOperand(2), Op.getOperand(3));
      case AMDGPUIntrinsic::AMDIL_max:
        return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
                                                    Op.getOperand(2));
@@ -176,9 +173,9 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
                                  Op.getOperand(1));
    SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
                                                      Op.getOperand(3));
-  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-                                               Op.getOperand(2),
-                                               OneSubAC);
+  return DAG.getNode(ISD::FADD, DL, VT,
+      DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
+      OneSubAC);
  }
  
  /// \brief Generate Min/Max node
@@ -393,7 +390,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
    switch (Opcode) {
    default: return 0;
    // AMDIL DAG nodes
-  NODE_NAME_CASE(MAD);
    NODE_NAME_CASE(CALL);
    NODE_NAME_CASE(UMUL);
    NODE_NAME_CASE(DIV_INF);
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h

index 99a11fff92c485a0b901ea9284db872e3050bca3..404f6201a55d0b5e032011c502671b54f1b0bbaf 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -108,7 +108,6 @@ namespace AMDGPUISD {
  enum {
    // AMDIL ISD Opcodes
    FIRST_NUMBER = ISD::BUILTIN_OP_END,
-  MAD,         // 32bit Fused Multiply Add instruction
    CALL,        // Function call based on a single integer
    UMUL,        // 32bit unsigned multiplication
    DIV_INF,      // Divide with infinity returned on zero divisor
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp

index 2e60adcc99faf018c4435ad62539f57916b57e70..3480ac80b103c10d5bf81bf19df45407ae852cfd 100644 (file)
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -451,7 +451,8 @@ AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
    SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
  
    // float fr = mad(fqneg, fb, fa);
-  SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
+  SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+      DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
  
    // int iq = (int)fq;
    SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td

index e969bbf8ca349ea193fe5209ac3276ae38c0f0c0..110f1476513b1887aeffd8ad5e64afaf22ac5994 100644 (file)
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -116,7 +116,6 @@ def IL_retflag       : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
  //===--------------------------------------------------------------------===//
  // Floating point math functions
  def IL_div_inf      : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
-def IL_mad          : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
  
  //===----------------------------------------------------------------------===//
  // Integer functions
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td

index 3f9e20f0c8545d3263dfdd3846a1f8b4d8f71578..6ec3559af24c9962748e4f373b6b58d6f10e7d57 100644 (file)
--- a/lib/Target/R600/AMDILIntrinsics.td
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -92,12 +92,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
        TernaryIntInt;
    def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
        BinaryIntInt;
-  def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
-          TernaryIntInt;
-  def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
-          TernaryIntInt;
-  def int_AMDIL_mad     : GCCBuiltin<"__amdil_mad">,
-          TernaryIntFloat;
    def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
            BinaryIntInt;
    def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
@@ -110,10 +104,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
            BinaryIntInt;
    def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
            BinaryIntInt;
-  def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
-          TernaryIntInt;
-  def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
-          TernaryIntInt;
    def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
            BinaryIntInt;
    def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td

index f88d3fca768d2d6a08126492c824459ca6f6369b..d24a3639fd0bf18f5db960b76ed6da6ce290bfff 100644 (file)
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -949,8 +949,13 @@ class MUL_LIT_Common <bits<5> inst> : R600_3OP <
  
  class MULADD_Common <bits<5> inst> : R600_3OP <
    inst, "MULADD",
+  []
+>;
+
+class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
+  inst, "MULADD_IEEE",
    [(set (f32 R600_Reg32:$dst),
-   (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))]
+   (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
  >;
  
  class CNDE_Common <bits<5> inst> : R600_3OP <
@@ -1107,6 +1112,7 @@ let Predicates = [isR600] in {
  
    def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
    def MULADD_r600 : MULADD_Common<0x10>;
+  def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
    def CNDE_r600 : CNDE_Common<0x18>;
    def CNDGT_r600 : CNDGT_Common<0x19>;
    def CNDGE_r600 : CNDGE_Common<0x1A>;
@@ -1246,6 +1252,7 @@ let Predicates = [isEGorCayman] in {
    >;
  
    def MULADD_eg : MULADD_Common<0x14>;
+  def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
    def ASHR_eg : ASHR_Common<0x15>;
    def LSHR_eg : LSHR_Common<0x16>;
    def LSHL_eg : LSHL_Common<0x17>;
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td

index 9372993dd4406bba74f8711b6f2964a37b21bd40..b4a263d02c78b61576fbc86f759b84f8678c8fff 100644 (file)
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1416,8 +1416,8 @@ def : Pat <
  /**********   VOP3 Patterns    **********/
  /********** ================== **********/
  
-def : Pat <(f32 (IL_mad VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2)),
-           (V_MAD_LEGACY_F32 VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2,
+def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VReg_32:$src1), VReg_32:$src2)),
+           (V_MAD_F32 VSrc_32:$src0, VReg_32:$src1, VReg_32:$src2,
              0, 0, 0, 0)>;
  
  /********** ================== **********/
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll

new file mode 100644 (file)

index 0000000..a3d4d0f
--- /dev/null
+++ b/test/CodeGen/R600/fmad.ll
@@ -0,0 +1,19 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test() {
+   %r0 = call float @llvm.R600.load.input(i32 0)
+   %r1 = call float @llvm.R600.load.input(i32 1)
+   %r2 = call float @llvm.R600.load.input(i32 2)
+   %r3 = fmul float %r0, %r1
+       %r4 = fadd float %r3, %r2
+   call void @llvm.AMDGPU.store.output(float %r4, i32 0)
+   ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
+
+declare float @fabs(float ) readnone
author	Vincent Lejeune <vljn@ovi.com>
	Mon, 18 Feb 2013 14:11:28 +0000 (14:11 +0000)
committer	Vincent Lejeune <vljn@ovi.com>
	Mon, 18 Feb 2013 14:11:28 +0000 (14:11 +0000)
lib/Target/R600/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/R600/AMDGPUISelLowering.h		patch \| blob \| history
lib/Target/R600/AMDILISelLowering.cpp		patch \| blob \| history
lib/Target/R600/AMDILInstrInfo.td		patch \| blob \| history
lib/Target/R600/AMDILIntrinsics.td		patch \| blob \| history
lib/Target/R600/R600Instructions.td		patch \| blob \| history
lib/Target/R600/SIInstructions.td		patch \| blob \| history
test/CodeGen/R600/fmad.ll	[new file with mode: 0644]	patch \| blob