R600: Expand mad24 for GPUs without it

author Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 22 May 2014 18:00:20 +0000 (18:00 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Thu, 22 May 2014 18:00:20 +0000 (18:00 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 22 May 2014 18:00:20 +0000 (18:00 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Thu, 22 May 2014 18:00:20 +0000 (18:00 +0000)
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td

index 127b74a0edbb77eeb1c05b61f88df4384d74846b..8a9ab54657ff1f2d51154076605c8b1df3dded9e 100644 (file)
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -433,6 +433,16 @@ class UMad24Pat<Instruction Inst> : Pat <
    (Inst $src0, $src1, $src2)
  >;
  
+class IMad24ExpandPat<Instruction MulInst, Instruction AddInst> : Pat <
+  (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2),
+  (AddInst (MulInst $src0, $src1), $src2)
+>;
+
+class UMad24ExpandPat<Instruction MulInst, Instruction AddInst> : Pat <
+  (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2),
+  (AddInst (MulInst $src0, $src1), $src2)
+>;
+
  
  include "R600Instructions.td"
  include "R700Instructions.td"
diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td

index 6dded24c6fefa2a626bd5b192996ee6f2f5925c6..58424a61722df8c09bb8cf214cad6d2ed0b8c922 100644 (file)
--- a/lib/Target/R600/CaymanInstructions.td
+++ b/lib/Target/R600/CaymanInstructions.td
@@ -49,6 +49,7 @@ def COS_cm : COS_Common<0x8E>;
  def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
  
  defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
+def : UMad24ExpandPat<MULLO_UINT_cm, ADD_INT>;
  
  // RECIP_UINT emulation for Cayman
  // The multiplication scales from [0,1] to the unsigned integer range
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td

index d9931c81d625e2320883d5e31cebefc7960a14d9..77416671a9036069cb6fef57d51caf344683541b 100644 (file)
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -75,6 +75,9 @@ def COS_eg : COS_Common<0x8E>;
  def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
  def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
  
+def : IMad24ExpandPat<MULLO_INT_eg, ADD_INT>;
+def : UMad24ExpandPat<MULLO_UINT_eg, ADD_INT>;
+
  //===----------------------------------------------------------------------===//
  // Memory read/write instructions
  //===----------------------------------------------------------------------===//
@@ -301,8 +304,11 @@ def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
  >;
  
  def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
-  [(set i32:$dst, (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2))], VecALU
+  [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))], VecALU
  >;
+
+def : UMad24Pat<MULADD_UINT24_eg>;
+
  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
  def : ROTRPattern <BIT_ALIGN_INT_eg>;
  def MULADD_eg : MULADD_Common<0x14>;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td

index d2075c0577b07b4cc83c48067233508940ccf260..0c804ffe5d4bedd74c45db5b99bac64127877cf9 100644 (file)
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1625,6 +1625,12 @@ def : DwordAddrPat  <i32, R600_Reg32>;
  
  } // End isR600toCayman Predicate
  
+let Predicates = [isR600] in {
+// Intrinsic patterns
+def : IMad24ExpandPat<MULLO_INT_r600, ADD_INT>;
+def : UMad24ExpandPat<MULLO_UINT_r600, ADD_INT>;
+} // End isR600
+
  def getLDSNoRetOp : InstrMapping {
    let FilterClass = "R600_LDS_1A1D";
    let RowFields = ["BaseOp"];
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll

index c7a3660b0b9aaea52e755d63713d1853ee782696..95795ea63b933c9e8cd92fa16cb0960aa786b716 100644 (file)
--- a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
@@ -1,11 +1,18 @@
  ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
  ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FIXME: Store of i32 seems to be broken pre-EG somehow?
  
  declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone
  
  ; FUNC-LABEL: @test_imad24
  ; SI: V_MAD_I32_I24
  ; CM: MULADD_INT24
+; R600: MULLO_INT
+; R600: ADD_INT
  define void @test_imad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
    %mad = call i32 @llvm.AMDGPU.imad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
    store i32 %mad, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll

index 08daac54424ae4650cfb7125382f0d7a73c464fa..afdfb18a563b171a5aef6f706949f74cbc66f6b4 100644 (file)
--- a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -1,9 +1,16 @@
  ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
  
  declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
  
  ; FUNC-LABEL: @test_umad24
  ; SI: V_MAD_U32_U24
+; EG: MULADD_UINT24
+; R600: MULLO_UINT
+; R600: ADD_INT
  define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
    %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 %src1, i32 %src2) nounwind readnone
    store i32 %mad, i32 addrspace(1)* %out, align 4
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 22 May 2014 18:00:20 +0000 (18:00 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Thu, 22 May 2014 18:00:20 +0000 (18:00 +0000)
lib/Target/R600/AMDGPUInstructions.td		patch \| blob \| history
lib/Target/R600/CaymanInstructions.td		patch \| blob \| history
lib/Target/R600/EvergreenInstructions.td		patch \| blob \| history
lib/Target/R600/R600Instructions.td		patch \| blob \| history
test/CodeGen/R600/llvm.AMDGPU.imad24.ll		patch \| blob \| history
test/CodeGen/R600/llvm.AMDGPU.umad24.ll		patch \| blob \| history