R600: Add support for LDS atomic subtract
authorAaron Watry <awatry@gmail.com>
Fri, 6 Sep 2013 20:17:42 +0000 (20:17 +0000)
committerAaron Watry <awatry@gmail.com>
Fri, 6 Sep 2013 20:17:42 +0000 (20:17 +0000)
Signed-off-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190200 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/R600/AMDGPUInstructions.td
lib/Target/R600/R600Instructions.td
lib/Target/R600/SIInstructions.td
test/CodeGen/R600/atomic_load_sub.ll [new file with mode: 0644]

index 6745fed3baecdc2769ce56a3520ce937f8d9e601..e30abc08bdb17f0957f4d05d65d9b2446c0d8a0f 100644 (file)
@@ -196,6 +196,11 @@ def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
   return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 }]>;
 
+def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value),
+                                    (atomic_load_sub node:$ptr, node:$value), [{
+  return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
   return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
index efa475198ca04e19052497a291efcc9a87f4e81b..24bc6b0241cfe0b167823c048b7f6a6b166564d1 100644 (file)
@@ -1673,6 +1673,7 @@ class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
 }
 
 def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
+def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
 def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
   [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
 >;
@@ -1685,6 +1686,9 @@ def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
 def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
   [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
 >;
+def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
+  [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
+>;
 def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
   [(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
 >;
index 31a5ad237e79a2d0d0f4c0a55441d9a3ea84d1ea..14a189a07afb8788b5c662b45a1c518a29050cbb 100644 (file)
@@ -392,6 +392,7 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
 } // End isCompare = 1
 
 def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
+def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
 def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
 def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
 def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
@@ -1779,6 +1780,9 @@ def : DSWritePat <DS_WRITE_B32, i32, local_store>;
 def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
            (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>;
 
+def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
+           (DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>;
+
 /********** ================== **********/
 /**********   SMRD Patterns    **********/
 /********** ================== **********/
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
new file mode 100644 (file)
index 0000000..e256f07
--- /dev/null
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
+
+; R600-CHECK-LABEL: @atomic_sub_local
+; R600-CHECK: LDS_SUB *
+; SI-CHECK-LABEL: @atomic_sub_local
+; SI-CHECK: DS_SUB_U32_RTN 0
+define void @atomic_sub_local(i32 addrspace(3)* %local) {
+entry:
+   %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
+   ret void
+}
+
+; R600-CHECK-LABEL: @atomic_sub_ret_local
+; R600-CHECK: LDS_SUB_RET *
+; SI-CHECK-LABEL: @atomic_sub_ret_local
+; SI-CHECK: DS_SUB_U32_RTN 0
+define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
+entry:
+  %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}