R600/SI: Make more store operations legal

author Tom Stellard <thomas.stellard@amd.com>

Wed, 4 Feb 2015 20:49:51 +0000 (20:49 +0000)

committer Tom Stellard <thomas.stellard@amd.com>

Wed, 4 Feb 2015 20:49:51 +0000 (20:49 +0000)
author Tom Stellard <thomas.stellard@amd.com>
Wed, 4 Feb 2015 20:49:51 +0000 (20:49 +0000)
committer Tom Stellard <thomas.stellard@amd.com>
Wed, 4 Feb 2015 20:49:51 +0000 (20:49 +0000)
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp

index 507e399b8785b4c1c2dc01f00b3a8d3b7cf56bde..d577d689f66aaa6e4d2ad6d15e6902fac8958900 100644 (file)
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -159,9 +159,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
    // Custom lowering of vector stores is required for local address space
    // stores.
    setOperationAction(ISD::STORE, MVT::v4i32, Custom);
-  // XXX: Native v2i32 local address space stores are possible, but not
-  // currently implemented.
-  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
  
    setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
    setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp

index 6ff17ec1f067bb009c82a36e8e9397c7500b0fb4..4545a5c648ccf317e229a957e632084b7b273bff 100644 (file)
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -90,8 +90,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
    setOperationAction(ISD::STORE, MVT::v16i32, Custom);
  
    setOperationAction(ISD::STORE, MVT::i1, Custom);
-  setOperationAction(ISD::STORE, MVT::i32, Custom);
-  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
    setOperationAction(ISD::STORE, MVT::v4i32, Custom);
  
    setOperationAction(ISD::SELECT, MVT::i64, Custom);
@@ -159,8 +157,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
    for (MVT VT : MVT::fp_valuetypes())
      setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
  
-  setTruncStoreAction(MVT::i32, MVT::i8, Custom);
-  setTruncStoreAction(MVT::i32, MVT::i16, Custom);
    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    setTruncStoreAction(MVT::i64, MVT::i32, Expand);
    setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
@@ -1150,11 +1146,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Store->getMemoryVT();
  
    // These stores are legal.
-  if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
-      VT.isVector() && VT.getVectorNumElements() == 2 &&
-      VT.getVectorElementType() == MVT::i32)
-    return SDValue();
-
    if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
      if (VT.isVector() && VT.getVectorNumElements() > 4)
        return ScalarizeVectorStore(Op, DAG);
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll

index 54ac608e1905e642b176879f0f648b769c8b0bb3..8ed0838c4ac68073d78fff9e06658a026c1dbeb9 100644 (file)
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -6,7 +6,10 @@
  ; SI: ds_read_u8
  ; SI: ds_read_u8
  ; SI: ds_read_u8
-; SI: ds_write_b32
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
  ; SI: s_endpgm
  define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
    %v = load i32 addrspace(3)* %p, align 1
@@ -19,7 +22,10 @@ define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(
  ; SI: buffer_load_ubyte
  ; SI: buffer_load_ubyte
  ; SI: buffer_load_ubyte
-; SI: buffer_store_dword
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
  define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
    %v = load i32 addrspace(1)* %p, align 1
    store i32 %v, i32 addrspace(1)* %r, align 1
@@ -35,8 +41,14 @@ define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace
  ; SI: ds_read_u8
  ; SI: ds_read_u8
  ; SI: ds_read_u8
-; SI: ds_write_b32
-; SI: ds_write_b32
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
  ; SI: s_endpgm
  define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
    %v = load i64 addrspace(3)* %p, align 1
@@ -53,8 +65,14 @@ define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(
  ; SI: buffer_load_ubyte
  ; SI: buffer_load_ubyte
  ; SI: buffer_load_ubyte
-; SI: buffer_store_dword
-; SI: buffer_store_dword
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
  define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
    %v = load i64 addrspace(1)* %p, align 1
    store i64 %v, i64 addrspace(1)* %r, align 1
@@ -82,10 +100,25 @@ define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace
  ; SI: ds_read_u8
  ; SI: ds_read_u8
  
-; SI: ds_write_b32
-; SI: ds_write_b32
-; SI: ds_write_b32
-; SI: ds_write_b32
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
  ; SI: s_endpgm
  define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
    %v = load <4 x i32> addrspace(3)* %p, align 1
@@ -149,7 +182,7 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture
    ret void
  }
  
-; SI: {{^}}load_lds_i64_align_1
+; SI-LABEL: {{^}}load_lds_i64_align_1:
  ; SI: ds_read_u8
  ; SI: ds_read_u8
  ; SI: ds_read_u8
@@ -158,7 +191,9 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture
  ; SI: ds_read_u8
  ; SI: ds_read_u8
  ; SI: ds_read_u8
+; SI: buffer_store_dwordx2
  ; SI: s_endpgm
+
  define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
    %val = load i64 addrspace(3)* %in, align 1
    store i64 %val, i64 addrspace(1)* %out, align 8
author	Tom Stellard <thomas.stellard@amd.com>
	Wed, 4 Feb 2015 20:49:51 +0000 (20:49 +0000)
committer	Tom Stellard <thomas.stellard@amd.com>
	Wed, 4 Feb 2015 20:49:51 +0000 (20:49 +0000)
lib/Target/R600/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/R600/SIISelLowering.cpp		patch \| blob \| history
test/CodeGen/R600/unaligned-load-store.ll		patch \| blob \| history