From fd4c349de20e2733284e4b61928c90245df8b1b5 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 4 Feb 2015 20:49:51 +0000 Subject: [PATCH] R600/SI: Make more store operations legal v2i32, i32, trunc i32 to i16, and truc i32 to i8 stores are legal for all address spaces. We had marked them as custom in order to lower them for the private address space, but this is no longer necessary. This enables lowering of misaligned stores of these types in the DAGLegalizer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 3 -- lib/Target/R600/SIISelLowering.cpp | 9 ---- test/CodeGen/R600/unaligned-load-store.ll | 57 ++++++++++++++++++----- 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 507e399b878..d577d689f66 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -159,9 +159,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, // Custom lowering of vector stores is required for local address space // stores. setOperationAction(ISD::STORE, MVT::v4i32, Custom); - // XXX: Native v2i32 local address space stores are possible, but not - // currently implemented. - setOperationAction(ISD::STORE, MVT::v2i32, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 6ff17ec1f06..4545a5c648c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -90,8 +90,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v16i32, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); @@ -159,8 +157,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); - setTruncStoreAction(MVT::i32, MVT::i8, Custom); - setTruncStoreAction(MVT::i32, MVT::i16, Custom); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); @@ -1150,11 +1146,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Store->getMemoryVT(); // These stores are legal. - if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && - VT.isVector() && VT.getVectorNumElements() == 2 && - VT.getVectorElementType() == MVT::i32) - return SDValue(); - if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { if (VT.isVector() && VT.getVectorNumElements() > 4) return ScalarizeVectorStore(Op, DAG); diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index 54ac608e190..8ed0838c4ac 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -6,7 +6,10 @@ ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 -; SI: ds_write_b32 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: s_endpgm define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { %v = load i32 addrspace(3)* %p, align 1 @@ -19,7 +22,10 @@ define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace( ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte -; SI: buffer_store_dword +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind { %v = load i32 addrspace(1)* %p, align 1 store i32 %v, i32 addrspace(1)* %r, align 1 @@ -35,8 +41,14 @@ define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 -; SI: ds_write_b32 -; SI: ds_write_b32 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: s_endpgm define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { %v = load i64 addrspace(3)* %p, align 1 @@ -53,8 +65,14 @@ define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace( ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte ; SI: buffer_load_ubyte -; SI: buffer_store_dword -; SI: buffer_store_dword +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte +; SI: buffer_store_byte define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) { %v = load i64 addrspace(1)* %p, align 1 store i64 %v, i64 addrspace(1)* %r, align 1 @@ -82,10 +100,25 @@ define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace ; SI: ds_read_u8 ; SI: ds_read_u8 -; SI: ds_write_b32 -; SI: ds_write_b32 -; SI: ds_write_b32 -; SI: ds_write_b32 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 + +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 +; SI: ds_write_b8 ; SI: s_endpgm define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind { %v = load <4 x i32> addrspace(3)* %p, align 1 @@ -149,7 +182,7 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture ret void } -; SI: {{^}}load_lds_i64_align_1 +; SI-LABEL: {{^}}load_lds_i64_align_1: ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 @@ -158,7 +191,9 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 +; SI: buffer_store_dwordx2 ; SI: s_endpgm + define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { %val = load i64 addrspace(3)* %in, align 1 store i64 %val, i64 addrspace(1)* %out, align 8 -- 2.34.1