From 782254a3d8eca0635d1155625de3bad344a36796 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 25 Nov 2015 20:01:03 +0000 Subject: [PATCH] AMDGPU: Add some tests for promotion of v2i64 scalar_to_vector git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254087 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AMDGPU/merge-stores.ll | 71 +++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll index 12d11ccfe41..8b01c96fe2c 100644 --- a/test/CodeGen/AMDGPU/merge-stores.ll +++ b/test/CodeGen/AMDGPU/merge-stores.ll @@ -634,6 +634,77 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { ret void } +; This requires handling of scalar_to_vector for v2i64 to avoid +; scratch usage. +; FIXME: Should do single load and store + +; GCN-LABEL: {{^}}copy_v3i32_align4: +; GCN-NOT: SCRATCH_RSRC_DWORD +; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-NOT: offen +; GCN: s_waitcnt vmcnt +; GCN-NOT: offen +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 + +; GCN: ScratchSize: 0{{$}} +define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { + %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4 + store <3 x i32> %vec, <3 x i32> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}copy_v3i64_align4: +; GCN-NOT: SCRATCH_RSRC_DWORD +; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN-NOT: offen +; GCN: s_waitcnt vmcnt +; GCN-NOT: offen +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN: ScratchSize: 0{{$}} +define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { + %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 + store <3 x i64> %vec, <3 x i64> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}copy_v3f32_align4: +; GCN-NOT: SCRATCH_RSRC_DWORD +; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-NOT: offen +; GCN: s_waitcnt vmcnt +; GCN-NOT: offen +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN: ScratchSize: 0{{$}} +define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { + %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 + %fadd = fadd <3 x float> %vec, + store <3 x float> %fadd, <3 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}copy_v3f64_align4: +; GCN-NOT: SCRATCH_RSRC_DWORD +; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN-NOT: offen +; GCN: s_waitcnt vmcnt +; GCN-NOT: offen +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN: ScratchSize: 0{{$}} +define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { + %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 + %fadd = fadd <3 x double> %vec, + store <3 x double> %fadd, <3 x double> addrspace(1)* %out + ret void +} + declare void @llvm.AMDGPU.barrier.local() #1 attributes #0 = { nounwind } -- 2.34.1