From 445a12ee1c5be3b2315c59561aa59db0e30c636b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 28 Sep 2015 20:54:38 +0000 Subject: [PATCH] AMDGPU: Add testcases Make sure we are testing moving users of the moved and split SMRD loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248738 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AMDGPU/salu-to-valu.ll | 119 ++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 01fb52f794a..01907ba2d41 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -87,6 +87,21 @@ entry: ret void } +; GCN-LABEL: {{^}}smrd_valu2_salu_user: +; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} +; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] +; GCN: buffer_store_dword [[ADD]] +define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp1 = add i32 %tmp, 4 + %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 + %tmp3 = load i32, i32 addrspace(2)* %tmp2 + %tmp4 = add i32 %tmp3, %a + store i32 %tmp4, i32 addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: ; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { @@ -128,6 +143,45 @@ entry: ret void } +; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user: +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: buffer_store_dword +define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +entry: + %tmp0 = tail call i32 @llvm.r600.read.tidig.x() + %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 + %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* + %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 + + %elt0 = extractelement <8 x i32> %tmp3, i32 0 + %elt1 = extractelement <8 x i32> %tmp3, i32 1 + %elt2 = extractelement <8 x i32> %tmp3, i32 2 + %elt3 = extractelement <8 x i32> %tmp3, i32 3 + %elt4 = extractelement <8 x i32> %tmp3, i32 4 + %elt5 = extractelement <8 x i32> %tmp3, i32 5 + %elt6 = extractelement <8 x i32> %tmp3, i32 6 + %elt7 = extractelement <8 x i32> %tmp3, i32 7 + + %add0 = add i32 %elt0, %elt1 + %add1 = add i32 %add0, %elt2 + %add2 = add i32 %add1, %elt3 + %add3 = add i32 %add2, %elt4 + %add4 = add i32 %add3, %elt5 + %add5 = add i32 %add4, %elt6 + %add6 = add i32 %add5, %elt7 + + store i32 %add6, i32 addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}s_load_imm_v16i32: ; GCN: buffer_load_dwordx4 ; GCN: buffer_load_dwordx4 @@ -143,5 +197,70 @@ entry: ret void } +; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user: +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: buffer_load_dwordx4 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: v_add_i32_e32 +; GCN: buffer_store_dword +define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +entry: + %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 + %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 + %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)* + %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4 + + %elt0 = extractelement <16 x i32> %tmp3, i32 0 + %elt1 = extractelement <16 x i32> %tmp3, i32 1 + %elt2 = extractelement <16 x i32> %tmp3, i32 2 + %elt3 = extractelement <16 x i32> %tmp3, i32 3 + %elt4 = extractelement <16 x i32> %tmp3, i32 4 + %elt5 = extractelement <16 x i32> %tmp3, i32 5 + %elt6 = extractelement <16 x i32> %tmp3, i32 6 + %elt7 = extractelement <16 x i32> %tmp3, i32 7 + %elt8 = extractelement <16 x i32> %tmp3, i32 8 + %elt9 = extractelement <16 x i32> %tmp3, i32 9 + %elt10 = extractelement <16 x i32> %tmp3, i32 10 + %elt11 = extractelement <16 x i32> %tmp3, i32 11 + %elt12 = extractelement <16 x i32> %tmp3, i32 12 + %elt13 = extractelement <16 x i32> %tmp3, i32 13 + %elt14 = extractelement <16 x i32> %tmp3, i32 14 + %elt15 = extractelement <16 x i32> %tmp3, i32 15 + + %add0 = add i32 %elt0, %elt1 + %add1 = add i32 %add0, %elt2 + %add2 = add i32 %add1, %elt3 + %add3 = add i32 %add2, %elt4 + %add4 = add i32 %add3, %elt5 + %add5 = add i32 %add4, %elt6 + %add6 = add i32 %add5, %elt7 + %add7 = add i32 %add6, %elt8 + %add8 = add i32 %add7, %elt9 + %add9 = add i32 %add8, %elt10 + %add10 = add i32 %add9, %elt11 + %add11 = add i32 %add10, %elt12 + %add12 = add i32 %add11, %elt13 + %add13 = add i32 %add12, %elt14 + %add14 = add i32 %add13, %elt15 + + store i32 %add14, i32 addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } -- 2.34.1