From 3443ffa83371872bf32c7cf6259ba2bc5a8068b9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 28 Sep 2015 20:54:52 +0000 Subject: [PATCH] AMDGPU: Fix splitting x16 SMRD loads When used recursively, this would set the kill flag on the intermediate step from first splitting x16 to x8. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248741 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +-- test/CodeGen/AMDGPU/salu-to-valu.ll | 43 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 432f763d9cb..5d0cfb32e4c 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2037,8 +2037,8 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI, .addOperand(*SOff); unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR) - .addOperand(*SOff) - .addImm(HalfSize); + .addReg(SOff->getReg(), 0, SOff->getSubReg()) + .addImm(HalfSize); Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi) .addReg(SBase->getReg(), getKillRegState(IsKill), SBase->getSubReg()) diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index de798182271..65b4a7dadba 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -181,6 +181,49 @@ entry: ret void } +; FIXME: should use immediate offset instead of using s_add_i32 for adding to constant. +; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: + +; GCN: s_mov_b32 s[[OFFSET0:[0-9]+]], 0x13480{{$}} +; SI: s_add_i32 s[[OFFSET1:[0-9]+]], s[[OFFSET0]], 16 +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET0]]:{{[0-9]+}}], 0 addr64{{$}} + +; CI: s_mov_b32 s[[OFFSET1:[0-9]+]], 0x13490{{$}} +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET1]]:{{[0-9]+}}], 0 addr64{{$}} + +; SI: s_add_i32 s[[OFFSET2:[0-9]+]], s[[OFFSET0]], 32 +; CI: s_mov_b32 s[[OFFSET2:[0-9]+]], 0x134a0 + +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET2]]:{{[0-9]+}}], 0 addr64{{$}} +; GCN: s_add_i32 s[[OFFSET3:[0-9]+]], s[[OFFSET2]], 16 +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET3]]:{{[0-9]+}}], 0 addr64{{$}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234 + %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3 + %tmp5 = or <16 x i32> %tmp4, %c + store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}smrd_valu2_salu_user: ; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] -- 2.34.1