From 33d8695b8891998a77b62f07dc8fb493a7d72b85 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 28 Sep 2015 20:54:46 +0000 Subject: [PATCH] AMDGPU: Fix moving SMRD loads with literal offsets on CI git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248740 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIInstrInfo.cpp | 12 +++- test/CodeGen/AMDGPU/salu-to-valu.ll | 99 ++++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 4 deletions(-) diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 9419afebd26..432f763d9cb 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1493,11 +1493,17 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; case AMDGPU::S_LOAD_DWORD_IMM: - case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; + case AMDGPU::S_LOAD_DWORD_SGPR: + case AMDGPU::S_LOAD_DWORD_IMM_ci: + return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; case AMDGPU::S_LOAD_DWORDX2_IMM: - case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; + case AMDGPU::S_LOAD_DWORDX2_SGPR: + case AMDGPU::S_LOAD_DWORDX2_IMM_ci: + return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; case AMDGPU::S_LOAD_DWORDX4_IMM: - case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; + case AMDGPU::S_LOAD_DWORDX4_SGPR: + case AMDGPU::S_LOAD_DWORDX4_IMM_ci: + return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index 01907ba2d41..de798182271 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s declare i32 @llvm.r600.read.tidig.x() #0 declare i32 @llvm.r600.read.tidig.y() #0 @@ -87,6 +88,99 @@ entry: ret void } +; Use a big offset that will use the SMRD literal offset on CI +; GCN-LABEL: {{^}}smrd_valu_ci_offset: +; GCN: s_movk_i32 s[[OFFSET:[0-9]+]], 0x4e20{{$}} +; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET]]:{{[0-9]+}}], 0 addr64{{$}} +; GCN: v_add_i32_e32 +; GCN: buffer_store_dword +define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000 + %tmp4 = load i32, i32 addrspace(2)* %tmp3 + %tmp5 = add i32 %tmp4, %c + store i32 %tmp5, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2: +; GCN: s_mov_b32 s[[OFFSET:[0-9]+]], 0x9c40{{$}} +; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET]]:{{[0-9]+}}], 0 addr64{{$}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: buffer_store_dwordx2 +define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000 + %tmp4 = load i64, i64 addrspace(2)* %tmp3 + %tmp5 = or i64 %tmp4, %c + store i64 %tmp5, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4: +; GCN: s_movk_i32 s[[OFFSET:[0-9]+]], 0x4d20{{$}} +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET]]:{{[0-9]+}}], 0 addr64{{$}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: buffer_store_dwordx4 +define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234 + %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3 + %tmp5 = or <4 x i32> %tmp4, %c + store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out + ret void +} + +; Original scalar load uses SGPR offset on SI and 32-bit literal on +; CI. + +; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: +; GCN: s_mov_b32 s[[OFFSET0:[0-9]+]], 0x9a40{{$}} +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET0]]:{{[0-9]+}}], 0 addr64{{$}} + +; SI: s_add_i32 s[[OFFSET1:[0-9]+]], s[[OFFSET0]], 16 +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET1]]:{{[0-9]+}}], 0 addr64{{$}} + +; CI: s_mov_b32 s[[OFFSET1:[0-9]+]], 0x9a50{{$}} +; CI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET1]]:{{[0-9]+}}], 0 addr64{{$}} + +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp + %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234 + %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3 + %tmp5 = or <8 x i32> %tmp4, %c + store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}smrd_valu2_salu_user: ; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] @@ -119,7 +213,10 @@ entry: ; FIXME: We should be using the offset but we don't ; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset: -; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; SI: s_movk_i32 s[[OFFSET:[0-9]+]], 0x400{{$}} +; SI: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET]]:{{[0-9]+\]}}, 0 addr64{{$}} + +; CI: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() #0 -- 2.34.1