From 87bd2fa24b725cf52fbf5fbe0696ef219839c4ee Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 19 Dec 2014 22:15:30 +0000 Subject: [PATCH] R600/SI: Make sure non-inline constants aren't folded into mubuf soffset operand mubuf instructions now define the soffset field using the SCSrc_32 register class which indicates that only SGPRs and inline constants are allowed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224622 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../R600/MCTargetDesc/SIMCCodeEmitter.cpp | 5 ++- lib/Target/R600/SIInstrInfo.td | 30 +++++++------- lib/Target/R600/SIRegisterInfo.cpp | 1 + lib/Target/R600/SIRegisterInfo.td | 6 +++ test/CodeGen/R600/mubuf.ll | 39 +++++++++++++++++++ 5 files changed, 64 insertions(+), 17 deletions(-) diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 999fd0dbc9a..4b693c4e8dc 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -90,8 +90,9 @@ bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc, (AMDGPU::SSrc_64RegClassID == RegClass) || (AMDGPU::VSrc_32RegClassID == RegClass) || (AMDGPU::VSrc_64RegClassID == RegClass) || - (AMDGPU::VCSrc_32RegClassID == RegClass) || - (AMDGPU::VCSrc_64RegClassID == RegClass); + (AMDGPU::VCSrc_32RegClassID == RegClass) || + (AMDGPU::VCSrc_64RegClassID == RegClass) || + (AMDGPU::SCSrc_32RegClassID == RegClass); } uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO) const { diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 8122ccb65cc..0471c6dbf4c 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -1498,7 +1498,7 @@ multiclass MTBUF_Store_Helper op, string opName, op, opName, (outs), (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, - SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), + SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset), opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt," #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", [] >; @@ -1512,7 +1512,7 @@ multiclass MTBUF_Load_Helper op, string opName, op, opName, (outs regClass:$dst), (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc, - i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), + i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset), opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt," #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", [] >; @@ -1579,7 +1579,7 @@ multiclass MUBUF_Atomic op, string name, RegisterClass rc, def _OFFSET : MUBUFAtomicOffset < op, (outs), (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, - SSrc_32:$soffset, slc:$slc), + SCSrc_32:$soffset, slc:$slc), name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [] >, MUBUFAddr64Table<0>, AtomicNoRet; } // glc = 0 @@ -1601,7 +1601,7 @@ multiclass MUBUF_Atomic op, string name, RegisterClass rc, def _RTN_OFFSET : MUBUFAtomicOffset < op, (outs rc:$vdata), (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset, - SSrc_32:$soffset, slc:$slc), + SCSrc_32:$soffset, slc:$slc), name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc", [(set vt:$vdata, (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, @@ -1624,7 +1624,7 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass, let offen = 0, idxen = 0, vaddr = 0 in { def _OFFSET : MUBUF_si op, string asm, RegisterClass regClass, let offen = 1, idxen = 0 in { def _OFFEN : MUBUF_si ; } @@ -1644,7 +1644,7 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass, let offen = 0, idxen = 1 in { def _IDXEN : MUBUF_si ; } @@ -1652,7 +1652,7 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass, let offen = 1, idxen = 1 in { def _BOTHEN : MUBUF_si ; } } @@ -1675,7 +1675,7 @@ multiclass MUBUF_Load_Helper_vi op, string asm, RegisterClass regClass, let offen = 0, idxen = 0, vaddr = 0 in { def _OFFSET : MUBUF_vi op, string asm, RegisterClass regClass, let offen = 1, idxen = 0 in { def _OFFEN : MUBUF_vi ; } @@ -1695,7 +1695,7 @@ multiclass MUBUF_Load_Helper_vi op, string asm, RegisterClass regClass, let offen = 0, idxen = 1 in { def _IDXEN : MUBUF_vi ; } @@ -1703,7 +1703,7 @@ multiclass MUBUF_Load_Helper_vi op, string asm, RegisterClass regClass, let offen = 1, idxen = 1 in { def _BOTHEN : MUBUF_vi ; } } @@ -1716,7 +1716,7 @@ multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass def "" : MUBUF_si < op, (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SSrc_32:$soffset, + (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SCSrc_32:$soffset, mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"# @@ -1728,7 +1728,7 @@ multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass def _OFFSET : MUBUF_si < op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, - SSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), + SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, @@ -1739,7 +1739,7 @@ multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass let offen = 1, idxen = 0 in { def _OFFEN : MUBUF_si < op, (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SSrc_32:$soffset, + (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"# "$glc"#"$slc"#"$tfe", diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 5dc0f755f14..7bd573cb6a6 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -414,6 +414,7 @@ bool SIRegisterInfo::regClassCanUseInlineConstant(int RCID) const { default: return false; case AMDGPU::VCSrc_32RegClassID: case AMDGPU::VCSrc_64RegClassID: + case AMDGPU::SCSrc_32RegClassID: return true; } } diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index a79da004def..20bcd96c7c4 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -221,6 +221,12 @@ def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>; +//===----------------------------------------------------------------------===// +// SCSrc_* Operands with an SGPR or a inline constant +//===----------------------------------------------------------------------===// + +def SCSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; + //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll index 9033b554f37..61db21a0270 100644 --- a/test/CodeGen/R600/mubuf.ll +++ b/test/CodeGen/R600/mubuf.ll @@ -52,6 +52,45 @@ entry: ret void } +; CHECK-LABEL: {{^}}soffset_max_imm: +; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc +define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 { +main_body: + %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0 + %tmp1 = load <16 x i8> addrspace(2)* %tmp0 + %tmp2 = shl i32 %6, 2 + %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) + %tmp4 = add i32 %6, 16 + %tmp5 = bitcast float 0.0 to i32 + call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) + ret void +} + +; Make sure immediates that aren't inline constants don't get folded into +; the soffset operand. +; FIXME: for this test we should be smart enough to shift the immediate into +; the offset field. +; CHECK-LABEL: {{^}}soffset_no_fold: +; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41 +; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc +define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 { +main_body: + %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0 + %tmp1 = load <16 x i8> addrspace(2)* %tmp0 + %tmp2 = shl i32 %6, 2 + %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) + %tmp4 = add i32 %6, 16 + %tmp5 = bitcast float 0.0 to i32 + call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) + ret void +} + +declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3 +declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) + +attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" } +attributes #3 = { nounwind readonly } + ;;;==========================================================================;;; ;;; MUBUF STORE TESTS ;;;==========================================================================;;; -- 2.34.1