From 17ea10cb792832c99677afa13b9b866098bc4679 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 5 Apr 2013 23:31:51 +0000 Subject: [PATCH] R600/SI: Add support for buffer stores v2 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit v2: - Use the ADDR64 bit Reviewed-by: Christian König git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178931 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUCallingConv.td | 8 ++++++- lib/Target/R600/AMDGPUISelLowering.h | 1 + lib/Target/R600/AMDILISelDAGToDAG.cpp | 23 +++++++++++++++++++ lib/Target/R600/SIISelLowering.cpp | 32 +++++++++++++++++++++++++++ lib/Target/R600/SIISelLowering.h | 1 + lib/Target/R600/SIInstrInfo.td | 26 ++++++++++++++++++++++ lib/Target/R600/SIInstructions.td | 10 +++++++-- lib/Target/R600/SIRegisterInfo.td | 2 +- test/CodeGen/R600/imm.ll | 3 --- test/CodeGen/R600/store.ll | 11 +++++++++ 10 files changed, 110 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/R600/store.ll diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 45ae37ef0c7..9c30515a342 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -32,8 +32,14 @@ def CC_SI : CallingConv<[ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31 - ]>>> + ]>>>, + // This is the default for i64 values. + // XXX: We should change this once clang understands the CC_AMDGPU. + CCIfType<[i64], CCAssignToRegWithShadow< + [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ], + [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ] + >> ]>; def CC_AMDGPU : CallingConv<[ diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index f31b6466bd4..c2a79ea9995 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -116,6 +116,7 @@ enum { BRANCH_COND, // End AMDIL ISD Opcodes BITALIGN, + BUFFER_STORE, DWORDADDR, FRACT, FMAX, diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index fa8f62de9c0..e0e0482ad49 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -191,6 +191,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs, 2 * N->getNumOperands() + 1); } + case ISD::BUILD_PAIR: { + SDValue RC, SubReg0, SubReg1; + const AMDGPUSubtarget &ST = TM.getSubtarget(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + break; + } + if (N->getValueType(0) == MVT::i128) { + RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32); + SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); + SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); + } else if (N->getValueType(0) == MVT::i64) { + RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32); + SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); + SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); + } else { + llvm_unreachable("Unhandled value type for BUILD_PAIR"); + } + const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, + N->getOperand(1), SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, + N->getDebugLoc(), N->getValueType(0), Ops, 5); + } + case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget(); diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 7fa28d9532c..ef0df9e3484 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -49,6 +49,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); @@ -70,6 +71,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); @@ -234,6 +239,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); } return SDValue(); } @@ -332,6 +338,32 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, return Chain; } +#define RSRC_DATA_FORMAT 0xf00000000000 + +SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode *StoreNode = cast(Op); + SDValue Chain = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue VirtualAddress = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + + if (StoreNode->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) { + return SDValue(); + } + + SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, + DAG.getConstant(0, MVT::i64), + DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64)); + + SDValue Ops[2]; + Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain, + Value, SrcSrc, VirtualAddress); + Ops[1] = Chain; + + return DAG.getMergeValues(Ops, 2, DL); + +} + SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 5ad2f40f0f3..ace32f81353 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -24,6 +24,7 @@ class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; const TargetRegisterInfo * TRI; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index e77a88616ba..6ed83db6b10 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -26,6 +26,10 @@ def HI32 : SDNodeXFormgetTargetConstant(N->getZExtValue() >> 32, MVT::i32); }]>; +def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE", + SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, + [SDNPHasChain, SDNPMayStore]>; + def IMM8bitDWORD : ImmLeaf < i32, [{ return (Imm & ~0x3FC) == 0; @@ -296,6 +300,28 @@ class MUBUF_Load_Helper op, string asm, RegisterClass regClass> : MUBUF let mayStore = 0; } +class MUBUF_Store_Helper op, string name, RegisterClass vdataClass, + ValueType VT> : + MUBUF { + + let mayLoad = 0; + let mayStore = 1; + + // Encoding + let offset = 0; + let offen = 0; + let idxen = 0; + let glc = 0; + let addr64 = 1; + let lds = 0; + let slc = 0; + let tfe = 0; + let soffset = 128; // ZERO +} + class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF < op, (outs regClass:$dst), diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4f734f91245..6bae3be7743 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -408,8 +408,14 @@ def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; -//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; -//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>; + +def BUFFER_STORE_DWORD : MUBUF_Store_Helper < + 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32 +>; + +def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < + 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64 +>; //def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>; //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 4f14931a9c4..2c101073c3d 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -151,7 +151,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SGPR_64, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>; diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll index 02b73096ce3..979efb00e7b 100644 --- a/test/CodeGen/R600/imm.ll +++ b/test/CodeGen/R600/imm.ll @@ -1,8 +1,5 @@ ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s -; XXX: Enable once SI supports buffer stores -; XFAIL: * - ; Use a 64-bit value with lo bits that can be represented as an inline constant ; CHECK: @i64_imm_inline_lo ; CHECK: S_MOV_B32 [[LO:SGPR[0-9]+]], 5 diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll new file mode 100644 index 00000000000..4382bfff490 --- /dev/null +++ b/test/CodeGen/R600/store.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s + +; CHECK: @store_float +; EG-CHECK: RAT_WRITE_CACHELESS_32_eg T{{[0-9]+\.X, T[0-9]+\.X}}, 1 +; SI-CHECK: BUFFER_STORE_DWORD + +define void @store_float(float addrspace(1)* %out, float %in) { + store float %in, float addrspace(1)* %out + ret void +} -- 2.34.1