From a1d28f6dd713d31269b612ac4aba0d408a7e9990 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 21 Mar 2014 15:51:57 +0000 Subject: [PATCH] R600/SI: Handle MUBUF instructions in SIInstrInfo::moveToVALU() git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204476 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUTargetMachine.cpp | 3 + lib/Target/R600/SIISelLowering.cpp | 4 +- lib/Target/R600/SIInstrFormats.td | 1 + lib/Target/R600/SIInstrInfo.cpp | 136 +++++++++++++++++++++++- lib/Target/R600/SIInstrInfo.h | 10 ++ test/CodeGen/R600/salu-to-valu.ll | 8 +- 6 files changed, 157 insertions(+), 5 deletions(-) diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 7f5042876e7..b11fce34025 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -165,6 +165,9 @@ bool AMDGPUPassConfig::addPreRegAlloc() { addPass(createR600VectorRegMerger(*TM)); } else { addPass(createSIFixSGPRCopiesPass(*TM)); + // SIFixSGPRCopies can generate a lot of duplicate instructions, + // so we need to run MachineCSE afterwards. + addPass(&MachineCSEID); } return false; } diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 52e5a16759d..fd1e3a69bd1 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -25,8 +25,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Function.h" -const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; - using namespace llvm; SITargetLowering::SITargetLowering(TargetMachine &TM) : @@ -407,7 +405,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo) .addImm(0); BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi) - .addImm(RSRC_DATA_FORMAT >> 32); + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi) .addReg(SubRegHiLo) .addImm(AMDGPU::sub0) diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 53ebaaf15a7..aa2c22c51eb 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -369,6 +369,7 @@ class MUBUF op, dag outs, dag ins, string asm, list pattern> : let EXP_CNT = 1; let neverHasSideEffects = 1; + let UseNamedOperandTable = 1; } class MTBUF op, dag outs, dag ins, string asm, list pattern> : diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index c8a52971efa..6c2185eafe8 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -558,6 +558,32 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { MO.ChangeToRegister(Reg, false); } +unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, + MachineRegisterInfo &MRI, + MachineOperand &SuperReg, + const TargetRegisterClass *SuperRC, + unsigned SubIdx, + const TargetRegisterClass *SubRC) + const { + assert(SuperReg.isReg()); + + unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); + unsigned SubReg = MRI.createVirtualRegister(SubRC); + + // Just in case the super register is itself a sub-register, copy it to a new + // value so we don't need to wory about merging its subreg index with the + // SubIdx passed to this function. The register coalescer should be able to + // eliminate this extra copy. + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), + NewSuperReg) + .addOperand(SuperReg); + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), + SubReg) + .addReg(NewSuperReg, 0, SubIdx); + return SubReg; +} + void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), @@ -675,6 +701,110 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { MI->getOperand(i).setReg(DstReg); } } + + // Legalize MUBUF* instructions + // FIXME: If we start using the non-addr64 instructions for compute, we + // may need to legalize them here. + + int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::srsrc); + int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), + AMDGPU::OpName::vaddr); + if (SRsrcIdx != -1 && VAddrIdx != -1) { + const TargetRegisterClass *VAddrRC = + RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass); + + if(VAddrRC->getSize() == 8 && + MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) { + // We have a MUBUF instruction that uses a 64-bit vaddr register and + // srsrc has the incorrect register class. In order to fix this, we + // need to extract the pointer from the resource descriptor (srsrc), + // add it to the value of vadd, then store the result in the vaddr + // operand. Then, we need to set the pointer field of the resource + // descriptor to zero. + + MachineBasicBlock &MBB = *MI->getParent(); + MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx); + MachineOperand &VAddrOp = MI->getOperand(VAddrIdx); + unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi; + unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); + unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + + // SRsrcPtrLo = srsrc:sub0 + SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp, + &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); + + // SRsrcPtrHi = srsrc:sub1 + SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp, + &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); + + // VAddrLo = vaddr:sub0 + VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp, + &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); + + // VAddrHi = vaddr:sub1 + VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp, + &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); + + // NewVaddrLo = SRsrcPtrLo + VAddrLo + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), + NewVAddrLo) + .addReg(SRsrcPtrLo) + .addReg(VAddrLo) + .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit); + + // NewVaddrHi = SRsrcPtrHi + VAddrHi + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), + NewVAddrHi) + .addReg(SRsrcPtrHi) + .addReg(VAddrHi) + .addReg(AMDGPU::VCC, RegState::ImplicitDefine) + .addReg(AMDGPU::VCC, RegState::Implicit); + + // NewVaddr = {NewVaddrHi, NewVaddrLo} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), + NewVAddr) + .addReg(NewVAddrLo) + .addImm(AMDGPU::sub0) + .addReg(NewVAddrHi) + .addImm(AMDGPU::sub1); + + // Zero64 = 0 + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), + Zero64) + .addImm(0); + + // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), + SRsrcFormatLo) + .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); + + // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), + SRsrcFormatHi) + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); + + // NewSRsrc = {Zero64, SRsrcFormat} + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), + NewSRsrc) + .addReg(Zero64) + .addImm(AMDGPU::sub0_sub1) + .addReg(SRsrcFormatLo) + .addImm(AMDGPU::sub2) + .addReg(SRsrcFormatHi) + .addImm(AMDGPU::sub3); + + // Update the instruction to use NewVaddr + MI->getOperand(VAddrIdx).setReg(NewVAddr); + // Update the instruction to use NewSRsrc + MI->getOperand(SRsrcIdx).setReg(NewSRsrc); + } + } } void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { @@ -731,8 +861,12 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { } unsigned NewOpcode = getVALUOp(*Inst); - if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) + if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { + // We cannot move this instruction to the VALU, so we should try to + // legalize its operands instead. + legalizeOperands(Inst); continue; + } // Use the new VALU Opcode. const MCInstrDesc &NewDesc = get(NewOpcode); diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index bb8bc72ff86..11dbfb8e489 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -25,6 +25,13 @@ class SIInstrInfo : public AMDGPUInstrInfo { private: const SIRegisterInfo RI; + unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, + MachineRegisterInfo &MRI, + MachineOperand &SuperReg, + const TargetRegisterClass *SuperRC, + unsigned SubIdx, + const TargetRegisterClass *SubRC) const; + public: explicit SIInstrInfo(AMDGPUTargetMachine &tm); @@ -142,6 +149,9 @@ namespace AMDGPU { int getCommuteRev(uint16_t Opcode); int getCommuteOrig(uint16_t Opcode); + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; + + } // End namespace AMDGPU } // End namespace llvm diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll index c989c9d6722..e461bf9acec 100644 --- a/test/CodeGen/R600/salu-to-valu.ll +++ b/test/CodeGen/R600/salu-to-valu.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s ; In this test both the pointer and the offset operands to the ; BUFFER_LOAD instructions end up being stored in vgprs. This @@ -8,8 +8,14 @@ ; (low 64-bits of srsrc). ; CHECK-LABEL: @mubuf + ; Make sure we aren't using VGPRs for the source operand of S_MOV_B64 ; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v + +; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_* +; instructions +; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] +; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = call i32 @llvm.r600.read.tidig.x() #1 -- 2.34.1