From 33040cf56e9cdeb86d584c51701a7a9eb8ce5ec5 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 14 Jan 2015 15:42:31 +0000 Subject: [PATCH] R600/SI: Spill VGPRs to scratch space for compute shaders git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225988 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 + lib/Target/R600/AMDGPUTargetMachine.cpp | 1 + lib/Target/R600/CMakeLists.txt | 1 + lib/Target/R600/SIInstrInfo.cpp | 26 ++- lib/Target/R600/SIInstrInfo.td | 2 + lib/Target/R600/SIInstructions.td | 51 +++--- lib/Target/R600/SIMachineFunctionInfo.cpp | 3 +- lib/Target/R600/SIMachineFunctionInfo.h | 3 + lib/Target/R600/SIPrepareScratchRegs.cpp | 196 ++++++++++++++++++++++ lib/Target/R600/SIRegisterInfo.cpp | 156 ++++++++++------- lib/Target/R600/SIRegisterInfo.h | 9 +- 11 files changed, 353 insertions(+), 96 deletions(-) create mode 100644 lib/Target/R600/SIPrepareScratchRegs.cpp diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 574506339c0..fcf9eca80e9 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -47,6 +47,7 @@ FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm); FunctionPass *createSIFixSGPRLiveRangesPass(); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); +FunctionPass *createSIPrepareScratchRegs(); void initializeSIFoldOperandsPass(PassRegistry &); extern char &SIFoldOperandsID; diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 2a6fbf23931..a1da7172d53 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -189,6 +189,7 @@ void AMDGPUPassConfig::addPostRegAlloc() { const AMDGPUSubtarget &ST = TM->getSubtarget(); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { + addPass(createSIPrepareScratchRegs(), false); addPass(createSIShrinkInstructionsPass(), false); } } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 3b703e72943..5a4bae2f93c 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -51,6 +51,7 @@ add_llvm_target(R600CodeGen SILowerControlFlow.cpp SILowerI1Copies.cpp SIMachineFunctionInfo.cpp + SIPrepareScratchRegs.cpp SIRegisterInfo.cpp SIShrinkInstructions.cpp SITypeRewriter.cpp diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index b911d418680..71872ee492a 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -433,13 +433,9 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { static bool shouldTryToSpillVGPRs(MachineFunction *MF) { SIMachineFunctionInfo *MFI = MF->getInfo(); - const TargetMachine &TM = MF->getTarget(); - // FIXME: Even though it can cause problems, we need to enable - // spilling at -O0, since the fast register allocator always - // spills registers that are live at the end of blocks. - return MFI->getShaderType() == ShaderType::COMPUTE && - TM.getOptLevel() == CodeGenOpt::None; + // FIXME: Implement spilling for other shader types. + return MFI->getShaderType() == ShaderType::COMPUTE; } @@ -450,6 +446,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); int Opcode = -1; @@ -466,6 +463,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; } } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) { + MFI->setHasSpilledVGPRs(); + switch(RC->getSize() * 8) { case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break; case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break; @@ -480,7 +479,11 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, FrameInfo->setObjectAlignment(FrameIndex, 4); BuildMI(MBB, MI, DL, get(Opcode)) .addReg(SrcReg) - .addFrameIndex(FrameIndex); + .addFrameIndex(FrameIndex) + // Place-holder registers, these will be filled in by + // SIPrepareScratchRegs. + .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) + .addReg(AMDGPU::SGPR0, RegState::Undef); } else { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" @@ -522,7 +525,12 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (Opcode != -1) { FrameInfo->setObjectAlignment(FrameIndex, 4); BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addFrameIndex(FrameIndex); + .addFrameIndex(FrameIndex) + // Place-holder registers, these will be filled in by + // SIPrepareScratchRegs. + .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef) + .addReg(AMDGPU::SGPR0, RegState::Undef); + } else { LLVMContext &Ctx = MF->getFunction()->getContext(); Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" @@ -553,7 +561,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineBasicBlock::iterator Insert = Entry.front(); DebugLoc DL = Insert->getDebugLoc(); - TIDReg = RI.findUnusedVGPR(MF->getRegInfo()); + TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); if (TIDReg == AMDGPU::NoRegister) return TIDReg; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 6235de33fa7..7cc9588c8e4 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -1763,6 +1763,7 @@ multiclass MUBUF_Load_Helper_vi op, string asm, RegisterClass regClass, multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass, ValueType store_vt, SDPatternOperator st> { + let mayLoad = 0, mayStore = 1 in { let addr64 = 0 in { def "" : MUBUF_si < @@ -1820,6 +1821,7 @@ multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass let tfe = 0; let soffset = 128; // ZERO } + } // End mayLoad = 0, mayStore = 1 } class FLAT_Load_Helper op, string asm, RegisterClass regClass> : diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index dd0ee407d3e..e05b6bb7d0f 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1940,18 +1940,20 @@ def V_SUB_F64 : InstSI < multiclass SI_SPILL_SGPR { - def _SAVE : InstSI < - (outs), - (ins sgpr_class:$src, i32imm:$frame_idx), - "", [] - >; - - def _RESTORE : InstSI < - (outs sgpr_class:$dst), - (ins i32imm:$frame_idx), - "", [] - >; - + let UseNamedOperandTable = 1 in { + def _SAVE : InstSI < + (outs), + (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, + SReg_32:$scratch_offset), + "", [] + >; + + def _RESTORE : InstSI < + (outs sgpr_class:$dst), + (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), + "", [] + >; + } // End UseNamedOperandTable = 1 } defm SI_SPILL_S32 : SI_SPILL_SGPR ; @@ -1961,17 +1963,20 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; multiclass SI_SPILL_VGPR { - def _SAVE : InstSI < - (outs), - (ins vgpr_class:$src, i32imm:$frame_idx), - "", [] - >; - - def _RESTORE : InstSI < - (outs vgpr_class:$dst), - (ins i32imm:$frame_idx), - "", [] - >; + let UseNamedOperandTable = 1 in { + def _SAVE : InstSI < + (outs), + (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr, + SReg_32:$scratch_offset), + "", [] + >; + + def _RESTORE : InstSI < + (outs vgpr_class:$dst), + (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset), + "", [] + >; + } // End UseNamedOperandTable = 1 } defm SI_SPILL_V32 : SI_SPILL_VGPR ; diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index d58f31db508..198dd568374 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -29,6 +29,7 @@ void SIMachineFunctionInfo::anchor() {} SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), TIDReg(AMDGPU::NoRegister), + HasSpilledVGPRs(false), PSInputAddr(0), NumUserSGPRs(0), LDSWaveSpillSize(0) { } @@ -50,7 +51,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( struct SpilledReg Spill; if (!LaneVGPRs.count(LaneVGPRIdx)) { - unsigned LaneVGPR = TRI->findUnusedVGPR(MRI); + unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); LaneVGPRs[LaneVGPRIdx] = LaneVGPR; MRI.setPhysRegUsed(LaneVGPR); diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 6bb8f9d6ced..71852717d7e 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -29,6 +29,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction { void anchor() override; unsigned TIDReg; + bool HasSpilledVGPRs; public: @@ -52,6 +53,8 @@ public: bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; unsigned getTIDReg() const { return TIDReg; }; void setTIDReg(unsigned Reg) { TIDReg = Reg; } + bool hasSpilledVGPRs() const { return HasSpilledVGPRs; } + void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; } unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const; }; diff --git a/lib/Target/R600/SIPrepareScratchRegs.cpp b/lib/Target/R600/SIPrepareScratchRegs.cpp new file mode 100644 index 00000000000..f0e7edec6b4 --- /dev/null +++ b/lib/Target/R600/SIPrepareScratchRegs.cpp @@ -0,0 +1,196 @@ +//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// This pass loads scratch pointer and scratch offset into a register or a +/// frame index which can be used anywhere in the program. These values will +/// be used for spilling VGPRs. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIDefines.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" + +using namespace llvm; + +namespace { + +class SIPrepareScratchRegs : public MachineFunctionPass { + +private: + static char ID; + +public: + SIPrepareScratchRegs() : MachineFunctionPass(ID) { } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "SI prepare scratch registers"; + } + +}; + +} // End anonymous namespace + +char SIPrepareScratchRegs::ID = 0; + +FunctionPass *llvm::createSIPrepareScratchRegs() { + return new SIPrepareScratchRegs(); +} + +bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { + SIMachineFunctionInfo *MFI = MF.getInfo(); + const SIInstrInfo *TII = + static_cast(MF.getSubtarget().getInstrInfo()); + const SIRegisterInfo *TRI = &TII->getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo *FrameInfo = MF.getFrameInfo(); + MachineBasicBlock *Entry = MF.begin(); + MachineBasicBlock::iterator I = Entry->begin(); + DebugLoc DL = I->getDebugLoc(); + + // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to + // run this pass. + if (!MFI->hasSpilledVGPRs()) + return false; + + unsigned ScratchPtrPreloadReg = + TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR); + unsigned ScratchOffsetPreloadReg = + TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); + + if (!Entry->isLiveIn(ScratchPtrPreloadReg)) + Entry->addLiveIn(ScratchPtrPreloadReg); + + if (!Entry->isLiveIn(ScratchOffsetPreloadReg)) + Entry->addLiveIn(ScratchOffsetPreloadReg); + + // Load the scratch pointer + unsigned ScratchPtrReg = + TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass); + int ScratchPtrFI = -1; + + if (ScratchPtrReg != AMDGPU::NoRegister) { + // Found an SGPR to use. + MRI.setPhysRegUsed(ScratchPtrReg); + BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg) + .addReg(ScratchPtrPreloadReg); + } else { + // No SGPR is available, we must spill. + ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4); + BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE)) + .addReg(ScratchPtrPreloadReg) + .addFrameIndex(ScratchPtrFI); + } + + // Load the scratch offset. + unsigned ScratchOffsetReg = + TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass); + int ScratchOffsetFI = ~0; + + if (ScratchOffsetReg != AMDGPU::NoRegister) { + // Found an SGPR to use + MRI.setPhysRegUsed(ScratchOffsetReg); + BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg) + .addReg(ScratchOffsetPreloadReg); + } else { + // No SGPR is available, we must spill. + ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4); + BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE)) + .addReg(ScratchOffsetPreloadReg) + .addFrameIndex(ScratchOffsetFI); + } + + + // Now that we have the scratch pointer and offset values, we need to + // add them to all the SI_SPILL_V* instructions. + + RegScavenger RS; + bool UseRegScavenger = + (ScratchPtrReg == AMDGPU::NoRegister || + ScratchOffsetReg == AMDGPU::NoRegister); + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + if (UseRegScavenger) + RS.enterBasicBlock(&MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + DebugLoc DL = MI.getDebugLoc(); + switch(MI.getOpcode()) { + default: break;; + case AMDGPU::SI_SPILL_V512_SAVE: + case AMDGPU::SI_SPILL_V256_SAVE: + case AMDGPU::SI_SPILL_V128_SAVE: + case AMDGPU::SI_SPILL_V96_SAVE: + case AMDGPU::SI_SPILL_V64_SAVE: + case AMDGPU::SI_SPILL_V32_SAVE: + case AMDGPU::SI_SPILL_V32_RESTORE: + case AMDGPU::SI_SPILL_V64_RESTORE: + case AMDGPU::SI_SPILL_V128_RESTORE: + case AMDGPU::SI_SPILL_V256_RESTORE: + case AMDGPU::SI_SPILL_V512_RESTORE: + + // Scratch Pointer + if (ScratchPtrReg == AMDGPU::NoRegister) { + ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0); + BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE), + ScratchPtrReg) + .addFrameIndex(ScratchPtrFI) + .addReg(AMDGPU::NoRegister) + .addReg(AMDGPU::NoRegister); + } else if (!MBB.isLiveIn(ScratchPtrReg)) { + MBB.addLiveIn(ScratchPtrReg); + } + + if (ScratchOffsetReg == AMDGPU::NoRegister) { + ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); + BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), + ScratchOffsetReg) + .addFrameIndex(ScratchOffsetFI) + .addReg(AMDGPU::NoRegister) + .addReg(AMDGPU::NoRegister); + } else if (!MBB.isLiveIn(ScratchOffsetReg)) { + MBB.addLiveIn(ScratchOffsetReg); + } + + if (ScratchPtrReg == AMDGPU::NoRegister || + ScratchOffsetReg == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF.getFunction()->getContext(); + Ctx.emitError("ran out of SGPRs for spilling VGPRs"); + ScratchPtrReg = AMDGPU::SGPR0; + ScratchOffsetReg = AMDGPU::SGPR0; + } + MI.getOperand(2).setReg(ScratchPtrReg); + MI.getOperand(3).setReg(ScratchOffsetReg); + + break; + } + if (UseRegScavenger) + RS.forward(); + } + } + return true; +} diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index ec59f2589e0..f9feea470f1 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" using namespace llvm; SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st) @@ -94,6 +95,84 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { } } +void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, + unsigned LoadStoreOp, + unsigned Value, + unsigned ScratchPtr, + unsigned ScratchOffset, + int64_t Offset, + RegScavenger *RS) const { + + const SIInstrInfo *TII = static_cast(ST.getInstrInfo()); + MachineBasicBlock *MBB = MI->getParent(); + const MachineFunction *MF = MI->getParent()->getParent(); + LLVMContext &Ctx = MF->getFunction()->getContext(); + DebugLoc DL = MI->getDebugLoc(); + bool IsLoad = TII->get(LoadStoreOp).mayLoad(); + + bool RanOutOfSGPRs = false; + unsigned SOffset = ScratchOffset; + + unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0); + if (RsrcReg == AMDGPU::NoRegister) { + RanOutOfSGPRs = true; + RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; + } + + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + unsigned Size = NumSubRegs * 4; + + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64), + getSubReg(RsrcReg, AMDGPU::sub0_sub1)) + .addReg(ScratchPtr) + .addReg(RsrcReg, RegState::ImplicitDefine); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), + getSubReg(RsrcReg, AMDGPU::sub2)) + .addImm(Rsrc & 0xffffffff) + .addReg(RsrcReg, RegState::ImplicitDefine); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), + getSubReg(RsrcReg, AMDGPU::sub3)) + .addImm(Rsrc >> 32) + .addReg(RsrcReg, RegState::ImplicitDefine); + + if (!isUInt<12>(Offset + Size)) { + SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0); + if (SOffset == AMDGPU::NoRegister) { + RanOutOfSGPRs = true; + SOffset = AMDGPU::SGPR0; + } + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) + .addReg(ScratchOffset) + .addImm(Offset); + Offset = 0; + } + + if (RanOutOfSGPRs) + Ctx.emitError("Ran out of SGPRs for spilling VGPRS"); + + for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) { + unsigned SubReg = NumSubRegs > 1 ? + getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) : + Value; + bool IsKill = (i == e - 1); + + BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) + .addReg(SubReg, getDefRegState(IsLoad)) + .addReg(RsrcReg, getKillRegState(IsKill)) + .addImm(Offset) + .addReg(SOffset, getKillRegState(IsKill)) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .addReg(Value, RegState::Implicit | getDefRegState(IsLoad)); + } +} + void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { @@ -162,7 +241,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) .addReg(Spill.VGPR) - .addImm(Spill.Lane); + .addImm(Spill.Lane) + .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); if (isM0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(SubReg); @@ -179,71 +259,24 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: - case AMDGPU::SI_SPILL_V32_SAVE: { - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); - unsigned SrcReg = MI->getOperand(0).getReg(); - int64_t Offset = FrameInfo->getObjectOffset(Index); - unsigned Size = NumSubRegs * 4; - unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - - for (unsigned i = 0, e = NumSubRegs; i != e; ++i) { - unsigned SubReg = NumSubRegs > 1 ? - getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) : - SrcReg; - Offset += (i * 4); - MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize); - - unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg, - Offset, Size); - - if (AddrReg == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling VGPRS"); - AddrReg = AMDGPU::VGPR0; - } - - // Store the value in LDS - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32)) - .addImm(0) // gds - .addReg(AddrReg, RegState::Kill) // addr - .addReg(SubReg) // data0 - .addImm(0); // offset - } - + case AMDGPU::SI_SPILL_V32_SAVE: + buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, + TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), + FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); break; - } case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { - unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); - unsigned DstReg = MI->getOperand(0).getReg(); - int64_t Offset = FrameInfo->getObjectOffset(Index); - unsigned Size = NumSubRegs * 4; - unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - - // FIXME: We could use DS_READ_B64 here to optimize for larger registers. - for (unsigned i = 0, e = NumSubRegs; i != e; ++i) { - unsigned SubReg = NumSubRegs > 1 ? - getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) : - DstReg; - - Offset += (i * 4); - unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg, - Offset, Size); - if (AddrReg == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling VGPRs"); - AddrReg = AMDGPU::VGPR0; - } - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg) - .addImm(0) // gds - .addReg(AddrReg, RegState::Kill) // addr - .addImm(0); //offset - } + buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, + TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(), + TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), + FrameInfo->getObjectOffset(Index), RS); MI->eraseFromParent(); break; } @@ -431,9 +464,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, /// \brief Returns a register that is not used at any point in the function. /// If all registers are used, then this function will return // AMDGPU::NoRegister. -unsigned SIRegisterInfo::findUnusedVGPR(const MachineRegisterInfo &MRI) const { - - const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass; +unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) const { for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 28de31b8c02..d14212c2b10 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -105,7 +105,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { unsigned getPreloadedValue(const MachineFunction &MF, enum PreloadedValue Value) const; - unsigned findUnusedVGPR(const MachineRegisterInfo &MRI) const; + unsigned findUnusedRegister(const MachineRegisterInfo &MRI, + const TargetRegisterClass *RC) const; + +private: + void buildScratchLoadStore(MachineBasicBlock::iterator MI, + unsigned LoadStoreOp, unsigned Value, + unsigned ScratchPtr, unsigned ScratchOffset, + int64_t Offset, RegScavenger *RS) const; }; } // End namespace llvm -- 2.34.1