From: Tom Stellard Date: Tue, 20 Jan 2015 19:33:04 +0000 (+0000) Subject: R600/SI: Add subtarget feature to enable VGPR spilling for all shader types X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=46846844ee01e12904fa2f4b0179f2eadf524d57;p=oota-llvm.git R600/SI: Add subtarget feature to enable VGPR spilling for all shader types This is disabled by default, but can be enabled with the subtarget feature: 'vgpr-spilling' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226597 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index 8a5ca613dc8..1df4448abf0 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -92,6 +92,11 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "true", "Support flat address space">; +def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", + "EnableVGPRSpilling", + "true", + "Enable spilling of VGPRs to scratch memory">; + class SubtargetFeatureFetchLimit : SubtargetFeature <"fetch"#Value, "TexVTXClauseSize", diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 8e25f8c2140..cae116f4168 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -423,6 +423,7 @@ static unsigned getRsrcReg(unsigned ShaderType) { void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo) { + const AMDGPUSubtarget &STM = TM.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned RsrcReg = getRsrcReg(MFI->getShaderType()); @@ -443,6 +444,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer.EmitIntValue(RsrcReg, 4); OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) | S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4); + if (STM.isVGPRSpillingEnabled(MFI)) { + OutStreamer.EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); + OutStreamer.EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4); + } } if (MFI->getShaderType() == ShaderType::PIXEL) { diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index 597e558e663..9f22adcc7f6 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -18,6 +18,7 @@ #include "R600MachineScheduler.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" using namespace llvm; @@ -78,6 +79,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), + EnableVGPRSpilling(false), DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) @@ -113,3 +115,8 @@ unsigned AMDGPUSubtarget::getAmdKernelCodeChipID() const { case SEA_ISLANDS: return 12; } } + +bool AMDGPUSubtarget::isVGPRSpillingEnabled( + const SIMachineFunctionInfo *MFI) const { + return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling; +} diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 90179d79d25..cc048a4db87 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -30,6 +30,8 @@ namespace llvm { +class SIMachineFunctionInfo; + class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { public: @@ -63,6 +65,7 @@ private: unsigned WavefrontSize; bool CFALUBug; int LocalMemorySize; + bool EnableVGPRSpilling; const DataLayout DL; AMDGPUFrameLowering FrameLowering; @@ -224,6 +227,7 @@ public: bool isAmdHsaOS() const { return TargetTriple.getOS() == Triple::AMDHSA; } + bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const; }; } // End namespace llvm diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 1c74dda5362..7601794beab 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -163,5 +163,8 @@ namespace SIOutMods { #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 #define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12) +#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 +#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12) + #endif diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0a3fa2f930d..6b2ea0682a4 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -588,6 +588,12 @@ SDValue SITargetLowering::LowerFormalArguments( InVals.push_back(Val); } + + if (Info->getShaderType() != ShaderType::COMPUTE) { + unsigned ScratchIdx = CCInfo.getFirstUnallocated( + AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()); + Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx); + } return Chain; } diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index ccf90ddfae0..80b560eb65a 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -430,15 +430,6 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { return AMDGPU::COPY; } -static bool shouldTryToSpillVGPRs(MachineFunction *MF) { - - SIMachineFunctionInfo *MFI = MF->getInfo(); - - // FIXME: Implement spilling for other shader types. - return MFI->getShaderType() == ShaderType::COMPUTE; - -} - void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, @@ -462,7 +453,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; } - } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) { + } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { MFI->setHasSpilledVGPRs(); switch(RC->getSize() * 8) { @@ -499,6 +490,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); + const SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo *FrameInfo = MF->getFrameInfo(); DebugLoc DL = MBB.findDebugLoc(MI); int Opcode = -1; @@ -511,7 +503,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; } - } else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) { + } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) { switch(RC->getSize() * 8) { case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break; case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break; diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 71852717d7e..667da4c8af6 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -50,6 +50,7 @@ public: unsigned NumUserSGPRs; std::map LaneVGPRs; unsigned LDSWaveSpillSize; + unsigned ScratchOffsetReg; bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; unsigned getTIDReg() const { return TIDReg; }; void setTIDReg(unsigned Reg) { TIDReg = Reg; } diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 166df66a27a..380c98b48d7 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -424,6 +424,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, case SIRegisterInfo::TGID_Z: return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2); case SIRegisterInfo::SCRATCH_WAVE_OFFSET: + if (MFI->getShaderType() != ShaderType::COMPUTE) + return MFI->ScratchOffsetReg; return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4); case SIRegisterInfo::SCRATCH_PTR: return AMDGPU::SGPR2_SGPR3;