From: Marek Olsak Date: Wed, 13 Jan 2016 11:45:36 +0000 (+0000) Subject: AMDGPU/SI: Add new target attribute InitialPSInputAddr X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=d2b0d84d0fd44db1eaaa30fcd6d7e5af4cb062a0 AMDGPU/SI: Add new target attribute InitialPSInputAddr Summary: This allows Mesa to pass initial SPI_PS_INPUT_ADDR to LLVM. The register assigns VGPR locations to PS inputs, while the ENA register determines whether or not they are loaded. Mesa needs to set some inputs as not-movable, so that a pixel shader prolog binary appended at the beginning can assume where some inputs are. v2: Make PSInputAddr private, because there is never enough silly getters and setters for people to read. Reviewers: tstellarAMD, arsenm Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16030 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257591 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index c1c34c37932..1239dfb235e 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -587,7 +587,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4); OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); - OutStreamer->EmitIntValue(MFI->PSInputAddr, 4); + OutStreamer->EmitIntValue(MFI->PSInputEna, 4); + OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); + OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4); } } diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 7f79dd34f3b..aa1e352ed74 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -137,7 +137,7 @@ namespace SIOutMods { #define C_00B84C_EXCP_EN #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC - +#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 #define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index c251752bb0d..5959afbcce2 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -601,14 +601,18 @@ SDValue SITargetLowering::LowerFormalArguments( assert((PSInputNum <= 15) && "Too many PS inputs!"); - if (!Arg.Used) { + if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) { // We can safely skip PS inputs Skipped.set(i); ++PSInputNum; continue; } - Info->PSInputAddr |= 1 << PSInputNum++; + Info->markPSInputAllocated(PSInputNum); + if (Arg.Used) + Info->PSInputEna |= 1 << PSInputNum; + + ++PSInputNum; } // Second split vertices into their elements @@ -638,11 +642,18 @@ SDValue SITargetLowering::LowerFormalArguments( *DAG.getContext()); // At least one interpolation mode must be enabled or else the GPU will hang. + // + // Check PSInputAddr instead of PSInputEna. The idea is that if the user set + // PSInputAddr, the user wants to enable some bits after the compilation + // based on run-time states. Since we can't know what the final PSInputEna + // will look like, so we shouldn't do anything here and the user should take + // responsibility for the correct programming. if (Info->getShaderType() == ShaderType::PIXEL && - (Info->PSInputAddr & 0x7F) == 0) { - Info->PSInputAddr |= 1; + (Info->getPSInputAddr() & 0x7F) == 0) { CCInfo.AllocateReg(AMDGPU::VGPR0); CCInfo.AllocateReg(AMDGPU::VGPR1); + Info->markPSInputAllocated(0); + Info->PSInputEna |= 1; } if (Info->getShaderType() == ShaderType::COMPUTE) { diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index bf15516bea7..878fd768bba 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -46,8 +46,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), - LDSWaveSpillSize(0), PSInputAddr(0), + LDSWaveSpillSize(0), + PSInputEna(0), NumUserSGPRs(0), NumSystemSGPRs(0), HasSpilledSGPRs(false), @@ -72,6 +73,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) const AMDGPUSubtarget &ST = MF.getSubtarget(); const Function *F = MF.getFunction(); + PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); + const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); if (getShaderType() == ShaderType::COMPUTE) diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 9c528d63bd0..61d575385ff 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -57,10 +57,13 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction { unsigned WorkGroupInfoSystemSGPR; unsigned PrivateSegmentWaveByteOffsetSystemSGPR; + // Graphics info. + unsigned PSInputAddr; + public: // FIXME: Make private unsigned LDSWaveSpillSize; - unsigned PSInputAddr; + unsigned PSInputEna; std::map LaneVGPRs; unsigned ScratchOffsetReg; unsigned NumUserSGPRs; @@ -273,6 +276,18 @@ public: HasSpilledVGPRs = Spill; } + unsigned getPSInputAddr() const { + return PSInputAddr; + } + + bool isPSInputAllocated(unsigned Index) const { + return PSInputAddr & (1 << Index); + } + + void markPSInputAllocated(unsigned Index) { + PSInputAddr |= 1 << Index; + } + unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const; }; diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index add415e215c..3b4c235c0dc 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -106,20 +106,27 @@ bool isReadOnlySegment(const GlobalValue *GV) { return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; } -static const char ShaderTypeAttribute[] = "ShaderType"; - -unsigned getShaderType(const Function &F) { - Attribute A = F.getFnAttribute(ShaderTypeAttribute); - unsigned ShaderType = ShaderType::COMPUTE; +static unsigned getIntegerAttribute(const Function &F, const char *Name, + unsigned Default) { + Attribute A = F.getFnAttribute(Name); + unsigned Result = Default; if (A.isStringAttribute()) { StringRef Str = A.getValueAsString(); - if (Str.getAsInteger(0, ShaderType)) { + if (Str.getAsInteger(0, Result)) { LLVMContext &Ctx = F.getContext(); Ctx.emitError("can't parse shader type"); } } - return ShaderType; + return Result; +} + +unsigned getShaderType(const Function &F) { + return getIntegerAttribute(F, "ShaderType", ShaderType::COMPUTE); +} + +unsigned getInitialPSInputAddr(const Function &F) { + return getIntegerAttribute(F, "InitialPSInputAddr", 0); } bool isSI(const MCSubtargetInfo &STI) { diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 19419a29f5e..57cbe1b58f9 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -45,6 +45,8 @@ bool isGlobalSegment(const GlobalValue *GV); bool isReadOnlySegment(const GlobalValue *GV); unsigned getShaderType(const Function &F); +unsigned getInitialPSInputAddr(const Function &F); + bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI);