1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
12 #include "SIMachineFunctionInfo.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "llvm/CodeGen/MachineInstrBuilder.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/LLVMContext.h"
26 // Pin the vtable to this file.
27 void SIMachineFunctionInfo::anchor() {}
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30 : AMDGPUMachineFunction(MF),
31 TIDReg(AMDGPU::NoRegister),
32 ScratchRSrcReg(AMDGPU::NoRegister),
33 ScratchWaveOffsetReg(AMDGPU::NoRegister),
34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35 DispatchPtrUserSGPR(AMDGPU::NoRegister),
36 QueuePtrUserSGPR(AMDGPU::NoRegister),
37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38 DispatchIDUserSGPR(AMDGPU::NoRegister),
39 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
54 HasSpilledSGPRs(false),
55 HasSpilledVGPRs(false),
56 PrivateSegmentBuffer(false),
60 KernargSegmentPtr(false),
61 FlatScratchInit(false),
62 GridWorkgroupCountX(false),
63 GridWorkgroupCountY(false),
64 GridWorkgroupCountZ(false),
69 PrivateSegmentWaveByteOffset(false),
73 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
74 const Function *F = MF.getFunction();
76 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
78 const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
80 if (getShaderType() == ShaderType::COMPUTE)
81 KernargSegmentPtr = true;
83 if (F->hasFnAttribute("amdgpu-work-group-id-y"))
86 if (F->hasFnAttribute("amdgpu-work-group-id-z"))
89 if (F->hasFnAttribute("amdgpu-work-item-id-y"))
92 if (F->hasFnAttribute("amdgpu-work-item-id-z"))
95 bool MaySpill = ST.isVGPRSpillingEnabled(this);
96 bool HasStackObjects = FrameInfo->hasStackObjects();
98 if (HasStackObjects || MaySpill)
99 PrivateSegmentWaveByteOffset = true;
101 if (ST.isAmdHsaOS()) {
102 if (HasStackObjects || MaySpill)
103 PrivateSegmentBuffer = true;
105 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
109 // X, XY, and XYZ are the only supported combinations, so make sure Y is
115 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
116 const SIRegisterInfo &TRI) {
117 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
118 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
120 return PrivateSegmentBufferUserSGPR;
123 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
124 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
125 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
127 return DispatchPtrUserSGPR;
130 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
131 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
132 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
134 return QueuePtrUserSGPR;
137 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
138 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
139 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
141 return KernargSegmentPtrUserSGPR;
144 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
148 const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
149 const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
150 MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
151 MachineRegisterInfo &MRI = MF->getRegInfo();
152 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
153 Offset += SubIdx * 4;
155 unsigned LaneVGPRIdx = Offset / (64 * 4);
156 unsigned Lane = (Offset / 4) % 64;
158 struct SpilledReg Spill;
160 if (!LaneVGPRs.count(LaneVGPRIdx)) {
161 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
163 if (LaneVGPR == AMDGPU::NoRegister) {
164 LLVMContext &Ctx = MF->getFunction()->getContext();
165 Ctx.emitError("Ran out of VGPRs for spilling SGPR");
167 // When compiling from inside Mesa, the compilation continues.
168 // Select an arbitrary register to avoid triggering assertions
169 // during subsequent passes.
170 LaneVGPR = AMDGPU::VGPR0;
173 LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
175 // Add this register as live-in to all blocks to avoid machine verifer
176 // complaining about use of an undefined physical register.
177 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
179 BI->addLiveIn(LaneVGPR);
183 Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
188 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
189 const MachineFunction &MF) const {
190 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
191 // FIXME: We should get this information from kernel attributes if it
193 return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();