1 //===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
12 #include "SIMachineFunctionInfo.h"
13 #include "AMDGPUSubtarget.h"
14 #include "SIInstrInfo.h"
15 #include "llvm/CodeGen/MachineInstrBuilder.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/LLVMContext.h"
26 // Pin the vtable to this file.
27 void SIMachineFunctionInfo::anchor() {}
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30 : AMDGPUMachineFunction(MF),
31 TIDReg(AMDGPU::NoRegister),
32 ScratchRSrcReg(AMDGPU::NoRegister),
33 ScratchWaveOffsetReg(AMDGPU::NoRegister),
34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35 DispatchPtrUserSGPR(AMDGPU::NoRegister),
36 QueuePtrUserSGPR(AMDGPU::NoRegister),
37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38 DispatchIDUserSGPR(AMDGPU::NoRegister),
39 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
55 HasSpilledSGPRs(false),
56 HasSpilledVGPRs(false),
57 PrivateSegmentBuffer(false),
61 KernargSegmentPtr(false),
62 FlatScratchInit(false),
63 GridWorkgroupCountX(false),
64 GridWorkgroupCountY(false),
65 GridWorkgroupCountZ(false),
70 PrivateSegmentWaveByteOffset(false),
74 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
75 const Function *F = MF.getFunction();
77 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
79 const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
81 if (getShaderType() == ShaderType::COMPUTE)
82 KernargSegmentPtr = true;
84 if (F->hasFnAttribute("amdgpu-work-group-id-y"))
87 if (F->hasFnAttribute("amdgpu-work-group-id-z"))
90 if (F->hasFnAttribute("amdgpu-work-item-id-y"))
93 if (F->hasFnAttribute("amdgpu-work-item-id-z"))
96 bool MaySpill = ST.isVGPRSpillingEnabled(this);
97 bool HasStackObjects = FrameInfo->hasStackObjects();
99 if (HasStackObjects || MaySpill)
100 PrivateSegmentWaveByteOffset = true;
102 if (ST.isAmdHsaOS()) {
103 if (HasStackObjects || MaySpill)
104 PrivateSegmentBuffer = true;
106 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
110 // X, XY, and XYZ are the only supported combinations, so make sure Y is
116 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
117 const SIRegisterInfo &TRI) {
118 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
119 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
121 return PrivateSegmentBufferUserSGPR;
124 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
125 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
126 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
128 return DispatchPtrUserSGPR;
131 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
132 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
133 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
135 return QueuePtrUserSGPR;
138 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
139 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
140 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
142 return KernargSegmentPtrUserSGPR;
145 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
149 const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
150 const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
151 MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
152 MachineRegisterInfo &MRI = MF->getRegInfo();
153 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
154 Offset += SubIdx * 4;
156 unsigned LaneVGPRIdx = Offset / (64 * 4);
157 unsigned Lane = (Offset / 4) % 64;
159 struct SpilledReg Spill;
161 if (!LaneVGPRs.count(LaneVGPRIdx)) {
162 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
164 if (LaneVGPR == AMDGPU::NoRegister) {
165 LLVMContext &Ctx = MF->getFunction()->getContext();
166 Ctx.emitError("Ran out of VGPRs for spilling SGPR");
168 // When compiling from inside Mesa, the compilation continues.
169 // Select an arbitrary register to avoid triggering assertions
170 // during subsequent passes.
171 LaneVGPR = AMDGPU::VGPR0;
174 LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
176 // Add this register as live-in to all blocks to avoid machine verifer
177 // complaining about use of an undefined physical register.
178 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
180 BI->addLiveIn(LaneVGPR);
184 Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
189 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
190 const MachineFunction &MF) const {
191 const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
192 // FIXME: We should get this information from kernel attributes if it
194 return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();