const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
+ if (ST.isXNACKEnabled())
+ BaseIdx -= 4;
+
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
- // next sgpr128 down.
+ // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
+ // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
+ // either way.
return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
}
const MachineFunction &MF) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
- unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+ unsigned Idx;
+
+ if (!ST.isXNACKEnabled())
+ Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+ else
+ Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
+
return AMDGPU::SGPR_32RegClass.getRegister(Idx);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // Next register before reservations for flat_scr and vcc.
- return AMDGPU::SGPR97;
+ if (!ST.isXNACKEnabled()) {
+ // Next register before reservations for flat_scr and vcc.
+ return AMDGPU::SGPR97;
+ } else {
+ // Next register before reservations for flat_scr, xnack_mask, vcc,
+ // and scratch resource.
+ return AMDGPU::SGPR91;
+ }
}
return AMDGPU::SGPR95;
// for VCC/FLAT_SCR.
reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
+
+ if (ST.isXNACKEnabled())
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
}
// Tonga and Iceland can only allocate a fixed number of SGPRs due
if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
- // Assume XNACK_MASK is unused.
unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+ if (ST.isXNACKEnabled())
+ Limit -= 2;
+
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
- if (Spill.VGPR == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling SGPR");
- }
-
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
- if (Spill.VGPR == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling SGPR");
- }
-
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)
// TODO: only do this when it is needed
switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
- // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
- TII->insertNOPs(MI, 3);
+ // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states
+ // ("S_NOP 3") on SI
+ TII->insertWaitStates(MI, 4);
break;
case AMDGPUSubtarget::SEA_ISLANDS:
break;
default: // VOLCANIC_ISLANDS and later
- // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
- // and later. This also applies to VALUs which write VCC, but we're
- // unlikely to see VMEM use VCC.
- TII->insertNOPs(MI, 4);
+ // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states
+ // ("S_NOP 4") on VI and later. This also applies to VALUs which write
+ // VCC, but we're unlikely to see VMEM use VCC.
+ TII->insertWaitStates(MI, 5);
}
MI->eraseFromParent();
unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const {
- const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ (void)ST;
switch (Value) {
case SIRegisterInfo::WORKGROUP_ID_X:
assert(MFI->hasWorkGroupIDX());