//
//===----------------------------------------------------------------------===//
-
#include "SIRegisterInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
-BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
- Reserved.set(AMDGPU::EXEC);
+void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
+ MCRegAliasIterator R(Reg, this, true);
- // EXEC_LO and EXEC_HI could be allocated and used as regular register,
- // but this seems likely to result in bugs, so I'm marking them as reserved.
- Reserved.set(AMDGPU::EXEC_LO);
- Reserved.set(AMDGPU::EXEC_HI);
+ for (; R.isValid(); ++R)
+ Reserved.set(*R);
+}
+BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
- Reserved.set(AMDGPU::FLAT_SCR);
- Reserved.set(AMDGPU::FLAT_SCR_LO);
- Reserved.set(AMDGPU::FLAT_SCR_HI);
+
+ // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
+ // this seems likely to result in bugs, so I'm marking them as reserved.
+ reserveRegisterTuples(Reserved, AMDGPU::EXEC);
+ reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
// Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
- Reserved.set(AMDGPU::VGPR255);
- Reserved.set(AMDGPU::VGPR254);
+ reserveRegisterTuples(Reserved, AMDGPU::VGPR254);
+ reserveRegisterTuples(Reserved, AMDGPU::VGPR255);
// Tonga and Iceland can only allocate a fixed number of SGPRs due
// to a hw bug.
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
- MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
-
- for (; R.isValid(); ++R)
- Reserved.set(*R);
+ reserveRegisterTuples(Reserved, Reg);
}
}
const int *Sets = getRegClassPressureSets(*I);
assert(Sets);
for (unsigned i = 0; Sets[i] != -1; ++i) {
- if (Sets[i] == (int)Idx)
+ if (Sets[i] == (int)Idx)
return Limit;
}
}
bool IsKill = (i == e - 1);
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .addReg(SubReg, getDefRegState(IsLoad))
- .addReg(ScratchRsrcReg, getKillRegState(IsKill))
- .addReg(SOffset)
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
+ .addReg(SubReg, getDefRegState(IsLoad))
+ .addReg(ScratchRsrcReg, getKillRegState(IsKill))
+ .addReg(SOffset)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addReg(Value, RegState::Implicit | getDefRegState(IsLoad))
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
}
}
}
}
-const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
- MVT VT) const {
- switch(VT.SimpleTy) {
- default:
- case MVT::i32: return &AMDGPU::VGPR_32RegClass;
- }
-}
-
unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
return getEncodingValue(Reg) & 0xff;
}
&AMDGPU::SReg_128RegClass,
&AMDGPU::VReg_256RegClass,
&AMDGPU::SReg_256RegClass,
- &AMDGPU::VReg_512RegClass
+ &AMDGPU::VReg_512RegClass,
+ &AMDGPU::SReg_512RegClass
};
for (const TargetRegisterClass *BaseClass : BaseClasses) {
const TargetRegisterClass *SRC) const {
if (hasVGPRs(SRC)) {
return SRC;
- } else if (SRC == &AMDGPU::SCCRegRegClass) {
- return &AMDGPU::VCCRegRegClass;
} else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
return &AMDGPU::VGPR_32RegClass;
} else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
}
}
+bool SIRegisterInfo::shouldRewriteCopySrc(
+ const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // We want to prefer the smallest register class possible, so we don't want to
+ // stop and rewrite on anything that looks like a subregister
+ // extract. Operations mostly don't care about the super register class, so we
+ // only want to stop on the most basic of copies between the smae register
+ // class.
+ //
+ // e.g. if we have something like
+ // vreg0 = ...
+ // vreg1 = ...
+ // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
+ // vreg3 = COPY vreg2, sub0
+ //
+ // We want to look through the COPY to find:
+ // => vreg3 = COPY vreg0
+
+ // Plain copy.
+ return getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
const TargetRegisterClass *SubRC,
unsigned Channel) const {
// AMDGPU::NoRegister.
unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const {
-
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I) {
- if (MRI.reg_nodbg_empty(*I))
- return *I;
- }
+ for (unsigned Reg : *RC)
+ if (!MRI.isPhysRegUsed(Reg))
+ return Reg;
return AMDGPU::NoRegister;
}