This was the slowest target custom pass and was spending 80%
of the time in getMinimalPhysRegClass which was called
for every register operand.
Try to use the statically known register class when possible from
the instruction's MCOperandInfo. There are a few pseudo instructions
which are not well behaved with unknown register classes which still
require the expensive physical register class search.
There are a few other possibilities for making this even faster,
such as not inspecting implicit operands. For now those are checked
because it is technically possible to have a scalar load into
exec or vcc which can be implicitly used.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249079
91177308-0d34-0410-b5e6-
96231b3b80d8
bool isOpRelevant(MachineOperand &Op);
/// \brief Get register interval an operand affects.
bool isOpRelevant(MachineOperand &Op);
/// \brief Get register interval an operand affects.
- RegInterval getRegInterval(MachineOperand &Op);
+ RegInterval getRegInterval(const TargetRegisterClass *RC,
+ const MachineOperand &Reg) const;
/// \brief Handle instructions async components
void pushInstruction(MachineBasicBlock &MBB,
/// \brief Handle instructions async components
void pushInstruction(MachineBasicBlock &MBB,
}
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
}
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
-
- uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
+ uint64_t TSFlags = MI.getDesc().TSFlags;
Counters Result = { { 0, 0, 0 } };
Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
Counters Result = { { 0, 0, 0 } };
Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
MachineOperand &Op = MI.getOperand(0);
assert(Op.isReg() && "First LGKM operand must be a register!");
MachineOperand &Op = MI.getOperand(0);
assert(Op.isReg() && "First LGKM operand must be a register!");
- unsigned Reg = Op.getReg();
-
// XXX - What if this is a write into a super register?
// XXX - What if this is a write into a super register?
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+ const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
+ unsigned Size = RC->getSize();
Result.Named.LGKM = Size > 4 ? 2 : 1;
} else {
// s_dcache_inv etc. do not have a a destination register. Assume we
Result.Named.LGKM = Size > 4 ? 2 : 1;
} else {
// s_dcache_inv etc. do not have a a destination register. Assume we
}
bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
}
bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
// Constants are always irrelevant
// Constants are always irrelevant
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
return false;
// Defines are always relevant
return false;
// Defines are always relevant
-RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
-
- if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
- return std::make_pair(0, 0);
-
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
-
+RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
+ const MachineOperand &Reg) const {
+ unsigned Size = RC->getSize();
assert(Size >= 4);
RegInterval Result;
assert(Size >= 4);
RegInterval Result;
- Result.first = TRI->getEncodingValue(Reg);
+ Result.first = TRI->getEncodingValue(Reg.getReg());
Result.second = Result.first + Size / 4;
return Result;
Result.second = Result.first + Size / 4;
return Result;
}
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
}
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
MachineOperand &Op = I->getOperand(i);
if (!isOpRelevant(Op))
continue;
MachineOperand &Op = I->getOperand(i);
if (!isOpRelevant(Op))
continue;
- RegInterval Interval = getRegInterval(Op);
+ const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
+ RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
// Remember which registers we define
for (unsigned j = Interval.first; j < Interval.second; ++j) {
// Remember which registers we define
if (MI.getOpcode() == AMDGPU::S_SENDMSG)
return LastIssued;
if (MI.getOpcode() == AMDGPU::S_SENDMSG)
return LastIssued;
- // For each register affected by this
- // instruction increase the result sequence
+ // For each register affected by this instruction increase the result
+ // sequence.
+ //
+ // TODO: We could probably just look at explicit operands if we removed VCC /
+ // EXEC from SMRD dest reg classes.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI.getOperand(i);
MachineOperand &Op = MI.getOperand(i);
- RegInterval Interval = getRegInterval(Op);
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
+ continue;
+
+ const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
+ RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
if (Op.isDef()) {
for (unsigned j = Interval.first; j < Interval.second; ++j) {
if (Op.isDef()) {
return getEncodingValue(Reg) & 0xff;
}
return getEncodingValue(Reg) & 0xff;
}
+// FIXME: This is very slow. It might be worth creating a map from physreg to
+// register class.
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));