//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "machine-trace-metrics"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "machine-trace-metrics"
+
char MachineTraceMetrics::ID = 0;
char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
"machine-trace-metrics", "Machine Trace Metrics", false, true)
MachineTraceMetrics::MachineTraceMetrics()
- : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
- std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+ : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
+ MRI(nullptr), Loops(nullptr) {
+ std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
}
void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
- TII = MF->getTarget().getInstrInfo();
- TRI = MF->getTarget().getRegisterInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
MRI = &MF->getRegInfo();
Loops = &getAnalysis<MachineLoopInfo>();
- const TargetSubtargetInfo &ST =
- MF->getTarget().getSubtarget<TargetSubtargetInfo>();
- SchedModel.init(*ST.getSchedModel(), &ST, TII);
+ SchedModel.init(ST.getSchedModel(), &ST, TII);
BlockInfo.resize(MF->getNumBlockIDs());
+ ProcResourceCycles.resize(MF->getNumBlockIDs() *
+ SchedModel.getNumProcResourceKinds());
return false;
}
void MachineTraceMetrics::releaseMemory() {
- MF = 0;
+ MF = nullptr;
BlockInfo.clear();
for (unsigned i = 0; i != TS_NumStrategies; ++i) {
delete Ensembles[i];
- Ensembles[i] = 0;
+ Ensembles[i] = nullptr;
}
}
return FBI;
// Compute resource usage in the block.
- // FIXME: Compute per-functional unit counts.
FBI->HasCalls = false;
unsigned InstrCount = 0;
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MachineInstr *MI = I;
- if (MI->isTransient())
+
+ // Add up per-processor resource cycles as well.
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ SmallVector<unsigned, 32> PRCycles(PRKinds);
+
+ for (const auto &MI : *MBB) {
+ if (MI.isTransient())
continue;
++InstrCount;
- if (MI->isCall())
+ if (MI.isCall())
FBI->HasCalls = true;
+
+ // Count processor resources used.
+ if (!SchedModel.hasInstrSchedModel())
+ continue;
+ const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
+ if (!SC->isValid())
+ continue;
+
+ for (TargetSchedModel::ProcResIter
+ PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind");
+ PRCycles[PI->ProcResourceIdx] += PI->Cycles;
+ }
}
FBI->InstrCount = InstrCount;
+
+ // Scale the resource cycles so they are comparable.
+ unsigned PROffset = MBB->getNumber() * PRKinds;
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceCycles[PROffset + K] =
+ PRCycles[K] * SchedModel.getResourceFactor(K);
+
return FBI;
}
+ArrayRef<unsigned>
+MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
+ assert(BlockInfo[MBBNum].hasResources() &&
+ "getResources() must be called before getProcResourceCycles()");
+ unsigned PRKinds = SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size());
+ return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
+}
+
+
//===----------------------------------------------------------------------===//
// Ensemble utility functions
//===----------------------------------------------------------------------===//
MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
: MTM(*ct) {
BlockInfo.resize(MTM.BlockInfo.size());
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds);
+ ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds);
}
// Virtual destructor serves as an anchor.
void MachineTraceMetrics::Ensemble::
computeDepthResources(const MachineBasicBlock *MBB) {
TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
// Compute resources from trace above. The top block is simple.
if (!TBI->Pred) {
TBI->InstrDepth = 0;
TBI->Head = MBB->getNumber();
+ std::fill(ProcResourceDepths.begin() + PROffset,
+ ProcResourceDepths.begin() + PROffset + PRKinds, 0);
return;
}
// Compute from the block above. A post-order traversal ensures the
// predecessor is always computed first.
- TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+ unsigned PredNum = TBI->Pred->getNumber();
+ TraceBlockInfo *PredTBI = &BlockInfo[PredNum];
assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
TBI->Head = PredTBI->Head;
+
+ // Compute per-resource depths.
+ ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum);
+ ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K];
}
// Update resource-related information in the TraceBlockInfo for MBB.
void MachineTraceMetrics::Ensemble::
computeHeightResources(const MachineBasicBlock *MBB) {
TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ unsigned PROffset = MBB->getNumber() * PRKinds;
// Compute resources for the current block.
TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+ ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber());
// The trace tail is done.
if (!TBI->Succ) {
TBI->Tail = MBB->getNumber();
+ std::copy(PRCycles.begin(), PRCycles.end(),
+ ProcResourceHeights.begin() + PROffset);
return;
}
// Compute from the block below. A post-order traversal ensures the
// predecessor is always computed first.
- TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+ unsigned SuccNum = TBI->Succ->getNumber();
+ TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum];
assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
TBI->InstrHeight += SuccTBI->InstrHeight;
TBI->Tail = SuccTBI->Tail;
+
+ // Compute per-resource heights.
+ ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum);
+ for (unsigned K = 0; K != PRKinds; ++K)
+ ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K];
}
// Check if depth resources for MBB are valid and return the TBI.
MachineTraceMetrics::Ensemble::
getDepthResources(const MachineBasicBlock *MBB) const {
const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
- return TBI->hasValidDepth() ? TBI : 0;
+ return TBI->hasValidDepth() ? TBI : nullptr;
}
// Check if height resources for MBB are valid and return the TBI.
MachineTraceMetrics::Ensemble::
getHeightResources(const MachineBasicBlock *MBB) const {
const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
- return TBI->hasValidHeight() ? TBI : 0;
+ return TBI->hasValidHeight() ? TBI : nullptr;
+}
+
+/// Get an array of processor resource depths for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by all blocks preceding MBB in its trace. It does not include instructions
+/// in MBB.
+///
+/// Compare TraceBlockInfo::InstrDepth.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceDepths(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size());
+ return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds);
+}
+
+/// Get an array of processor resource heights for MBB. Indexed by processor
+/// resource kind, this array contains the scaled processor resources consumed
+/// by this block and all blocks following it in its trace.
+///
+/// Compare TraceBlockInfo::InstrHeight.
+ArrayRef<unsigned>
+MachineTraceMetrics::Ensemble::
+getProcResourceHeights(unsigned MBBNum) const {
+ unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds();
+ assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size());
+ return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds);
}
//===----------------------------------------------------------------------===//
// instructions.
namespace {
class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
- const char *getName() const { return "MinInstr"; }
- const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
- const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+ const char *getName() const override { return "MinInstr"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
public:
MinInstrCountEnsemble(MachineTraceMetrics *mtm)
const MachineBasicBlock*
MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
if (MBB->pred_empty())
- return 0;
+ return nullptr;
const MachineLoop *CurLoop = getLoopFor(MBB);
// Don't leave loops, and never follow back-edges.
if (CurLoop && MBB == CurLoop->getHeader())
- return 0;
+ return nullptr;
unsigned CurCount = MTM.getResources(MBB)->InstrCount;
- const MachineBasicBlock *Best = 0;
+ const MachineBasicBlock *Best = nullptr;
unsigned BestDepth = 0;
- for (MachineBasicBlock::const_pred_iterator
- I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
- const MachineBasicBlock *Pred = *I;
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
const MachineTraceMetrics::TraceBlockInfo *PredTBI =
getDepthResources(Pred);
// Ignore cycles that aren't natural loops.
const MachineBasicBlock*
MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
if (MBB->pred_empty())
- return 0;
+ return nullptr;
const MachineLoop *CurLoop = getLoopFor(MBB);
- const MachineBasicBlock *Best = 0;
+ const MachineBasicBlock *Best = nullptr;
unsigned BestHeight = 0;
- for (MachineBasicBlock::const_succ_iterator
- I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
- const MachineBasicBlock *Succ = *I;
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
// Don't consider back-edges.
if (CurLoop && Succ == CurLoop->getHeader())
continue;
}
// To is a new block. Mark the block as visited in case the CFG has cycles
// that MachineLoopInfo didn't recognize as a natural loop.
- return LB.Visited.insert(To);
+ return LB.Visited.insert(To).second;
}
};
}
// Run an upwards post-order search for the trace start.
Bounds.Downward = false;
Bounds.Visited.clear();
- typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
- for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
- I != E; ++I) {
+ for (auto I : inverse_post_order_ext(MBB, Bounds)) {
DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": ");
TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
// All the predecessors have been visited, pick the preferred one.
- TBI.Pred = pickTracePred(*I);
+ TBI.Pred = pickTracePred(I);
DEBUG({
if (TBI.Pred)
dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
dbgs() << "null\n";
});
// The trace leading to I is now known, compute the depth resources.
- computeDepthResources(*I);
+ computeDepthResources(I);
}
// Run a downwards post-order search for the trace end.
Bounds.Downward = true;
Bounds.Visited.clear();
- typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
- for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
- I != E; ++I) {
+ for (auto I : post_order_ext(MBB, Bounds)) {
DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": ");
TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
// All the successors have been visited, pick the preferred one.
- TBI.Succ = pickTraceSucc(*I);
+ TBI.Succ = pickTraceSucc(I);
DEBUG({
if (TBI.Succ)
dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
dbgs() << "null\n";
});
// The trace leaving I is now known, compute the height resources.
- computeHeightResources(*I);
+ computeHeightResources(I);
}
}
<< " height.\n");
// Find any MBB predecessors that have MBB as their preferred successor.
// They are the only ones that need to be invalidated.
- for (MachineBasicBlock::const_pred_iterator
- I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
- TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
if (!TBI.hasValidHeight())
continue;
if (TBI.Succ == MBB) {
TBI.invalidateHeight();
- WorkList.push_back(*I);
+ WorkList.push_back(Pred);
continue;
}
// Verify that TBI.Succ is actually a *I successor.
- assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+ assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
}
} while (!WorkList.empty());
}
<< " depth.\n");
// Find any MBB successors that have MBB as their preferred predecessor.
// They are the only ones that need to be invalidated.
- for (MachineBasicBlock::const_succ_iterator
- I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
- TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+ for (const MachineBasicBlock *Succ : MBB->successors()) {
+ TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
if (!TBI.hasValidDepth())
continue;
if (TBI.Pred == MBB) {
TBI.invalidateDepth();
- WorkList.push_back(*I);
+ WorkList.push_back(Succ);
continue;
}
// Verify that TBI.Pred is actually a *I predecessor.
- assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+ assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
}
} while (!WorkList.empty());
}
// invalidated, but their instructions will stay the same, so there is no
// need to erase the Cycle entries. They will be overwritten when we
// recompute.
- for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
- I != E; ++I)
- Cycles.erase(I);
+ for (const auto &I : *BadMBB)
+ Cycles.erase(&I);
}
void MachineTraceMetrics::Ensemble::verify() const {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
assert(!DefI.atEnd() && "Register has no defs");
- DefMI = &*DefI;
+ DefMI = DefI->getParent();
DefOp = DefI.getOperandNo();
assert((++DefI).atEnd() && "Register has multiple defs");
}
static bool getDataDeps(const MachineInstr *UseMI,
SmallVectorImpl<DataDep> &Deps,
const MachineRegisterInfo *MRI) {
+ // Debug values should not be included in any calculations.
+ if (UseMI->isDebugValue())
+ return false;
+
bool HasPhysRegs = false;
- for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
- if (!MO->isReg())
+ for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(),
+ E = UseMI->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+ if (!MO.isReg())
continue;
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!Reg)
continue;
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
continue;
}
// Collect virtual register reads.
- if (MO->readsReg())
- Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+ if (MO.readsReg())
+ Deps.push_back(DataDep(MRI, Reg, UseMI->getOperandNo(I)));
}
return HasPhysRegs;
}
unsigned getSparseSetIndex() const { return RegUnit; }
- LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
};
}
SmallVector<unsigned, 8> Kills;
SmallVector<unsigned, 8> LiveDefOps;
- for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
- if (!MO->isReg())
+ for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
+ ME = UseMI->operands_end(); MI != ME; ++MI) {
+ const MachineOperand &MO = *MI;
+ if (!MO.isReg())
continue;
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
// Track live defs and kills for updating RegUnits.
- if (MO->isDef()) {
- if (MO->isDead())
+ if (MO.isDef()) {
+ if (MO.isDead())
Kills.push_back(Reg);
else
- LiveDefOps.push_back(MO.getOperandNo());
- } else if (MO->isKill())
+ LiveDefOps.push_back(UseMI->getOperandNo(MI));
+ } else if (MO.isKill())
Kills.push_back(Reg);
// Identify dependencies.
- if (!MO->readsReg())
+ if (!MO.readsReg())
continue;
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
if (I == RegUnits.end())
continue;
- Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+ Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
break;
}
}
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
// Ignore dependencies outside the current trace.
const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
- if (!DefTBI.isEarlierInSameTrace(TBI))
+ if (!DefTBI.isUsefulDominator(TBI))
continue;
unsigned Len = LIR.Height + Cycles[DefMI].Depth;
MaxLen = std::max(MaxLen, Len);
SmallVector<DataDep, 8> Deps;
while (!Stack.empty()) {
MBB = Stack.pop_back_val();
- DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+ DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n");
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
TBI.HasValidInstrDepths = true;
TBI.CriticalPath = 0;
+ // Print out resource depths here as well.
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrDepth);
+ ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ if (PRDepths[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRDepths[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
// Also compute the critical path length through MBB when possible.
if (TBI.HasValidInstrHeights)
TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MachineInstr *UseMI = I;
-
+ for (const auto &UseMI : *MBB) {
// Collect all data dependencies.
Deps.clear();
- if (UseMI->isPHI())
- getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
- else if (getDataDeps(UseMI, Deps, MTM.MRI))
- updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+ if (UseMI.isPHI())
+ getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(&UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
// Filter and process dependencies, computing the earliest issue cycle.
unsigned Cycle = 0;
- for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
- const DataDep &Dep = Deps[i];
+ for (const DataDep &Dep : Deps) {
const TraceBlockInfo&DepTBI =
BlockInfo[Dep.DefMI->getParent()->getNumber()];
// Ignore dependencies from outside the current trace.
- if (!DepTBI.isEarlierInSameTrace(TBI))
+ if (!DepTBI.isUsefulDominator(TBI))
continue;
assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
// Add latency if DefMI is a real instruction. Transients get latency 0.
if (!Dep.DefMI->isTransient())
DepCycle += MTM.SchedModel
- .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp,
- /* FindMin = */ false);
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
Cycle = std::max(Cycle, DepCycle);
}
// Remember the instruction depth.
- InstrCycles &MICycles = Cycles[UseMI];
+ InstrCycles &MICycles = Cycles[&UseMI];
MICycles.Depth = Cycle;
if (!TBI.HasValidInstrHeights) {
- DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ DEBUG(dbgs() << Cycle << '\t' << UseMI);
continue;
}
// Update critical path length.
TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
- DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
}
}
}
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
SmallVector<unsigned, 8> ReadOps;
- for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
- if (!MO->isReg())
+
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ const MachineOperand &MO = *MOI;
+ if (!MO.isReg())
continue;
- unsigned Reg = MO->getReg();
+ unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
- if (MO->readsReg())
- ReadOps.push_back(MO.getOperandNo());
- if (!MO->isDef())
+ if (MO.readsReg())
+ ReadOps.push_back(MI->getOperandNo(MOI));
+ if (!MO.isDef())
continue;
// This is a def of Reg. Remove corresponding entries from RegUnits, and
// update MI Height to consider the physreg dependencies.
// We may not know the UseMI of this dependency, if it came from the
// live-in list. SchedModel can handle a NULL UseMI.
DepHeight += SchedModel
- .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op,
- /* FindMin = */ false);
+ .computeOperandLatency(MI, MI->getOperandNo(MOI), I->MI, I->Op);
}
Height = std::max(Height, DepHeight);
// This regunit is dead above MI.
// Adjust height by Dep.DefMI latency.
if (!Dep.DefMI->isTransient())
UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp,
- UseMI, Dep.UseOp, false);
+ UseMI, Dep.UseOp);
// Update Heights[DefMI] to be the maximum height seen.
MIHeightMap::iterator I;
bool New;
- tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+ std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
if (New)
return true;
// MBB is the highest precomputed block in the trace.
if (MBB) {
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
- for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
- LiveInReg LI = TBI.LiveIns[i];
+ for (LiveInReg &LI : TBI.LiveIns) {
if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
// For virtual registers, the def latency is included.
unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
TBI.HasValidInstrHeights = true;
TBI.CriticalPath = 0;
+ DEBUG({
+ dbgs() << format("%7u Instructions\n", TBI.InstrHeight);
+ ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber());
+ for (unsigned K = 0; K != PRHeights.size(); ++K)
+ if (PRHeights[K]) {
+ unsigned Factor = MTM.SchedModel.getResourceFactor(K);
+ dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K]))
+ << MTM.SchedModel.getProcResource(K)->Name << " ("
+ << PRHeights[K]/Factor << " ops x" << Factor << ")\n";
+ }
+ });
+
// Get dependencies from PHIs in the trace successor.
const MachineBasicBlock *Succ = TBI.Succ;
// If MBB is the last block in the trace, and it has a back-edge to the
Succ = Loop->getHeader();
if (Succ) {
- for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
- I != E && I->isPHI(); ++I) {
- const MachineInstr *PHI = I;
+ for (const auto &PHI : *Succ) {
+ if (!PHI.isPHI())
+ break;
Deps.clear();
- getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ getPHIDeps(&PHI, Deps, MBB, MTM.MRI);
if (!Deps.empty()) {
// Loop header PHI heights are all 0.
- unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
- DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
- if (pushDepHeight(Deps.front(), PHI, Height,
+ unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
+ if (pushDepHeight(Deps.front(), &PHI, Height,
Heights, MTM.SchedModel, MTM.TII))
addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
}
MTM.SchedModel, MTM.TII, MTM.TRI);
// Update the required height of any virtual registers read by MI.
- for (unsigned i = 0, e = Deps.size(); i != e; ++i)
- if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
- addLiveIns(Deps[i].DefMI, Deps[i].DefOp, Stack);
+ for (const DataDep &Dep : Deps)
+ if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII))
+ addLiveIns(Dep.DefMI, Dep.DefOp, Stack);
InstrCycles &MICycles = Cycles[MI];
MICycles.Height = Cycle;
// Update virtual live-in heights. They were added by addLiveIns() with a 0
// height because the final height isn't known until now.
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:");
- for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
- LiveInReg &LIR = TBI.LiveIns[i];
+ for (LiveInReg &LIR : TBI.LiveIns) {
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
LIR.Height = Heights.lookup(DefMI);
DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
MachineTraceMetrics::Trace
MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
- // FIXME: Check cache tags, recompute as needed.
- computeTrace(MBB);
- computeInstrDepths(MBB);
- computeInstrHeights(MBB);
- return Trace(*this, BlockInfo[MBB->getNumber()]);
+ TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+
+ if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
+ computeTrace(MBB);
+ if (!TBI.HasValidInstrDepths)
+ computeInstrDepths(MBB);
+ if (!TBI.HasValidInstrHeights)
+ computeInstrHeights(MBB);
+
+ return Trace(*this, TBI);
}
unsigned
// Add latency if DefMI is a real instruction. Transients get latency 0.
if (!Dep.DefMI->isTransient())
DepCycle += TE.MTM.SchedModel
- .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false);
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp);
return DepCycle;
}
+/// When bottom is set include instructions in current block in estimate.
unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
- // For now, we compute the resource depth from instruction count / issue
- // width. Eventually, we should compute resource depth per functional unit
- // and return the max.
+ // Find the limiting processor resource.
+ // Numbers have been pre-scaled to be comparable.
+ unsigned PRMax = 0;
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ if (Bottom) {
+ ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum());
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]);
+ } else {
+ for (unsigned K = 0; K != PRDepths.size(); ++K)
+ PRMax = std::max(PRMax, PRDepths[K]);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ /// All instructions before current block
unsigned Instrs = TBI.InstrDepth;
+ // plus instructions in current block
if (Bottom)
Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
Instrs /= IW;
// Assume issue width 1 without a schedule model.
- return Instrs;
+ return std::max(Instrs, PRMax);
}
-unsigned MachineTraceMetrics::Trace::
-getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+unsigned MachineTraceMetrics::Trace::getResourceLength(
+ ArrayRef<const MachineBasicBlock *> Extrablocks,
+ ArrayRef<const MCSchedClassDesc *> ExtraInstrs,
+ ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const {
+ // Add up resources above and below the center block.
+ ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
+ ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
+ unsigned PRMax = 0;
+
+ // Capture computing cycles from extra instructions
+ auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs,
+ unsigned ResourceIdx)
+ ->unsigned {
+ unsigned Cycles = 0;
+ for (const MCSchedClassDesc *SC : Instrs) {
+ if (!SC->isValid())
+ continue;
+ for (TargetSchedModel::ProcResIter
+ PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+ PE = TE.MTM.SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (PI->ProcResourceIdx != ResourceIdx)
+ continue;
+ Cycles +=
+ (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx));
+ }
+ }
+ return Cycles;
+ };
+
+ for (unsigned K = 0; K != PRDepths.size(); ++K) {
+ unsigned PRCycles = PRDepths[K] + PRHeights[K];
+ for (const MachineBasicBlock *MBB : Extrablocks)
+ PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
+ PRCycles += extraCycles(ExtraInstrs, K);
+ PRCycles -= extraCycles(RemoveInstrs, K);
+ PRMax = std::max(PRMax, PRCycles);
+ }
+ // Convert to cycle count.
+ PRMax = TE.MTM.getCycles(PRMax);
+
+ // Instrs: #instructions in current trace outside current block.
unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
- for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
- Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+ // Add instruction count from the extra blocks.
+ for (const MachineBasicBlock *MBB : Extrablocks)
+ Instrs += TE.MTM.getResources(MBB)->InstrCount;
+ Instrs += ExtraInstrs.size();
+ Instrs -= RemoveInstrs.size();
if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
Instrs /= IW;
// Assume issue width 1 without a schedule model.
- return Instrs;
+ return std::max(Instrs, PRMax);
+}
+
+bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI,
+ const MachineInstr *UseMI) const {
+ if (DefMI->getParent() == UseMI->getParent())
+ return true;
+
+ const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()];
+ const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()];
+
+ return DepTBI.isUsefulDominator(TBI);
}
void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {