static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
cl::desc("Pop up a window to show MISched dags after they are processed"));
+/// In some situations a few uninteresting nodes depend on nearly all other
+/// nodes in the graph, provide a cutoff to hide them.
+static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
+ cl::desc("Hide nodes with more predecessor/successor than cutoff"));
+
static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
"Machine Instruction Scheduler", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
AU.setPreservesCFG();
AU.addRequiredID(MachineDominatorsID);
AU.addRequired<MachineLoopInfo>();
- AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
MLI = &getAnalysis<MachineLoopInfo>();
MDT = &getAnalysis<MachineDominatorTree>();
PassConfig = &getAnalysis<TargetPassConfig>();
- AA = &getAnalysis<AliasAnalysis>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LIS = &getAnalysis<LiveIntervals>();
if (skipOptnoneFunction(*mf.getFunction()))
return false;
- if (!mf.getSubtarget().enablePostMachineScheduler()) {
+ if (!mf.getSubtarget().enablePostRAScheduler()) {
DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}
for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
MBB != MBBEnd; ++MBB) {
- Scheduler.startBlock(MBB);
+ Scheduler.startBlock(&*MBB);
#ifndef NDEBUG
if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
// Avoid decrementing RegionEnd for blocks with no terminator.
if (RegionEnd != MBB->end() ||
- isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
+ isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII, IsPostRA)) {
--RegionEnd;
// Count the boundary instruction.
--RemainingInstrs;
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
for(;I != MBB->begin(); --I, --RemainingInstrs) {
- if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
+ if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII, IsPostRA))
break;
if (!I->isDebugValue())
++NumRegionInstrs;
}
// Notify the scheduler of the region, even if we may skip scheduling
// it. Perhaps it still needs to be bundled.
- Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
+ Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
// Skip empty scheduling regions (0 or 1 schedulable instructions).
if (I == RegionEnd || I == std::prev(RegionEnd)) {
if (Scheduler.isPostRA()) {
// FIXME: Ideally, no further passes should rely on kill flags. However,
// thumb2 size reduction is currently an exception.
- Scheduler.fixupKills(MBB);
+ Scheduler.fixupKills(&*MBB);
}
}
Scheduler.finalizeSchedule();
LLVM_DUMP_METHOD
void ReadyQueue::dump() {
- dbgs() << Name << ": ";
+ dbgs() << "Queue " << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
/// does not consider liveness or register pressure. It is useful for PostRA
/// scheduling and potentially other custom schedulers.
void ScheduleDAGMI::schedule() {
+ DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
+ DEBUG(SchedImpl->dumpPolicy());
+
// Build the DAG.
buildSchedGraph(AA);
initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ while (true) {
+ DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
assert(!SU->isScheduled && "Node already scheduled");
if (!checkSchedLimit())
break;
unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
if (NewMaxPressure[ID] >= Limit - 2) {
DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
- << NewMaxPressure[ID] << " > " << Limit << "(+ "
- << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
+ << NewMaxPressure[ID]
+ << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
+ << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
}
}
}
/// only includes instructions that have DAG nodes, not scheduling boundaries.
///
/// This is a skeletal driver, with all the functionality pushed into helpers,
-/// so that it can be easilly extended by experimental schedulers. Generally,
+/// so that it can be easily extended by experimental schedulers. Generally,
/// implementing MachineSchedStrategy should be sufficient to implement a new
/// scheduling algorithm. However, if a scheduler further subclasses
/// ScheduleDAGMILive then it will want to override this virtual method in order
/// to update any specialized state.
void ScheduleDAGMILive::schedule() {
+ DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
+ DEBUG(SchedImpl->dumpPolicy());
buildDAGWithRegPressure();
Topo.InitDAGTopologicalSorting();
}
bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ while (true) {
+ DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
assert(!SU->isScheduled && "Node already scheduled");
if (!checkSchedLimit())
break;
SUnit *SU = Loads[Idx];
unsigned BaseReg;
unsigned Offset;
- if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
+ if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
}
if (LoadRecords.size() < 2)
/// \brief Post-process the DAG to create cluster edges between instructions
/// that may be fused by the processor into a single operation.
class MacroFusion : public ScheduleDAGMutation {
- const TargetInstrInfo *TII;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
public:
- MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+ MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
+ : TII(TII), TRI(TRI) {}
void apply(ScheduleDAGMI *DAG) override;
};
} // anonymous
+/// Returns true if \p MI reads a register written by \p Other.
+static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
+ const MachineInstr &Other) {
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (Other.modifiesRegister(Reg, &TRI))
+ return true;
+ }
+ return false;
+}
+
/// \brief Callback from DAG postProcessing to create cluster edges to encourage
/// fused operations.
void MacroFusion::apply(ScheduleDAGMI *DAG) {
// For now, assume targets can only fuse with the branch.
- MachineInstr *Branch = DAG->ExitSU.getInstr();
+ SUnit &ExitSU = DAG->ExitSU;
+ MachineInstr *Branch = ExitSU.getInstr();
if (!Branch)
return;
- for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
- SUnit *SU = &DAG->SUnits[--Idx];
- if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+ for (SUnit &SU : DAG->SUnits) {
+ // SUnits with successors can't be schedule in front of the ExitSU.
+ if (!SU.Succs.empty())
+ continue;
+ // We only care if the node writes to a register that the branch reads.
+ MachineInstr *Pred = SU.getInstr();
+ if (!HasDataDep(TRI, *Branch, *Pred))
+ continue;
+
+ if (!TII.shouldScheduleAdjacent(Pred, Branch))
continue;
// Create a single weak edge from SU to ExitSU. The only effect is to cause
// scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
// of SU, we could create an artificial edge from the deepest root, but it
// hasn't been needed yet.
- bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+ bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
(void)Success;
assert(Success && "No DAG nodes should be reachable from ExitSU");
- DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+ DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
break;
}
}
bool IsPostRA,
SchedBoundary &CurrZone,
SchedBoundary *OtherZone) {
- // Apply preemptive heuristics based on the the total latency and resources
+ // Apply preemptive heuristics based on the total latency and resources
// inside and outside this zone. Potential stalls should be considered before
// following this policy.
Latency = Cand.SU->getDepth();
break;
}
- dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
if (P.isValid())
dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
<< ":" << P.getUnitInc() << " ";
}
}
+void GenericScheduler::dumpPolicy() {
+ dbgs() << "GenericScheduler RegionPolicy: "
+ << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
+ << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
+ << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
+ << "\n";
+}
+
/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
/// critical path by more cycles than it takes to drain the instruction buffer.
/// We estimate an upper bounds on in-flight instructions as:
}
}
DEBUG(if (TryCand.RPDelta.Excess.isValid())
- dbgs() << " SU(" << TryCand.SU->NodeNum << ") "
+ dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") "
<< TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
<< ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
TryCand, Cand, PhysRegCopy))
return;
- // Avoid exceeding the target's limit. If signed PSetID is negative, it is
- // invalid; convert it to INT_MAX to give it lowest priority.
+ // Avoid exceeding the target's limit.
if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
Cand.RPDelta.Excess,
TryCand, Cand, RegExcess))
// Avoid serializing long latency dependence chains.
// For acyclic path limited loops, latency was already checked above.
- if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
- && tryLatency(TryCand, Cand, Zone)) {
+ if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
+ !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
return;
}
if (EnableLoadCluster && DAG->TII->enableClusterLoads())
DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
if (EnableMacroFusion)
- DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
+ DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
return DAG;
}
}
static bool isNodeHidden(const SUnit *Node) {
- return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
+ if (ViewMISchedCutoff == 0)
+ return false;
+ return (Node->Preds.size() > ViewMISchedCutoff
+ || Node->Succs.size() > ViewMISchedCutoff);
}
static bool hasNodeAddressLabel(const SUnit *Node,