cl::desc("Enable register pressure scheduling."), cl::init(true));
static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
- cl::desc("Enable cyclic critical path analysis."), cl::init(false));
+ cl::desc("Enable cyclic critical path analysis."), cl::init(true));
static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
cl::desc("Enable load clustering."), cl::init(true));
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
+// pin vtable to this file
+void MachineSchedStrategy::anchor() {}
+void ScheduleDAGMutation::anchor() {}
+
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
virtual void print(raw_ostream &O, const Module* = 0) const;
static char ID; // Class identification, replacement for typeinfo
+
+protected:
+ ScheduleDAGInstrs *createMachineScheduler();
};
} // namespace
/// Forward declare the standard machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C);
/// Decrement this iterator until reaching the top or a non-debug instr.
&*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
}
+/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
+ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
+ // Select the scheduler, or set the default.
+ MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+ if (Ctor != useDefaultMachineSched)
+ return Ctor(this);
+
+ // Get the default scheduler set by the target for this function.
+ ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
+ if (Scheduler)
+ return Scheduler;
+
+ // Default to GenericScheduler.
+ return createGenericSched(this);
+}
+
/// Top-level MachineScheduler pass driver.
///
/// Visit blocks in function order. Divide each block into scheduling regions
}
RegClassInfo->runOnMachineFunction(*MF);
- // Select the scheduler, or set the default.
- MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
- if (Ctor == useDefaultMachineSched) {
- // Get the default scheduler set by the target.
- Ctor = MachineSchedRegistry::getDefault();
- if (!Ctor) {
- Ctor = createConvergingSched;
- MachineSchedRegistry::setDefault(Ctor);
- }
- }
- // Instantiate the selected scheduler.
- OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+ // Instantiate the selected scheduler for this target, function, and
+ // optimization level.
+ OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
// Visit all machine basic blocks.
//
if (I == BB->end())
VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
else {
- LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(I));
+ LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
VNI = LRQ.valueIn();
}
// RegisterPressureTracker guarantees that readsReg is true for LiveUses.
// If this use comes before the reaching def, it cannot be a last use, so
// descrease its pressure change.
if (!SU->isScheduled && SU != &ExitSU) {
- LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(SU->getInstr()));
+ LiveQueryResult LRQ
+ = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
if (LRQ.valueIn() == VNI)
getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
}
continue;
// Only consider uses of the phi.
- LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(UI->SU->getInstr()));
+ LiveQueryResult LRQ =
+ LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
if (!LRQ.valueIn()->isPHIDef())
continue;
}
//===----------------------------------------------------------------------===//
-// ConvergingScheduler - Implementation of the generic MachineSchedStrategy.
+// GenericScheduler - Implementation of the generic MachineSchedStrategy.
//===----------------------------------------------------------------------===//
namespace {
-/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
/// the schedule.
-class ConvergingScheduler : public MachineSchedStrategy {
+class GenericScheduler : public MachineSchedStrategy {
public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
#ifndef NDEBUG
- static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
+ static const char *getReasonStr(GenericScheduler::CandReason Reason);
#endif
/// Policy for scheduling the next instruction in the candidate's zone.
}
};
- /// Store the state used by ConvergingScheduler heuristics, required for the
+ /// Store the state used by GenericScheduler heuristics, required for the
/// lifetime of one invocation of pickNode().
struct SchedCandidate {
CandPolicy Policy;
/// PendingFlag set.
SchedBoundary(unsigned ID, const Twine &Name):
DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
- Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+ Pending(ID << GenericScheduler::LogMaxQID, Name+".P"),
HazardRec(0) {
reset();
}
SchedRemainder *rem);
bool isTop() const {
- return Available.getID() == ConvergingScheduler::TopQID;
+ return Available.getID() == GenericScheduler::TopQID;
}
#ifndef NDEBUG
LogMaxQID = 2
};
- ConvergingScheduler(const MachineSchedContext *C):
+ GenericScheduler(const MachineSchedContext *C):
Context(C), DAG(0), SchedModel(0), TRI(0),
Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
};
} // namespace
-void ConvergingScheduler::SchedRemainder::
+void GenericScheduler::SchedRemainder::
init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
reset();
if (!SchedModel->hasInstrSchedModel())
}
}
-void ConvergingScheduler::SchedBoundary::
+void GenericScheduler::SchedBoundary::
init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
reset();
DAG = dag;
}
/// Initialize the per-region scheduling policy.
-void ConvergingScheduler::initPolicy(MachineBasicBlock::iterator Begin,
+void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {
const TargetMachine &TM = Context->MF->getTarget();
}
}
-void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
+void GenericScheduler::initialize(ScheduleDAGMI *dag) {
DAG = dag;
SchedModel = DAG->getSchedModel();
TRI = DAG->TRI;
}
}
-void ConvergingScheduler::releaseTopNode(SUnit *SU) {
+void GenericScheduler::releaseTopNode(SUnit *SU) {
if (SU->isScheduled)
return;
Top.releaseNode(SU, SU->TopReadyCycle);
}
-void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
+void GenericScheduler::releaseBottomNode(SUnit *SU) {
if (SU->isScheduled)
return;
/// InFlightResources = InFlightIterations * LoopResources
///
/// TODO: Check execution resources in addition to IssueCount.
-void ConvergingScheduler::checkAcyclicLatency() {
+void GenericScheduler::checkAcyclicLatency() {
if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
return;
dbgs() << " ACYCLIC LATENCY LIMIT\n");
}
-void ConvergingScheduler::registerRoots() {
+void GenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
// Some roots may not feed into ExitSU. Check all of them in case.
/// can dispatch per cycle.
///
/// TODO: Also check whether the SU must start a new group.
-bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
+bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
if (HazardRec->isEnabled())
return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
}
// Find the unscheduled node in ReadySUs with the highest latency.
-unsigned ConvergingScheduler::SchedBoundary::
+unsigned GenericScheduler::SchedBoundary::
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
SUnit *LateSU = 0;
unsigned RemLatency = 0;
// Count resources in this zone and the remaining unscheduled
// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
// resource index, or zero if the zone is issue limited.
-unsigned ConvergingScheduler::SchedBoundary::
+unsigned GenericScheduler::SchedBoundary::
getOtherResourceCount(unsigned &OtherCritIdx) {
OtherCritIdx = 0;
if (!SchedModel->hasInstrSchedModel())
/// Set the CandPolicy for this zone given the current resources and latencies
/// inside and outside the zone.
-void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
+void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
SchedBoundary &OtherZone) {
// Now that potential stalls have been considered, apply preemptive heuristics
// based on the the total latency and resources inside and outside this
Policy.DemandResIdx = OtherCritIdx;
}
-void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
+void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU,
unsigned ReadyCycle) {
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
}
/// Move the boundary of scheduled code by one cycle.
-void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
+void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
if (SchedModel->getMicroOpBufferSize() == 0) {
assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
if (MinReadyCycle > NextCycle)
DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
}
-void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
+void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
unsigned Count) {
ExecutedResCounts[PIdx] += Count;
if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
///
/// \return the next cycle at which the instruction may execute without
/// oversubscribing resources.
-unsigned ConvergingScheduler::SchedBoundary::
+unsigned GenericScheduler::SchedBoundary::
countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
unsigned Count = Factor * Cycles;
}
/// Move the boundary of scheduled code by one SUnit.
-void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
// Update the reservation table.
if (HazardRec->isEnabled()) {
if (!isTop() && SU->isCall) {
/// Release pending ready nodes in to the available queue. This makes them
/// visible to heuristics.
-void ConvergingScheduler::SchedBoundary::releasePending() {
+void GenericScheduler::SchedBoundary::releasePending() {
// If the available queue is empty, it is safe to reset MinReadyCycle.
if (Available.empty())
MinReadyCycle = UINT_MAX;
}
/// Remove SU from the ready set for this boundary.
-void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
+void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) {
if (Available.isInQueue(SU))
Available.remove(Available.find(SU));
else {
/// If this queue only has one ready candidate, return it. As a side effect,
/// defer any nodes that now hit a hazard, and advance the cycle until at least
/// one node is ready. If multiple instructions are ready, return NULL.
-SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
+SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() {
if (CheckPending)
releasePending();
#ifndef NDEBUG
// This is useful information to dump after bumpNode.
// Note that the Queue contents are more useful before pickNodeFromQueue.
-void ConvergingScheduler::SchedBoundary::dumpScheduledState() {
+void GenericScheduler::SchedBoundary::dumpScheduledState() {
unsigned ResFactor;
unsigned ResCount;
if (ZoneCritResIdx) {
}
#endif
-void ConvergingScheduler::SchedCandidate::
+void GenericScheduler::SchedCandidate::
initResourceDelta(const ScheduleDAGMI *DAG,
const TargetSchedModel *SchedModel) {
if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
/// Return true if this heuristic determines order.
static bool tryLess(int TryVal, int CandVal,
- ConvergingScheduler::SchedCandidate &TryCand,
- ConvergingScheduler::SchedCandidate &Cand,
- ConvergingScheduler::CandReason Reason) {
+ GenericScheduler::SchedCandidate &TryCand,
+ GenericScheduler::SchedCandidate &Cand,
+ GenericScheduler::CandReason Reason) {
if (TryVal < CandVal) {
TryCand.Reason = Reason;
return true;
}
static bool tryGreater(int TryVal, int CandVal,
- ConvergingScheduler::SchedCandidate &TryCand,
- ConvergingScheduler::SchedCandidate &Cand,
- ConvergingScheduler::CandReason Reason) {
+ GenericScheduler::SchedCandidate &TryCand,
+ GenericScheduler::SchedCandidate &Cand,
+ GenericScheduler::CandReason Reason) {
if (TryVal > CandVal) {
TryCand.Reason = Reason;
return true;
static bool tryPressure(const PressureChange &TryP,
const PressureChange &CandP,
- ConvergingScheduler::SchedCandidate &TryCand,
- ConvergingScheduler::SchedCandidate &Cand,
- ConvergingScheduler::CandReason Reason) {
+ GenericScheduler::SchedCandidate &TryCand,
+ GenericScheduler::SchedCandidate &Cand,
+ GenericScheduler::CandReason Reason) {
int TryRank = TryP.getPSetOrMax();
int CandRank = CandP.getPSetOrMax();
// If both candidates affect the same set, go with the smallest increase.
return 0;
}
-static bool tryLatency(ConvergingScheduler::SchedCandidate &TryCand,
- ConvergingScheduler::SchedCandidate &Cand,
- ConvergingScheduler::SchedBoundary &Zone) {
+static bool tryLatency(GenericScheduler::SchedCandidate &TryCand,
+ GenericScheduler::SchedCandidate &Cand,
+ GenericScheduler::SchedBoundary &Zone) {
if (Zone.isTop()) {
if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
- TryCand, Cand, ConvergingScheduler::TopDepthReduce))
+ TryCand, Cand, GenericScheduler::TopDepthReduce))
return true;
}
if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
- TryCand, Cand, ConvergingScheduler::TopPathReduce))
+ TryCand, Cand, GenericScheduler::TopPathReduce))
return true;
}
else {
if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
- TryCand, Cand, ConvergingScheduler::BotHeightReduce))
+ TryCand, Cand, GenericScheduler::BotHeightReduce))
return true;
}
if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
- TryCand, Cand, ConvergingScheduler::BotPathReduce))
+ TryCand, Cand, GenericScheduler::BotPathReduce))
return true;
}
return false;
/// \param Zone describes the scheduled zone that we are extending.
/// \param RPTracker describes reg pressure within the scheduled zone.
/// \param TempTracker is a scratch pressure tracker to reuse in queries.
-void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
+void GenericScheduler::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand,
SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
return;
// For loops that are acyclic path limited, aggressively schedule for latency.
- if (Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone))
+ // This can result in very long dependence chains scheduled in sequence, so
+ // once every cycle (when CurrMOps == 0), switch to normal heuristics.
+ if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps
+ && tryLatency(TryCand, Cand, Zone))
return;
// Keep clustered nodes together to encourage downstream peephole
}
#ifndef NDEBUG
-const char *ConvergingScheduler::getReasonStr(
- ConvergingScheduler::CandReason Reason) {
+const char *GenericScheduler::getReasonStr(
+ GenericScheduler::CandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND ";
case PhysRegCopy: return "PREG-COPY";
llvm_unreachable("Unknown reason!");
}
-void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
+void GenericScheduler::traceCandidate(const SchedCandidate &Cand) {
PressureChange P;
unsigned ResIdx = 0;
unsigned Latency = 0;
/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
/// DAG building. To adjust for the current scheduling location we need to
/// maintain the number of vreg uses remaining to be top-scheduled.
-void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
ReadyQueue &Q = Zone.Available;
}
}
-static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
+static void tracePick(const GenericScheduler::SchedCandidate &Cand,
bool IsTop) {
DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
- << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
+ << GenericScheduler::getReasonStr(Cand.Reason) << '\n');
}
/// Pick the best candidate node from either the top or bottom queue.
-SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
+SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
}
/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
-SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
+SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
if (DAG->top() == DAG->bottom()) {
assert(Top.Available.empty() && Top.Pending.empty() &&
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
return SU;
}
-void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
MachineBasicBlock::iterator InsertPos = SU->getInstr();
if (!isTop)
///
/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
/// them here. See comments in biasPhysRegCopy.
-void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
Top.bumpNode(SU);
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
- ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler(C));
+static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) {
+ ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
return DAG;
}
static MachineSchedRegistry
-ConvergingSchedRegistry("converge", "Standard converging scheduler.",
- createConvergingSched);
+GenericSchedRegistry("converge", "Standard converging scheduler.",
+ createGenericSched);
//===----------------------------------------------------------------------===//
// ILP Scheduler. Currently for experimental analysis of heuristics.