X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineScheduler.cpp;h=a13dab3fe78fc5572e220b1ec75346b87de5e7d8;hb=104db6b94a127e4e68db7528f50d3fcf24d543bd;hp=f10a471d66f5ae4251ecfbca2cc0f4a8724d9f29;hpb=629b96cb4f278cc78bfd5679e91315cb6e1ed164;p=oota-llvm.git diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index f10a471d66f..a13dab3fe78 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -12,10 +12,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "misched" - #include "llvm/CodeGen/MachineScheduler.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -36,17 +33,27 @@ using namespace llvm; +#define DEBUG_TYPE "misched" + namespace llvm { cl::opt ForceTopDown("misched-topdown", cl::Hidden, cl::desc("Force top-down list scheduling")); cl::opt ForceBottomUp("misched-bottomup", cl::Hidden, cl::desc("Force bottom-up list scheduling")); +cl::opt +DumpCriticalPathLength("misched-dcpl", cl::Hidden, + cl::desc("Print critical path length to stdout")); } #ifndef NDEBUG static cl::opt ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); +/// In some situations a few uninteresting nodes depend on nearly all other +/// nodes in the graph, provide a cutoff to hide them. +static cl::opt ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, + cl::desc("Hide nodes with more predecessor/successor than cutoff")); + static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); @@ -86,7 +93,7 @@ void ScheduleDAGMutation::anchor() {} //===----------------------------------------------------------------------===// MachineSchedContext::MachineSchedContext(): - MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { + MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) { RegClassInfo = new RegisterClassInfo(); } @@ -101,10 +108,10 @@ class MachineSchedulerBase : public MachineSchedContext, public: MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} - virtual void print(raw_ostream &O, const Module* = 0) const; + void print(raw_ostream &O, const Module* = nullptr) const override; protected: - void scheduleRegions(ScheduleDAGInstrs &Scheduler); + void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); }; /// MachineScheduler runs after coalescing and before register allocation. @@ -112,9 +119,9 @@ class MachineScheduler : public MachineSchedulerBase { public: MachineScheduler(); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; static char ID; // Class identification, replacement for typeinfo @@ -127,9 +134,9 @@ class PostMachineScheduler : public MachineSchedulerBase { public: PostMachineScheduler(); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void getAnalysisUsage(AnalysisUsage &AU) const override; - virtual bool runOnMachineFunction(MachineFunction&); + bool runOnMachineFunction(MachineFunction&) override; static char ID; // Class identification, replacement for typeinfo @@ -142,12 +149,12 @@ char MachineScheduler::ID = 0; char &llvm::MachineSchedulerID = MachineScheduler::ID; -INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", +INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachineScheduler, "misched", +INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() @@ -159,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequiredID(MachineDominatorsID); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -193,7 +200,7 @@ MachinePassRegistry MachineSchedRegistry::Registry; /// A dummy default scheduler factory indicates whether the scheduler /// is overridden on the command line. static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { - return 0; + return nullptr; } /// MachineSchedOpt allows command line selection of the scheduler. @@ -207,6 +214,11 @@ static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched); +static cl::opt EnableMachineSched( + "enable-misched", + cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), + cl::Hidden); + /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); @@ -302,14 +314,20 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { - DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); + if (EnableMachineSched.getNumOccurrences()) { + if (!EnableMachineSched) + return false; + } else if (!mf.getSubtarget().enableMachineScheduler()) + return false; + + DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); // Initialize the context of the pass. MF = &mf; MLI = &getAnalysis(); MDT = &getAnalysis(); PassConfig = &getAnalysis(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); LIS = &getAnalysis(); @@ -321,8 +339,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. - OwningPtr Scheduler(createMachineScheduler()); - scheduleRegions(*Scheduler); + std::unique_ptr Scheduler(createMachineScheduler()); + scheduleRegions(*Scheduler, false); DEBUG(LIS->dump()); if (VerifyScheduling) @@ -331,6 +349,13 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (skipOptnoneFunction(*mf.getFunction())) + return false; + + if (!mf.getSubtarget().enablePostRAScheduler()) { + DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); + return false; + } DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); // Initialize the context of the pass. @@ -342,8 +367,8 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. - OwningPtr Scheduler(createPostMachineScheduler()); - scheduleRegions(*Scheduler); + std::unique_ptr Scheduler(createPostMachineScheduler()); + scheduleRegions(*Scheduler, true); if (VerifyScheduling) MF->verify(this, "After post machine scheduling."); @@ -363,15 +388,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, - const TargetInstrInfo *TII, - bool IsPostRA) { + const TargetInstrInfo *TII) { return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); } /// Main driver for both MachineScheduler and PostMachineScheduler. -void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - bool IsPostRA = Scheduler.isPostRA(); +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, + bool FixKillFlags) { + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); // Visit all machine basic blocks. // @@ -380,7 +404,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { - Scheduler.startBlock(MBB); + Scheduler.startBlock(&*MBB); #ifndef NDEBUG if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) @@ -408,8 +432,8 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { // Avoid decrementing RegionEnd for blocks with no terminator. - if (RegionEnd != MBB->end() - || isSchedBoundary(llvm::prior(RegionEnd), MBB, MF, TII, IsPostRA)) { + if (RegionEnd != MBB->end() || + isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { --RegionEnd; // Count the boundary instruction. --RemainingInstrs; @@ -419,23 +443,24 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { // instruction stream until we find the nearest boundary. unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { - if (isSchedBoundary(llvm::prior(I), MBB, MF, TII, IsPostRA)) + for(;I != MBB->begin(); --I, --RemainingInstrs) { + if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII)) break; + if (!I->isDebugValue()) + ++NumRegionInstrs; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); + Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). - if (I == RegionEnd || I == llvm::prior(RegionEnd)) { + if (I == RegionEnd || I == std::prev(RegionEnd)) { // Close the current region. Bundle the terminator if needed. // This invalidates 'RegionEnd' and 'I'. Scheduler.exitRegion(); continue; } - DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") - << "MI Scheduling **********\n"); + DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *I << " To: "; @@ -443,6 +468,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << " Remaining: " << RemainingInstrs << "\n"); + if (DumpCriticalPathLength) { + errs() << MF->getName(); + errs() << ":BB# " << MBB->getNumber(); + errs() << " " << MBB->getName() << " \n"; + } // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -457,11 +487,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { } assert(RemainingInstrs == 0 && "Instruction count mismatch!"); Scheduler.finishBlock(); - if (Scheduler.isPostRA()) { - // FIXME: Ideally, no further passes should rely on kill flags. However, - // thumb2 size reduction is currently an exception. - Scheduler.fixupKills(MBB); - } + // FIXME: Ideally, no further passes should rely on kill flags. However, + // thumb2 size reduction is currently an exception, so the PostMIScheduler + // needs to do this. + if (FixKillFlags) + Scheduler.fixupKills(&*MBB); } Scheduler.finalizeSchedule(); } @@ -470,14 +500,13 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { // unimplemented } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ReadyQueue::dump() { - dbgs() << Name << ": "; + dbgs() << "Queue " << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; } -#endif //===----------------------------------------------------------------------===// // ScheduleDAGMI - Basic machine instruction scheduling. This is @@ -485,9 +514,8 @@ void ReadyQueue::dump() { // virtual registers. // ===----------------------------------------------------------------------===/ +// Provide a vtable anchor. ScheduleDAGMI::~ScheduleDAGMI() { - DeleteContainerPointers(Mutations); - delete SchedImpl; } bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { @@ -525,9 +553,14 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif + // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) + SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); + --SuccSU->NumPredsLeft; if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) SchedImpl->releaseTopNode(SuccSU); @@ -559,9 +592,14 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif + // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) + PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); + --PredSU->NumSuccsLeft; if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) SchedImpl->releaseBottomNode(PredSU); @@ -625,6 +663,9 @@ bool ScheduleDAGMI::checkSchedLimit() { /// does not consider liveness or register pressure. It is useful for PostRA /// scheduling and potentially other custom schedulers. void ScheduleDAGMI::schedule() { + DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); + // Build the DAG. buildSchedGraph(AA); @@ -647,7 +688,11 @@ void ScheduleDAGMI::schedule() { initQueues(TopRoots, BotRoots); bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + assert(!SU->isScheduled && "Node already scheduled"); if (!checkSchedLimit()) break; @@ -673,10 +718,13 @@ void ScheduleDAGMI::schedule() { CurrentBottom = MI; } } - updateQueues(SU, IsTopNode); - - // Notify the scheduling strategy after updating the DAG. + // Notify the scheduling strategy before updating the DAG. + // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues + // runs, it can then use the accurate ReadyCycle time to determine whether + // newly released nodes can move to the readyQ. SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -721,8 +769,8 @@ findRootsAndBiasEdges(SmallVectorImpl &TopRoots, /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues(ArrayRef TopRoots, ArrayRef BotRoots) { - NextClusterSucc = NULL; - NextClusterPred = NULL; + NextClusterSucc = nullptr; + NextClusterPred = nullptr; // Release all DAG roots for scheduling, not including EntrySU/ExitSU. // @@ -770,17 +818,17 @@ void ScheduleDAGMI::placeDebugValues() { for (std::vector >::iterator DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair P = *prior(DI); + std::pair P = *std::prev(DI); MachineInstr *DbgValue = P.first; MachineBasicBlock::iterator OrigPrevMI = P.second; if (&*RegionBegin == DbgValue) ++RegionBegin; BB->splice(++OrigPrevMI, BB, DbgValue); - if (OrigPrevMI == llvm::prior(RegionEnd)) + if (OrigPrevMI == std::prev(RegionEnd)) RegionEnd = DbgValue; } DbgValues.clear(); - FirstDbgValue = NULL; + FirstDbgValue = nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -816,8 +864,7 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); // For convenience remember the end of the liveness region. - LiveRegionEnd = - (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); + LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); SUPressureDiffs.clear(); @@ -863,6 +910,13 @@ void ScheduleDAGMILive::initRegPressure() { updatePressureDiffs(LiveUses); } + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); // Cache the list of excess pressure sets in this region. This will also track @@ -906,8 +960,9 @@ updateScheduledPressure(const SUnit *SU, unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); if (NewMaxPressure[ID] >= Limit - 2) { DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " - << NewMaxPressure[ID] << " > " << Limit << "(+ " - << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + << NewMaxPressure[ID] + << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit + << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); } } } @@ -938,18 +993,24 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef LiveUses) { } // RegisterPressureTracker guarantees that readsReg is true for LiveUses. assert(VNI && "No live value at use."); - for (VReg2UseMap::iterator - UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { - SUnit *SU = UI->SU; - DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " - << *SU->getInstr()); + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; // If this use comes before the reaching def, it cannot be a last use, so // descrease its pressure change. if (!SU->isScheduled && SU != &ExitSU) { LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(SU->getInstr())); - if (LRQ.valueIn() == VNI) - getPressureDiff(SU).addPressureChange(Reg, true, &MRI); + if (LRQ.valueIn() == VNI) { + PressureDiff &PDiff = getPressureDiff(SU); + PDiff.addPressureChange(Reg, true, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } } } } @@ -960,12 +1021,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef LiveUses) { /// only includes instructions that have DAG nodes, not scheduling boundaries. /// /// This is a skeletal driver, with all the functionality pushed into helpers, -/// so that it can be easilly extended by experimental schedulers. Generally, +/// so that it can be easily extended by experimental schedulers. Generally, /// implementing MachineSchedStrategy should be sufficient to implement a new /// scheduling algorithm. However, if a scheduler further subclasses /// ScheduleDAGMILive then it will want to override this virtual method in order /// to update any specialized state. void ScheduleDAGMILive::schedule() { + DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); buildDAGWithRegPressure(); Topo.InitDAGTopologicalSorting(); @@ -979,8 +1042,16 @@ void ScheduleDAGMILive::schedule() { // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG( + for (const SUnit &SU : SUnits) { + SU.dumpAll(this); + if (ShouldTrackPressure) { + dbgs() << " Pressure Diff : "; + getPressureDiff(&SU).dump(*TRI); + } + dbgs() << '\n'; + } + ); if (ViewMISchedDAGs) viewGraph(); // Initialize ready queues now that the DAG and priority data are finalized. @@ -992,15 +1063,17 @@ void ScheduleDAGMILive::schedule() { } bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + assert(!SU->isScheduled && "Node already scheduled"); if (!checkSchedLimit()) break; scheduleMI(SU, IsTopNode); - updateQueues(SU, IsTopNode); - if (DFSResult) { unsigned SubtreeID = DFSResult->getSubtreeID(SU); if (!ScheduledTrees.test(SubtreeID)) { @@ -1012,6 +1085,8 @@ void ScheduleDAGMILive::schedule() { // Notify the scheduling strategy after updating the DAG. SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -1111,14 +1186,15 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { unsigned LiveOutHeight = DefSU->getHeight(); unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; // Visit all local users of the vreg def. - for (VReg2UseMap::iterator - UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { - if (UI->SU == &ExitSU) + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + if (SU == &ExitSU) continue; // Only consider uses of the phi. LiveQueryResult LRQ = - LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); + LI.Query(LIS->getInstructionIndex(SU->getInstr())); if (!LRQ.valueIn()->isPHIDef()) continue; @@ -1126,10 +1202,10 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // overestimate in strange cases. This allows cyclic latency to be // estimated as the minimum slack of the vreg's depth or height. unsigned CyclicLatency = 0; - if (LiveOutDepth > UI->SU->getDepth()) - CyclicLatency = LiveOutDepth - UI->SU->getDepth(); + if (LiveOutDepth > SU->getDepth()) + CyclicLatency = LiveOutDepth - SU->getDepth(); - unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; + unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; if (LiveInHeight > LiveOutHeight) { if (LiveInHeight - LiveOutHeight < CyclicLatency) CyclicLatency = LiveInHeight - LiveOutHeight; @@ -1138,7 +1214,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { CyclicLatency = 0; DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" - << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); + << SU->NodeNum << ") = " << CyclicLatency << "c\n"); if (CyclicLatency > MaxCyclicLatency) MaxCyclicLatency = CyclicLatency; } @@ -1165,6 +1241,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { // Update top scheduled pressure. TopRPTracker.advance(); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + ); + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); } } @@ -1187,6 +1268,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { SmallVector LiveUses; BotRPTracker.recede(&LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + DEBUG( + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); updatePressureDiffs(LiveUses); } @@ -1207,9 +1293,11 @@ class LoadClusterMutation : public ScheduleDAGMutation { unsigned Offset; LoadInfo(SUnit *su, unsigned reg, unsigned ofs) : SU(su), BaseReg(reg), Offset(ofs) {} + + bool operator<(const LoadInfo &RHS) const { + return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); + } }; - static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, - const LoadClusterMutation::LoadInfo &RHS); const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -1218,20 +1306,12 @@ public: const TargetRegisterInfo *tri) : TII(tii), TRI(tri) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; protected: void clusterNeighboringLoads(ArrayRef Loads, ScheduleDAGMI *DAG); }; } // anonymous -bool LoadClusterMutation::LoadInfoLess( - const LoadClusterMutation::LoadInfo &LHS, - const LoadClusterMutation::LoadInfo &RHS) { - if (LHS.BaseReg != RHS.BaseReg) - return LHS.BaseReg < RHS.BaseReg; - return LHS.Offset < RHS.Offset; -} - void LoadClusterMutation::clusterNeighboringLoads(ArrayRef Loads, ScheduleDAGMI *DAG) { SmallVector LoadRecords; @@ -1239,12 +1319,12 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef Loads, SUnit *SU = Loads[Idx]; unsigned BaseReg; unsigned Offset; - if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) + if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); } if (LoadRecords.size() < 2) return; - std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); + std::sort(LoadRecords.begin(), LoadRecords.end()); unsigned ClusterLength = 1; for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { @@ -1317,25 +1397,49 @@ namespace { /// \brief Post-process the DAG to create cluster edges between instructions /// that may be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { - const TargetInstrInfo *TII; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; public: - MacroFusion(const TargetInstrInfo *tii): TII(tii) {} + MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) + : TII(TII), TRI(TRI) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; }; } // anonymous +/// Returns true if \p MI reads a register written by \p Other. +static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, + const MachineInstr &Other) { + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Other.modifiesRegister(Reg, &TRI)) + return true; + } + return false; +} + /// \brief Callback from DAG postProcessing to create cluster edges to encourage /// fused operations. void MacroFusion::apply(ScheduleDAGMI *DAG) { // For now, assume targets can only fuse with the branch. - MachineInstr *Branch = DAG->ExitSU.getInstr(); + SUnit &ExitSU = DAG->ExitSU; + MachineInstr *Branch = ExitSU.getInstr(); if (!Branch) return; - for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { - SUnit *SU = &DAG->SUnits[--Idx]; - if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) + for (SUnit &SU : DAG->SUnits) { + // SUnits with successors can't be schedule in front of the ExitSU. + if (!SU.Succs.empty()) + continue; + // We only care if the node writes to a register that the branch reads. + MachineInstr *Pred = SU.getInstr(); + if (!HasDataDep(TRI, *Branch, *Pred)) + continue; + + if (!TII.shouldScheduleAdjacent(Pred, Branch)) continue; // Create a single weak edge from SU to ExitSU. The only effect is to cause @@ -1344,11 +1448,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) { // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling // of SU, we could create an artificial edge from the deepest root, but it // hasn't been needed yet. - bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); + bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); (void)Success; assert(Success && "No DAG nodes should be reachable from ExitSU"); - DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); + DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n"); break; } } @@ -1370,7 +1474,7 @@ class CopyConstrain : public ScheduleDAGMutation { public: CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} - virtual void apply(ScheduleDAGMI *DAG); + void apply(ScheduleDAGMI *DAG) override; protected: void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); @@ -1412,12 +1516,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check if either the dest or source is local. If it's live across a back // edge, it's not local. Note that if both vregs are live across the back // edge, we cannot successfully contrain the copy without cyclic scheduling. - unsigned LocalReg = DstReg; - unsigned GlobalReg = SrcReg; + // If both the copy's source and dest are local live intervals, then we + // should treat the dest as the global for the purpose of adding + // constraints. This adds edges from source's other uses to the copy. + unsigned LocalReg = SrcReg; + unsigned GlobalReg = DstReg; LiveInterval *LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { - LocalReg = SrcReg; - GlobalReg = DstReg; + LocalReg = DstReg; + GlobalReg = SrcReg; LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) return; @@ -1446,19 +1553,19 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check if GlobalLI contains a hole in the vicinity of LocalLI. if (GlobalSegment != GlobalLI->begin()) { // Two address defs have no hole. - if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end, + if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, GlobalSegment->start)) { return; } // If the prior global segment may be defined by the same two-address // instruction that also defines LocalLI, then can't make a hole here. - if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start, + if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, LocalLI->beginIndex())) { return; } // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise // it would be a disconnected component in the live range. - assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && + assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && "Disconnected LRG within the scheduling region."); } MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); @@ -1554,7 +1661,7 @@ void SchedBoundary::reset() { // invalid, placeholder HazardRecs. if (HazardRec && HazardRec->isEnabled()) { delete HazardRec; - HazardRec = 0; + HazardRec = nullptr; } Available.clear(); Pending.clear(); @@ -1574,7 +1681,7 @@ void SchedBoundary::reset() { // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is // reserved (SchedBoundary::ReservedCycles). - MaxObservedLatency = 0; + MaxObservedStall = 0; #endif // Reserve a zero-count for invalid CritResIdx. ExecutedResCounts.resize(1); @@ -1674,8 +1781,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) { for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle) + unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); + if (NRCycle > CurrCycle) { +#ifndef NDEBUG + MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); +#endif + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " + << SchedModel->getResourceName(PI->ProcResourceIdx) + << "=" << NRCycle << "c\n"); return true; + } } } return false; @@ -1684,7 +1799,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) { // Find the unscheduled node in ReadySUs with the highest latency. unsigned SchedBoundary:: findMaxLatency(ArrayRef ReadySUs) { - SUnit *LateSU = 0; + SUnit *LateSU = nullptr; unsigned RemLatency = 0; for (ArrayRef::iterator I = ReadySUs.begin(), E = ReadySUs.end(); I != E; ++I) { @@ -1731,6 +1846,16 @@ getOtherResourceCount(unsigned &OtherCritIdx) { } void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + +#ifndef NDEBUG + // ReadyCycle was been bumped up to the CurrCycle when this node was + // scheduled, but CurrCycle may have been eagerly advanced immediately after + // scheduling, so may now be greater than ReadyCycle. + if (ReadyCycle > CurrCycle) + MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); +#endif + if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -1750,18 +1875,6 @@ void SchedBoundary::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - MaxObservedLatency = std::max(Latency, MaxObservedLatency); -#endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; - } releaseNode(SU, SU->TopReadyCycle); } @@ -1769,20 +1882,6 @@ void SchedBoundary::releaseBottomNode(SUnit *SU) { if (SU->isScheduled) return; - assert(SU->getInstr() && "Scheduled SUnit must have instr"); - - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - MaxObservedLatency = std::max(Latency, MaxObservedLatency); -#endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; - } releaseNode(SU, SU->BotReadyCycle); } @@ -1949,10 +2048,12 @@ void SchedBoundary::bumpNode(SUnit *SU) { PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { - ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle; -#ifndef NDEBUG - MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency); -#endif + if (isTop()) { + ReservedCycles[PIdx] = + std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); + } + else + ReservedCycles[PIdx] = NextCycle; } } } @@ -2055,14 +2156,16 @@ SUnit *SchedBoundary::pickOnlyChoice() { } } for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && - "permanent hazard"); (void)i; +// FIXME: Re-enable assert once PR20057 is resolved. +// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && +// "permanent hazard"); + (void)i; bumpCycle(CurrCycle + 1); releasePending(); } if (Available.size() == 1) return *Available.begin(); - return NULL; + return nullptr; } #ifndef NDEBUG @@ -2096,111 +2199,6 @@ void SchedBoundary::dumpScheduledState() { // GenericScheduler - Generic implementation of MachineSchedStrategy. //===----------------------------------------------------------------------===// -namespace { -/// Base class for GenericScheduler. This class maintains information about -/// scheduling candidates based on TargetSchedModel making it easy to implement -/// heuristics for either preRA or postRA scheduling. -class GenericSchedulerBase : public MachineSchedStrategy { -public: - /// Represent the type of SchedCandidate found within a single queue. - /// pickNodeBidirectional depends on these listed by decreasing priority. - enum CandReason { - NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, - ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; - -#ifndef NDEBUG - static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); -#endif - - /// Policy for scheduling the next instruction in the candidate's zone. - struct CandPolicy { - bool ReduceLatency; - unsigned ReduceResIdx; - unsigned DemandResIdx; - - CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} - }; - - /// Status of an instruction's critical resource consumption. - struct SchedResourceDelta { - // Count critical resources in the scheduled region required by SU. - unsigned CritResources; - - // Count critical resources from another region consumed by SU. - unsigned DemandedResources; - - SchedResourceDelta(): CritResources(0), DemandedResources(0) {} - - bool operator==(const SchedResourceDelta &RHS) const { - return CritResources == RHS.CritResources - && DemandedResources == RHS.DemandedResources; - } - bool operator!=(const SchedResourceDelta &RHS) const { - return !operator==(RHS); - } - }; - - /// Store the state used by GenericScheduler heuristics, required for the - /// lifetime of one invocation of pickNode(). - struct SchedCandidate { - CandPolicy Policy; - - // The best SUnit candidate. - SUnit *SU; - - // The reason for this candidate. - CandReason Reason; - - // Set of reasons that apply to multiple candidates. - uint32_t RepeatReasonSet; - - // Register pressure values for the best candidate. - RegPressureDelta RPDelta; - - // Critical resource consumption of the best candidate. - SchedResourceDelta ResDelta; - - SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} - - bool isValid() const { return SU; } - - // Copy the status of another candidate without changing policy. - void setBest(SchedCandidate &Best) { - assert(Best.Reason != NoCand && "uninitialized Sched candidate"); - SU = Best.SU; - Reason = Best.Reason; - RPDelta = Best.RPDelta; - ResDelta = Best.ResDelta; - } - - bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } - void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } - - void initResourceDelta(const ScheduleDAGMI *DAG, - const TargetSchedModel *SchedModel); - }; - -protected: - const MachineSchedContext *Context; - const TargetSchedModel *SchedModel; - const TargetRegisterInfo *TRI; - - SchedRemainder Rem; -protected: - GenericSchedulerBase(const MachineSchedContext *C): - Context(C), SchedModel(0), TRI(0) {} - - void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, - SchedBoundary *OtherZone); - -#ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand); -#endif -}; -} // namespace - void GenericSchedulerBase::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { @@ -2224,7 +2222,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone) { - // Apply preemptive heuristics based on the the total latency and resources + // Apply preemptive heuristics based on the total latency and resources // inside and outside this zone. Potential stalls should be considered before // following this policy. @@ -2351,7 +2349,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { Latency = Cand.SU->getDepth(); break; } - dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) << ":" << P.getUnitInc() << " "; @@ -2436,65 +2434,6 @@ static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); } -namespace { -/// GenericScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class GenericScheduler : public GenericSchedulerBase { - ScheduleDAGMILive *DAG; - - // State of the top and bottom scheduled instruction boundaries. - SchedBoundary Top; - SchedBoundary Bot; - - MachineSchedPolicy RegionPolicy; -public: - GenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"), - Bot(SchedBoundary::BotQID, "BotQ") {} - - virtual void initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) override; - - virtual bool shouldTrackPressure() const override { - return RegionPolicy.ShouldTrackPressure; - } - - virtual void initialize(ScheduleDAGMI *dag) override; - - virtual SUnit *pickNode(bool &IsTopNode) override; - - virtual void schedNode(SUnit *SU, bool IsTopNode) override; - - virtual void releaseTopNode(SUnit *SU) override { - Top.releaseTopNode(SU); - } - - virtual void releaseBottomNode(SUnit *SU) override { - Bot.releaseBottomNode(SU); - } - - virtual void registerRoots() override; - -protected: - void checkAcyclicLatency(); - - void tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker); - - SUnit *pickNodeBidirectional(bool &IsTopNode); - - void pickNodeFromQueue(SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); - - void reschedulePhysRegCopies(SUnit *SU, bool isTop); -}; -} // namespace - void GenericScheduler::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "(PreRA)GenericScheduler needs vreg liveness"); @@ -2511,14 +2450,15 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); if (!Top.HazardRec) { Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } if (!Bot.HazardRec) { Bot.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } } @@ -2526,8 +2466,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) { - const TargetMachine &TM = Context->MF->getTarget(); - const TargetLowering *TLI = TM.getTargetLowering(); + const MachineFunction &MF = *Begin->getParent()->getParent(); + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); // Avoid setting up the register pressure tracker for small regions to save // compile time. As a rough heuristic, only track pressure when the number of @@ -2547,8 +2487,8 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, RegionPolicy.OnlyBottomUp = true; // Allow the subtarget to override default policy. - const TargetSubtargetInfo &ST = TM.getSubtarget(); - ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End, + NumRegionInstrs); // After subtarget overrides, apply command line options. if (!EnableRegPressure) @@ -2570,6 +2510,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } +void GenericScheduler::dumpPolicy() { + dbgs() << "GenericScheduler RegionPolicy: " + << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure + << " OnlyTopDown=" << RegionPolicy.OnlyTopDown + << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp + << "\n"; +} + /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic /// critical path by more cycles than it takes to drain the instruction buffer. /// We estimate an upper bounds on in-flight instructions as: @@ -2616,7 +2564,10 @@ void GenericScheduler::registerRoots() { if ((*I)->getDepth() > Rem.CriticalPath) Rem.CriticalPath = (*I)->getDepth(); } - DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); + if (DumpCriticalPathLength) { + errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; + } if (EnableCyclicPath) { Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); @@ -2638,8 +2589,8 @@ static bool tryPressure(const PressureChange &TryP, } // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. - if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, - Reason)) { + if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, + Reason)) { return true; } // If the candidates are decreasing pressure, reverse priority. @@ -2726,7 +2677,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, } } DEBUG(if (TryCand.RPDelta.Excess.isValid()) - dbgs() << " SU(" << TryCand.SU->NodeNum << ") " + dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") " << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); @@ -2741,8 +2692,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, TryCand, Cand, PhysRegCopy)) return; - // Avoid exceeding the target's limit. If signed PSetID is negative, it is - // invalid; convert it to INT_MAX to give it lowest priority. + // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess)) @@ -2802,8 +2752,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. - if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited - && tryLatency(TryCand, Cand, Zone)) { + if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) { return; } @@ -2857,12 +2807,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Bot NOCAND\n"); + DEBUG(dbgs() << "Pick Bot ONLY1\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Top NOCAND\n"); + DEBUG(dbgs() << "Pick Top ONLY1\n"); return SU; } CandPolicy NoPolicy; @@ -2915,7 +2865,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); - return NULL; + return nullptr; } SUnit *SU; do { @@ -3007,17 +2957,17 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { - ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C)); + ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); + DAG->addMutation(make_unique(DAG->TII, DAG->TRI)); if (EnableLoadCluster && DAG->TII->enableClusterLoads()) - DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(make_unique(DAG->TII, DAG->TRI)); if (EnableMacroFusion) - DAG->addMutation(new MacroFusion(DAG->TII)); + DAG->addMutation(make_unique(*DAG->TII, *DAG->TRI)); return DAG; } @@ -3029,75 +2979,25 @@ GenericSchedRegistry("converge", "Standard converging scheduler.", // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. //===----------------------------------------------------------------------===// -namespace { -/// PostGenericScheduler - Interface to the scheduling algorithm used by -/// ScheduleDAGMI. -/// -/// Callbacks from ScheduleDAGMI: -/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... -class PostGenericScheduler : public GenericSchedulerBase { - ScheduleDAGMI *DAG; - SchedBoundary Top; - SmallVector BotRoots; -public: - PostGenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} - - virtual ~PostGenericScheduler() {} - - virtual void initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) override { - /* no configurable policy */ - }; - - /// PostRA scheduling does not track pressure. - virtual bool shouldTrackPressure() const override { return false; } - - virtual void initialize(ScheduleDAGMI *Dag) override { - DAG = Dag; - SchedModel = DAG->getSchedModel(); - TRI = DAG->TRI; - - Rem.init(DAG, SchedModel); - Top.init(DAG, SchedModel, &Rem); - BotRoots.clear(); - - // Initialize the HazardRecognizers. If itineraries don't exist, are empty, - // or are disabled, then these HazardRecs will be disabled. - const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); - if (!Top.HazardRec) { - Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } - } - - virtual void registerRoots() override; - - virtual SUnit *pickNode(bool &IsTopNode) override; - - virtual void scheduleTree(unsigned SubtreeID) override { - llvm_unreachable("PostRA scheduler does not support subtree analysis."); - } - - virtual void schedNode(SUnit *SU, bool IsTopNode) override; +void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { + DAG = Dag; + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; - virtual void releaseTopNode(SUnit *SU) override { - Top.releaseTopNode(SU); - } + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + BotRoots.clear(); - // Only called for roots. - virtual void releaseBottomNode(SUnit *SU) override { - BotRoots.push_back(SU); + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, + // or are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + if (!Top.HazardRec) { + Top.HazardRec = + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } +} -protected: - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); - - void pickNodeFromQueue(SchedCandidate &Cand); -}; -} // namespace void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); @@ -3108,7 +3008,10 @@ void PostGenericScheduler::registerRoots() { if ((*I)->getDepth() > Rem.CriticalPath) Rem.CriticalPath = (*I)->getDepth(); } - DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); + if (DumpCriticalPathLength) { + errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; + } } /// Apply a set of heursitics to a new candidate for PostRA scheduling. @@ -3169,7 +3072,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); - return NULL; + return nullptr; } SUnit *SU; do { @@ -3179,7 +3082,7 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { SchedCandidate TopCand(NoPolicy); // Set the top-down policy based on the state of the current top zone and // the instructions outside the zone, including the bottom zone. - setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL); + setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); pickNodeFromQueue(TopCand); assert(TopCand.Reason != NoCand && "failed to find a candidate"); tracePick(TopCand, true); @@ -3203,7 +3106,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create a generic scheduler with no vreg liveness or DAG mutation passes. static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true); + return new ScheduleDAGMI(C, make_unique(C), /*IsPostRA=*/true); } //===----------------------------------------------------------------------===// @@ -3217,7 +3120,8 @@ struct ILPOrder { const BitVector *ScheduledTrees; bool MaximizeILP; - ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} + ILPOrder(bool MaxILP) + : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {} /// \brief Apply a less-than relation on node priority. /// @@ -3251,9 +3155,9 @@ class ILPScheduler : public MachineSchedStrategy { std::vector ReadyQ; public: - ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} + ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {} - virtual void initialize(ScheduleDAGMI *dag) { + void initialize(ScheduleDAGMI *dag) override { assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); DAG = static_cast(dag); DAG->computeDFSResult(); @@ -3262,7 +3166,7 @@ public: ReadyQ.clear(); } - virtual void registerRoots() { + void registerRoots() override { // Restore the heap in ReadyQ with the updated DFS results. std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } @@ -3271,8 +3175,8 @@ public: /// ----------------------------------------- /// Callback to select the highest priority node from the ready Q. - virtual SUnit *pickNode(bool &IsTopNode) { - if (ReadyQ.empty()) return NULL; + SUnit *pickNode(bool &IsTopNode) override { + if (ReadyQ.empty()) return nullptr; std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); @@ -3287,19 +3191,19 @@ public: } /// \brief Scheduler callback to notify that a new subtree is scheduled. - virtual void scheduleTree(unsigned SubtreeID) { + void scheduleTree(unsigned SubtreeID) override { std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } /// Callback after a node is scheduled. Mark a newly scheduled tree, notify /// DFSResults, and resort the priority Q. - virtual void schedNode(SUnit *SU, bool IsTopNode) { + void schedNode(SUnit *SU, bool IsTopNode) override { assert(!IsTopNode && "SchedDFSResult needs bottom-up"); } - virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + void releaseTopNode(SUnit *) override { /*only called for top roots*/ } - virtual void releaseBottomNode(SUnit *SU) { + void releaseBottomNode(SUnit *SU) override { ReadyQ.push_back(SU); std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } @@ -3307,10 +3211,10 @@ public: } // namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, new ILPScheduler(true)); + return new ScheduleDAGMILive(C, make_unique(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, new ILPScheduler(false)); + return new ScheduleDAGMILive(C, make_unique(false)); } static MachineSchedRegistry ILPMaxRegistry( "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); @@ -3352,7 +3256,7 @@ public: InstructionShuffler(bool alternate, bool topdown) : IsAlternating(alternate), IsTopDown(topdown) {} - virtual void initialize(ScheduleDAGMI*) { + void initialize(ScheduleDAGMI*) override { TopQ.clear(); BottomQ.clear(); } @@ -3360,11 +3264,11 @@ public: /// Implement MachineSchedStrategy interface. /// ----------------------------------------- - virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *pickNode(bool &IsTopNode) override { SUnit *SU; if (IsTopDown) { do { - if (TopQ.empty()) return NULL; + if (TopQ.empty()) return nullptr; SU = TopQ.top(); TopQ.pop(); } while (SU->isScheduled); @@ -3372,7 +3276,7 @@ public: } else { do { - if (BottomQ.empty()) return NULL; + if (BottomQ.empty()) return nullptr; SU = BottomQ.top(); BottomQ.pop(); } while (SU->isScheduled); @@ -3383,12 +3287,12 @@ public: return SU; } - virtual void schedNode(SUnit *SU, bool IsTopNode) {} + void schedNode(SUnit *SU, bool IsTopNode) override {} - virtual void releaseTopNode(SUnit *SU) { + void releaseTopNode(SUnit *SU) override { TopQ.push(SU); } - virtual void releaseBottomNode(SUnit *SU) { + void releaseBottomNode(SUnit *SU) override { BottomQ.push(SU); } }; @@ -3399,7 +3303,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { bool TopDown = !ForceBottomUp; assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); - return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown)); + return new ScheduleDAGMILive(C, make_unique(Alternate, TopDown)); } static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", @@ -3430,12 +3334,10 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } static bool isNodeHidden(const SUnit *Node) { - return (Node->Preds.size() > 10 || Node->Succs.size() > 10); - } - - static bool hasNodeAddressLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - return false; + if (ViewMISchedCutoff == 0) + return false; + return (Node->Preds.size() > ViewMISchedCutoff + || Node->Succs.size() > ViewMISchedCutoff); } /// If you want to override the dot attributes printed for a particular @@ -3455,7 +3357,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { raw_string_ostream SS(Str); const ScheduleDAGMI *DAG = static_cast(G); const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? - static_cast(G)->getDFSResult() : 0; + static_cast(G)->getDFSResult() : nullptr; SS << "SU:" << SU->NodeNum; if (DFS) SS << " I:" << DFS->getNumInstrs(SU); @@ -3469,7 +3371,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { std::string Str("shape=Mrecord"); const ScheduleDAGMI *DAG = static_cast(G); const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? - static_cast(G)->getDFSResult() : 0; + static_cast(G)->getDFSResult() : nullptr; if (DFS) { Str += ",style=filled,fillcolor=\"#"; Str += DOT::getColorString(DFS->getSubtreeID(N));