X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineScheduler.cpp;h=a13dab3fe78fc5572e220b1ec75346b87de5e7d8;hb=104db6b94a127e4e68db7528f50d3fcf24d543bd;hp=261942f534be0166af1300a9776504533401484c;hpb=bf72e1456556b12fa356bc3c44e16faafc3d4fbe;p=oota-llvm.git diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 261942f534b..a13dab3fe78 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -49,6 +49,11 @@ DumpCriticalPathLength("misched-dcpl", cl::Hidden, static cl::opt ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed")); +/// In some situations a few uninteresting nodes depend on nearly all other +/// nodes in the graph, provide a cutoff to hide them. +static cl::opt ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, + cl::desc("Hide nodes with more predecessor/successor than cutoff")); + static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); @@ -106,7 +111,7 @@ public: void print(raw_ostream &O, const Module* = nullptr) const override; protected: - void scheduleRegions(ScheduleDAGInstrs &Scheduler); + void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); }; /// MachineScheduler runs after coalescing and before register allocation. @@ -144,12 +149,12 @@ char MachineScheduler::ID = 0; char &llvm::MachineSchedulerID = MachineScheduler::ID; -INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", +INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachineScheduler, "misched", +INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() @@ -161,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequiredID(MachineDominatorsID); AU.addRequired(); - AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -209,6 +214,11 @@ static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched); +static cl::opt EnableMachineSched( + "enable-misched", + cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), + cl::Hidden); + /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); @@ -304,14 +314,20 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { - DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); + if (EnableMachineSched.getNumOccurrences()) { + if (!EnableMachineSched) + return false; + } else if (!mf.getSubtarget().enableMachineScheduler()) + return false; + + DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); // Initialize the context of the pass. MF = &mf; MLI = &getAnalysis(); MDT = &getAnalysis(); PassConfig = &getAnalysis(); - AA = &getAnalysis(); + AA = &getAnalysis().getAAResults(); LIS = &getAnalysis(); @@ -324,7 +340,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. std::unique_ptr Scheduler(createMachineScheduler()); - scheduleRegions(*Scheduler); + scheduleRegions(*Scheduler, false); DEBUG(LIS->dump()); if (VerifyScheduling) @@ -336,9 +352,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (skipOptnoneFunction(*mf.getFunction())) return false; - const TargetSubtargetInfo &ST = - mf.getTarget().getSubtarget(); - if (!ST.enablePostMachineScheduler()) { + if (!mf.getSubtarget().enablePostRAScheduler()) { DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); return false; } @@ -354,7 +368,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { // Instantiate the selected scheduler for this target, function, and // optimization level. std::unique_ptr Scheduler(createPostMachineScheduler()); - scheduleRegions(*Scheduler); + scheduleRegions(*Scheduler, true); if (VerifyScheduling) MF->verify(this, "After post machine scheduling."); @@ -374,15 +388,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, - const TargetInstrInfo *TII, - bool IsPostRA) { + const TargetInstrInfo *TII) { return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); } /// Main driver for both MachineScheduler and PostMachineScheduler. -void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, + bool FixKillFlags) { const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - bool IsPostRA = Scheduler.isPostRA(); // Visit all machine basic blocks. // @@ -391,7 +404,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { - Scheduler.startBlock(MBB); + Scheduler.startBlock(&*MBB); #ifndef NDEBUG if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) @@ -420,7 +433,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { // Avoid decrementing RegionEnd for blocks with no terminator. if (RegionEnd != MBB->end() || - isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) { + isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { --RegionEnd; // Count the boundary instruction. --RemainingInstrs; @@ -430,13 +443,15 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { // instruction stream until we find the nearest boundary. unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { - if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) + for(;I != MBB->begin(); --I, --RemainingInstrs) { + if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII)) break; + if (!I->isDebugValue()) + ++NumRegionInstrs; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs); + Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == std::prev(RegionEnd)) { @@ -445,8 +460,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { Scheduler.exitRegion(); continue; } - DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "") - << "MI Scheduling **********\n"); + DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() << ":BB#" << MBB->getNumber() << " " << MBB->getName() << "\n From: " << *I << " To: "; @@ -473,11 +487,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { } assert(RemainingInstrs == 0 && "Instruction count mismatch!"); Scheduler.finishBlock(); - if (Scheduler.isPostRA()) { - // FIXME: Ideally, no further passes should rely on kill flags. However, - // thumb2 size reduction is currently an exception. - Scheduler.fixupKills(MBB); - } + // FIXME: Ideally, no further passes should rely on kill flags. However, + // thumb2 size reduction is currently an exception, so the PostMIScheduler + // needs to do this. + if (FixKillFlags) + Scheduler.fixupKills(&*MBB); } Scheduler.finalizeSchedule(); } @@ -488,7 +502,7 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { LLVM_DUMP_METHOD void ReadyQueue::dump() { - dbgs() << Name << ": "; + dbgs() << "Queue " << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -649,6 +663,9 @@ bool ScheduleDAGMI::checkSchedLimit() { /// does not consider liveness or register pressure. It is useful for PostRA /// scheduling and potentially other custom schedulers. void ScheduleDAGMI::schedule() { + DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); + // Build the DAG. buildSchedGraph(AA); @@ -671,7 +688,11 @@ void ScheduleDAGMI::schedule() { initQueues(TopRoots, BotRoots); bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + assert(!SU->isScheduled && "Node already scheduled"); if (!checkSchedLimit()) break; @@ -889,6 +910,13 @@ void ScheduleDAGMILive::initRegPressure() { updatePressureDiffs(LiveUses); } + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); // Cache the list of excess pressure sets in this region. This will also track @@ -932,8 +960,9 @@ updateScheduledPressure(const SUnit *SU, unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); if (NewMaxPressure[ID] >= Limit - 2) { DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " - << NewMaxPressure[ID] << " > " << Limit << "(+ " - << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + << NewMaxPressure[ID] + << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit + << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); } } } @@ -964,18 +993,24 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef LiveUses) { } // RegisterPressureTracker guarantees that readsReg is true for LiveUses. assert(VNI && "No live value at use."); - for (VReg2UseMap::iterator - UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { - SUnit *SU = UI->SU; - DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " - << *SU->getInstr()); + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; // If this use comes before the reaching def, it cannot be a last use, so // descrease its pressure change. if (!SU->isScheduled && SU != &ExitSU) { LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(SU->getInstr())); - if (LRQ.valueIn() == VNI) - getPressureDiff(SU).addPressureChange(Reg, true, &MRI); + if (LRQ.valueIn() == VNI) { + PressureDiff &PDiff = getPressureDiff(SU); + PDiff.addPressureChange(Reg, true, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } } } } @@ -986,12 +1021,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef LiveUses) { /// only includes instructions that have DAG nodes, not scheduling boundaries. /// /// This is a skeletal driver, with all the functionality pushed into helpers, -/// so that it can be easilly extended by experimental schedulers. Generally, +/// so that it can be easily extended by experimental schedulers. Generally, /// implementing MachineSchedStrategy should be sufficient to implement a new /// scheduling algorithm. However, if a scheduler further subclasses /// ScheduleDAGMILive then it will want to override this virtual method in order /// to update any specialized state. void ScheduleDAGMILive::schedule() { + DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); buildDAGWithRegPressure(); Topo.InitDAGTopologicalSorting(); @@ -1005,8 +1042,16 @@ void ScheduleDAGMILive::schedule() { // This may initialize a DFSResult to be used for queue priority. SchedImpl->initialize(this); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG( + for (const SUnit &SU : SUnits) { + SU.dumpAll(this); + if (ShouldTrackPressure) { + dbgs() << " Pressure Diff : "; + getPressureDiff(&SU).dump(*TRI); + } + dbgs() << '\n'; + } + ); if (ViewMISchedDAGs) viewGraph(); // Initialize ready queues now that the DAG and priority data are finalized. @@ -1018,15 +1063,17 @@ void ScheduleDAGMILive::schedule() { } bool IsTopNode = false; - while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + assert(!SU->isScheduled && "Node already scheduled"); if (!checkSchedLimit()) break; scheduleMI(SU, IsTopNode); - updateQueues(SU, IsTopNode); - if (DFSResult) { unsigned SubtreeID = DFSResult->getSubtreeID(SU); if (!ScheduledTrees.test(SubtreeID)) { @@ -1038,6 +1085,8 @@ void ScheduleDAGMILive::schedule() { // Notify the scheduling strategy after updating the DAG. SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -1137,14 +1186,15 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { unsigned LiveOutHeight = DefSU->getHeight(); unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; // Visit all local users of the vreg def. - for (VReg2UseMap::iterator - UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { - if (UI->SU == &ExitSU) + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + if (SU == &ExitSU) continue; // Only consider uses of the phi. LiveQueryResult LRQ = - LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); + LI.Query(LIS->getInstructionIndex(SU->getInstr())); if (!LRQ.valueIn()->isPHIDef()) continue; @@ -1152,10 +1202,10 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { // overestimate in strange cases. This allows cyclic latency to be // estimated as the minimum slack of the vreg's depth or height. unsigned CyclicLatency = 0; - if (LiveOutDepth > UI->SU->getDepth()) - CyclicLatency = LiveOutDepth - UI->SU->getDepth(); + if (LiveOutDepth > SU->getDepth()) + CyclicLatency = LiveOutDepth - SU->getDepth(); - unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; + unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; if (LiveInHeight > LiveOutHeight) { if (LiveInHeight - LiveOutHeight < CyclicLatency) CyclicLatency = LiveInHeight - LiveOutHeight; @@ -1164,7 +1214,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { CyclicLatency = 0; DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" - << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); + << SU->NodeNum << ") = " << CyclicLatency << "c\n"); if (CyclicLatency > MaxCyclicLatency) MaxCyclicLatency = CyclicLatency; } @@ -1191,6 +1241,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { // Update top scheduled pressure. TopRPTracker.advance(); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + ); + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); } } @@ -1213,6 +1268,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { SmallVector LiveUses; BotRPTracker.recede(&LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + DEBUG( + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); updatePressureDiffs(LiveUses); } @@ -1259,7 +1319,7 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef Loads, SUnit *SU = Loads[Idx]; unsigned BaseReg; unsigned Offset; - if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) + if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); } if (LoadRecords.size() < 2) @@ -1337,25 +1397,49 @@ namespace { /// \brief Post-process the DAG to create cluster edges between instructions /// that may be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { - const TargetInstrInfo *TII; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; public: - MacroFusion(const TargetInstrInfo *tii): TII(tii) {} + MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) + : TII(TII), TRI(TRI) {} void apply(ScheduleDAGMI *DAG) override; }; } // anonymous +/// Returns true if \p MI reads a register written by \p Other. +static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, + const MachineInstr &Other) { + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Other.modifiesRegister(Reg, &TRI)) + return true; + } + return false; +} + /// \brief Callback from DAG postProcessing to create cluster edges to encourage /// fused operations. void MacroFusion::apply(ScheduleDAGMI *DAG) { // For now, assume targets can only fuse with the branch. - MachineInstr *Branch = DAG->ExitSU.getInstr(); + SUnit &ExitSU = DAG->ExitSU; + MachineInstr *Branch = ExitSU.getInstr(); if (!Branch) return; - for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { - SUnit *SU = &DAG->SUnits[--Idx]; - if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) + for (SUnit &SU : DAG->SUnits) { + // SUnits with successors can't be schedule in front of the ExitSU. + if (!SU.Succs.empty()) + continue; + // We only care if the node writes to a register that the branch reads. + MachineInstr *Pred = SU.getInstr(); + if (!HasDataDep(TRI, *Branch, *Pred)) + continue; + + if (!TII.shouldScheduleAdjacent(Pred, Branch)) continue; // Create a single weak edge from SU to ExitSU. The only effect is to cause @@ -1364,11 +1448,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) { // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling // of SU, we could create an artificial edge from the deepest root, but it // hasn't been needed yet. - bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); + bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); (void)Success; assert(Success && "No DAG nodes should be reachable from ExitSU"); - DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); + DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n"); break; } } @@ -1432,12 +1516,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { // Check if either the dest or source is local. If it's live across a back // edge, it's not local. Note that if both vregs are live across the back // edge, we cannot successfully contrain the copy without cyclic scheduling. - unsigned LocalReg = DstReg; - unsigned GlobalReg = SrcReg; + // If both the copy's source and dest are local live intervals, then we + // should treat the dest as the global for the purpose of adding + // constraints. This adds edges from source's other uses to the copy. + unsigned LocalReg = SrcReg; + unsigned GlobalReg = DstReg; LiveInterval *LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { - LocalReg = SrcReg; - GlobalReg = DstReg; + LocalReg = DstReg; + GlobalReg = SrcReg; LocalLI = &LIS->getInterval(LocalReg); if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) return; @@ -2135,7 +2222,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone) { - // Apply preemptive heuristics based on the the total latency and resources + // Apply preemptive heuristics based on the total latency and resources // inside and outside this zone. Potential stalls should be considered before // following this policy. @@ -2262,7 +2349,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { Latency = Cand.SU->getDepth(); break; } - dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) << ":" << P.getUnitInc() << " "; @@ -2423,6 +2510,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, } } +void GenericScheduler::dumpPolicy() { + dbgs() << "GenericScheduler RegionPolicy: " + << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure + << " OnlyTopDown=" << RegionPolicy.OnlyTopDown + << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp + << "\n"; +} + /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic /// critical path by more cycles than it takes to drain the instruction buffer. /// We estimate an upper bounds on in-flight instructions as: @@ -2582,7 +2677,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, } } DEBUG(if (TryCand.RPDelta.Excess.isValid()) - dbgs() << " SU(" << TryCand.SU->NodeNum << ") " + dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") " << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); @@ -2597,8 +2692,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, TryCand, Cand, PhysRegCopy)) return; - // Avoid exceeding the target's limit. If signed PSetID is negative, it is - // invalid; convert it to INT_MAX to give it lowest priority. + // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess)) @@ -2658,8 +2752,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. - if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited - && tryLatency(TryCand, Cand, Zone)) { + if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) { return; } @@ -2713,12 +2807,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Bot NOCAND\n"); + DEBUG(dbgs() << "Pick Bot ONLY1\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Top NOCAND\n"); + DEBUG(dbgs() << "Pick Top ONLY1\n"); return SU; } CandPolicy NoPolicy; @@ -2873,7 +2967,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { if (EnableLoadCluster && DAG->TII->enableClusterLoads()) DAG->addMutation(make_unique(DAG->TII, DAG->TRI)); if (EnableMacroFusion) - DAG->addMutation(make_unique(DAG->TII)); + DAG->addMutation(make_unique(*DAG->TII, *DAG->TRI)); return DAG; } @@ -3240,12 +3334,10 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } static bool isNodeHidden(const SUnit *Node) { - return (Node->Preds.size() > 10 || Node->Succs.size() > 10); - } - - static bool hasNodeAddressLabel(const SUnit *Node, - const ScheduleDAG *Graph) { - return false; + if (ViewMISchedCutoff == 0) + return false; + return (Node->Preds.size() > ViewMISchedCutoff + || Node->Succs.size() > ViewMISchedCutoff); } /// If you want to override the dot attributes printed for a particular