1 //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // MachineScheduler schedules machine instructions after phi elimination. It
11 // preserves LiveIntervals so it can be invoked before register allocation.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "misched"
17 #include "HexagonMachineScheduler.h"
23 /// Check if scheduling of this SU is possible
24 /// in the current packet.
25 /// It is _not_ precise (statefull), it is more like
26 /// another heuristic. Many corner cases are figured
28 bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
29 if (!SU || !SU->getInstr())
32 // First see if the pipeline could receive this instruction
33 // in the current cycle.
34 switch (SU->getInstr()->getOpcode()) {
36 if (!ResourcesModel->canReserveResources(SU->getInstr()))
38 case TargetOpcode::EXTRACT_SUBREG:
39 case TargetOpcode::INSERT_SUBREG:
40 case TargetOpcode::SUBREG_TO_REG:
41 case TargetOpcode::REG_SEQUENCE:
42 case TargetOpcode::IMPLICIT_DEF:
43 case TargetOpcode::COPY:
44 case TargetOpcode::INLINEASM:
48 // Now see if there are no other dependencies to instructions already
50 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
51 if (Packet[i]->Succs.size() == 0)
53 for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
54 E = Packet[i]->Succs.end(); I != E; ++I) {
55 // Since we do not add pseudos to packets, might as well
56 // ignore order dependencies.
60 if (I->getSUnit() == SU)
67 /// Keep track of available resources.
68 bool VLIWResourceModel::reserveResources(SUnit *SU) {
69 bool startNewCycle = false;
70 // If this SU does not fit in the packet
72 if (!isResourceAvailable(SU)) {
73 ResourcesModel->clearResources();
79 switch (SU->getInstr()->getOpcode()) {
81 ResourcesModel->reserveResources(SU->getInstr());
83 case TargetOpcode::EXTRACT_SUBREG:
84 case TargetOpcode::INSERT_SUBREG:
85 case TargetOpcode::SUBREG_TO_REG:
86 case TargetOpcode::REG_SEQUENCE:
87 case TargetOpcode::IMPLICIT_DEF:
88 case TargetOpcode::KILL:
89 case TargetOpcode::PROLOG_LABEL:
90 case TargetOpcode::EH_LABEL:
91 case TargetOpcode::COPY:
92 case TargetOpcode::INLINEASM:
98 DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
99 for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
100 DEBUG(dbgs() << "\t[" << i << "] SU(");
101 DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
102 DEBUG(Packet[i]->getInstr()->dump());
106 // If packet is now full, reset the state so in the next cycle
108 if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
109 ResourcesModel->clearResources();
112 startNewCycle = true;
115 return startNewCycle;
118 /// schedule - Called back from MachineScheduler::runOnMachineFunction
119 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
120 /// only includes instructions that have DAG nodes, not scheduling boundaries.
121 void VLIWMachineScheduler::schedule() {
123 << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
124 << " " << BB->getName()
125 << " in_func " << BB->getParent()->getFunction()->getName()
126 << " at loop depth " << MLI->getLoopDepth(BB)
129 buildDAGWithRegPressure();
131 // To view Height/Depth correctly, they should be accessed at least once.
132 DEBUG(unsigned maxH = 0;
133 for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
134 if (SUnits[su].getHeight() > maxH)
135 maxH = SUnits[su].getHeight();
136 dbgs() << "Max Height " << maxH << "\n";);
137 DEBUG(unsigned maxD = 0;
138 for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
139 if (SUnits[su].getDepth() > maxD)
140 maxD = SUnits[su].getDepth();
141 dbgs() << "Max Depth " << maxD << "\n";);
142 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
143 SUnits[su].dumpAll(this));
147 bool IsTopNode = false;
148 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
149 if (!checkSchedLimit())
152 scheduleMI(SU, IsTopNode);
154 updateQueues(SU, IsTopNode);
156 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
161 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
162 DAG = static_cast<VLIWMachineScheduler*>(dag);
167 // Initialize the HazardRecognizers.
168 const TargetMachine &TM = DAG->MF.getTarget();
169 const InstrItineraryData *Itin = TM.getInstrItineraryData();
170 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
171 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
173 Top.ResourceModel = new VLIWResourceModel(TM);
174 Bot.ResourceModel = new VLIWResourceModel(TM);
176 assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) &&
177 "-misched-topdown incompatible with -misched-bottomup");
180 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
184 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
186 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
187 unsigned MinLatency = I->getMinLatency();
189 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
191 if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
192 SU->TopReadyCycle = PredReadyCycle + MinLatency;
194 Top.releaseNode(SU, SU->TopReadyCycle);
197 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
201 assert(SU->getInstr() && "Scheduled SUnit must have instr");
203 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
205 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
206 unsigned MinLatency = I->getMinLatency();
208 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
210 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
211 SU->BotReadyCycle = SuccReadyCycle + MinLatency;
213 Bot.releaseNode(SU, SU->BotReadyCycle);
216 /// Does this SU have a hazard within the current instruction group.
218 /// The scheduler supports two modes of hazard recognition. The first is the
219 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
220 /// supports highly complicated in-order reservation tables
221 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
223 /// The second is a streamlined mechanism that checks for hazards based on
224 /// simple counters that the scheduler itself maintains. It explicitly checks
225 /// for instruction dispatch limitations, including the number of micro-ops that
226 /// can dispatch per cycle.
228 /// TODO: Also check whether the SU must start a new group.
229 bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
230 if (HazardRec->isEnabled())
231 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
233 if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
239 void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
240 unsigned ReadyCycle) {
241 if (ReadyCycle < MinReadyCycle)
242 MinReadyCycle = ReadyCycle;
244 // Check for interlocks first. For the purpose of other heuristics, an
245 // instruction that cannot issue appears as if it's not in the ReadyQueue.
246 if (ReadyCycle > CurrCycle || checkHazard(SU))
253 /// Move the boundary of scheduled code by one cycle.
254 void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
255 unsigned Width = DAG->getIssueWidth();
256 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
258 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
259 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
261 if (!HazardRec->isEnabled()) {
262 // Bypass HazardRec virtual calls.
263 CurrCycle = NextCycle;
265 // Bypass getHazardType calls in case of long latency.
266 for (; CurrCycle != NextCycle; ++CurrCycle) {
268 HazardRec->AdvanceCycle();
270 HazardRec->RecedeCycle();
275 DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
276 << CurrCycle << '\n');
279 /// Move the boundary of scheduled code by one SUnit.
280 void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
281 bool startNewCycle = false;
283 // Update the reservation table.
284 if (HazardRec->isEnabled()) {
285 if (!isTop() && SU->isCall) {
286 // Calls are scheduled with their preceding instructions. For bottom-up
287 // scheduling, clear the pipeline state before emitting.
290 HazardRec->EmitInstruction(SU);
294 startNewCycle = ResourceModel->reserveResources(SU);
296 // Check the instruction group dispatch limit.
297 // TODO: Check if this SU must end a dispatch group.
298 IssueCount += DAG->getNumMicroOps(SU->getInstr());
300 DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
304 DEBUG(dbgs() << "*** IssueCount " << IssueCount
305 << " at cycle " << CurrCycle << '\n');
308 /// Release pending ready nodes in to the available queue. This makes them
309 /// visible to heuristics.
310 void ConvergingVLIWScheduler::SchedBoundary::releasePending() {
311 // If the available queue is empty, it is safe to reset MinReadyCycle.
312 if (Available.empty())
313 MinReadyCycle = UINT_MAX;
315 // Check to see if any of the pending instructions are ready to issue. If
316 // so, add them to the available queue.
317 for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
318 SUnit *SU = *(Pending.begin()+i);
319 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
321 if (ReadyCycle < MinReadyCycle)
322 MinReadyCycle = ReadyCycle;
324 if (ReadyCycle > CurrCycle)
331 Pending.remove(Pending.begin()+i);
334 CheckPending = false;
337 /// Remove SU from the ready set for this boundary.
338 void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) {
339 if (Available.isInQueue(SU))
340 Available.remove(Available.find(SU));
342 assert(Pending.isInQueue(SU) && "bad ready count");
343 Pending.remove(Pending.find(SU));
347 /// If this queue only has one ready candidate, return it. As a side effect,
348 /// advance the cycle until at least one node is ready. If multiple instructions
349 /// are ready, return NULL.
350 SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
354 for (unsigned i = 0; Available.empty(); ++i) {
355 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
356 "permanent hazard"); (void)i;
360 if (Available.size() == 1)
361 return *Available.begin();
366 void ConvergingVLIWScheduler::traceCandidate(const char *Label,
368 SUnit *SU, PressureElement P) {
369 dbgs() << Label << " " << Q.getName() << " ";
371 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
379 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
380 /// of SU, return it, otherwise return null.
381 static SUnit *getSingleUnscheduledPred(SUnit *SU) {
382 SUnit *OnlyAvailablePred = 0;
383 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
385 SUnit &Pred = *I->getSUnit();
386 if (!Pred.isScheduled) {
387 // We found an available, but not scheduled, predecessor. If it's the
388 // only one we have found, keep track of it... otherwise give up.
389 if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
391 OnlyAvailablePred = &Pred;
394 return OnlyAvailablePred;
397 /// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
398 /// of SU, return it, otherwise return null.
399 static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
400 SUnit *OnlyAvailableSucc = 0;
401 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
403 SUnit &Succ = *I->getSUnit();
404 if (!Succ.isScheduled) {
405 // We found an available, but not scheduled, successor. If it's the
406 // only one we have found, keep track of it... otherwise give up.
407 if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
409 OnlyAvailableSucc = &Succ;
412 return OnlyAvailableSucc;
415 // Constants used to denote relative importance of
416 // heuristic components for cost computation.
417 static const unsigned PriorityOne = 200;
418 static const unsigned PriorityTwo = 100;
419 static const unsigned PriorityThree = 50;
420 static const unsigned PriorityFour = 20;
421 static const unsigned ScaleTwo = 10;
422 static const unsigned FactorOne = 2;
424 /// Single point to compute overall scheduling cost.
425 /// TODO: More heuristics will be used soon.
426 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
427 SchedCandidate &Candidate,
428 RegPressureDelta &Delta,
430 // Initial trivial priority.
433 // Do not waste time on a node that is already scheduled.
434 if (!SU || SU->isScheduled)
437 // Forced priority is high.
438 if (SU->isScheduleHigh)
439 ResCount += PriorityOne;
441 // Critical path first.
442 if (Q.getID() == TopQID) {
443 ResCount += (SU->getHeight() * ScaleTwo);
445 // If resources are available for it, multiply the
446 // chance of scheduling.
447 if (Top.ResourceModel->isResourceAvailable(SU))
448 ResCount <<= FactorOne;
450 ResCount += (SU->getDepth() * ScaleTwo);
452 // If resources are available for it, multiply the
453 // chance of scheduling.
454 if (Bot.ResourceModel->isResourceAvailable(SU))
455 ResCount <<= FactorOne;
458 unsigned NumNodesBlocking = 0;
459 if (Q.getID() == TopQID) {
460 // How many SUs does it block from scheduling?
461 // Look at all of the successors of this node.
462 // Count the number of nodes that
463 // this node is the sole unscheduled node for.
464 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
466 if (getSingleUnscheduledPred(I->getSUnit()) == SU)
469 // How many unscheduled predecessors block this node?
470 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
472 if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
475 ResCount += (NumNodesBlocking * ScaleTwo);
477 // Factor in reg pressure as a heuristic.
478 ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
479 ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
481 DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
486 /// Pick the best candidate from the top queue.
488 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
489 /// DAG building. To adjust for the current scheduling location we need to
490 /// maintain the number of vreg uses remaining to be top-scheduled.
491 ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
492 pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
493 SchedCandidate &Candidate) {
496 // getMaxPressureDelta temporarily modifies the tracker.
497 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
499 // BestSU remains NULL if no top candidates beat the best existing candidate.
500 CandResult FoundCandidate = NoCand;
501 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
502 RegPressureDelta RPDelta;
503 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
504 DAG->getRegionCriticalPSets(),
505 DAG->getRegPressure().MaxSetPressure);
507 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
509 // Initialize the candidate if needed.
512 Candidate.RPDelta = RPDelta;
513 Candidate.SCost = CurrentCost;
514 FoundCandidate = NodeOrder;
519 if (CurrentCost > Candidate.SCost) {
520 DEBUG(traceCandidate("CCAND", Q, *I));
522 Candidate.RPDelta = RPDelta;
523 Candidate.SCost = CurrentCost;
524 FoundCandidate = BestCost;
528 // Fall through to original instruction order.
529 // Only consider node order if Candidate was chosen from this Q.
530 if (FoundCandidate == NoCand)
533 return FoundCandidate;
536 /// Pick the best candidate node from either the top or bottom queue.
537 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
538 // Schedule as far as possible in the direction of no choice. This is most
539 // efficient, but also provides the best heuristics for CriticalPSets.
540 if (SUnit *SU = Bot.pickOnlyChoice()) {
544 if (SUnit *SU = Top.pickOnlyChoice()) {
548 SchedCandidate BotCand;
549 // Prefer bottom scheduling when heuristics are silent.
550 CandResult BotResult = pickNodeFromQueue(Bot.Available,
551 DAG->getBotRPTracker(), BotCand);
552 assert(BotResult != NoCand && "failed to find the first candidate");
554 // If either Q has a single candidate that provides the least increase in
555 // Excess pressure, we can immediately schedule from that Q.
557 // RegionCriticalPSets summarizes the pressure within the scheduled region and
558 // affects picking from either Q. If scheduling in one direction must
559 // increase pressure for one of the excess PSets, then schedule in that
560 // direction first to provide more freedom in the other direction.
561 if (BotResult == SingleExcess || BotResult == SingleCritical) {
565 // Check if the top Q has a better candidate.
566 SchedCandidate TopCand;
567 CandResult TopResult = pickNodeFromQueue(Top.Available,
568 DAG->getTopRPTracker(), TopCand);
569 assert(TopResult != NoCand && "failed to find the first candidate");
571 if (TopResult == SingleExcess || TopResult == SingleCritical) {
575 // If either Q has a single candidate that minimizes pressure above the
576 // original region's pressure pick it.
577 if (BotResult == SingleMax) {
581 if (TopResult == SingleMax) {
585 if (TopCand.SCost > BotCand.SCost) {
589 // Otherwise prefer the bottom candidate in node order.
594 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
595 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
596 if (DAG->top() == DAG->bottom()) {
597 assert(Top.Available.empty() && Top.Pending.empty() &&
598 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
602 if (llvm::ForceTopDown) {
603 SU = Top.pickOnlyChoice();
605 SchedCandidate TopCand;
606 CandResult TopResult =
607 pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
608 assert(TopResult != NoCand && "failed to find the first candidate");
613 } else if (llvm::ForceBottomUp) {
614 SU = Bot.pickOnlyChoice();
616 SchedCandidate BotCand;
617 CandResult BotResult =
618 pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
619 assert(BotResult != NoCand && "failed to find the first candidate");
625 SU = pickNodeBidrectional(IsTopNode);
627 if (SU->isTopReady())
629 if (SU->isBottomReady())
632 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
633 << " Scheduling Instruction in cycle "
634 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
639 /// Update the scheduler's state after scheduling a node. This is the same node
640 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs
641 /// to update it's state based on the current cycle before MachineSchedStrategy
643 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
645 SU->TopReadyCycle = Top.CurrCycle;
648 SU->BotReadyCycle = Bot.CurrCycle;