1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/InlineAsm.h"
21 #include "llvm/CodeGen/SchedulerRegistry.h"
22 #include "llvm/CodeGen/SelectionDAGISel.h"
23 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24 #include "llvm/Target/TargetRegisterInfo.h"
25 #include "llvm/Target/TargetData.h"
26 #include "llvm/Target/TargetMachine.h"
27 #include "llvm/Target/TargetInstrInfo.h"
28 #include "llvm/Target/TargetLowering.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
38 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39 STATISTIC(NumUnfolds, "Number of nodes unfolded");
40 STATISTIC(NumDups, "Number of duplicated nodes");
41 STATISTIC(NumPRCopies, "Number of physical register copies");
43 static RegisterScheduler
44 burrListDAGScheduler("list-burr",
45 "Bottom-up register reduction list scheduling",
46 createBURRListDAGScheduler);
47 static RegisterScheduler
48 tdrListrDAGScheduler("list-tdrr",
49 "Top-down register reduction list scheduling",
50 createTDRRListDAGScheduler);
51 static RegisterScheduler
52 sourceListDAGScheduler("source",
53 "Similar to list-burr but schedules in source "
54 "order when possible",
55 createSourceListDAGScheduler);
57 static RegisterScheduler
58 hybridListDAGScheduler("list-hybrid",
59 "Bottom-up register pressure aware list scheduling "
60 "which tries to balance latency and register pressure",
61 createHybridListDAGScheduler);
63 static RegisterScheduler
64 ILPListDAGScheduler("list-ilp",
65 "Bottom-up register pressure aware list scheduling "
66 "which tries to balance ILP and register pressure",
67 createILPListDAGScheduler);
69 static cl::opt<bool> EnableSchedCycles(
70 "enable-sched-cycles",
71 cl::desc("Enable cycle-level precision during preRA scheduling"),
72 cl::init(false), cl::Hidden);
75 //===----------------------------------------------------------------------===//
76 /// ScheduleDAGRRList - The actual register reduction list scheduler
77 /// implementation. This supports both top-down and bottom-up scheduling.
79 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
81 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
85 /// NeedLatency - True if the scheduler will make use of latency information.
89 /// AvailableQueue - The priority queue to use for the available SUnits.
90 SchedulingPriorityQueue *AvailableQueue;
92 /// PendingQueue - This contains all of the instructions whose operands have
93 /// been issued, but their results are not ready yet (due to the latency of
94 /// the operation). Once the operands becomes available, the instruction is
95 /// added to the AvailableQueue.
96 std::vector<SUnit*> PendingQueue;
98 /// HazardRec - The hazard recognizer to use.
99 ScheduleHazardRecognizer *HazardRec;
101 /// CurCycle - The current scheduler state corresponds to this cycle.
104 /// MinAvailableCycle - Cycle of the soonest available instruction.
105 unsigned MinAvailableCycle;
107 /// LiveRegDefs - A set of physical registers and their definition
108 /// that are "live". These nodes must be scheduled before any other nodes that
109 /// modifies the registers can be scheduled.
110 unsigned NumLiveRegs;
111 std::vector<SUnit*> LiveRegDefs;
112 std::vector<SUnit*> LiveRegGens;
114 /// Topo - A topological ordering for SUnits which permits fast IsReachable
115 /// and similar queries.
116 ScheduleDAGTopologicalSort Topo;
119 ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
120 SchedulingPriorityQueue *availqueue,
121 CodeGenOpt::Level OptLevel)
122 : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
123 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
126 const TargetMachine &tm = mf.getTarget();
127 if (EnableSchedCycles && OptLevel != CodeGenOpt::None)
128 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
130 HazardRec = new ScheduleHazardRecognizer();
133 ~ScheduleDAGRRList() {
135 delete AvailableQueue;
140 /// IsReachable - Checks if SU is reachable from TargetSU.
141 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
142 return Topo.IsReachable(SU, TargetSU);
145 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
147 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
148 return Topo.WillCreateCycle(SU, TargetSU);
151 /// AddPred - adds a predecessor edge to SUnit SU.
152 /// This returns true if this is a new predecessor.
153 /// Updates the topological ordering if required.
154 void AddPred(SUnit *SU, const SDep &D) {
155 Topo.AddPred(SU, D.getSUnit());
159 /// RemovePred - removes a predecessor edge from SUnit SU.
160 /// This returns true if an edge was removed.
161 /// Updates the topological ordering if required.
162 void RemovePred(SUnit *SU, const SDep &D) {
163 Topo.RemovePred(SU, D.getSUnit());
168 bool isReady(SUnit *SU) {
169 return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() ||
170 AvailableQueue->isReady(SU);
173 void ReleasePred(SUnit *SU, const SDep *PredEdge);
174 void ReleasePredecessors(SUnit *SU);
175 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
176 void ReleaseSuccessors(SUnit *SU);
177 void ReleasePending();
178 void AdvanceToCycle(unsigned NextCycle);
179 void AdvancePastStalls(SUnit *SU);
180 void EmitNode(SUnit *SU);
181 void ScheduleNodeBottomUp(SUnit*);
182 void CapturePred(SDep *PredEdge);
183 void UnscheduleNodeBottomUp(SUnit*);
184 void RestoreHazardCheckerBottomUp();
185 void BacktrackBottomUp(SUnit*, SUnit*);
186 SUnit *CopyAndMoveSuccessors(SUnit*);
187 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
188 const TargetRegisterClass*,
189 const TargetRegisterClass*,
190 SmallVector<SUnit*, 2>&);
191 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
193 SUnit *PickNodeToScheduleBottomUp();
194 void ListScheduleBottomUp();
196 void ScheduleNodeTopDown(SUnit*);
197 void ListScheduleTopDown();
200 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
201 /// Updates the topological ordering if required.
202 SUnit *CreateNewSUnit(SDNode *N) {
203 unsigned NumSUnits = SUnits.size();
204 SUnit *NewNode = NewSUnit(N);
205 // Update the topological ordering.
206 if (NewNode->NodeNum >= NumSUnits)
207 Topo.InitDAGTopologicalSorting();
211 /// CreateClone - Creates a new SUnit from an existing one.
212 /// Updates the topological ordering if required.
213 SUnit *CreateClone(SUnit *N) {
214 unsigned NumSUnits = SUnits.size();
215 SUnit *NewNode = Clone(N);
216 // Update the topological ordering.
217 if (NewNode->NodeNum >= NumSUnits)
218 Topo.InitDAGTopologicalSorting();
222 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
223 /// need actual latency information but the hybrid scheduler does.
224 bool ForceUnitLatencies() const {
228 } // end anonymous namespace
231 /// Schedule - Schedule the DAG using list scheduling.
232 void ScheduleDAGRRList::Schedule() {
234 << "********** List Scheduling BB#" << BB->getNumber()
235 << " '" << BB->getName() << "' **********\n");
238 MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0;
240 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
241 LiveRegGens.resize(TRI->getNumRegs(), NULL);
243 // Build the scheduling graph.
244 BuildSchedGraph(NULL);
246 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
247 SUnits[su].dumpAll(this));
248 Topo.InitDAGTopologicalSorting();
250 AvailableQueue->initNodes(SUnits);
254 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
256 ListScheduleBottomUp();
258 ListScheduleTopDown();
260 AvailableQueue->releaseState();
263 //===----------------------------------------------------------------------===//
264 // Bottom-Up Scheduling
265 //===----------------------------------------------------------------------===//
267 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
268 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
269 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
270 SUnit *PredSU = PredEdge->getSUnit();
273 if (PredSU->NumSuccsLeft == 0) {
274 dbgs() << "*** Scheduling failed! ***\n";
276 dbgs() << " has been released too many times!\n";
280 --PredSU->NumSuccsLeft;
282 if (!ForceUnitLatencies()) {
283 // Updating predecessor's height. This is now the cycle when the
284 // predecessor can be scheduled without causing a pipeline stall.
285 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
288 // If all the node's successors are scheduled, this node is ready
289 // to be scheduled. Ignore the special EntrySU node.
290 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
291 PredSU->isAvailable = true;
293 unsigned Height = PredSU->getHeight();
294 if (Height < MinAvailableCycle)
295 MinAvailableCycle = Height;
298 AvailableQueue->push(PredSU);
300 // CapturePred and others may have left the node in the pending queue, avoid
302 else if (!PredSU->isPending) {
303 PredSU->isPending = true;
304 PendingQueue.push_back(PredSU);
309 /// Call ReleasePred for each predecessor, then update register live def/gen.
310 /// Always update LiveRegDefs for a register dependence even if the current SU
311 /// also defines the register. This effectively create one large live range
312 /// across a sequence of two-address node. This is important because the
313 /// entire chain must be scheduled together. Example:
316 /// flags = (2) addc flags
317 /// flags = (1) addc flags
321 /// LiveRegDefs[flags] = 3
322 /// LiveRegGens[flags] = 1
324 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
325 /// interference on flags.
326 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
327 // Bottom up: release predecessors
328 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
330 ReleasePred(SU, &*I);
331 if (I->isAssignedRegDep()) {
332 // This is a physical register dependency and it's impossible or
333 // expensive to copy the register. Make sure nothing that can
334 // clobber the register is scheduled between the predecessor and
336 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
337 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
338 "interference on register dependence");
339 LiveRegDefs[I->getReg()] = I->getSUnit();
340 if (!LiveRegGens[I->getReg()]) {
342 LiveRegGens[I->getReg()] = SU;
348 /// Check to see if any of the pending instructions are ready to issue. If
349 /// so, add them to the available queue.
350 void ScheduleDAGRRList::ReleasePending() {
351 assert(!EnableSchedCycles && "requires --enable-sched-cycles" );
353 // If the available queue is empty, it is safe to reset MinAvailableCycle.
354 if (AvailableQueue->empty())
355 MinAvailableCycle = UINT_MAX;
357 // Check to see if any of the pending instructions are ready to issue. If
358 // so, add them to the available queue.
359 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
360 unsigned ReadyCycle =
361 isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
362 if (ReadyCycle < MinAvailableCycle)
363 MinAvailableCycle = ReadyCycle;
365 if (PendingQueue[i]->isAvailable) {
366 if (!isReady(PendingQueue[i]))
368 AvailableQueue->push(PendingQueue[i]);
370 PendingQueue[i]->isPending = false;
371 PendingQueue[i] = PendingQueue.back();
372 PendingQueue.pop_back();
377 /// Move the scheduler state forward by the specified number of Cycles.
378 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
379 if (NextCycle <= CurCycle)
382 AvailableQueue->setCurCycle(NextCycle);
383 if (HazardRec->getMaxLookAhead() == 0) {
384 // Bypass lots of virtual calls in case of long latency.
385 CurCycle = NextCycle;
388 for (; CurCycle != NextCycle; ++CurCycle) {
390 HazardRec->RecedeCycle();
392 HazardRec->AdvanceCycle();
395 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
396 // available Q to release pending nodes at least once before popping.
400 /// Move the scheduler state forward until the specified node's dependents are
401 /// ready and can be scheduled with no resource conflicts.
402 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
403 if (!EnableSchedCycles)
406 unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
408 // Bump CurCycle to account for latency. We assume the latency of other
409 // available instructions may be hidden by the stall (not a full pipe stall).
410 // This updates the hazard recognizer's cycle before reserving resources for
412 AdvanceToCycle(ReadyCycle);
414 // Calls are scheduled in their preceding cycle, so don't conflict with
415 // hazards from instructions after the call. EmitNode will reset the
416 // scoreboard state before emitting the call.
417 if (isBottomUp && SU->isCall)
420 // FIXME: For resource conflicts in very long non-pipelined stages, we
421 // should probably skip ahead here to avoid useless scoreboard checks.
424 ScheduleHazardRecognizer::HazardType HT =
425 HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
427 if (HT == ScheduleHazardRecognizer::NoHazard)
432 AdvanceToCycle(CurCycle + Stalls);
435 /// Record this SUnit in the HazardRecognizer.
436 /// Does not update CurCycle.
437 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
438 if (!EnableSchedCycles || HazardRec->getMaxLookAhead() == 0)
441 // Check for phys reg copy.
445 switch (SU->getNode()->getOpcode()) {
447 assert(SU->getNode()->isMachineOpcode() &&
448 "This target-independent node should not be scheduled.");
450 case ISD::MERGE_VALUES:
451 case ISD::TokenFactor:
453 case ISD::CopyFromReg:
455 // Noops don't affect the scoreboard state. Copies are likely to be
459 // For inline asm, clear the pipeline state.
463 if (isBottomUp && SU->isCall) {
464 // Calls are scheduled with their preceding instructions. For bottom-up
465 // scheduling, clear the pipeline state before emitting.
469 HazardRec->EmitInstruction(SU);
471 if (!isBottomUp && SU->isCall) {
476 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
477 /// count of its predecessors. If a predecessor pending count is zero, add it to
478 /// the Available queue.
479 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
480 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
481 DEBUG(SU->dump(this));
484 if (CurCycle < SU->getHeight())
485 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
488 // FIXME: Do not modify node height. It may interfere with
489 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
490 // node it's ready cycle can aid heuristics, and after scheduling it can
491 // indicate the scheduled cycle.
492 SU->setHeightToAtLeast(CurCycle);
494 // Reserve resources for the scheduled intruction.
497 Sequence.push_back(SU);
499 AvailableQueue->ScheduledNode(SU);
501 // Update liveness of predecessors before successors to avoid treating a
502 // two-address node as a live range def.
503 ReleasePredecessors(SU);
505 // Release all the implicit physical register defs that are live.
506 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
508 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
509 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
510 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
512 LiveRegDefs[I->getReg()] = NULL;
513 LiveRegGens[I->getReg()] = NULL;
517 SU->isScheduled = true;
519 // Conditions under which the scheduler should eagerly advance the cycle:
520 // (1) No available instructions
521 // (2) All pipelines full, so available instructions must have hazards.
523 // If SchedCycles is disabled, count each inst as one cycle.
524 if (!EnableSchedCycles ||
525 AvailableQueue->empty() || HazardRec->atIssueLimit())
526 AdvanceToCycle(CurCycle + 1);
529 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
530 /// unscheduled, incrcease the succ left count of its predecessors. Remove
531 /// them from AvailableQueue if necessary.
532 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
533 SUnit *PredSU = PredEdge->getSUnit();
534 if (PredSU->isAvailable) {
535 PredSU->isAvailable = false;
536 if (!PredSU->isPending)
537 AvailableQueue->remove(PredSU);
540 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
541 ++PredSU->NumSuccsLeft;
544 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
545 /// its predecessor states to reflect the change.
546 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
547 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
548 DEBUG(SU->dump(this));
550 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
553 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
554 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
555 assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
556 "Physical register dependency violated?");
558 LiveRegDefs[I->getReg()] = NULL;
559 LiveRegGens[I->getReg()] = NULL;
563 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
565 if (I->isAssignedRegDep()) {
566 // This becomes the nearest def. Note that an earlier def may still be
567 // pending if this is a two-address node.
568 LiveRegDefs[I->getReg()] = SU;
569 if (!LiveRegDefs[I->getReg()]) {
572 if (LiveRegGens[I->getReg()] == NULL ||
573 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
574 LiveRegGens[I->getReg()] = I->getSUnit();
577 if (SU->getHeight() < MinAvailableCycle)
578 MinAvailableCycle = SU->getHeight();
580 SU->setHeightDirty();
581 SU->isScheduled = false;
582 SU->isAvailable = true;
583 if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) {
584 // Don't make available until backtracking is complete.
585 SU->isPending = true;
586 PendingQueue.push_back(SU);
589 AvailableQueue->push(SU);
591 AvailableQueue->UnscheduledNode(SU);
594 /// After backtracking, the hazard checker needs to be restored to a state
595 /// corresponding the the current cycle.
596 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
599 unsigned LookAhead = std::min((unsigned)Sequence.size(),
600 HazardRec->getMaxLookAhead());
604 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
605 unsigned HazardCycle = (*I)->getHeight();
606 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
608 for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
609 HazardRec->RecedeCycle();
615 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
616 /// BTCycle in order to schedule a specific node.
617 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
618 SUnit *OldSU = Sequence.back();
621 if (SU->isSucc(OldSU))
622 // Don't try to remove SU from AvailableQueue.
623 SU->isAvailable = false;
624 // FIXME: use ready cycle instead of height
625 CurCycle = OldSU->getHeight();
626 UnscheduleNodeBottomUp(OldSU);
627 AvailableQueue->setCurCycle(CurCycle);
630 OldSU = Sequence.back();
633 assert(!SU->isSucc(OldSU) && "Something is wrong!");
635 RestoreHazardCheckerBottomUp();
637 if (EnableSchedCycles)
643 static bool isOperandOf(const SUnit *SU, SDNode *N) {
644 for (const SDNode *SUNode = SU->getNode(); SUNode;
645 SUNode = SUNode->getGluedNode()) {
646 if (SUNode->isOperandOf(N))
652 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
653 /// successors to the newly created node.
654 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
655 SDNode *N = SU->getNode();
659 if (SU->getNode()->getGluedNode())
663 bool TryUnfold = false;
664 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
665 EVT VT = N->getValueType(i);
668 else if (VT == MVT::Other)
671 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
672 const SDValue &Op = N->getOperand(i);
673 EVT VT = Op.getNode()->getValueType(Op.getResNo());
679 SmallVector<SDNode*, 2> NewNodes;
680 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
683 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
684 assert(NewNodes.size() == 2 && "Expected a load folding node!");
687 SDNode *LoadNode = NewNodes[0];
688 unsigned NumVals = N->getNumValues();
689 unsigned OldNumVals = SU->getNode()->getNumValues();
690 for (unsigned i = 0; i != NumVals; ++i)
691 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
692 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
693 SDValue(LoadNode, 1));
695 // LoadNode may already exist. This can happen when there is another
696 // load from the same location and producing the same type of value
697 // but it has different alignment or volatileness.
698 bool isNewLoad = true;
700 if (LoadNode->getNodeId() != -1) {
701 LoadSU = &SUnits[LoadNode->getNodeId()];
704 LoadSU = CreateNewSUnit(LoadNode);
705 LoadNode->setNodeId(LoadSU->NodeNum);
706 ComputeLatency(LoadSU);
709 SUnit *NewSU = CreateNewSUnit(N);
710 assert(N->getNodeId() == -1 && "Node already inserted!");
711 N->setNodeId(NewSU->NodeNum);
713 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
714 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
715 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
716 NewSU->isTwoAddress = true;
720 if (TID.isCommutable())
721 NewSU->isCommutable = true;
722 ComputeLatency(NewSU);
724 // Record all the edges to and from the old SU, by category.
725 SmallVector<SDep, 4> ChainPreds;
726 SmallVector<SDep, 4> ChainSuccs;
727 SmallVector<SDep, 4> LoadPreds;
728 SmallVector<SDep, 4> NodePreds;
729 SmallVector<SDep, 4> NodeSuccs;
730 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
733 ChainPreds.push_back(*I);
734 else if (isOperandOf(I->getSUnit(), LoadNode))
735 LoadPreds.push_back(*I);
737 NodePreds.push_back(*I);
739 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
742 ChainSuccs.push_back(*I);
744 NodeSuccs.push_back(*I);
747 // Now assign edges to the newly-created nodes.
748 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
749 const SDep &Pred = ChainPreds[i];
750 RemovePred(SU, Pred);
752 AddPred(LoadSU, Pred);
754 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
755 const SDep &Pred = LoadPreds[i];
756 RemovePred(SU, Pred);
758 AddPred(LoadSU, Pred);
760 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
761 const SDep &Pred = NodePreds[i];
762 RemovePred(SU, Pred);
763 AddPred(NewSU, Pred);
765 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
766 SDep D = NodeSuccs[i];
767 SUnit *SuccDep = D.getSUnit();
769 RemovePred(SuccDep, D);
773 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
774 SDep D = ChainSuccs[i];
775 SUnit *SuccDep = D.getSUnit();
777 RemovePred(SuccDep, D);
784 // Add a data dependency to reflect that NewSU reads the value defined
786 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
789 AvailableQueue->addNode(LoadSU);
790 AvailableQueue->addNode(NewSU);
794 if (NewSU->NumSuccsLeft == 0) {
795 NewSU->isAvailable = true;
801 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
802 NewSU = CreateClone(SU);
804 // New SUnit has the exact same predecessors.
805 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
807 if (!I->isArtificial())
810 // Only copy scheduled successors. Cut them from old node's successor
811 // list and move them over.
812 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
813 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
815 if (I->isArtificial())
817 SUnit *SuccSU = I->getSUnit();
818 if (SuccSU->isScheduled) {
823 DelDeps.push_back(std::make_pair(SuccSU, D));
826 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
827 RemovePred(DelDeps[i].first, DelDeps[i].second);
829 AvailableQueue->updateNode(SU);
830 AvailableQueue->addNode(NewSU);
836 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
837 /// scheduled successors of the given SUnit to the last copy.
838 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
839 const TargetRegisterClass *DestRC,
840 const TargetRegisterClass *SrcRC,
841 SmallVector<SUnit*, 2> &Copies) {
842 SUnit *CopyFromSU = CreateNewSUnit(NULL);
843 CopyFromSU->CopySrcRC = SrcRC;
844 CopyFromSU->CopyDstRC = DestRC;
846 SUnit *CopyToSU = CreateNewSUnit(NULL);
847 CopyToSU->CopySrcRC = DestRC;
848 CopyToSU->CopyDstRC = SrcRC;
850 // Only copy scheduled successors. Cut them from old node's successor
851 // list and move them over.
852 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
853 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
855 if (I->isArtificial())
857 SUnit *SuccSU = I->getSUnit();
858 if (SuccSU->isScheduled) {
860 D.setSUnit(CopyToSU);
862 DelDeps.push_back(std::make_pair(SuccSU, *I));
865 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
866 RemovePred(DelDeps[i].first, DelDeps[i].second);
868 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
869 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
871 AvailableQueue->updateNode(SU);
872 AvailableQueue->addNode(CopyFromSU);
873 AvailableQueue->addNode(CopyToSU);
874 Copies.push_back(CopyFromSU);
875 Copies.push_back(CopyToSU);
880 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
881 /// definition of the specified node.
882 /// FIXME: Move to SelectionDAG?
883 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
884 const TargetInstrInfo *TII) {
885 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
886 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
887 unsigned NumRes = TID.getNumDefs();
888 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
893 return N->getValueType(NumRes);
896 /// CheckForLiveRegDef - Return true and update live register vector if the
897 /// specified register def of the specified SUnit clobbers any "live" registers.
898 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
899 std::vector<SUnit*> &LiveRegDefs,
900 SmallSet<unsigned, 4> &RegAdded,
901 SmallVector<unsigned, 4> &LRegs,
902 const TargetRegisterInfo *TRI) {
903 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
905 // Check if Ref is live.
906 if (!LiveRegDefs[Reg]) continue;
908 // Allow multiple uses of the same def.
909 if (LiveRegDefs[Reg] == SU) continue;
911 // Add Reg to the set of interfering live regs.
912 if (RegAdded.insert(Reg))
913 LRegs.push_back(Reg);
917 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
918 /// scheduling of the given node to satisfy live physical register dependencies.
919 /// If the specific node is the last one that's available to schedule, do
920 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
921 bool ScheduleDAGRRList::
922 DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
923 if (NumLiveRegs == 0)
926 SmallSet<unsigned, 4> RegAdded;
927 // If this node would clobber any "live" register, then it's not ready.
929 // If SU is the currently live definition of the same register that it uses,
930 // then we are free to schedule it.
931 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
933 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
934 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
935 RegAdded, LRegs, TRI);
938 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
939 if (Node->getOpcode() == ISD::INLINEASM) {
940 // Inline asm can clobber physical defs.
941 unsigned NumOps = Node->getNumOperands();
942 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
943 --NumOps; // Ignore the glue operand.
945 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
947 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
948 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
950 ++i; // Skip the ID value.
951 if (InlineAsm::isRegDefKind(Flags) ||
952 InlineAsm::isRegDefEarlyClobberKind(Flags)) {
953 // Check for def of register or earlyclobber register.
954 for (; NumVals; --NumVals, ++i) {
955 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
956 if (TargetRegisterInfo::isPhysicalRegister(Reg))
957 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
965 if (!Node->isMachineOpcode())
967 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
968 if (!TID.ImplicitDefs)
970 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
971 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
974 return !LRegs.empty();
977 /// Return a node that can be scheduled in this cycle. Requirements:
978 /// (1) Ready: latency has been satisfied
979 /// (2) No Hazards: resources are available
980 /// (3) No Interferences: may unschedule to break register interferences.
981 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
982 SmallVector<SUnit*, 4> Interferences;
983 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
985 SUnit *CurSU = AvailableQueue->pop();
987 SmallVector<unsigned, 4> LRegs;
988 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
990 LRegsMap.insert(std::make_pair(CurSU, LRegs));
992 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
993 Interferences.push_back(CurSU);
994 CurSU = AvailableQueue->pop();
997 // Add the nodes that aren't ready back onto the available list.
998 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
999 Interferences[i]->isPending = false;
1000 assert(Interferences[i]->isAvailable && "must still be available");
1001 AvailableQueue->push(Interferences[i]);
1006 // All candidates are delayed due to live physical reg dependencies.
1007 // Try backtracking, code duplication, or inserting cross class copies
1009 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1010 SUnit *TrySU = Interferences[i];
1011 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1013 // Try unscheduling up to the point where it's safe to schedule
1016 unsigned LiveCycle = UINT_MAX;
1017 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1018 unsigned Reg = LRegs[j];
1019 if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1020 BtSU = LiveRegGens[Reg];
1021 LiveCycle = BtSU->getHeight();
1024 if (!WillCreateCycle(TrySU, BtSU)) {
1025 BacktrackBottomUp(TrySU, BtSU);
1027 // Force the current node to be scheduled before the node that
1028 // requires the physical reg dep.
1029 if (BtSU->isAvailable) {
1030 BtSU->isAvailable = false;
1031 if (!BtSU->isPending)
1032 AvailableQueue->remove(BtSU);
1034 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1035 /*Reg=*/0, /*isNormalMemory=*/false,
1036 /*isMustAlias=*/false, /*isArtificial=*/true));
1038 // If one or more successors has been unscheduled, then the current
1039 // node is no longer avaialable. Schedule a successor that's now
1040 // available instead.
1041 if (!TrySU->isAvailable) {
1042 CurSU = AvailableQueue->pop();
1046 TrySU->isPending = false;
1047 Interferences.erase(Interferences.begin()+i);
1054 // Can't backtrack. If it's too expensive to copy the value, then try
1055 // duplicate the nodes that produces these "too expensive to copy"
1056 // values to break the dependency. In case even that doesn't work,
1057 // insert cross class copies.
1058 // If it's not too expensive, i.e. cost != -1, issue copies.
1059 SUnit *TrySU = Interferences[0];
1060 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1061 assert(LRegs.size() == 1 && "Can't handle this yet!");
1062 unsigned Reg = LRegs[0];
1063 SUnit *LRDef = LiveRegDefs[Reg];
1064 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1065 const TargetRegisterClass *RC =
1066 TRI->getMinimalPhysRegClass(Reg, VT);
1067 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1069 // If cross copy register class is null, then it must be possible copy
1070 // the value directly. Do not try duplicate the def.
1073 NewDef = CopyAndMoveSuccessors(LRDef);
1077 // Issue copies, these can be expensive cross register class copies.
1078 SmallVector<SUnit*, 2> Copies;
1079 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1080 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1081 << " to SU #" << Copies.front()->NodeNum << "\n");
1082 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1083 /*Reg=*/0, /*isNormalMemory=*/false,
1084 /*isMustAlias=*/false,
1085 /*isArtificial=*/true));
1086 NewDef = Copies.back();
1089 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1090 << " to SU #" << TrySU->NodeNum << "\n");
1091 LiveRegDefs[Reg] = NewDef;
1092 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1093 /*Reg=*/0, /*isNormalMemory=*/false,
1094 /*isMustAlias=*/false,
1095 /*isArtificial=*/true));
1096 TrySU->isAvailable = false;
1100 assert(CurSU && "Unable to resolve live physical register dependencies!");
1102 // Add the nodes that aren't ready back onto the available list.
1103 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1104 Interferences[i]->isPending = false;
1105 // May no longer be available due to backtracking.
1106 if (Interferences[i]->isAvailable) {
1107 AvailableQueue->push(Interferences[i]);
1113 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1115 void ScheduleDAGRRList::ListScheduleBottomUp() {
1116 // Release any predecessors of the special Exit node.
1117 ReleasePredecessors(&ExitSU);
1119 // Add root to Available queue.
1120 if (!SUnits.empty()) {
1121 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1122 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1123 RootSU->isAvailable = true;
1124 AvailableQueue->push(RootSU);
1127 // While Available queue is not empty, grab the node with the highest
1128 // priority. If it is not ready put it back. Schedule the node.
1129 Sequence.reserve(SUnits.size());
1130 while (!AvailableQueue->empty()) {
1131 DEBUG(dbgs() << "\n*** Examining Available\n";
1132 AvailableQueue->dump(this));
1134 // Pick the best node to schedule taking all constraints into
1136 SUnit *SU = PickNodeToScheduleBottomUp();
1138 AdvancePastStalls(SU);
1140 ScheduleNodeBottomUp(SU);
1142 while (AvailableQueue->empty() && !PendingQueue.empty()) {
1143 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1144 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1145 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1149 // Reverse the order if it is bottom up.
1150 std::reverse(Sequence.begin(), Sequence.end());
1153 VerifySchedule(isBottomUp);
1157 //===----------------------------------------------------------------------===//
1158 // Top-Down Scheduling
1159 //===----------------------------------------------------------------------===//
1161 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1162 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1163 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1164 SUnit *SuccSU = SuccEdge->getSUnit();
1167 if (SuccSU->NumPredsLeft == 0) {
1168 dbgs() << "*** Scheduling failed! ***\n";
1170 dbgs() << " has been released too many times!\n";
1171 llvm_unreachable(0);
1174 --SuccSU->NumPredsLeft;
1176 // If all the node's predecessors are scheduled, this node is ready
1177 // to be scheduled. Ignore the special ExitSU node.
1178 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1179 SuccSU->isAvailable = true;
1180 AvailableQueue->push(SuccSU);
1184 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1185 // Top down: release successors
1186 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1188 assert(!I->isAssignedRegDep() &&
1189 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1191 ReleaseSucc(SU, &*I);
1195 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1196 /// count of its successors. If a successor pending count is zero, add it to
1197 /// the Available queue.
1198 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1199 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1200 DEBUG(SU->dump(this));
1202 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1203 SU->setDepthToAtLeast(CurCycle);
1204 Sequence.push_back(SU);
1206 ReleaseSuccessors(SU);
1207 SU->isScheduled = true;
1208 AvailableQueue->ScheduledNode(SU);
1211 /// ListScheduleTopDown - The main loop of list scheduling for top-down
1213 void ScheduleDAGRRList::ListScheduleTopDown() {
1214 AvailableQueue->setCurCycle(CurCycle);
1216 // Release any successors of the special Entry node.
1217 ReleaseSuccessors(&EntrySU);
1219 // All leaves to Available queue.
1220 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1221 // It is available if it has no predecessors.
1222 if (SUnits[i].Preds.empty()) {
1223 AvailableQueue->push(&SUnits[i]);
1224 SUnits[i].isAvailable = true;
1228 // While Available queue is not empty, grab the node with the highest
1229 // priority. If it is not ready put it back. Schedule the node.
1230 Sequence.reserve(SUnits.size());
1231 while (!AvailableQueue->empty()) {
1232 SUnit *CurSU = AvailableQueue->pop();
1235 ScheduleNodeTopDown(CurSU);
1237 AvailableQueue->setCurCycle(CurCycle);
1241 VerifySchedule(isBottomUp);
1246 //===----------------------------------------------------------------------===//
1247 // RegReductionPriorityQueue Implementation
1248 //===----------------------------------------------------------------------===//
1250 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1251 // to reduce register pressure.
1255 class RegReductionPriorityQueue;
1257 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1258 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1261 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1262 // reduction scheduler.
1263 struct bu_ls_rr_sort : public queue_sort {
1266 HasReadyFilter = false
1269 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
1270 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
1271 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1273 bool operator()(const SUnit* left, const SUnit* right) const;
1276 // td_ls_rr_sort - Priority function for top down register pressure reduction
1278 struct td_ls_rr_sort : public queue_sort {
1281 HasReadyFilter = false
1284 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
1285 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
1286 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1288 bool operator()(const SUnit* left, const SUnit* right) const;
1291 // src_ls_rr_sort - Priority function for source order scheduler.
1292 struct src_ls_rr_sort : public queue_sort {
1295 HasReadyFilter = false
1298 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
1299 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
1301 src_ls_rr_sort(const src_ls_rr_sort &RHS)
1304 bool operator()(const SUnit* left, const SUnit* right) const;
1307 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1308 struct hybrid_ls_rr_sort : public queue_sort {
1311 HasReadyFilter = false
1314 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
1315 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
1317 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1320 bool operator()(const SUnit* left, const SUnit* right) const;
1323 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1325 struct ilp_ls_rr_sort : public queue_sort {
1328 HasReadyFilter = true
1331 RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
1332 ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
1334 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1337 bool isReady(SUnit *SU, unsigned CurCycle) const;
1339 bool operator()(const SUnit* left, const SUnit* right) const;
1341 } // end anonymous namespace
1343 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1344 /// Smaller number is the higher priority.
1346 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1347 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1348 if (SethiUllmanNumber != 0)
1349 return SethiUllmanNumber;
1352 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1354 if (I->isCtrl()) continue; // ignore chain preds
1355 SUnit *PredSU = I->getSUnit();
1356 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1357 if (PredSethiUllman > SethiUllmanNumber) {
1358 SethiUllmanNumber = PredSethiUllman;
1360 } else if (PredSethiUllman == SethiUllmanNumber)
1364 SethiUllmanNumber += Extra;
1366 if (SethiUllmanNumber == 0)
1367 SethiUllmanNumber = 1;
1369 return SethiUllmanNumber;
1374 class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1375 static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1376 std::vector<SUnit *>::iterator Best = Q.begin();
1377 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1378 E = Q.end(); I != E; ++I)
1379 if (Picker(*Best, *I))
1382 if (Best != prior(Q.end()))
1383 std::swap(*Best, Q.back());
1388 std::vector<SUnit*> Queue;
1390 unsigned CurQueueId;
1391 bool TracksRegPressure;
1394 // SUnits - The SUnits for the current graph.
1395 std::vector<SUnit> *SUnits;
1397 MachineFunction &MF;
1398 const TargetInstrInfo *TII;
1399 const TargetRegisterInfo *TRI;
1400 const TargetLowering *TLI;
1401 ScheduleDAGRRList *scheduleDAG;
1403 // SethiUllmanNumbers - The SethiUllman number for each node.
1404 std::vector<unsigned> SethiUllmanNumbers;
1406 /// RegPressure - Tracking current reg pressure per register class.
1408 std::vector<unsigned> RegPressure;
1410 /// RegLimit - Tracking the number of allocatable registers per register
1412 std::vector<unsigned> RegLimit;
1415 RegReductionPriorityQueue(MachineFunction &mf,
1417 const TargetInstrInfo *tii,
1418 const TargetRegisterInfo *tri,
1419 const TargetLowering *tli)
1420 : SchedulingPriorityQueue(SF::HasReadyFilter), Picker(this),
1421 CurQueueId(0), TracksRegPressure(tracksrp),
1422 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1423 if (TracksRegPressure) {
1424 unsigned NumRC = TRI->getNumRegClasses();
1425 RegLimit.resize(NumRC);
1426 RegPressure.resize(NumRC);
1427 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1428 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1429 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1430 E = TRI->regclass_end(); I != E; ++I)
1431 RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1435 bool isBottomUp() const { return SF::IsBottomUp; }
1437 void initNodes(std::vector<SUnit> &sunits) {
1439 // Add pseudo dependency edges for two-address nodes.
1440 AddPseudoTwoAddrDeps();
1441 // Reroute edges to nodes with multiple uses.
1442 PrescheduleNodesWithMultipleUses();
1443 // Calculate node priorities.
1444 CalculateSethiUllmanNumbers();
1447 void addNode(const SUnit *SU) {
1448 unsigned SUSize = SethiUllmanNumbers.size();
1449 if (SUnits->size() > SUSize)
1450 SethiUllmanNumbers.resize(SUSize*2, 0);
1451 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1454 void updateNode(const SUnit *SU) {
1455 SethiUllmanNumbers[SU->NodeNum] = 0;
1456 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1459 void releaseState() {
1461 SethiUllmanNumbers.clear();
1462 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1465 unsigned getNodePriority(const SUnit *SU) const {
1466 assert(SU->NodeNum < SethiUllmanNumbers.size());
1467 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1468 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1469 // CopyToReg should be close to its uses to facilitate coalescing and
1472 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1473 Opc == TargetOpcode::SUBREG_TO_REG ||
1474 Opc == TargetOpcode::INSERT_SUBREG)
1475 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1476 // close to their uses to facilitate coalescing.
1478 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1479 // If SU does not have a register use, i.e. it doesn't produce a value
1480 // that would be consumed (e.g. store), then it terminates a chain of
1481 // computation. Give it a large SethiUllman number so it will be
1482 // scheduled right before its predecessors that it doesn't lengthen
1483 // their live ranges.
1485 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1486 // If SU does not have a register def, schedule it close to its uses
1487 // because it does not lengthen any live ranges.
1489 return SethiUllmanNumbers[SU->NodeNum];
1492 unsigned getNodeOrdering(const SUnit *SU) const {
1493 return scheduleDAG->DAG->GetOrdering(SU->getNode());
1496 bool empty() const { return Queue.empty(); }
1498 bool isReady(SUnit *U) const {
1499 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1502 void push(SUnit *U) {
1503 assert(!U->NodeQueueId && "Node in the queue already");
1504 U->NodeQueueId = ++CurQueueId;
1509 if (Queue.empty()) return NULL;
1511 SUnit *V = popFromQueue(Queue, Picker);
1516 void remove(SUnit *SU) {
1517 assert(!Queue.empty() && "Queue is empty!");
1518 assert(SU->NodeQueueId != 0 && "Not in queue!");
1519 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1521 if (I != prior(Queue.end()))
1522 std::swap(*I, Queue.back());
1524 SU->NodeQueueId = 0;
1527 bool HighRegPressure(const SUnit *SU) const {
1531 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1535 SUnit *PredSU = I->getSUnit();
1536 const SDNode *PN = PredSU->getNode();
1537 if (!PN->isMachineOpcode()) {
1538 if (PN->getOpcode() == ISD::CopyFromReg) {
1539 EVT VT = PN->getValueType(0);
1540 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1541 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1542 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1547 unsigned POpc = PN->getMachineOpcode();
1548 if (POpc == TargetOpcode::IMPLICIT_DEF)
1550 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1551 EVT VT = PN->getOperand(0).getValueType();
1552 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1553 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1554 // Check if this increases register pressure of the specific register
1555 // class to the point where it would cause spills.
1556 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1559 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1560 POpc == TargetOpcode::SUBREG_TO_REG) {
1561 EVT VT = PN->getValueType(0);
1562 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1563 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1564 // Check if this increases register pressure of the specific register
1565 // class to the point where it would cause spills.
1566 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1570 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1571 for (unsigned i = 0; i != NumDefs; ++i) {
1572 EVT VT = PN->getValueType(i);
1573 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1574 if (RegPressure[RCId] >= RegLimit[RCId])
1575 return true; // Reg pressure already high.
1576 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1577 if (!PN->hasAnyUseOfValue(i))
1579 // Check if this increases register pressure of the specific register
1580 // class to the point where it would cause spills.
1581 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1589 void ScheduledNode(SUnit *SU) {
1590 if (!TracksRegPressure)
1593 const SDNode *N = SU->getNode();
1594 if (!N->isMachineOpcode()) {
1595 if (N->getOpcode() != ISD::CopyToReg)
1598 unsigned Opc = N->getMachineOpcode();
1599 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1600 Opc == TargetOpcode::INSERT_SUBREG ||
1601 Opc == TargetOpcode::SUBREG_TO_REG ||
1602 Opc == TargetOpcode::REG_SEQUENCE ||
1603 Opc == TargetOpcode::IMPLICIT_DEF)
1607 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1611 SUnit *PredSU = I->getSUnit();
1612 if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1614 const SDNode *PN = PredSU->getNode();
1615 if (!PN->isMachineOpcode()) {
1616 if (PN->getOpcode() == ISD::CopyFromReg) {
1617 EVT VT = PN->getValueType(0);
1618 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1619 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1623 unsigned POpc = PN->getMachineOpcode();
1624 if (POpc == TargetOpcode::IMPLICIT_DEF)
1626 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1627 EVT VT = PN->getOperand(0).getValueType();
1628 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1629 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1631 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1632 POpc == TargetOpcode::SUBREG_TO_REG) {
1633 EVT VT = PN->getValueType(0);
1634 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1635 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1638 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1639 for (unsigned i = 0; i != NumDefs; ++i) {
1640 EVT VT = PN->getValueType(i);
1641 if (!PN->hasAnyUseOfValue(i))
1643 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1644 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1648 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1649 // may transfer data dependencies to CopyToReg.
1650 if (SU->NumSuccs && N->isMachineOpcode()) {
1651 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1652 for (unsigned i = 0; i != NumDefs; ++i) {
1653 EVT VT = N->getValueType(i);
1654 if (!N->hasAnyUseOfValue(i))
1656 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1657 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1658 // Register pressure tracking is imprecise. This can happen.
1659 RegPressure[RCId] = 0;
1661 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1668 void UnscheduledNode(SUnit *SU) {
1669 if (!TracksRegPressure)
1672 const SDNode *N = SU->getNode();
1673 if (!N->isMachineOpcode()) {
1674 if (N->getOpcode() != ISD::CopyToReg)
1677 unsigned Opc = N->getMachineOpcode();
1678 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1679 Opc == TargetOpcode::INSERT_SUBREG ||
1680 Opc == TargetOpcode::SUBREG_TO_REG ||
1681 Opc == TargetOpcode::REG_SEQUENCE ||
1682 Opc == TargetOpcode::IMPLICIT_DEF)
1686 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1690 SUnit *PredSU = I->getSUnit();
1691 if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1693 const SDNode *PN = PredSU->getNode();
1694 if (!PN->isMachineOpcode()) {
1695 if (PN->getOpcode() == ISD::CopyFromReg) {
1696 EVT VT = PN->getValueType(0);
1697 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1698 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1702 unsigned POpc = PN->getMachineOpcode();
1703 if (POpc == TargetOpcode::IMPLICIT_DEF)
1705 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1706 EVT VT = PN->getOperand(0).getValueType();
1707 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1708 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1710 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1711 POpc == TargetOpcode::SUBREG_TO_REG) {
1712 EVT VT = PN->getValueType(0);
1713 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1714 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1717 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1718 for (unsigned i = 0; i != NumDefs; ++i) {
1719 EVT VT = PN->getValueType(i);
1720 if (!PN->hasAnyUseOfValue(i))
1722 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1723 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1724 // Register pressure tracking is imprecise. This can happen.
1725 RegPressure[RCId] = 0;
1727 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1731 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1732 // may transfer data dependencies to CopyToReg.
1733 if (SU->NumSuccs && N->isMachineOpcode()) {
1734 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1735 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1736 EVT VT = N->getValueType(i);
1737 if (VT == MVT::Glue || VT == MVT::Other)
1739 if (!N->hasAnyUseOfValue(i))
1741 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1742 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1749 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1750 scheduleDAG = scheduleDag;
1753 void dumpRegPressure() const {
1754 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1755 E = TRI->regclass_end(); I != E; ++I) {
1756 const TargetRegisterClass *RC = *I;
1757 unsigned Id = RC->getID();
1758 unsigned RP = RegPressure[Id];
1760 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1765 void dump(ScheduleDAG *DAG) const {
1766 // Emulate pop() without clobbering NodeQueueIds.
1767 std::vector<SUnit*> DumpQueue = Queue;
1768 SF DumpPicker = Picker;
1769 while (!DumpQueue.empty()) {
1770 SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
1772 dbgs() << "Height " << SU->getHeight() << ": ";
1774 dbgs() << "Depth " << SU->getDepth() << ": ";
1780 bool canClobber(const SUnit *SU, const SUnit *Op);
1781 void AddPseudoTwoAddrDeps();
1782 void PrescheduleNodesWithMultipleUses();
1783 void CalculateSethiUllmanNumbers();
1786 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1787 BURegReductionPriorityQueue;
1789 typedef RegReductionPriorityQueue<td_ls_rr_sort>
1790 TDRegReductionPriorityQueue;
1792 typedef RegReductionPriorityQueue<src_ls_rr_sort>
1793 SrcRegReductionPriorityQueue;
1795 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1796 HybridBURRPriorityQueue;
1798 typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1799 ILPBURRPriorityQueue;
1802 /// closestSucc - Returns the scheduled cycle of the successor which is
1803 /// closest to the current cycle.
1804 static unsigned closestSucc(const SUnit *SU) {
1805 unsigned MaxHeight = 0;
1806 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1808 if (I->isCtrl()) continue; // ignore chain succs
1809 unsigned Height = I->getSUnit()->getHeight();
1810 // If there are bunch of CopyToRegs stacked up, they should be considered
1811 // to be at the same position.
1812 if (I->getSUnit()->getNode() &&
1813 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1814 Height = closestSucc(I->getSUnit())+1;
1815 if (Height > MaxHeight)
1821 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1822 /// for scratch registers, i.e. number of data dependencies.
1823 static unsigned calcMaxScratches(const SUnit *SU) {
1824 unsigned Scratches = 0;
1825 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1827 if (I->isCtrl()) continue; // ignore chain preds
1833 /// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1834 /// CopyToReg to a virtual register. This SU def is probably a liveout and
1835 /// it has no other use. It should be scheduled closer to the terminator.
1836 static bool hasOnlyLiveOutUses(const SUnit *SU) {
1837 bool RetVal = false;
1838 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1840 if (I->isCtrl()) continue;
1841 const SUnit *SuccSU = I->getSUnit();
1842 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1844 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1845 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1855 /// UnitsSharePred - Return true if the two scheduling units share a common
1856 /// data predecessor.
1857 static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1858 SmallSet<const SUnit*, 4> Preds;
1859 for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1861 if (I->isCtrl()) continue; // ignore chain preds
1862 Preds.insert(I->getSUnit());
1864 for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1866 if (I->isCtrl()) continue; // ignore chain preds
1867 if (Preds.count(I->getSUnit()))
1873 template <typename RRSort>
1874 static bool BURRSort(const SUnit *left, const SUnit *right,
1875 const RegReductionPriorityQueue<RRSort> *SPQ) {
1876 unsigned LPriority = SPQ->getNodePriority(left);
1877 unsigned RPriority = SPQ->getNodePriority(right);
1878 if (LPriority != RPriority)
1879 return LPriority > RPriority;
1881 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1886 // and the following instructions are both ready.
1890 // Then schedule t2 = op first.
1897 // This creates more short live intervals.
1898 unsigned LDist = closestSucc(left);
1899 unsigned RDist = closestSucc(right);
1901 return LDist < RDist;
1903 // How many registers becomes live when the node is scheduled.
1904 unsigned LScratch = calcMaxScratches(left);
1905 unsigned RScratch = calcMaxScratches(right);
1906 if (LScratch != RScratch)
1907 return LScratch > RScratch;
1909 // Note: with a bottom-up ready filter, the height check may be redundant.
1910 if (left->getHeight() != right->getHeight())
1911 return left->getHeight() > right->getHeight();
1913 if (left->getDepth() != right->getDepth())
1914 return left->getDepth() < right->getDepth();
1916 assert(left->NodeQueueId && right->NodeQueueId &&
1917 "NodeQueueId cannot be zero");
1918 return (left->NodeQueueId > right->NodeQueueId);
1922 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1923 return BURRSort(left, right, SPQ);
1926 // Source order, otherwise bottom up.
1927 bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1928 unsigned LOrder = SPQ->getNodeOrdering(left);
1929 unsigned ROrder = SPQ->getNodeOrdering(right);
1931 // Prefer an ordering where the lower the non-zero order number, the higher
1933 if ((LOrder || ROrder) && LOrder != ROrder)
1934 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1936 return BURRSort(left, right, SPQ);
1939 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1940 if (left->isCall || right->isCall)
1941 // No way to compute latency of calls.
1942 return BURRSort(left, right, SPQ);
1944 bool LHigh = SPQ->HighRegPressure(left);
1945 bool RHigh = SPQ->HighRegPressure(right);
1946 // Avoid causing spills. If register pressure is high, schedule for
1947 // register pressure reduction.
1948 if (LHigh && !RHigh)
1950 else if (!LHigh && RHigh)
1952 else if (!LHigh && !RHigh) {
1953 // If the two nodes share an operand and one of them has a single
1954 // use that is a live out copy, favor the one that is live out. Otherwise
1955 // it will be difficult to eliminate the copy if the instruction is a
1956 // loop induction variable update. e.g.
1963 bool SharePred = UnitsSharePred(left, right);
1964 // FIXME: Only adjust if BB is a loop back edge.
1965 // FIXME: What's the cost of a copy?
1966 int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1967 int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1968 int LHeight = (int)left->getHeight() - LBonus;
1969 int RHeight = (int)right->getHeight() - RBonus;
1971 // Low register pressure situation, schedule for latency if possible.
1972 bool LStall = left->SchedulingPref == Sched::Latency &&
1973 (int)SPQ->getCurCycle() < LHeight;
1974 bool RStall = right->SchedulingPref == Sched::Latency &&
1975 (int)SPQ->getCurCycle() < RHeight;
1976 // If scheduling one of the node will cause a pipeline stall, delay it.
1977 // If scheduling either one of the node will cause a pipeline stall, sort
1978 // them according to their height.
1982 if (LHeight != RHeight)
1983 return LHeight > RHeight;
1987 // If either node is scheduling for latency, sort them by height
1989 if (left->SchedulingPref == Sched::Latency ||
1990 right->SchedulingPref == Sched::Latency) {
1991 if (LHeight != RHeight)
1992 return LHeight > RHeight;
1993 if (left->Latency != right->Latency)
1994 return left->Latency > right->Latency;
1998 return BURRSort(left, right, SPQ);
2001 // Schedule as many instructions in each cycle as possible. So don't make an
2002 // instruction available unless it is ready in the current cycle.
2003 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2004 return SU->getHeight() <= CurCycle;
2007 bool ilp_ls_rr_sort::operator()(const SUnit *left,
2008 const SUnit *right) const {
2009 if (left->isCall || right->isCall)
2010 // No way to compute latency of calls.
2011 return BURRSort(left, right, SPQ);
2013 bool LHigh = SPQ->HighRegPressure(left);
2014 bool RHigh = SPQ->HighRegPressure(right);
2015 // Avoid causing spills. If register pressure is high, schedule for
2016 // register pressure reduction.
2017 if (LHigh && !RHigh)
2019 else if (!LHigh && RHigh)
2021 else if (!LHigh && !RHigh) {
2022 // Low register pressure situation, schedule to maximize instruction level
2024 if (left->NumPreds > right->NumPreds)
2026 else if (left->NumPreds < right->NumPreds)
2030 return BURRSort(left, right, SPQ);
2035 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
2036 if (SU->isTwoAddress) {
2037 unsigned Opc = SU->getNode()->getMachineOpcode();
2038 const TargetInstrDesc &TID = TII->get(Opc);
2039 unsigned NumRes = TID.getNumDefs();
2040 unsigned NumOps = TID.getNumOperands() - NumRes;
2041 for (unsigned i = 0; i != NumOps; ++i) {
2042 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2043 SDNode *DU = SU->getNode()->getOperand(i).getNode();
2044 if (DU->getNodeId() != -1 &&
2045 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2053 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2054 /// physical register defs.
2055 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2056 const TargetInstrInfo *TII,
2057 const TargetRegisterInfo *TRI) {
2058 SDNode *N = SuccSU->getNode();
2059 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2060 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2061 assert(ImpDefs && "Caller should check hasPhysRegDefs");
2062 for (const SDNode *SUNode = SU->getNode(); SUNode;
2063 SUNode = SUNode->getGluedNode()) {
2064 if (!SUNode->isMachineOpcode())
2066 const unsigned *SUImpDefs =
2067 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2070 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2071 EVT VT = N->getValueType(i);
2072 if (VT == MVT::Glue || VT == MVT::Other)
2074 if (!N->hasAnyUseOfValue(i))
2076 unsigned Reg = ImpDefs[i - NumDefs];
2077 for (;*SUImpDefs; ++SUImpDefs) {
2078 unsigned SUReg = *SUImpDefs;
2079 if (TRI->regsOverlap(Reg, SUReg))
2087 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2088 /// are not handled well by the general register pressure reduction
2089 /// heuristics. When presented with code like this:
2098 /// the heuristics tend to push the store up, but since the
2099 /// operand of the store has another use (U), this would increase
2100 /// the length of that other use (the U->N edge).
2102 /// This function transforms code like the above to route U's
2103 /// dependence through the store when possible, like this:
2114 /// This results in the store being scheduled immediately
2115 /// after N, which shortens the U->N live range, reducing
2116 /// register pressure.
2119 void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
2120 // Visit all the nodes in topological order, working top-down.
2121 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2122 SUnit *SU = &(*SUnits)[i];
2123 // For now, only look at nodes with no data successors, such as stores.
2124 // These are especially important, due to the heuristics in
2125 // getNodePriority for nodes with no data successors.
2126 if (SU->NumSuccs != 0)
2128 // For now, only look at nodes with exactly one data predecessor.
2129 if (SU->NumPreds != 1)
2131 // Avoid prescheduling copies to virtual registers, which don't behave
2132 // like other nodes from the perspective of scheduling heuristics.
2133 if (SDNode *N = SU->getNode())
2134 if (N->getOpcode() == ISD::CopyToReg &&
2135 TargetRegisterInfo::isVirtualRegister
2136 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2139 // Locate the single data predecessor.
2141 for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2142 EE = SU->Preds.end(); II != EE; ++II)
2143 if (!II->isCtrl()) {
2144 PredSU = II->getSUnit();
2149 // Don't rewrite edges that carry physregs, because that requires additional
2150 // support infrastructure.
2151 if (PredSU->hasPhysRegDefs)
2153 // Short-circuit the case where SU is PredSU's only data successor.
2154 if (PredSU->NumSuccs == 1)
2156 // Avoid prescheduling to copies from virtual registers, which don't behave
2157 // like other nodes from the perspective of scheduling // heuristics.
2158 if (SDNode *N = SU->getNode())
2159 if (N->getOpcode() == ISD::CopyFromReg &&
2160 TargetRegisterInfo::isVirtualRegister
2161 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2164 // Perform checks on the successors of PredSU.
2165 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2166 EE = PredSU->Succs.end(); II != EE; ++II) {
2167 SUnit *PredSuccSU = II->getSUnit();
2168 if (PredSuccSU == SU) continue;
2169 // If PredSU has another successor with no data successors, for
2170 // now don't attempt to choose either over the other.
2171 if (PredSuccSU->NumSuccs == 0)
2172 goto outer_loop_continue;
2173 // Don't break physical register dependencies.
2174 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2175 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2176 goto outer_loop_continue;
2177 // Don't introduce graph cycles.
2178 if (scheduleDAG->IsReachable(SU, PredSuccSU))
2179 goto outer_loop_continue;
2182 // Ok, the transformation is safe and the heuristics suggest it is
2183 // profitable. Update the graph.
2184 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
2185 << " next to PredSU #" << PredSU->NodeNum
2186 << " to guide scheduling in the presence of multiple uses\n");
2187 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2188 SDep Edge = PredSU->Succs[i];
2189 assert(!Edge.isAssignedRegDep());
2190 SUnit *SuccSU = Edge.getSUnit();
2192 Edge.setSUnit(PredSU);
2193 scheduleDAG->RemovePred(SuccSU, Edge);
2194 scheduleDAG->AddPred(SU, Edge);
2196 scheduleDAG->AddPred(SuccSU, Edge);
2200 outer_loop_continue:;
2204 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2205 /// it as a def&use operand. Add a pseudo control edge from it to the other
2206 /// node (if it won't create a cycle) so the two-address one will be scheduled
2207 /// first (lower in the schedule). If both nodes are two-address, favor the
2208 /// one that has a CopyToReg use (more likely to be a loop induction update).
2209 /// If both are two-address, but one is commutable while the other is not
2210 /// commutable, favor the one that's not commutable.
2212 void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
2213 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2214 SUnit *SU = &(*SUnits)[i];
2215 if (!SU->isTwoAddress)
2218 SDNode *Node = SU->getNode();
2219 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2222 bool isLiveOut = hasOnlyLiveOutUses(SU);
2223 unsigned Opc = Node->getMachineOpcode();
2224 const TargetInstrDesc &TID = TII->get(Opc);
2225 unsigned NumRes = TID.getNumDefs();
2226 unsigned NumOps = TID.getNumOperands() - NumRes;
2227 for (unsigned j = 0; j != NumOps; ++j) {
2228 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2230 SDNode *DU = SU->getNode()->getOperand(j).getNode();
2231 if (DU->getNodeId() == -1)
2233 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2234 if (!DUSU) continue;
2235 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2236 E = DUSU->Succs.end(); I != E; ++I) {
2237 if (I->isCtrl()) continue;
2238 SUnit *SuccSU = I->getSUnit();
2241 // Be conservative. Ignore if nodes aren't at roughly the same
2242 // depth and height.
2243 if (SuccSU->getHeight() < SU->getHeight() &&
2244 (SU->getHeight() - SuccSU->getHeight()) > 1)
2246 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2247 // constrains whatever is using the copy, instead of the copy
2248 // itself. In the case that the copy is coalesced, this
2249 // preserves the intent of the pseudo two-address heurietics.
2250 while (SuccSU->Succs.size() == 1 &&
2251 SuccSU->getNode()->isMachineOpcode() &&
2252 SuccSU->getNode()->getMachineOpcode() ==
2253 TargetOpcode::COPY_TO_REGCLASS)
2254 SuccSU = SuccSU->Succs.front().getSUnit();
2255 // Don't constrain non-instruction nodes.
2256 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2258 // Don't constrain nodes with physical register defs if the
2259 // predecessor can clobber them.
2260 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2261 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2264 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2265 // these may be coalesced away. We want them close to their uses.
2266 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2267 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2268 SuccOpc == TargetOpcode::INSERT_SUBREG ||
2269 SuccOpc == TargetOpcode::SUBREG_TO_REG)
2271 if ((!canClobber(SuccSU, DUSU) ||
2272 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2273 (!SU->isCommutable && SuccSU->isCommutable)) &&
2274 !scheduleDAG->IsReachable(SuccSU, SU)) {
2275 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
2276 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2277 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2278 /*Reg=*/0, /*isNormalMemory=*/false,
2279 /*isMustAlias=*/false,
2280 /*isArtificial=*/true));
2287 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2288 /// scheduling units.
2290 void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
2291 SethiUllmanNumbers.assign(SUnits->size(), 0);
2293 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
2294 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
2297 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2298 /// predecessors of the successors of the SUnit SU. Stop when the provided
2299 /// limit is exceeded.
2300 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2303 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2305 const SUnit *SuccSU = I->getSUnit();
2306 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2307 EE = SuccSU->Preds.end(); II != EE; ++II) {
2308 SUnit *PredSU = II->getSUnit();
2309 if (!PredSU->isScheduled)
2319 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2320 unsigned LPriority = SPQ->getNodePriority(left);
2321 unsigned RPriority = SPQ->getNodePriority(right);
2322 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2323 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2324 bool LIsFloater = LIsTarget && left->NumPreds == 0;
2325 bool RIsFloater = RIsTarget && right->NumPreds == 0;
2326 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2327 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2329 if (left->NumSuccs == 0 && right->NumSuccs != 0)
2331 else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2338 if (left->NumSuccs == 1)
2340 if (right->NumSuccs == 1)
2343 if (LPriority+LBonus != RPriority+RBonus)
2344 return LPriority+LBonus < RPriority+RBonus;
2346 if (left->getDepth() != right->getDepth())
2347 return left->getDepth() < right->getDepth();
2349 if (left->NumSuccsLeft != right->NumSuccsLeft)
2350 return left->NumSuccsLeft > right->NumSuccsLeft;
2352 assert(left->NodeQueueId && right->NodeQueueId &&
2353 "NodeQueueId cannot be zero");
2354 return (left->NodeQueueId > right->NodeQueueId);
2357 //===----------------------------------------------------------------------===//
2358 // Public Constructor Functions
2359 //===----------------------------------------------------------------------===//
2361 llvm::ScheduleDAGSDNodes *
2362 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2363 CodeGenOpt::Level OptLevel) {
2364 const TargetMachine &TM = IS->TM;
2365 const TargetInstrInfo *TII = TM.getInstrInfo();
2366 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2368 BURegReductionPriorityQueue *PQ =
2369 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2370 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2371 PQ->setScheduleDAG(SD);
2375 llvm::ScheduleDAGSDNodes *
2376 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2377 CodeGenOpt::Level OptLevel) {
2378 const TargetMachine &TM = IS->TM;
2379 const TargetInstrInfo *TII = TM.getInstrInfo();
2380 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2382 TDRegReductionPriorityQueue *PQ =
2383 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2384 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2385 PQ->setScheduleDAG(SD);
2389 llvm::ScheduleDAGSDNodes *
2390 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2391 CodeGenOpt::Level OptLevel) {
2392 const TargetMachine &TM = IS->TM;
2393 const TargetInstrInfo *TII = TM.getInstrInfo();
2394 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2396 SrcRegReductionPriorityQueue *PQ =
2397 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2398 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2399 PQ->setScheduleDAG(SD);
2403 llvm::ScheduleDAGSDNodes *
2404 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2405 CodeGenOpt::Level OptLevel) {
2406 const TargetMachine &TM = IS->TM;
2407 const TargetInstrInfo *TII = TM.getInstrInfo();
2408 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2409 const TargetLowering *TLI = &IS->getTargetLowering();
2411 HybridBURRPriorityQueue *PQ =
2412 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2414 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2415 PQ->setScheduleDAG(SD);
2419 llvm::ScheduleDAGSDNodes *
2420 llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2421 CodeGenOpt::Level OptLevel) {
2422 const TargetMachine &TM = IS->TM;
2423 const TargetInstrInfo *TII = TM.getInstrInfo();
2424 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2425 const TargetLowering *TLI = &IS->getTargetLowering();
2427 ILPBURRPriorityQueue *PQ =
2428 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2429 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2430 PQ->setScheduleDAG(SD);