1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/InlineAsm.h"
21 #include "llvm/CodeGen/SchedulerRegistry.h"
22 #include "llvm/CodeGen/SelectionDAGISel.h"
23 #include "llvm/Target/TargetRegisterInfo.h"
24 #include "llvm/Target/TargetData.h"
25 #include "llvm/Target/TargetMachine.h"
26 #include "llvm/Target/TargetInstrInfo.h"
27 #include "llvm/Target/TargetLowering.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
37 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
38 STATISTIC(NumUnfolds, "Number of nodes unfolded");
39 STATISTIC(NumDups, "Number of duplicated nodes");
40 STATISTIC(NumPRCopies, "Number of physical register copies");
42 static RegisterScheduler
43 burrListDAGScheduler("list-burr",
44 "Bottom-up register reduction list scheduling",
45 createBURRListDAGScheduler);
46 static RegisterScheduler
47 tdrListrDAGScheduler("list-tdrr",
48 "Top-down register reduction list scheduling",
49 createTDRRListDAGScheduler);
50 static RegisterScheduler
51 sourceListDAGScheduler("source",
52 "Similar to list-burr but schedules in source "
53 "order when possible",
54 createSourceListDAGScheduler);
56 static RegisterScheduler
57 hybridListDAGScheduler("list-hybrid",
58 "Bottom-up register pressure aware list scheduling "
59 "which tries to balance latency and register pressure",
60 createHybridListDAGScheduler);
62 static RegisterScheduler
63 ILPListDAGScheduler("list-ilp",
64 "Bottom-up register pressure aware list scheduling "
65 "which tries to balance ILP and register pressure",
66 createILPListDAGScheduler);
69 //===----------------------------------------------------------------------===//
70 /// ScheduleDAGRRList - The actual register reduction list scheduler
71 /// implementation. This supports both top-down and bottom-up scheduling.
73 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
75 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
79 /// NeedLatency - True if the scheduler will make use of latency information.
83 /// AvailableQueue - The priority queue to use for the available SUnits.
84 SchedulingPriorityQueue *AvailableQueue;
86 /// CurCycle - The current scheduler state corresponds to this cycle.
89 /// LiveRegDefs - A set of physical registers and their definition
90 /// that are "live". These nodes must be scheduled before any other nodes that
91 /// modifies the registers can be scheduled.
93 std::vector<SUnit*> LiveRegDefs;
94 std::vector<SUnit*> LiveRegGens;
96 /// Topo - A topological ordering for SUnits which permits fast IsReachable
97 /// and similar queries.
98 ScheduleDAGTopologicalSort Topo;
101 ScheduleDAGRRList(MachineFunction &mf,
102 bool isbottomup, bool needlatency,
103 SchedulingPriorityQueue *availqueue)
104 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
105 AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) {
108 ~ScheduleDAGRRList() {
109 delete AvailableQueue;
114 /// IsReachable - Checks if SU is reachable from TargetSU.
115 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
116 return Topo.IsReachable(SU, TargetSU);
119 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
121 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
122 return Topo.WillCreateCycle(SU, TargetSU);
125 /// AddPred - adds a predecessor edge to SUnit SU.
126 /// This returns true if this is a new predecessor.
127 /// Updates the topological ordering if required.
128 void AddPred(SUnit *SU, const SDep &D) {
129 Topo.AddPred(SU, D.getSUnit());
133 /// RemovePred - removes a predecessor edge from SUnit SU.
134 /// This returns true if an edge was removed.
135 /// Updates the topological ordering if required.
136 void RemovePred(SUnit *SU, const SDep &D) {
137 Topo.RemovePred(SU, D.getSUnit());
142 void ReleasePred(SUnit *SU, const SDep *PredEdge);
143 void ReleasePredecessors(SUnit *SU);
144 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
145 void ReleaseSuccessors(SUnit *SU);
146 void CapturePred(SDep *PredEdge);
147 void ScheduleNodeBottomUp(SUnit*);
148 void UnscheduleNodeBottomUp(SUnit*);
149 void BacktrackBottomUp(SUnit*, unsigned);
150 SUnit *CopyAndMoveSuccessors(SUnit*);
151 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
152 const TargetRegisterClass*,
153 const TargetRegisterClass*,
154 SmallVector<SUnit*, 2>&);
155 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
156 SUnit *PickNodeToScheduleBottomUp();
157 void ListScheduleBottomUp();
159 void ScheduleNodeTopDown(SUnit*);
160 void ListScheduleTopDown();
163 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
164 /// Updates the topological ordering if required.
165 SUnit *CreateNewSUnit(SDNode *N) {
166 unsigned NumSUnits = SUnits.size();
167 SUnit *NewNode = NewSUnit(N);
168 // Update the topological ordering.
169 if (NewNode->NodeNum >= NumSUnits)
170 Topo.InitDAGTopologicalSorting();
174 /// CreateClone - Creates a new SUnit from an existing one.
175 /// Updates the topological ordering if required.
176 SUnit *CreateClone(SUnit *N) {
177 unsigned NumSUnits = SUnits.size();
178 SUnit *NewNode = Clone(N);
179 // Update the topological ordering.
180 if (NewNode->NodeNum >= NumSUnits)
181 Topo.InitDAGTopologicalSorting();
185 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
186 /// need actual latency information but the hybrid scheduler does.
187 bool ForceUnitLatencies() const {
191 } // end anonymous namespace
194 /// Schedule - Schedule the DAG using list scheduling.
195 void ScheduleDAGRRList::Schedule() {
197 << "********** List Scheduling BB#" << BB->getNumber()
198 << " '" << BB->getName() << "' **********\n");
202 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
203 LiveRegGens.resize(TRI->getNumRegs(), NULL);
205 // Build the scheduling graph.
206 BuildSchedGraph(NULL);
208 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
209 SUnits[su].dumpAll(this));
210 Topo.InitDAGTopologicalSorting();
212 AvailableQueue->initNodes(SUnits);
214 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
216 ListScheduleBottomUp();
218 ListScheduleTopDown();
220 AvailableQueue->releaseState();
223 //===----------------------------------------------------------------------===//
224 // Bottom-Up Scheduling
225 //===----------------------------------------------------------------------===//
227 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
228 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
229 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
230 SUnit *PredSU = PredEdge->getSUnit();
233 if (PredSU->NumSuccsLeft == 0) {
234 dbgs() << "*** Scheduling failed! ***\n";
236 dbgs() << " has been released too many times!\n";
240 --PredSU->NumSuccsLeft;
242 if (!ForceUnitLatencies()) {
243 // Updating predecessor's height. This is now the cycle when the
244 // predecessor can be scheduled without causing a pipeline stall.
245 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
248 // If all the node's successors are scheduled, this node is ready
249 // to be scheduled. Ignore the special EntrySU node.
250 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
251 PredSU->isAvailable = true;
252 AvailableQueue->push(PredSU);
256 /// Call ReleasePred for each predecessor, then update register live def/gen.
257 /// Always update LiveRegDefs for a register dependence even if the current SU
258 /// also defines the register. This effectively create one large live range
259 /// across a sequence of two-address node. This is important because the
260 /// entire chain must be scheduled together. Example:
263 /// flags = (2) addc flags
264 /// flags = (1) addc flags
268 /// LiveRegDefs[flags] = 3
269 /// LiveRegGens[flags] = 1
271 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
272 /// interference on flags.
273 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
274 // Bottom up: release predecessors
275 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
277 ReleasePred(SU, &*I);
278 if (I->isAssignedRegDep()) {
279 // This is a physical register dependency and it's impossible or
280 // expensive to copy the register. Make sure nothing that can
281 // clobber the register is scheduled between the predecessor and
283 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
284 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
285 "interference on register dependence");
286 LiveRegDefs[I->getReg()] = I->getSUnit();
287 if (!LiveRegGens[I->getReg()]) {
289 LiveRegGens[I->getReg()] = SU;
295 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
296 /// count of its predecessors. If a predecessor pending count is zero, add it to
297 /// the Available queue.
298 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
299 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
300 DEBUG(SU->dump(this));
303 if (CurCycle < SU->getHeight())
304 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
307 // FIXME: Handle noop hazard.
308 SU->setHeightToAtLeast(CurCycle);
309 Sequence.push_back(SU);
311 AvailableQueue->ScheduledNode(SU);
313 // Update liveness of predecessors before successors to avoid treating a
314 // two-address node as a live range def.
315 ReleasePredecessors(SU);
317 // Release all the implicit physical register defs that are live.
318 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
320 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
321 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
322 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
324 LiveRegDefs[I->getReg()] = NULL;
325 LiveRegGens[I->getReg()] = NULL;
329 SU->isScheduled = true;
332 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
333 /// unscheduled, incrcease the succ left count of its predecessors. Remove
334 /// them from AvailableQueue if necessary.
335 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
336 SUnit *PredSU = PredEdge->getSUnit();
337 if (PredSU->isAvailable) {
338 PredSU->isAvailable = false;
339 if (!PredSU->isPending)
340 AvailableQueue->remove(PredSU);
343 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
344 ++PredSU->NumSuccsLeft;
347 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
348 /// its predecessor states to reflect the change.
349 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
350 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
351 DEBUG(SU->dump(this));
353 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
356 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
357 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
358 assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
359 "Physical register dependency violated?");
361 LiveRegDefs[I->getReg()] = NULL;
362 LiveRegGens[I->getReg()] = NULL;
366 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
368 if (I->isAssignedRegDep()) {
369 // This becomes the nearest def. Note that an earlier def may still be
370 // pending if this is a two-address node.
371 LiveRegDefs[I->getReg()] = SU;
372 if (!LiveRegDefs[I->getReg()]) {
375 if (LiveRegGens[I->getReg()] == NULL ||
376 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
377 LiveRegGens[I->getReg()] = I->getSUnit();
381 SU->setHeightDirty();
382 SU->isScheduled = false;
383 SU->isAvailable = true;
384 AvailableQueue->push(SU);
385 AvailableQueue->UnscheduledNode(SU);
388 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
389 /// BTCycle in order to schedule a specific node.
390 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle) {
392 while (CurCycle > BtCycle) {
393 OldSU = Sequence.back();
395 if (SU->isSucc(OldSU))
396 // Don't try to remove SU from AvailableQueue.
397 SU->isAvailable = false;
398 UnscheduleNodeBottomUp(OldSU);
400 AvailableQueue->setCurCycle(CurCycle);
403 assert(!SU->isSucc(OldSU) && "Something is wrong!");
408 static bool isOperandOf(const SUnit *SU, SDNode *N) {
409 for (const SDNode *SUNode = SU->getNode(); SUNode;
410 SUNode = SUNode->getGluedNode()) {
411 if (SUNode->isOperandOf(N))
417 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
418 /// successors to the newly created node.
419 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
420 if (SU->getNode()->getGluedNode())
423 SDNode *N = SU->getNode();
428 bool TryUnfold = false;
429 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
430 EVT VT = N->getValueType(i);
433 else if (VT == MVT::Other)
436 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
437 const SDValue &Op = N->getOperand(i);
438 EVT VT = Op.getNode()->getValueType(Op.getResNo());
444 SmallVector<SDNode*, 2> NewNodes;
445 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
448 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
449 assert(NewNodes.size() == 2 && "Expected a load folding node!");
452 SDNode *LoadNode = NewNodes[0];
453 unsigned NumVals = N->getNumValues();
454 unsigned OldNumVals = SU->getNode()->getNumValues();
455 for (unsigned i = 0; i != NumVals; ++i)
456 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
457 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
458 SDValue(LoadNode, 1));
460 // LoadNode may already exist. This can happen when there is another
461 // load from the same location and producing the same type of value
462 // but it has different alignment or volatileness.
463 bool isNewLoad = true;
465 if (LoadNode->getNodeId() != -1) {
466 LoadSU = &SUnits[LoadNode->getNodeId()];
469 LoadSU = CreateNewSUnit(LoadNode);
470 LoadNode->setNodeId(LoadSU->NodeNum);
471 ComputeLatency(LoadSU);
474 SUnit *NewSU = CreateNewSUnit(N);
475 assert(N->getNodeId() == -1 && "Node already inserted!");
476 N->setNodeId(NewSU->NodeNum);
478 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
479 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
480 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
481 NewSU->isTwoAddress = true;
485 if (TID.isCommutable())
486 NewSU->isCommutable = true;
487 ComputeLatency(NewSU);
489 // Record all the edges to and from the old SU, by category.
490 SmallVector<SDep, 4> ChainPreds;
491 SmallVector<SDep, 4> ChainSuccs;
492 SmallVector<SDep, 4> LoadPreds;
493 SmallVector<SDep, 4> NodePreds;
494 SmallVector<SDep, 4> NodeSuccs;
495 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
498 ChainPreds.push_back(*I);
499 else if (isOperandOf(I->getSUnit(), LoadNode))
500 LoadPreds.push_back(*I);
502 NodePreds.push_back(*I);
504 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
507 ChainSuccs.push_back(*I);
509 NodeSuccs.push_back(*I);
512 // Now assign edges to the newly-created nodes.
513 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
514 const SDep &Pred = ChainPreds[i];
515 RemovePred(SU, Pred);
517 AddPred(LoadSU, Pred);
519 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
520 const SDep &Pred = LoadPreds[i];
521 RemovePred(SU, Pred);
523 AddPred(LoadSU, Pred);
525 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
526 const SDep &Pred = NodePreds[i];
527 RemovePred(SU, Pred);
528 AddPred(NewSU, Pred);
530 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
531 SDep D = NodeSuccs[i];
532 SUnit *SuccDep = D.getSUnit();
534 RemovePred(SuccDep, D);
538 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
539 SDep D = ChainSuccs[i];
540 SUnit *SuccDep = D.getSUnit();
542 RemovePred(SuccDep, D);
549 // Add a data dependency to reflect that NewSU reads the value defined
551 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
554 AvailableQueue->addNode(LoadSU);
555 AvailableQueue->addNode(NewSU);
559 if (NewSU->NumSuccsLeft == 0) {
560 NewSU->isAvailable = true;
566 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
567 NewSU = CreateClone(SU);
569 // New SUnit has the exact same predecessors.
570 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
572 if (!I->isArtificial())
575 // Only copy scheduled successors. Cut them from old node's successor
576 // list and move them over.
577 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
578 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
580 if (I->isArtificial())
582 SUnit *SuccSU = I->getSUnit();
583 if (SuccSU->isScheduled) {
588 DelDeps.push_back(std::make_pair(SuccSU, D));
591 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
592 RemovePred(DelDeps[i].first, DelDeps[i].second);
594 AvailableQueue->updateNode(SU);
595 AvailableQueue->addNode(NewSU);
601 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
602 /// scheduled successors of the given SUnit to the last copy.
603 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
604 const TargetRegisterClass *DestRC,
605 const TargetRegisterClass *SrcRC,
606 SmallVector<SUnit*, 2> &Copies) {
607 SUnit *CopyFromSU = CreateNewSUnit(NULL);
608 CopyFromSU->CopySrcRC = SrcRC;
609 CopyFromSU->CopyDstRC = DestRC;
611 SUnit *CopyToSU = CreateNewSUnit(NULL);
612 CopyToSU->CopySrcRC = DestRC;
613 CopyToSU->CopyDstRC = SrcRC;
615 // Only copy scheduled successors. Cut them from old node's successor
616 // list and move them over.
617 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
618 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
620 if (I->isArtificial())
622 SUnit *SuccSU = I->getSUnit();
623 if (SuccSU->isScheduled) {
625 D.setSUnit(CopyToSU);
627 DelDeps.push_back(std::make_pair(SuccSU, *I));
630 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
631 RemovePred(DelDeps[i].first, DelDeps[i].second);
633 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
634 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
636 AvailableQueue->updateNode(SU);
637 AvailableQueue->addNode(CopyFromSU);
638 AvailableQueue->addNode(CopyToSU);
639 Copies.push_back(CopyFromSU);
640 Copies.push_back(CopyToSU);
645 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
646 /// definition of the specified node.
647 /// FIXME: Move to SelectionDAG?
648 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
649 const TargetInstrInfo *TII) {
650 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
651 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
652 unsigned NumRes = TID.getNumDefs();
653 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
658 return N->getValueType(NumRes);
661 /// CheckForLiveRegDef - Return true and update live register vector if the
662 /// specified register def of the specified SUnit clobbers any "live" registers.
663 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
664 std::vector<SUnit*> &LiveRegDefs,
665 SmallSet<unsigned, 4> &RegAdded,
666 SmallVector<unsigned, 4> &LRegs,
667 const TargetRegisterInfo *TRI) {
668 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
670 // Check if Ref is live.
671 if (!LiveRegDefs[Reg]) continue;
673 // Allow multiple uses of the same def.
674 if (LiveRegDefs[Reg] == SU) continue;
676 // Add Reg to the set of interfering live regs.
677 if (RegAdded.insert(Reg))
678 LRegs.push_back(Reg);
682 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
683 /// scheduling of the given node to satisfy live physical register dependencies.
684 /// If the specific node is the last one that's available to schedule, do
685 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
686 bool ScheduleDAGRRList::
687 DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
688 if (NumLiveRegs == 0)
691 SmallSet<unsigned, 4> RegAdded;
692 // If this node would clobber any "live" register, then it's not ready.
694 // If SU is the currently live definition of the same register that it uses,
695 // then we are free to schedule it.
696 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
698 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
699 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
700 RegAdded, LRegs, TRI);
703 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
704 if (Node->getOpcode() == ISD::INLINEASM) {
705 // Inline asm can clobber physical defs.
706 unsigned NumOps = Node->getNumOperands();
707 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
708 --NumOps; // Ignore the glue operand.
710 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
712 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
713 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
715 ++i; // Skip the ID value.
716 if (InlineAsm::isRegDefKind(Flags) ||
717 InlineAsm::isRegDefEarlyClobberKind(Flags)) {
718 // Check for def of register or earlyclobber register.
719 for (; NumVals; --NumVals, ++i) {
720 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
721 if (TargetRegisterInfo::isPhysicalRegister(Reg))
722 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
730 if (!Node->isMachineOpcode())
732 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
733 if (!TID.ImplicitDefs)
735 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
736 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
739 return !LRegs.empty();
742 /// Return a node that can be scheduled in this cycle. Requirements:
743 /// (1) Ready: latency has been satisfied
744 /// (2) No Hazards: resources are available (TBD)
745 /// (3) No Interferences: may unschedule to break register interferences.
746 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
747 SmallVector<SUnit*, 4> Interferences;
748 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
750 SUnit *CurSU = AvailableQueue->pop();
752 SmallVector<unsigned, 4> LRegs;
753 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
755 LRegsMap.insert(std::make_pair(CurSU, LRegs));
757 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
758 Interferences.push_back(CurSU);
759 CurSU = AvailableQueue->pop();
762 // Add the nodes that aren't ready back onto the available list.
763 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
764 Interferences[i]->isPending = false;
765 assert(Interferences[i]->isAvailable && "must still be available");
766 AvailableQueue->push(Interferences[i]);
771 // All candidates are delayed due to live physical reg dependencies.
772 // Try backtracking, code duplication, or inserting cross class copies
774 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
775 SUnit *TrySU = Interferences[i];
776 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
778 // Try unscheduling up to the point where it's safe to schedule
780 unsigned LiveCycle = CurCycle;
781 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
782 unsigned Reg = LRegs[j];
783 unsigned LCycle = LiveRegGens[Reg]->getHeight();
784 LiveCycle = std::min(LiveCycle, LCycle);
786 SUnit *OldSU = Sequence[LiveCycle];
787 if (!WillCreateCycle(TrySU, OldSU)) {
788 BacktrackBottomUp(TrySU, LiveCycle);
790 // Force the current node to be scheduled before the node that
791 // requires the physical reg dep.
792 if (OldSU->isAvailable) {
793 OldSU->isAvailable = false;
794 if (!OldSU->isPending)
795 AvailableQueue->remove(OldSU);
797 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
798 /*Reg=*/0, /*isNormalMemory=*/false,
799 /*isMustAlias=*/false, /*isArtificial=*/true));
801 // If one or more successors has been unscheduled, then the current
802 // node is no longer avaialable. Schedule a successor that's now
803 // available instead.
804 if (!TrySU->isAvailable) {
805 CurSU = AvailableQueue->pop();
809 TrySU->isPending = false;
810 Interferences.erase(Interferences.begin()+i);
817 // Can't backtrack. If it's too expensive to copy the value, then try
818 // duplicate the nodes that produces these "too expensive to copy"
819 // values to break the dependency. In case even that doesn't work,
820 // insert cross class copies.
821 // If it's not too expensive, i.e. cost != -1, issue copies.
822 SUnit *TrySU = Interferences[0];
823 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
824 assert(LRegs.size() == 1 && "Can't handle this yet!");
825 unsigned Reg = LRegs[0];
826 SUnit *LRDef = LiveRegDefs[Reg];
827 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
828 const TargetRegisterClass *RC =
829 TRI->getMinimalPhysRegClass(Reg, VT);
830 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
832 // If cross copy register class is null, then it must be possible copy
833 // the value directly. Do not try duplicate the def.
836 NewDef = CopyAndMoveSuccessors(LRDef);
840 // Issue copies, these can be expensive cross register class copies.
841 SmallVector<SUnit*, 2> Copies;
842 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
843 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
844 << " to SU #" << Copies.front()->NodeNum << "\n");
845 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
846 /*Reg=*/0, /*isNormalMemory=*/false,
847 /*isMustAlias=*/false,
848 /*isArtificial=*/true));
849 NewDef = Copies.back();
852 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
853 << " to SU #" << TrySU->NodeNum << "\n");
854 LiveRegDefs[Reg] = NewDef;
855 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
856 /*Reg=*/0, /*isNormalMemory=*/false,
857 /*isMustAlias=*/false,
858 /*isArtificial=*/true));
859 TrySU->isAvailable = false;
863 assert(CurSU && "Unable to resolve live physical register dependencies!");
865 // Add the nodes that aren't ready back onto the available list.
866 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
867 Interferences[i]->isPending = false;
868 // May no longer be available due to backtracking.
869 if (Interferences[i]->isAvailable) {
870 AvailableQueue->push(Interferences[i]);
876 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
878 void ScheduleDAGRRList::ListScheduleBottomUp() {
879 // Release any predecessors of the special Exit node.
880 ReleasePredecessors(&ExitSU);
882 // Add root to Available queue.
883 if (!SUnits.empty()) {
884 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
885 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
886 RootSU->isAvailable = true;
887 AvailableQueue->push(RootSU);
890 // While Available queue is not empty, grab the node with the highest
891 // priority. If it is not ready put it back. Schedule the node.
892 Sequence.reserve(SUnits.size());
893 while (!AvailableQueue->empty()) {
894 // Pick the best node to schedule taking all constraints into
896 SUnit *SU = PickNodeToScheduleBottomUp();
899 ScheduleNodeBottomUp(SU);
902 AvailableQueue->setCurCycle(CurCycle);
905 // Reverse the order if it is bottom up.
906 std::reverse(Sequence.begin(), Sequence.end());
909 VerifySchedule(isBottomUp);
913 //===----------------------------------------------------------------------===//
914 // Top-Down Scheduling
915 //===----------------------------------------------------------------------===//
917 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
918 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
919 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
920 SUnit *SuccSU = SuccEdge->getSUnit();
923 if (SuccSU->NumPredsLeft == 0) {
924 dbgs() << "*** Scheduling failed! ***\n";
926 dbgs() << " has been released too many times!\n";
930 --SuccSU->NumPredsLeft;
932 // If all the node's predecessors are scheduled, this node is ready
933 // to be scheduled. Ignore the special ExitSU node.
934 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
935 SuccSU->isAvailable = true;
936 AvailableQueue->push(SuccSU);
940 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
941 // Top down: release successors
942 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
944 assert(!I->isAssignedRegDep() &&
945 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
947 ReleaseSucc(SU, &*I);
951 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
952 /// count of its successors. If a successor pending count is zero, add it to
953 /// the Available queue.
954 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
955 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
956 DEBUG(SU->dump(this));
958 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
959 SU->setDepthToAtLeast(CurCycle);
960 Sequence.push_back(SU);
962 ReleaseSuccessors(SU);
963 SU->isScheduled = true;
964 AvailableQueue->ScheduledNode(SU);
967 /// ListScheduleTopDown - The main loop of list scheduling for top-down
969 void ScheduleDAGRRList::ListScheduleTopDown() {
970 unsigned CurCycle = 0;
971 AvailableQueue->setCurCycle(CurCycle);
973 // Release any successors of the special Entry node.
974 ReleaseSuccessors(&EntrySU);
976 // All leaves to Available queue.
977 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
978 // It is available if it has no predecessors.
979 if (SUnits[i].Preds.empty()) {
980 AvailableQueue->push(&SUnits[i]);
981 SUnits[i].isAvailable = true;
985 // While Available queue is not empty, grab the node with the highest
986 // priority. If it is not ready put it back. Schedule the node.
987 Sequence.reserve(SUnits.size());
988 while (!AvailableQueue->empty()) {
989 SUnit *CurSU = AvailableQueue->pop();
992 ScheduleNodeTopDown(CurSU);
994 AvailableQueue->setCurCycle(CurCycle);
998 VerifySchedule(isBottomUp);
1003 //===----------------------------------------------------------------------===//
1004 // RegReductionPriorityQueue Implementation
1005 //===----------------------------------------------------------------------===//
1007 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1008 // to reduce register pressure.
1012 class RegReductionPriorityQueue;
1014 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1015 // reduction scheduler.
1016 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1017 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
1018 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
1019 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1021 bool operator()(const SUnit* left, const SUnit* right) const;
1024 // td_ls_rr_sort - Priority function for top down register pressure reduction
1026 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1027 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
1028 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
1029 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1031 bool operator()(const SUnit* left, const SUnit* right) const;
1034 // src_ls_rr_sort - Priority function for source order scheduler.
1035 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1036 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
1037 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
1039 src_ls_rr_sort(const src_ls_rr_sort &RHS)
1042 bool operator()(const SUnit* left, const SUnit* right) const;
1045 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1046 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1047 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
1048 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
1050 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1053 bool operator()(const SUnit* left, const SUnit* right) const;
1056 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1058 struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1059 RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
1060 ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
1062 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1065 bool operator()(const SUnit* left, const SUnit* right) const;
1067 } // end anonymous namespace
1069 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1070 /// Smaller number is the higher priority.
1072 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1073 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1074 if (SethiUllmanNumber != 0)
1075 return SethiUllmanNumber;
1078 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1080 if (I->isCtrl()) continue; // ignore chain preds
1081 SUnit *PredSU = I->getSUnit();
1082 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1083 if (PredSethiUllman > SethiUllmanNumber) {
1084 SethiUllmanNumber = PredSethiUllman;
1086 } else if (PredSethiUllman == SethiUllmanNumber)
1090 SethiUllmanNumber += Extra;
1092 if (SethiUllmanNumber == 0)
1093 SethiUllmanNumber = 1;
1095 return SethiUllmanNumber;
1100 class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1101 std::vector<SUnit*> Queue;
1103 unsigned CurQueueId;
1104 bool TracksRegPressure;
1107 // SUnits - The SUnits for the current graph.
1108 std::vector<SUnit> *SUnits;
1110 MachineFunction &MF;
1111 const TargetInstrInfo *TII;
1112 const TargetRegisterInfo *TRI;
1113 const TargetLowering *TLI;
1114 ScheduleDAGRRList *scheduleDAG;
1116 // SethiUllmanNumbers - The SethiUllman number for each node.
1117 std::vector<unsigned> SethiUllmanNumbers;
1119 /// RegPressure - Tracking current reg pressure per register class.
1121 std::vector<unsigned> RegPressure;
1123 /// RegLimit - Tracking the number of allocatable registers per register
1125 std::vector<unsigned> RegLimit;
1128 RegReductionPriorityQueue(MachineFunction &mf,
1130 const TargetInstrInfo *tii,
1131 const TargetRegisterInfo *tri,
1132 const TargetLowering *tli)
1133 : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
1134 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1135 if (TracksRegPressure) {
1136 unsigned NumRC = TRI->getNumRegClasses();
1137 RegLimit.resize(NumRC);
1138 RegPressure.resize(NumRC);
1139 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1140 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1141 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1142 E = TRI->regclass_end(); I != E; ++I)
1143 RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1147 void initNodes(std::vector<SUnit> &sunits) {
1149 // Add pseudo dependency edges for two-address nodes.
1150 AddPseudoTwoAddrDeps();
1151 // Reroute edges to nodes with multiple uses.
1152 PrescheduleNodesWithMultipleUses();
1153 // Calculate node priorities.
1154 CalculateSethiUllmanNumbers();
1157 void addNode(const SUnit *SU) {
1158 unsigned SUSize = SethiUllmanNumbers.size();
1159 if (SUnits->size() > SUSize)
1160 SethiUllmanNumbers.resize(SUSize*2, 0);
1161 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1164 void updateNode(const SUnit *SU) {
1165 SethiUllmanNumbers[SU->NodeNum] = 0;
1166 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1169 void releaseState() {
1171 SethiUllmanNumbers.clear();
1172 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1175 unsigned getNodePriority(const SUnit *SU) const {
1176 assert(SU->NodeNum < SethiUllmanNumbers.size());
1177 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1178 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1179 // CopyToReg should be close to its uses to facilitate coalescing and
1182 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1183 Opc == TargetOpcode::SUBREG_TO_REG ||
1184 Opc == TargetOpcode::INSERT_SUBREG)
1185 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1186 // close to their uses to facilitate coalescing.
1188 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1189 // If SU does not have a register use, i.e. it doesn't produce a value
1190 // that would be consumed (e.g. store), then it terminates a chain of
1191 // computation. Give it a large SethiUllman number so it will be
1192 // scheduled right before its predecessors that it doesn't lengthen
1193 // their live ranges.
1195 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1196 // If SU does not have a register def, schedule it close to its uses
1197 // because it does not lengthen any live ranges.
1199 return SethiUllmanNumbers[SU->NodeNum];
1202 unsigned getNodeOrdering(const SUnit *SU) const {
1203 return scheduleDAG->DAG->GetOrdering(SU->getNode());
1206 bool empty() const { return Queue.empty(); }
1208 void push(SUnit *U) {
1209 assert(!U->NodeQueueId && "Node in the queue already");
1210 U->NodeQueueId = ++CurQueueId;
1215 if (empty()) return NULL;
1216 std::vector<SUnit *>::iterator Best = Queue.begin();
1217 for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
1218 E = Queue.end(); I != E; ++I)
1219 if (Picker(*Best, *I))
1222 if (Best != prior(Queue.end()))
1223 std::swap(*Best, Queue.back());
1229 void remove(SUnit *SU) {
1230 assert(!Queue.empty() && "Queue is empty!");
1231 assert(SU->NodeQueueId != 0 && "Not in queue!");
1232 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1234 if (I != prior(Queue.end()))
1235 std::swap(*I, Queue.back());
1237 SU->NodeQueueId = 0;
1240 bool HighRegPressure(const SUnit *SU) const {
1244 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1248 SUnit *PredSU = I->getSUnit();
1249 const SDNode *PN = PredSU->getNode();
1250 if (!PN->isMachineOpcode()) {
1251 if (PN->getOpcode() == ISD::CopyFromReg) {
1252 EVT VT = PN->getValueType(0);
1253 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1254 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1255 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1260 unsigned POpc = PN->getMachineOpcode();
1261 if (POpc == TargetOpcode::IMPLICIT_DEF)
1263 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1264 EVT VT = PN->getOperand(0).getValueType();
1265 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1266 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1267 // Check if this increases register pressure of the specific register
1268 // class to the point where it would cause spills.
1269 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1272 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1273 POpc == TargetOpcode::SUBREG_TO_REG) {
1274 EVT VT = PN->getValueType(0);
1275 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1276 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1277 // Check if this increases register pressure of the specific register
1278 // class to the point where it would cause spills.
1279 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1283 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1284 for (unsigned i = 0; i != NumDefs; ++i) {
1285 EVT VT = PN->getValueType(i);
1286 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1287 if (RegPressure[RCId] >= RegLimit[RCId])
1288 return true; // Reg pressure already high.
1289 unsigned Cost = TLI->getRepRegClassCostFor(VT);
1290 if (!PN->hasAnyUseOfValue(i))
1292 // Check if this increases register pressure of the specific register
1293 // class to the point where it would cause spills.
1294 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1302 void ScheduledNode(SUnit *SU) {
1303 if (!TracksRegPressure)
1306 const SDNode *N = SU->getNode();
1307 if (!N->isMachineOpcode()) {
1308 if (N->getOpcode() != ISD::CopyToReg)
1311 unsigned Opc = N->getMachineOpcode();
1312 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1313 Opc == TargetOpcode::INSERT_SUBREG ||
1314 Opc == TargetOpcode::SUBREG_TO_REG ||
1315 Opc == TargetOpcode::REG_SEQUENCE ||
1316 Opc == TargetOpcode::IMPLICIT_DEF)
1320 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1324 SUnit *PredSU = I->getSUnit();
1325 if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1327 const SDNode *PN = PredSU->getNode();
1328 if (!PN->isMachineOpcode()) {
1329 if (PN->getOpcode() == ISD::CopyFromReg) {
1330 EVT VT = PN->getValueType(0);
1331 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1332 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1336 unsigned POpc = PN->getMachineOpcode();
1337 if (POpc == TargetOpcode::IMPLICIT_DEF)
1339 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1340 EVT VT = PN->getOperand(0).getValueType();
1341 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1342 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1344 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1345 POpc == TargetOpcode::SUBREG_TO_REG) {
1346 EVT VT = PN->getValueType(0);
1347 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1348 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1351 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1352 for (unsigned i = 0; i != NumDefs; ++i) {
1353 EVT VT = PN->getValueType(i);
1354 if (!PN->hasAnyUseOfValue(i))
1356 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1357 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1361 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1362 // may transfer data dependencies to CopyToReg.
1363 if (SU->NumSuccs && N->isMachineOpcode()) {
1364 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1365 for (unsigned i = 0; i != NumDefs; ++i) {
1366 EVT VT = N->getValueType(i);
1367 if (!N->hasAnyUseOfValue(i))
1369 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1370 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1371 // Register pressure tracking is imprecise. This can happen.
1372 RegPressure[RCId] = 0;
1374 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1381 void UnscheduledNode(SUnit *SU) {
1382 if (!TracksRegPressure)
1385 const SDNode *N = SU->getNode();
1386 if (!N->isMachineOpcode()) {
1387 if (N->getOpcode() != ISD::CopyToReg)
1390 unsigned Opc = N->getMachineOpcode();
1391 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1392 Opc == TargetOpcode::INSERT_SUBREG ||
1393 Opc == TargetOpcode::SUBREG_TO_REG ||
1394 Opc == TargetOpcode::REG_SEQUENCE ||
1395 Opc == TargetOpcode::IMPLICIT_DEF)
1399 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1403 SUnit *PredSU = I->getSUnit();
1404 if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1406 const SDNode *PN = PredSU->getNode();
1407 if (!PN->isMachineOpcode()) {
1408 if (PN->getOpcode() == ISD::CopyFromReg) {
1409 EVT VT = PN->getValueType(0);
1410 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1411 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1415 unsigned POpc = PN->getMachineOpcode();
1416 if (POpc == TargetOpcode::IMPLICIT_DEF)
1418 if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1419 EVT VT = PN->getOperand(0).getValueType();
1420 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1421 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1423 } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1424 POpc == TargetOpcode::SUBREG_TO_REG) {
1425 EVT VT = PN->getValueType(0);
1426 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1427 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1430 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1431 for (unsigned i = 0; i != NumDefs; ++i) {
1432 EVT VT = PN->getValueType(i);
1433 if (!PN->hasAnyUseOfValue(i))
1435 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1436 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1437 // Register pressure tracking is imprecise. This can happen.
1438 RegPressure[RCId] = 0;
1440 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1444 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1445 // may transfer data dependencies to CopyToReg.
1446 if (SU->NumSuccs && N->isMachineOpcode()) {
1447 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1448 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1449 EVT VT = N->getValueType(i);
1450 if (VT == MVT::Glue || VT == MVT::Other)
1452 if (!N->hasAnyUseOfValue(i))
1454 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1455 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1462 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1463 scheduleDAG = scheduleDag;
1466 void dumpRegPressure() const {
1467 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1468 E = TRI->regclass_end(); I != E; ++I) {
1469 const TargetRegisterClass *RC = *I;
1470 unsigned Id = RC->getID();
1471 unsigned RP = RegPressure[Id];
1473 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1479 bool canClobber(const SUnit *SU, const SUnit *Op);
1480 void AddPseudoTwoAddrDeps();
1481 void PrescheduleNodesWithMultipleUses();
1482 void CalculateSethiUllmanNumbers();
1485 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1486 BURegReductionPriorityQueue;
1488 typedef RegReductionPriorityQueue<td_ls_rr_sort>
1489 TDRegReductionPriorityQueue;
1491 typedef RegReductionPriorityQueue<src_ls_rr_sort>
1492 SrcRegReductionPriorityQueue;
1494 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1495 HybridBURRPriorityQueue;
1497 typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1498 ILPBURRPriorityQueue;
1501 /// closestSucc - Returns the scheduled cycle of the successor which is
1502 /// closest to the current cycle.
1503 static unsigned closestSucc(const SUnit *SU) {
1504 unsigned MaxHeight = 0;
1505 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1507 if (I->isCtrl()) continue; // ignore chain succs
1508 unsigned Height = I->getSUnit()->getHeight();
1509 // If there are bunch of CopyToRegs stacked up, they should be considered
1510 // to be at the same position.
1511 if (I->getSUnit()->getNode() &&
1512 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1513 Height = closestSucc(I->getSUnit())+1;
1514 if (Height > MaxHeight)
1520 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1521 /// for scratch registers, i.e. number of data dependencies.
1522 static unsigned calcMaxScratches(const SUnit *SU) {
1523 unsigned Scratches = 0;
1524 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1526 if (I->isCtrl()) continue; // ignore chain preds
1532 /// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1533 /// CopyToReg to a virtual register. This SU def is probably a liveout and
1534 /// it has no other use. It should be scheduled closer to the terminator.
1535 static bool hasOnlyLiveOutUses(const SUnit *SU) {
1536 bool RetVal = false;
1537 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1539 if (I->isCtrl()) continue;
1540 const SUnit *SuccSU = I->getSUnit();
1541 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1543 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1544 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1554 /// UnitsSharePred - Return true if the two scheduling units share a common
1555 /// data predecessor.
1556 static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1557 SmallSet<const SUnit*, 4> Preds;
1558 for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1560 if (I->isCtrl()) continue; // ignore chain preds
1561 Preds.insert(I->getSUnit());
1563 for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1565 if (I->isCtrl()) continue; // ignore chain preds
1566 if (Preds.count(I->getSUnit()))
1572 template <typename RRSort>
1573 static bool BURRSort(const SUnit *left, const SUnit *right,
1574 const RegReductionPriorityQueue<RRSort> *SPQ) {
1575 unsigned LPriority = SPQ->getNodePriority(left);
1576 unsigned RPriority = SPQ->getNodePriority(right);
1577 if (LPriority != RPriority)
1578 return LPriority > RPriority;
1580 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1585 // and the following instructions are both ready.
1589 // Then schedule t2 = op first.
1596 // This creates more short live intervals.
1597 unsigned LDist = closestSucc(left);
1598 unsigned RDist = closestSucc(right);
1600 return LDist < RDist;
1602 // How many registers becomes live when the node is scheduled.
1603 unsigned LScratch = calcMaxScratches(left);
1604 unsigned RScratch = calcMaxScratches(right);
1605 if (LScratch != RScratch)
1606 return LScratch > RScratch;
1608 if (left->getHeight() != right->getHeight())
1609 return left->getHeight() > right->getHeight();
1611 if (left->getDepth() != right->getDepth())
1612 return left->getDepth() < right->getDepth();
1614 assert(left->NodeQueueId && right->NodeQueueId &&
1615 "NodeQueueId cannot be zero");
1616 return (left->NodeQueueId > right->NodeQueueId);
1620 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1621 return BURRSort(left, right, SPQ);
1624 // Source order, otherwise bottom up.
1625 bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1626 unsigned LOrder = SPQ->getNodeOrdering(left);
1627 unsigned ROrder = SPQ->getNodeOrdering(right);
1629 // Prefer an ordering where the lower the non-zero order number, the higher
1631 if ((LOrder || ROrder) && LOrder != ROrder)
1632 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1634 return BURRSort(left, right, SPQ);
1637 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1638 if (left->isCall || right->isCall)
1639 // No way to compute latency of calls.
1640 return BURRSort(left, right, SPQ);
1642 bool LHigh = SPQ->HighRegPressure(left);
1643 bool RHigh = SPQ->HighRegPressure(right);
1644 // Avoid causing spills. If register pressure is high, schedule for
1645 // register pressure reduction.
1646 if (LHigh && !RHigh)
1648 else if (!LHigh && RHigh)
1650 else if (!LHigh && !RHigh) {
1651 // If the two nodes share an operand and one of them has a single
1652 // use that is a live out copy, favor the one that is live out. Otherwise
1653 // it will be difficult to eliminate the copy if the instruction is a
1654 // loop induction variable update. e.g.
1661 bool SharePred = UnitsSharePred(left, right);
1662 // FIXME: Only adjust if BB is a loop back edge.
1663 // FIXME: What's the cost of a copy?
1664 int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1665 int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1666 int LHeight = (int)left->getHeight() - LBonus;
1667 int RHeight = (int)right->getHeight() - RBonus;
1669 // Low register pressure situation, schedule for latency if possible.
1670 bool LStall = left->SchedulingPref == Sched::Latency &&
1671 (int)SPQ->getCurCycle() < LHeight;
1672 bool RStall = right->SchedulingPref == Sched::Latency &&
1673 (int)SPQ->getCurCycle() < RHeight;
1674 // If scheduling one of the node will cause a pipeline stall, delay it.
1675 // If scheduling either one of the node will cause a pipeline stall, sort
1676 // them according to their height.
1680 if (LHeight != RHeight)
1681 return LHeight > RHeight;
1685 // If either node is scheduling for latency, sort them by height
1687 if (left->SchedulingPref == Sched::Latency ||
1688 right->SchedulingPref == Sched::Latency) {
1689 if (LHeight != RHeight)
1690 return LHeight > RHeight;
1691 if (left->Latency != right->Latency)
1692 return left->Latency > right->Latency;
1696 return BURRSort(left, right, SPQ);
1699 bool ilp_ls_rr_sort::operator()(const SUnit *left,
1700 const SUnit *right) const {
1701 if (left->isCall || right->isCall)
1702 // No way to compute latency of calls.
1703 return BURRSort(left, right, SPQ);
1705 bool LHigh = SPQ->HighRegPressure(left);
1706 bool RHigh = SPQ->HighRegPressure(right);
1707 // Avoid causing spills. If register pressure is high, schedule for
1708 // register pressure reduction.
1709 if (LHigh && !RHigh)
1711 else if (!LHigh && RHigh)
1713 else if (!LHigh && !RHigh) {
1714 // Low register pressure situation, schedule to maximize instruction level
1716 if (left->NumPreds > right->NumPreds)
1718 else if (left->NumPreds < right->NumPreds)
1722 return BURRSort(left, right, SPQ);
1727 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1728 if (SU->isTwoAddress) {
1729 unsigned Opc = SU->getNode()->getMachineOpcode();
1730 const TargetInstrDesc &TID = TII->get(Opc);
1731 unsigned NumRes = TID.getNumDefs();
1732 unsigned NumOps = TID.getNumOperands() - NumRes;
1733 for (unsigned i = 0; i != NumOps; ++i) {
1734 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1735 SDNode *DU = SU->getNode()->getOperand(i).getNode();
1736 if (DU->getNodeId() != -1 &&
1737 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1745 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1746 /// physical register defs.
1747 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1748 const TargetInstrInfo *TII,
1749 const TargetRegisterInfo *TRI) {
1750 SDNode *N = SuccSU->getNode();
1751 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1752 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1753 assert(ImpDefs && "Caller should check hasPhysRegDefs");
1754 for (const SDNode *SUNode = SU->getNode(); SUNode;
1755 SUNode = SUNode->getGluedNode()) {
1756 if (!SUNode->isMachineOpcode())
1758 const unsigned *SUImpDefs =
1759 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1762 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1763 EVT VT = N->getValueType(i);
1764 if (VT == MVT::Glue || VT == MVT::Other)
1766 if (!N->hasAnyUseOfValue(i))
1768 unsigned Reg = ImpDefs[i - NumDefs];
1769 for (;*SUImpDefs; ++SUImpDefs) {
1770 unsigned SUReg = *SUImpDefs;
1771 if (TRI->regsOverlap(Reg, SUReg))
1779 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1780 /// are not handled well by the general register pressure reduction
1781 /// heuristics. When presented with code like this:
1790 /// the heuristics tend to push the store up, but since the
1791 /// operand of the store has another use (U), this would increase
1792 /// the length of that other use (the U->N edge).
1794 /// This function transforms code like the above to route U's
1795 /// dependence through the store when possible, like this:
1806 /// This results in the store being scheduled immediately
1807 /// after N, which shortens the U->N live range, reducing
1808 /// register pressure.
1811 void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1812 // Visit all the nodes in topological order, working top-down.
1813 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1814 SUnit *SU = &(*SUnits)[i];
1815 // For now, only look at nodes with no data successors, such as stores.
1816 // These are especially important, due to the heuristics in
1817 // getNodePriority for nodes with no data successors.
1818 if (SU->NumSuccs != 0)
1820 // For now, only look at nodes with exactly one data predecessor.
1821 if (SU->NumPreds != 1)
1823 // Avoid prescheduling copies to virtual registers, which don't behave
1824 // like other nodes from the perspective of scheduling heuristics.
1825 if (SDNode *N = SU->getNode())
1826 if (N->getOpcode() == ISD::CopyToReg &&
1827 TargetRegisterInfo::isVirtualRegister
1828 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1831 // Locate the single data predecessor.
1833 for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1834 EE = SU->Preds.end(); II != EE; ++II)
1835 if (!II->isCtrl()) {
1836 PredSU = II->getSUnit();
1841 // Don't rewrite edges that carry physregs, because that requires additional
1842 // support infrastructure.
1843 if (PredSU->hasPhysRegDefs)
1845 // Short-circuit the case where SU is PredSU's only data successor.
1846 if (PredSU->NumSuccs == 1)
1848 // Avoid prescheduling to copies from virtual registers, which don't behave
1849 // like other nodes from the perspective of scheduling // heuristics.
1850 if (SDNode *N = SU->getNode())
1851 if (N->getOpcode() == ISD::CopyFromReg &&
1852 TargetRegisterInfo::isVirtualRegister
1853 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1856 // Perform checks on the successors of PredSU.
1857 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1858 EE = PredSU->Succs.end(); II != EE; ++II) {
1859 SUnit *PredSuccSU = II->getSUnit();
1860 if (PredSuccSU == SU) continue;
1861 // If PredSU has another successor with no data successors, for
1862 // now don't attempt to choose either over the other.
1863 if (PredSuccSU->NumSuccs == 0)
1864 goto outer_loop_continue;
1865 // Don't break physical register dependencies.
1866 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1867 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1868 goto outer_loop_continue;
1869 // Don't introduce graph cycles.
1870 if (scheduleDAG->IsReachable(SU, PredSuccSU))
1871 goto outer_loop_continue;
1874 // Ok, the transformation is safe and the heuristics suggest it is
1875 // profitable. Update the graph.
1876 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
1877 << " next to PredSU #" << PredSU->NodeNum
1878 << " to guide scheduling in the presence of multiple uses\n");
1879 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1880 SDep Edge = PredSU->Succs[i];
1881 assert(!Edge.isAssignedRegDep());
1882 SUnit *SuccSU = Edge.getSUnit();
1884 Edge.setSUnit(PredSU);
1885 scheduleDAG->RemovePred(SuccSU, Edge);
1886 scheduleDAG->AddPred(SU, Edge);
1888 scheduleDAG->AddPred(SuccSU, Edge);
1892 outer_loop_continue:;
1896 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1897 /// it as a def&use operand. Add a pseudo control edge from it to the other
1898 /// node (if it won't create a cycle) so the two-address one will be scheduled
1899 /// first (lower in the schedule). If both nodes are two-address, favor the
1900 /// one that has a CopyToReg use (more likely to be a loop induction update).
1901 /// If both are two-address, but one is commutable while the other is not
1902 /// commutable, favor the one that's not commutable.
1904 void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1905 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1906 SUnit *SU = &(*SUnits)[i];
1907 if (!SU->isTwoAddress)
1910 SDNode *Node = SU->getNode();
1911 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
1914 bool isLiveOut = hasOnlyLiveOutUses(SU);
1915 unsigned Opc = Node->getMachineOpcode();
1916 const TargetInstrDesc &TID = TII->get(Opc);
1917 unsigned NumRes = TID.getNumDefs();
1918 unsigned NumOps = TID.getNumOperands() - NumRes;
1919 for (unsigned j = 0; j != NumOps; ++j) {
1920 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1922 SDNode *DU = SU->getNode()->getOperand(j).getNode();
1923 if (DU->getNodeId() == -1)
1925 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1926 if (!DUSU) continue;
1927 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1928 E = DUSU->Succs.end(); I != E; ++I) {
1929 if (I->isCtrl()) continue;
1930 SUnit *SuccSU = I->getSUnit();
1933 // Be conservative. Ignore if nodes aren't at roughly the same
1934 // depth and height.
1935 if (SuccSU->getHeight() < SU->getHeight() &&
1936 (SU->getHeight() - SuccSU->getHeight()) > 1)
1938 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1939 // constrains whatever is using the copy, instead of the copy
1940 // itself. In the case that the copy is coalesced, this
1941 // preserves the intent of the pseudo two-address heurietics.
1942 while (SuccSU->Succs.size() == 1 &&
1943 SuccSU->getNode()->isMachineOpcode() &&
1944 SuccSU->getNode()->getMachineOpcode() ==
1945 TargetOpcode::COPY_TO_REGCLASS)
1946 SuccSU = SuccSU->Succs.front().getSUnit();
1947 // Don't constrain non-instruction nodes.
1948 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1950 // Don't constrain nodes with physical register defs if the
1951 // predecessor can clobber them.
1952 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1953 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1956 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1957 // these may be coalesced away. We want them close to their uses.
1958 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1959 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1960 SuccOpc == TargetOpcode::INSERT_SUBREG ||
1961 SuccOpc == TargetOpcode::SUBREG_TO_REG)
1963 if ((!canClobber(SuccSU, DUSU) ||
1964 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
1965 (!SU->isCommutable && SuccSU->isCommutable)) &&
1966 !scheduleDAG->IsReachable(SuccSU, SU)) {
1967 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
1968 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1969 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1970 /*Reg=*/0, /*isNormalMemory=*/false,
1971 /*isMustAlias=*/false,
1972 /*isArtificial=*/true));
1979 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1980 /// scheduling units.
1982 void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1983 SethiUllmanNumbers.assign(SUnits->size(), 0);
1985 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1986 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1989 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1990 /// predecessors of the successors of the SUnit SU. Stop when the provided
1991 /// limit is exceeded.
1992 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1995 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1997 const SUnit *SuccSU = I->getSUnit();
1998 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1999 EE = SuccSU->Preds.end(); II != EE; ++II) {
2000 SUnit *PredSU = II->getSUnit();
2001 if (!PredSU->isScheduled)
2011 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2012 unsigned LPriority = SPQ->getNodePriority(left);
2013 unsigned RPriority = SPQ->getNodePriority(right);
2014 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2015 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2016 bool LIsFloater = LIsTarget && left->NumPreds == 0;
2017 bool RIsFloater = RIsTarget && right->NumPreds == 0;
2018 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2019 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2021 if (left->NumSuccs == 0 && right->NumSuccs != 0)
2023 else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2030 if (left->NumSuccs == 1)
2032 if (right->NumSuccs == 1)
2035 if (LPriority+LBonus != RPriority+RBonus)
2036 return LPriority+LBonus < RPriority+RBonus;
2038 if (left->getDepth() != right->getDepth())
2039 return left->getDepth() < right->getDepth();
2041 if (left->NumSuccsLeft != right->NumSuccsLeft)
2042 return left->NumSuccsLeft > right->NumSuccsLeft;
2044 assert(left->NodeQueueId && right->NodeQueueId &&
2045 "NodeQueueId cannot be zero");
2046 return (left->NodeQueueId > right->NodeQueueId);
2049 //===----------------------------------------------------------------------===//
2050 // Public Constructor Functions
2051 //===----------------------------------------------------------------------===//
2053 llvm::ScheduleDAGSDNodes *
2054 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2055 const TargetMachine &TM = IS->TM;
2056 const TargetInstrInfo *TII = TM.getInstrInfo();
2057 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2059 BURegReductionPriorityQueue *PQ =
2060 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2061 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
2062 PQ->setScheduleDAG(SD);
2066 llvm::ScheduleDAGSDNodes *
2067 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2068 const TargetMachine &TM = IS->TM;
2069 const TargetInstrInfo *TII = TM.getInstrInfo();
2070 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2072 TDRegReductionPriorityQueue *PQ =
2073 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2074 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
2075 PQ->setScheduleDAG(SD);
2079 llvm::ScheduleDAGSDNodes *
2080 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2081 const TargetMachine &TM = IS->TM;
2082 const TargetInstrInfo *TII = TM.getInstrInfo();
2083 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2085 SrcRegReductionPriorityQueue *PQ =
2086 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2087 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
2088 PQ->setScheduleDAG(SD);
2092 llvm::ScheduleDAGSDNodes *
2093 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2094 const TargetMachine &TM = IS->TM;
2095 const TargetInstrInfo *TII = TM.getInstrInfo();
2096 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2097 const TargetLowering *TLI = &IS->getTargetLowering();
2099 HybridBURRPriorityQueue *PQ =
2100 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2101 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
2102 PQ->setScheduleDAG(SD);
2106 llvm::ScheduleDAGSDNodes *
2107 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
2108 const TargetMachine &TM = IS->TM;
2109 const TargetInstrInfo *TII = TM.getInstrInfo();
2110 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2111 const TargetLowering *TLI = &IS->getTargetLowering();
2113 ILPBURRPriorityQueue *PQ =
2114 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2115 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
2116 PQ->setScheduleDAG(SD);