1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements bottom-up and top-down register pressure reduction list
11 // schedulers, using standard algorithms. The basic approach uses a priority
12 // queue of available nodes to schedule. One at a time, nodes are taken from
13 // the priority queue (thus in priority order), checked for legality to
14 // schedule, and emitted if legal.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "pre-RA-sched"
19 #include "ScheduleDAGSDNodes.h"
20 #include "llvm/InlineAsm.h"
21 #include "llvm/CodeGen/SchedulerRegistry.h"
22 #include "llvm/CodeGen/SelectionDAGISel.h"
23 #include "llvm/Target/TargetRegisterInfo.h"
24 #include "llvm/Target/TargetData.h"
25 #include "llvm/Target/TargetMachine.h"
26 #include "llvm/Target/TargetInstrInfo.h"
27 #include "llvm/ADT/PriorityQueue.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/raw_ostream.h"
37 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
38 STATISTIC(NumUnfolds, "Number of nodes unfolded");
39 STATISTIC(NumDups, "Number of duplicated nodes");
40 STATISTIC(NumPRCopies, "Number of physical register copies");
42 static RegisterScheduler
43 burrListDAGScheduler("list-burr",
44 "Bottom-up register reduction list scheduling",
45 createBURRListDAGScheduler);
46 static RegisterScheduler
47 tdrListrDAGScheduler("list-tdrr",
48 "Top-down register reduction list scheduling",
49 createTDRRListDAGScheduler);
50 static RegisterScheduler
51 sourceListDAGScheduler("source",
52 "Similar to list-burr but schedules in source "
53 "order when possible",
54 createSourceListDAGScheduler);
56 static RegisterScheduler
57 hybridListDAGScheduler("hybrid",
58 "Bottom-up rr list scheduling which avoid stalls for "
59 "long latency instructions",
60 createHybridListDAGScheduler);
63 //===----------------------------------------------------------------------===//
64 /// ScheduleDAGRRList - The actual register reduction list scheduler
65 /// implementation. This supports both top-down and bottom-up scheduling.
67 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
69 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
73 /// NeedLatency - True if the scheduler will make use of latency information.
77 /// AvailableQueue - The priority queue to use for the available SUnits.
78 SchedulingPriorityQueue *AvailableQueue;
80 /// LiveRegDefs - A set of physical registers and their definition
81 /// that are "live". These nodes must be scheduled before any other nodes that
82 /// modifies the registers can be scheduled.
84 std::vector<SUnit*> LiveRegDefs;
85 std::vector<unsigned> LiveRegCycles;
87 /// Topo - A topological ordering for SUnits which permits fast IsReachable
88 /// and similar queries.
89 ScheduleDAGTopologicalSort Topo;
92 ScheduleDAGRRList(MachineFunction &mf,
93 bool isbottomup, bool needlatency,
94 SchedulingPriorityQueue *availqueue)
95 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
96 AvailableQueue(availqueue), Topo(SUnits) {
99 ~ScheduleDAGRRList() {
100 delete AvailableQueue;
105 /// IsReachable - Checks if SU is reachable from TargetSU.
106 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
107 return Topo.IsReachable(SU, TargetSU);
110 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
112 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
113 return Topo.WillCreateCycle(SU, TargetSU);
116 /// AddPred - adds a predecessor edge to SUnit SU.
117 /// This returns true if this is a new predecessor.
118 /// Updates the topological ordering if required.
119 void AddPred(SUnit *SU, const SDep &D) {
120 Topo.AddPred(SU, D.getSUnit());
124 /// RemovePred - removes a predecessor edge from SUnit SU.
125 /// This returns true if an edge was removed.
126 /// Updates the topological ordering if required.
127 void RemovePred(SUnit *SU, const SDep &D) {
128 Topo.RemovePred(SU, D.getSUnit());
133 void ReleasePred(SUnit *SU, const SDep *PredEdge);
134 void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
135 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
136 void ReleaseSuccessors(SUnit *SU);
137 void CapturePred(SDep *PredEdge);
138 void ScheduleNodeBottomUp(SUnit*, unsigned);
139 void ScheduleNodeTopDown(SUnit*, unsigned);
140 void UnscheduleNodeBottomUp(SUnit*);
141 void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
142 SUnit *CopyAndMoveSuccessors(SUnit*);
143 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
144 const TargetRegisterClass*,
145 const TargetRegisterClass*,
146 SmallVector<SUnit*, 2>&);
147 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
148 void ListScheduleTopDown();
149 void ListScheduleBottomUp();
152 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
153 /// Updates the topological ordering if required.
154 SUnit *CreateNewSUnit(SDNode *N) {
155 unsigned NumSUnits = SUnits.size();
156 SUnit *NewNode = NewSUnit(N);
157 // Update the topological ordering.
158 if (NewNode->NodeNum >= NumSUnits)
159 Topo.InitDAGTopologicalSorting();
163 /// CreateClone - Creates a new SUnit from an existing one.
164 /// Updates the topological ordering if required.
165 SUnit *CreateClone(SUnit *N) {
166 unsigned NumSUnits = SUnits.size();
167 SUnit *NewNode = Clone(N);
168 // Update the topological ordering.
169 if (NewNode->NodeNum >= NumSUnits)
170 Topo.InitDAGTopologicalSorting();
174 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
175 /// need actual latency information but the hybrid scheduler does.
176 bool ForceUnitLatencies() const {
180 } // end anonymous namespace
183 /// Schedule - Schedule the DAG using list scheduling.
184 void ScheduleDAGRRList::Schedule() {
185 DEBUG(dbgs() << "********** List Scheduling **********\n");
188 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
189 LiveRegCycles.resize(TRI->getNumRegs(), 0);
191 // Build the scheduling graph.
192 BuildSchedGraph(NULL);
194 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
195 SUnits[su].dumpAll(this));
196 Topo.InitDAGTopologicalSorting();
198 AvailableQueue->initNodes(SUnits);
200 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
202 ListScheduleBottomUp();
204 ListScheduleTopDown();
206 AvailableQueue->releaseState();
209 //===----------------------------------------------------------------------===//
210 // Bottom-Up Scheduling
211 //===----------------------------------------------------------------------===//
213 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
214 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
215 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
216 SUnit *PredSU = PredEdge->getSUnit();
219 if (PredSU->NumSuccsLeft == 0) {
220 dbgs() << "*** Scheduling failed! ***\n";
222 dbgs() << " has been released too many times!\n";
226 --PredSU->NumSuccsLeft;
228 if (!ForceUnitLatencies()) {
229 // Updating predecessor's height. This is now the cycle when the
230 // predecessor can be scheduled without causing a pipeline stall.
231 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
234 // If all the node's successors are scheduled, this node is ready
235 // to be scheduled. Ignore the special EntrySU node.
236 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
237 PredSU->isAvailable = true;
238 AvailableQueue->push(PredSU);
242 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
243 // Bottom up: release predecessors
244 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
246 ReleasePred(SU, &*I);
247 if (I->isAssignedRegDep()) {
248 // This is a physical register dependency and it's impossible or
249 // expensive to copy the register. Make sure nothing that can
250 // clobber the register is scheduled between the predecessor and
252 if (!LiveRegDefs[I->getReg()]) {
254 LiveRegDefs[I->getReg()] = I->getSUnit();
255 LiveRegCycles[I->getReg()] = CurCycle;
261 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
262 /// count of its predecessors. If a predecessor pending count is zero, add it to
263 /// the Available queue.
264 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
265 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
266 DEBUG(SU->dump(this));
269 if (CurCycle < SU->getHeight())
270 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
273 // FIXME: Handle noop hazard.
274 SU->setHeightToAtLeast(CurCycle);
275 Sequence.push_back(SU);
277 ReleasePredecessors(SU, CurCycle);
279 // Release all the implicit physical register defs that are live.
280 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
282 if (I->isAssignedRegDep()) {
283 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
284 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
285 assert(LiveRegDefs[I->getReg()] == SU &&
286 "Physical register dependency violated?");
288 LiveRegDefs[I->getReg()] = NULL;
289 LiveRegCycles[I->getReg()] = 0;
294 SU->isScheduled = true;
295 AvailableQueue->ScheduledNode(SU);
298 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
299 /// unscheduled, incrcease the succ left count of its predecessors. Remove
300 /// them from AvailableQueue if necessary.
301 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
302 SUnit *PredSU = PredEdge->getSUnit();
303 if (PredSU->isAvailable) {
304 PredSU->isAvailable = false;
305 if (!PredSU->isPending)
306 AvailableQueue->remove(PredSU);
309 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
310 ++PredSU->NumSuccsLeft;
313 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
314 /// its predecessor states to reflect the change.
315 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
316 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
317 DEBUG(SU->dump(this));
319 AvailableQueue->UnscheduledNode(SU);
321 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
324 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
325 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
326 assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
327 "Physical register dependency violated?");
329 LiveRegDefs[I->getReg()] = NULL;
330 LiveRegCycles[I->getReg()] = 0;
334 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
336 if (I->isAssignedRegDep()) {
337 if (!LiveRegDefs[I->getReg()]) {
338 LiveRegDefs[I->getReg()] = SU;
341 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
342 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
346 SU->setHeightDirty();
347 SU->isScheduled = false;
348 SU->isAvailable = true;
349 AvailableQueue->push(SU);
352 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
353 /// BTCycle in order to schedule a specific node.
354 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
355 unsigned &CurCycle) {
357 while (CurCycle > BtCycle) {
358 OldSU = Sequence.back();
360 if (SU->isSucc(OldSU))
361 // Don't try to remove SU from AvailableQueue.
362 SU->isAvailable = false;
363 UnscheduleNodeBottomUp(OldSU);
365 AvailableQueue->setCurCycle(CurCycle);
368 assert(!SU->isSucc(OldSU) && "Something is wrong!");
373 static bool isOperandOf(const SUnit *SU, SDNode *N) {
374 for (const SDNode *SUNode = SU->getNode(); SUNode;
375 SUNode = SUNode->getFlaggedNode()) {
376 if (SUNode->isOperandOf(N))
382 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
383 /// successors to the newly created node.
384 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
385 if (SU->getNode()->getFlaggedNode())
388 SDNode *N = SU->getNode();
393 bool TryUnfold = false;
394 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
395 EVT VT = N->getValueType(i);
398 else if (VT == MVT::Other)
401 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
402 const SDValue &Op = N->getOperand(i);
403 EVT VT = Op.getNode()->getValueType(Op.getResNo());
409 SmallVector<SDNode*, 2> NewNodes;
410 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
413 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
414 assert(NewNodes.size() == 2 && "Expected a load folding node!");
417 SDNode *LoadNode = NewNodes[0];
418 unsigned NumVals = N->getNumValues();
419 unsigned OldNumVals = SU->getNode()->getNumValues();
420 for (unsigned i = 0; i != NumVals; ++i)
421 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
422 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
423 SDValue(LoadNode, 1));
425 // LoadNode may already exist. This can happen when there is another
426 // load from the same location and producing the same type of value
427 // but it has different alignment or volatileness.
428 bool isNewLoad = true;
430 if (LoadNode->getNodeId() != -1) {
431 LoadSU = &SUnits[LoadNode->getNodeId()];
434 LoadSU = CreateNewSUnit(LoadNode);
435 LoadNode->setNodeId(LoadSU->NodeNum);
436 ComputeLatency(LoadSU);
439 SUnit *NewSU = CreateNewSUnit(N);
440 assert(N->getNodeId() == -1 && "Node already inserted!");
441 N->setNodeId(NewSU->NodeNum);
443 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
444 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
445 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
446 NewSU->isTwoAddress = true;
450 if (TID.isCommutable())
451 NewSU->isCommutable = true;
452 ComputeLatency(NewSU);
454 // Record all the edges to and from the old SU, by category.
455 SmallVector<SDep, 4> ChainPreds;
456 SmallVector<SDep, 4> ChainSuccs;
457 SmallVector<SDep, 4> LoadPreds;
458 SmallVector<SDep, 4> NodePreds;
459 SmallVector<SDep, 4> NodeSuccs;
460 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
463 ChainPreds.push_back(*I);
464 else if (isOperandOf(I->getSUnit(), LoadNode))
465 LoadPreds.push_back(*I);
467 NodePreds.push_back(*I);
469 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
472 ChainSuccs.push_back(*I);
474 NodeSuccs.push_back(*I);
477 // Now assign edges to the newly-created nodes.
478 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
479 const SDep &Pred = ChainPreds[i];
480 RemovePred(SU, Pred);
482 AddPred(LoadSU, Pred);
484 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
485 const SDep &Pred = LoadPreds[i];
486 RemovePred(SU, Pred);
488 AddPred(LoadSU, Pred);
490 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
491 const SDep &Pred = NodePreds[i];
492 RemovePred(SU, Pred);
493 AddPred(NewSU, Pred);
495 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
496 SDep D = NodeSuccs[i];
497 SUnit *SuccDep = D.getSUnit();
499 RemovePred(SuccDep, D);
503 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
504 SDep D = ChainSuccs[i];
505 SUnit *SuccDep = D.getSUnit();
507 RemovePred(SuccDep, D);
514 // Add a data dependency to reflect that NewSU reads the value defined
516 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
519 AvailableQueue->addNode(LoadSU);
520 AvailableQueue->addNode(NewSU);
524 if (NewSU->NumSuccsLeft == 0) {
525 NewSU->isAvailable = true;
531 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
532 NewSU = CreateClone(SU);
534 // New SUnit has the exact same predecessors.
535 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
537 if (!I->isArtificial())
540 // Only copy scheduled successors. Cut them from old node's successor
541 // list and move them over.
542 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
543 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
545 if (I->isArtificial())
547 SUnit *SuccSU = I->getSUnit();
548 if (SuccSU->isScheduled) {
553 DelDeps.push_back(std::make_pair(SuccSU, D));
556 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
557 RemovePred(DelDeps[i].first, DelDeps[i].second);
559 AvailableQueue->updateNode(SU);
560 AvailableQueue->addNode(NewSU);
566 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
567 /// scheduled successors of the given SUnit to the last copy.
568 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
569 const TargetRegisterClass *DestRC,
570 const TargetRegisterClass *SrcRC,
571 SmallVector<SUnit*, 2> &Copies) {
572 SUnit *CopyFromSU = CreateNewSUnit(NULL);
573 CopyFromSU->CopySrcRC = SrcRC;
574 CopyFromSU->CopyDstRC = DestRC;
576 SUnit *CopyToSU = CreateNewSUnit(NULL);
577 CopyToSU->CopySrcRC = DestRC;
578 CopyToSU->CopyDstRC = SrcRC;
580 // Only copy scheduled successors. Cut them from old node's successor
581 // list and move them over.
582 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
583 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
585 if (I->isArtificial())
587 SUnit *SuccSU = I->getSUnit();
588 if (SuccSU->isScheduled) {
590 D.setSUnit(CopyToSU);
592 DelDeps.push_back(std::make_pair(SuccSU, *I));
595 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
596 RemovePred(DelDeps[i].first, DelDeps[i].second);
598 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
599 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
601 AvailableQueue->updateNode(SU);
602 AvailableQueue->addNode(CopyFromSU);
603 AvailableQueue->addNode(CopyToSU);
604 Copies.push_back(CopyFromSU);
605 Copies.push_back(CopyToSU);
610 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
611 /// definition of the specified node.
612 /// FIXME: Move to SelectionDAG?
613 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
614 const TargetInstrInfo *TII) {
615 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
616 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
617 unsigned NumRes = TID.getNumDefs();
618 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
623 return N->getValueType(NumRes);
626 /// CheckForLiveRegDef - Return true and update live register vector if the
627 /// specified register def of the specified SUnit clobbers any "live" registers.
628 static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
629 std::vector<SUnit*> &LiveRegDefs,
630 SmallSet<unsigned, 4> &RegAdded,
631 SmallVector<unsigned, 4> &LRegs,
632 const TargetRegisterInfo *TRI) {
634 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
635 if (RegAdded.insert(Reg)) {
636 LRegs.push_back(Reg);
640 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
641 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
642 if (RegAdded.insert(*Alias)) {
643 LRegs.push_back(*Alias);
650 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
651 /// scheduling of the given node to satisfy live physical register dependencies.
652 /// If the specific node is the last one that's available to schedule, do
653 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
654 bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
655 SmallVector<unsigned, 4> &LRegs){
656 if (NumLiveRegs == 0)
659 SmallSet<unsigned, 4> RegAdded;
660 // If this node would clobber any "live" register, then it's not ready.
661 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
663 if (I->isAssignedRegDep())
664 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
665 RegAdded, LRegs, TRI);
668 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
669 if (Node->getOpcode() == ISD::INLINEASM) {
670 // Inline asm can clobber physical defs.
671 unsigned NumOps = Node->getNumOperands();
672 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
673 --NumOps; // Ignore the flag operand.
675 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
677 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
678 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
680 ++i; // Skip the ID value.
681 if (InlineAsm::isRegDefKind(Flags) ||
682 InlineAsm::isRegDefEarlyClobberKind(Flags)) {
683 // Check for def of register or earlyclobber register.
684 for (; NumVals; --NumVals, ++i) {
685 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
686 if (TargetRegisterInfo::isPhysicalRegister(Reg))
687 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
695 if (!Node->isMachineOpcode())
697 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
698 if (!TID.ImplicitDefs)
700 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
701 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
703 return !LRegs.empty();
707 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
709 void ScheduleDAGRRList::ListScheduleBottomUp() {
710 unsigned CurCycle = 0;
712 // Release any predecessors of the special Exit node.
713 ReleasePredecessors(&ExitSU, CurCycle);
715 // Add root to Available queue.
716 if (!SUnits.empty()) {
717 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
718 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
719 RootSU->isAvailable = true;
720 AvailableQueue->push(RootSU);
723 // While Available queue is not empty, grab the node with the highest
724 // priority. If it is not ready put it back. Schedule the node.
725 SmallVector<SUnit*, 4> NotReady;
726 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
727 Sequence.reserve(SUnits.size());
728 while (!AvailableQueue->empty()) {
729 bool Delayed = false;
731 SUnit *CurSU = AvailableQueue->pop();
733 SmallVector<unsigned, 4> LRegs;
734 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
737 LRegsMap.insert(std::make_pair(CurSU, LRegs));
739 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
740 NotReady.push_back(CurSU);
741 CurSU = AvailableQueue->pop();
744 // All candidates are delayed due to live physical reg dependencies.
745 // Try backtracking, code duplication, or inserting cross class copies
747 if (Delayed && !CurSU) {
748 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
749 SUnit *TrySU = NotReady[i];
750 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
752 // Try unscheduling up to the point where it's safe to schedule
754 unsigned LiveCycle = CurCycle;
755 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
756 unsigned Reg = LRegs[j];
757 unsigned LCycle = LiveRegCycles[Reg];
758 LiveCycle = std::min(LiveCycle, LCycle);
760 SUnit *OldSU = Sequence[LiveCycle];
761 if (!WillCreateCycle(TrySU, OldSU)) {
762 BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
763 // Force the current node to be scheduled before the node that
764 // requires the physical reg dep.
765 if (OldSU->isAvailable) {
766 OldSU->isAvailable = false;
767 AvailableQueue->remove(OldSU);
769 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
770 /*Reg=*/0, /*isNormalMemory=*/false,
771 /*isMustAlias=*/false, /*isArtificial=*/true));
772 // If one or more successors has been unscheduled, then the current
773 // node is no longer avaialable. Schedule a successor that's now
774 // available instead.
775 if (!TrySU->isAvailable)
776 CurSU = AvailableQueue->pop();
779 TrySU->isPending = false;
780 NotReady.erase(NotReady.begin()+i);
787 // Can't backtrack. If it's too expensive to copy the value, then try
788 // duplicate the nodes that produces these "too expensive to copy"
789 // values to break the dependency. In case even that doesn't work,
790 // insert cross class copies.
791 // If it's not too expensive, i.e. cost != -1, issue copies.
792 SUnit *TrySU = NotReady[0];
793 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
794 assert(LRegs.size() == 1 && "Can't handle this yet!");
795 unsigned Reg = LRegs[0];
796 SUnit *LRDef = LiveRegDefs[Reg];
797 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
798 const TargetRegisterClass *RC =
799 TRI->getPhysicalRegisterRegClass(Reg, VT);
800 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
802 // If cross copy register class is null, then it must be possible copy
803 // the value directly. Do not try duplicate the def.
806 NewDef = CopyAndMoveSuccessors(LRDef);
810 // Issue copies, these can be expensive cross register class copies.
811 SmallVector<SUnit*, 2> Copies;
812 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
813 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
814 << " to SU #" << Copies.front()->NodeNum << "\n");
815 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
816 /*Reg=*/0, /*isNormalMemory=*/false,
817 /*isMustAlias=*/false,
818 /*isArtificial=*/true));
819 NewDef = Copies.back();
822 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
823 << " to SU #" << TrySU->NodeNum << "\n");
824 LiveRegDefs[Reg] = NewDef;
825 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
826 /*Reg=*/0, /*isNormalMemory=*/false,
827 /*isMustAlias=*/false,
828 /*isArtificial=*/true));
829 TrySU->isAvailable = false;
833 assert(CurSU && "Unable to resolve live physical register dependencies!");
836 // Add the nodes that aren't ready back onto the available list.
837 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
838 NotReady[i]->isPending = false;
839 // May no longer be available due to backtracking.
840 if (NotReady[i]->isAvailable)
841 AvailableQueue->push(NotReady[i]);
846 ScheduleNodeBottomUp(CurSU, CurCycle);
848 AvailableQueue->setCurCycle(CurCycle);
851 // Reverse the order if it is bottom up.
852 std::reverse(Sequence.begin(), Sequence.end());
855 VerifySchedule(isBottomUp);
859 //===----------------------------------------------------------------------===//
860 // Top-Down Scheduling
861 //===----------------------------------------------------------------------===//
863 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
864 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
865 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
866 SUnit *SuccSU = SuccEdge->getSUnit();
869 if (SuccSU->NumPredsLeft == 0) {
870 dbgs() << "*** Scheduling failed! ***\n";
872 dbgs() << " has been released too many times!\n";
876 --SuccSU->NumPredsLeft;
878 // If all the node's predecessors are scheduled, this node is ready
879 // to be scheduled. Ignore the special ExitSU node.
880 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
881 SuccSU->isAvailable = true;
882 AvailableQueue->push(SuccSU);
886 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
887 // Top down: release successors
888 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
890 assert(!I->isAssignedRegDep() &&
891 "The list-tdrr scheduler doesn't yet support physreg dependencies!");
893 ReleaseSucc(SU, &*I);
897 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
898 /// count of its successors. If a successor pending count is zero, add it to
899 /// the Available queue.
900 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
901 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
902 DEBUG(SU->dump(this));
904 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
905 SU->setDepthToAtLeast(CurCycle);
906 Sequence.push_back(SU);
908 ReleaseSuccessors(SU);
909 SU->isScheduled = true;
910 AvailableQueue->ScheduledNode(SU);
913 /// ListScheduleTopDown - The main loop of list scheduling for top-down
915 void ScheduleDAGRRList::ListScheduleTopDown() {
916 unsigned CurCycle = 0;
917 AvailableQueue->setCurCycle(CurCycle);
919 // Release any successors of the special Entry node.
920 ReleaseSuccessors(&EntrySU);
922 // All leaves to Available queue.
923 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
924 // It is available if it has no predecessors.
925 if (SUnits[i].Preds.empty()) {
926 AvailableQueue->push(&SUnits[i]);
927 SUnits[i].isAvailable = true;
931 // While Available queue is not empty, grab the node with the highest
932 // priority. If it is not ready put it back. Schedule the node.
933 Sequence.reserve(SUnits.size());
934 while (!AvailableQueue->empty()) {
935 SUnit *CurSU = AvailableQueue->pop();
938 ScheduleNodeTopDown(CurSU, CurCycle);
940 AvailableQueue->setCurCycle(CurCycle);
944 VerifySchedule(isBottomUp);
949 //===----------------------------------------------------------------------===//
950 // RegReductionPriorityQueue Implementation
951 //===----------------------------------------------------------------------===//
953 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
954 // to reduce register pressure.
958 class RegReductionPriorityQueue;
960 /// Sorting functions for the Available queue.
961 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
962 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
963 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
964 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
966 bool operator()(const SUnit* left, const SUnit* right) const;
969 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
970 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
971 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
972 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
974 bool operator()(const SUnit* left, const SUnit* right) const;
977 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
978 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
979 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
981 src_ls_rr_sort(const src_ls_rr_sort &RHS)
984 bool operator()(const SUnit* left, const SUnit* right) const;
987 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
988 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
989 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
991 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
994 bool operator()(const SUnit* left, const SUnit* right) const;
996 } // end anonymous namespace
998 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
999 /// Smaller number is the higher priority.
1001 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1002 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1003 if (SethiUllmanNumber != 0)
1004 return SethiUllmanNumber;
1007 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1009 if (I->isCtrl()) continue; // ignore chain preds
1010 SUnit *PredSU = I->getSUnit();
1011 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1012 if (PredSethiUllman > SethiUllmanNumber) {
1013 SethiUllmanNumber = PredSethiUllman;
1015 } else if (PredSethiUllman == SethiUllmanNumber)
1019 SethiUllmanNumber += Extra;
1021 if (SethiUllmanNumber == 0)
1022 SethiUllmanNumber = 1;
1024 return SethiUllmanNumber;
1029 class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1030 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
1031 unsigned CurQueueId;
1034 // SUnits - The SUnits for the current graph.
1035 std::vector<SUnit> *SUnits;
1037 const TargetInstrInfo *TII;
1038 const TargetRegisterInfo *TRI;
1039 ScheduleDAGRRList *scheduleDAG;
1041 // SethiUllmanNumbers - The SethiUllman number for each node.
1042 std::vector<unsigned> SethiUllmanNumbers;
1045 RegReductionPriorityQueue(const TargetInstrInfo *tii,
1046 const TargetRegisterInfo *tri)
1047 : Queue(SF(this)), CurQueueId(0),
1048 TII(tii), TRI(tri), scheduleDAG(NULL) {}
1050 void initNodes(std::vector<SUnit> &sunits) {
1052 // Add pseudo dependency edges for two-address nodes.
1053 AddPseudoTwoAddrDeps();
1054 // Reroute edges to nodes with multiple uses.
1055 PrescheduleNodesWithMultipleUses();
1056 // Calculate node priorities.
1057 CalculateSethiUllmanNumbers();
1060 void addNode(const SUnit *SU) {
1061 unsigned SUSize = SethiUllmanNumbers.size();
1062 if (SUnits->size() > SUSize)
1063 SethiUllmanNumbers.resize(SUSize*2, 0);
1064 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1067 void updateNode(const SUnit *SU) {
1068 SethiUllmanNumbers[SU->NodeNum] = 0;
1069 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1072 void releaseState() {
1074 SethiUllmanNumbers.clear();
1077 unsigned getNodePriority(const SUnit *SU) const {
1078 assert(SU->NodeNum < SethiUllmanNumbers.size());
1079 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1080 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1081 // CopyToReg should be close to its uses to facilitate coalescing and
1084 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1085 Opc == TargetOpcode::SUBREG_TO_REG ||
1086 Opc == TargetOpcode::INSERT_SUBREG)
1087 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1088 // close to their uses to facilitate coalescing.
1090 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1091 // If SU does not have a register use, i.e. it doesn't produce a value
1092 // that would be consumed (e.g. store), then it terminates a chain of
1093 // computation. Give it a large SethiUllman number so it will be
1094 // scheduled right before its predecessors that it doesn't lengthen
1095 // their live ranges.
1097 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1098 // If SU does not have a register def, schedule it close to its uses
1099 // because it does not lengthen any live ranges.
1101 return SethiUllmanNumbers[SU->NodeNum];
1104 unsigned getNodeOrdering(const SUnit *SU) const {
1105 return scheduleDAG->DAG->GetOrdering(SU->getNode());
1108 unsigned size() const { return Queue.size(); }
1110 bool empty() const { return Queue.empty(); }
1112 void push(SUnit *U) {
1113 assert(!U->NodeQueueId && "Node in the queue already");
1114 U->NodeQueueId = ++CurQueueId;
1118 void push_all(const std::vector<SUnit *> &Nodes) {
1119 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1124 if (empty()) return NULL;
1125 SUnit *V = Queue.top();
1131 void remove(SUnit *SU) {
1132 assert(!Queue.empty() && "Queue is empty!");
1133 assert(SU->NodeQueueId != 0 && "Not in queue!");
1134 Queue.erase_one(SU);
1135 SU->NodeQueueId = 0;
1138 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1139 scheduleDAG = scheduleDag;
1143 bool canClobber(const SUnit *SU, const SUnit *Op);
1144 void AddPseudoTwoAddrDeps();
1145 void PrescheduleNodesWithMultipleUses();
1146 void CalculateSethiUllmanNumbers();
1149 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1150 BURegReductionPriorityQueue;
1152 typedef RegReductionPriorityQueue<td_ls_rr_sort>
1153 TDRegReductionPriorityQueue;
1155 typedef RegReductionPriorityQueue<src_ls_rr_sort>
1156 SrcRegReductionPriorityQueue;
1158 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1159 HybridBURRPriorityQueue;
1162 /// closestSucc - Returns the scheduled cycle of the successor which is
1163 /// closest to the current cycle.
1164 static unsigned closestSucc(const SUnit *SU) {
1165 unsigned MaxHeight = 0;
1166 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1168 if (I->isCtrl()) continue; // ignore chain succs
1169 unsigned Height = I->getSUnit()->getHeight();
1170 // If there are bunch of CopyToRegs stacked up, they should be considered
1171 // to be at the same position.
1172 if (I->getSUnit()->getNode() &&
1173 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1174 Height = closestSucc(I->getSUnit())+1;
1175 if (Height > MaxHeight)
1181 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
1182 /// for scratch registers, i.e. number of data dependencies.
1183 static unsigned calcMaxScratches(const SUnit *SU) {
1184 unsigned Scratches = 0;
1185 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1187 if (I->isCtrl()) continue; // ignore chain preds
1193 template <typename RRSort>
1194 static bool BURRSort(const SUnit *left, const SUnit *right,
1195 const RegReductionPriorityQueue<RRSort> *SPQ) {
1196 unsigned LPriority = SPQ->getNodePriority(left);
1197 unsigned RPriority = SPQ->getNodePriority(right);
1198 if (LPriority != RPriority)
1199 return LPriority > RPriority;
1201 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1206 // and the following instructions are both ready.
1210 // Then schedule t2 = op first.
1217 // This creates more short live intervals.
1218 unsigned LDist = closestSucc(left);
1219 unsigned RDist = closestSucc(right);
1221 return LDist < RDist;
1223 // How many registers becomes live when the node is scheduled.
1224 unsigned LScratch = calcMaxScratches(left);
1225 unsigned RScratch = calcMaxScratches(right);
1226 if (LScratch != RScratch)
1227 return LScratch > RScratch;
1229 if (left->getHeight() != right->getHeight())
1230 return left->getHeight() > right->getHeight();
1232 if (left->getDepth() != right->getDepth())
1233 return left->getDepth() < right->getDepth();
1235 assert(left->NodeQueueId && right->NodeQueueId &&
1236 "NodeQueueId cannot be zero");
1237 return (left->NodeQueueId > right->NodeQueueId);
1241 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1242 return BURRSort(left, right, SPQ);
1245 // Source order, otherwise bottom up.
1246 bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1247 unsigned LOrder = SPQ->getNodeOrdering(left);
1248 unsigned ROrder = SPQ->getNodeOrdering(right);
1250 // Prefer an ordering where the lower the non-zero order number, the higher
1252 if ((LOrder || ROrder) && LOrder != ROrder)
1253 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1255 return BURRSort(left, right, SPQ);
1258 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1259 bool LStall = SPQ->getCurCycle() < left->getHeight();
1260 bool RStall = SPQ->getCurCycle() < right->getHeight();
1261 // If scheduling one of the node will cause a pipeline stall, delay it.
1262 // If scheduling either one of the node will cause a pipeline stall, sort them
1263 // according to their height.
1264 // If neither will cause a pipeline stall, try to reduce register pressure.
1268 if (left->getHeight() != right->getHeight())
1269 return left->getHeight() > right->getHeight();
1272 return BURRSort(left, right, SPQ);
1277 RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1278 if (SU->isTwoAddress) {
1279 unsigned Opc = SU->getNode()->getMachineOpcode();
1280 const TargetInstrDesc &TID = TII->get(Opc);
1281 unsigned NumRes = TID.getNumDefs();
1282 unsigned NumOps = TID.getNumOperands() - NumRes;
1283 for (unsigned i = 0; i != NumOps; ++i) {
1284 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1285 SDNode *DU = SU->getNode()->getOperand(i).getNode();
1286 if (DU->getNodeId() != -1 &&
1287 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1295 /// hasCopyToRegUse - Return true if SU has a value successor that is a
1297 static bool hasCopyToRegUse(const SUnit *SU) {
1298 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1300 if (I->isCtrl()) continue;
1301 const SUnit *SuccSU = I->getSUnit();
1302 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1308 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1309 /// physical register defs.
1310 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1311 const TargetInstrInfo *TII,
1312 const TargetRegisterInfo *TRI) {
1313 SDNode *N = SuccSU->getNode();
1314 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1315 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1316 assert(ImpDefs && "Caller should check hasPhysRegDefs");
1317 for (const SDNode *SUNode = SU->getNode(); SUNode;
1318 SUNode = SUNode->getFlaggedNode()) {
1319 if (!SUNode->isMachineOpcode())
1321 const unsigned *SUImpDefs =
1322 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1325 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1326 EVT VT = N->getValueType(i);
1327 if (VT == MVT::Flag || VT == MVT::Other)
1329 if (!N->hasAnyUseOfValue(i))
1331 unsigned Reg = ImpDefs[i - NumDefs];
1332 for (;*SUImpDefs; ++SUImpDefs) {
1333 unsigned SUReg = *SUImpDefs;
1334 if (TRI->regsOverlap(Reg, SUReg))
1342 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1343 /// are not handled well by the general register pressure reduction
1344 /// heuristics. When presented with code like this:
1353 /// the heuristics tend to push the store up, but since the
1354 /// operand of the store has another use (U), this would increase
1355 /// the length of that other use (the U->N edge).
1357 /// This function transforms code like the above to route U's
1358 /// dependence through the store when possible, like this:
1369 /// This results in the store being scheduled immediately
1370 /// after N, which shortens the U->N live range, reducing
1371 /// register pressure.
1374 void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1375 // Visit all the nodes in topological order, working top-down.
1376 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1377 SUnit *SU = &(*SUnits)[i];
1378 // For now, only look at nodes with no data successors, such as stores.
1379 // These are especially important, due to the heuristics in
1380 // getNodePriority for nodes with no data successors.
1381 if (SU->NumSuccs != 0)
1383 // For now, only look at nodes with exactly one data predecessor.
1384 if (SU->NumPreds != 1)
1386 // Avoid prescheduling copies to virtual registers, which don't behave
1387 // like other nodes from the perspective of scheduling heuristics.
1388 if (SDNode *N = SU->getNode())
1389 if (N->getOpcode() == ISD::CopyToReg &&
1390 TargetRegisterInfo::isVirtualRegister
1391 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1394 // Locate the single data predecessor.
1396 for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1397 EE = SU->Preds.end(); II != EE; ++II)
1398 if (!II->isCtrl()) {
1399 PredSU = II->getSUnit();
1404 // Don't rewrite edges that carry physregs, because that requires additional
1405 // support infrastructure.
1406 if (PredSU->hasPhysRegDefs)
1408 // Short-circuit the case where SU is PredSU's only data successor.
1409 if (PredSU->NumSuccs == 1)
1411 // Avoid prescheduling to copies from virtual registers, which don't behave
1412 // like other nodes from the perspective of scheduling // heuristics.
1413 if (SDNode *N = SU->getNode())
1414 if (N->getOpcode() == ISD::CopyFromReg &&
1415 TargetRegisterInfo::isVirtualRegister
1416 (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1419 // Perform checks on the successors of PredSU.
1420 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1421 EE = PredSU->Succs.end(); II != EE; ++II) {
1422 SUnit *PredSuccSU = II->getSUnit();
1423 if (PredSuccSU == SU) continue;
1424 // If PredSU has another successor with no data successors, for
1425 // now don't attempt to choose either over the other.
1426 if (PredSuccSU->NumSuccs == 0)
1427 goto outer_loop_continue;
1428 // Don't break physical register dependencies.
1429 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1430 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1431 goto outer_loop_continue;
1432 // Don't introduce graph cycles.
1433 if (scheduleDAG->IsReachable(SU, PredSuccSU))
1434 goto outer_loop_continue;
1437 // Ok, the transformation is safe and the heuristics suggest it is
1438 // profitable. Update the graph.
1439 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
1440 << " next to PredSU #" << PredSU->NodeNum
1441 << " to guide scheduling in the presence of multiple uses\n");
1442 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1443 SDep Edge = PredSU->Succs[i];
1444 assert(!Edge.isAssignedRegDep());
1445 SUnit *SuccSU = Edge.getSUnit();
1447 Edge.setSUnit(PredSU);
1448 scheduleDAG->RemovePred(SuccSU, Edge);
1449 scheduleDAG->AddPred(SU, Edge);
1451 scheduleDAG->AddPred(SuccSU, Edge);
1455 outer_loop_continue:;
1459 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1460 /// it as a def&use operand. Add a pseudo control edge from it to the other
1461 /// node (if it won't create a cycle) so the two-address one will be scheduled
1462 /// first (lower in the schedule). If both nodes are two-address, favor the
1463 /// one that has a CopyToReg use (more likely to be a loop induction update).
1464 /// If both are two-address, but one is commutable while the other is not
1465 /// commutable, favor the one that's not commutable.
1467 void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1468 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1469 SUnit *SU = &(*SUnits)[i];
1470 if (!SU->isTwoAddress)
1473 SDNode *Node = SU->getNode();
1474 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1477 unsigned Opc = Node->getMachineOpcode();
1478 const TargetInstrDesc &TID = TII->get(Opc);
1479 unsigned NumRes = TID.getNumDefs();
1480 unsigned NumOps = TID.getNumOperands() - NumRes;
1481 for (unsigned j = 0; j != NumOps; ++j) {
1482 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1484 SDNode *DU = SU->getNode()->getOperand(j).getNode();
1485 if (DU->getNodeId() == -1)
1487 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1488 if (!DUSU) continue;
1489 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1490 E = DUSU->Succs.end(); I != E; ++I) {
1491 if (I->isCtrl()) continue;
1492 SUnit *SuccSU = I->getSUnit();
1495 // Be conservative. Ignore if nodes aren't at roughly the same
1496 // depth and height.
1497 if (SuccSU->getHeight() < SU->getHeight() &&
1498 (SU->getHeight() - SuccSU->getHeight()) > 1)
1500 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1501 // constrains whatever is using the copy, instead of the copy
1502 // itself. In the case that the copy is coalesced, this
1503 // preserves the intent of the pseudo two-address heurietics.
1504 while (SuccSU->Succs.size() == 1 &&
1505 SuccSU->getNode()->isMachineOpcode() &&
1506 SuccSU->getNode()->getMachineOpcode() ==
1507 TargetOpcode::COPY_TO_REGCLASS)
1508 SuccSU = SuccSU->Succs.front().getSUnit();
1509 // Don't constrain non-instruction nodes.
1510 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1512 // Don't constrain nodes with physical register defs if the
1513 // predecessor can clobber them.
1514 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1515 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1518 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1519 // these may be coalesced away. We want them close to their uses.
1520 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1521 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1522 SuccOpc == TargetOpcode::INSERT_SUBREG ||
1523 SuccOpc == TargetOpcode::SUBREG_TO_REG)
1525 if ((!canClobber(SuccSU, DUSU) ||
1526 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1527 (!SU->isCommutable && SuccSU->isCommutable)) &&
1528 !scheduleDAG->IsReachable(SuccSU, SU)) {
1529 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
1530 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1531 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1532 /*Reg=*/0, /*isNormalMemory=*/false,
1533 /*isMustAlias=*/false,
1534 /*isArtificial=*/true));
1541 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1542 /// scheduling units.
1544 void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1545 SethiUllmanNumbers.assign(SUnits->size(), 0);
1547 for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1548 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1551 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1552 /// predecessors of the successors of the SUnit SU. Stop when the provided
1553 /// limit is exceeded.
1554 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1557 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1559 const SUnit *SuccSU = I->getSUnit();
1560 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1561 EE = SuccSU->Preds.end(); II != EE; ++II) {
1562 SUnit *PredSU = II->getSUnit();
1563 if (!PredSU->isScheduled)
1573 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1574 unsigned LPriority = SPQ->getNodePriority(left);
1575 unsigned RPriority = SPQ->getNodePriority(right);
1576 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1577 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1578 bool LIsFloater = LIsTarget && left->NumPreds == 0;
1579 bool RIsFloater = RIsTarget && right->NumPreds == 0;
1580 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1581 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1583 if (left->NumSuccs == 0 && right->NumSuccs != 0)
1585 else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1592 if (left->NumSuccs == 1)
1594 if (right->NumSuccs == 1)
1597 if (LPriority+LBonus != RPriority+RBonus)
1598 return LPriority+LBonus < RPriority+RBonus;
1600 if (left->getDepth() != right->getDepth())
1601 return left->getDepth() < right->getDepth();
1603 if (left->NumSuccsLeft != right->NumSuccsLeft)
1604 return left->NumSuccsLeft > right->NumSuccsLeft;
1606 assert(left->NodeQueueId && right->NodeQueueId &&
1607 "NodeQueueId cannot be zero");
1608 return (left->NodeQueueId > right->NodeQueueId);
1611 //===----------------------------------------------------------------------===//
1612 // Public Constructor Functions
1613 //===----------------------------------------------------------------------===//
1615 llvm::ScheduleDAGSDNodes *
1616 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1617 const TargetMachine &TM = IS->TM;
1618 const TargetInstrInfo *TII = TM.getInstrInfo();
1619 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1621 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1623 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1624 PQ->setScheduleDAG(SD);
1628 llvm::ScheduleDAGSDNodes *
1629 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1630 const TargetMachine &TM = IS->TM;
1631 const TargetInstrInfo *TII = TM.getInstrInfo();
1632 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1634 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1636 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
1637 PQ->setScheduleDAG(SD);
1641 llvm::ScheduleDAGSDNodes *
1642 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1643 const TargetMachine &TM = IS->TM;
1644 const TargetInstrInfo *TII = TM.getInstrInfo();
1645 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1647 SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
1649 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1650 PQ->setScheduleDAG(SD);
1654 llvm::ScheduleDAGSDNodes *
1655 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1656 const TargetMachine &TM = IS->TM;
1657 const TargetInstrInfo *TII = TM.getInstrInfo();
1658 const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1660 HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
1662 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
1663 PQ->setScheduleDAG(SD);