1 //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a fast scheduler.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "pre-RA-sched"
15 #include "llvm/CodeGen/ScheduleDAGSDNodes.h"
16 #include "llvm/CodeGen/SchedulerRegistry.h"
17 #include "llvm/Target/TargetRegisterInfo.h"
18 #include "llvm/Target/TargetData.h"
19 #include "llvm/Target/TargetMachine.h"
20 #include "llvm/Target/TargetInstrInfo.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/CommandLine.h"
29 STATISTIC(NumUnfolds, "Number of nodes unfolded");
30 STATISTIC(NumDups, "Number of duplicated nodes");
31 STATISTIC(NumCCCopies, "Number of cross class copies");
33 static RegisterScheduler
34 fastDAGScheduler("fast", "Fast suboptimal list scheduling",
35 createFastDAGScheduler);
38 /// FastPriorityQueue - A degenerate priority queue that considers
39 /// all nodes to have the same priority.
41 struct VISIBILITY_HIDDEN FastPriorityQueue {
42 SmallVector<SUnit *, 16> Queue;
44 bool empty() const { return Queue.empty(); }
51 if (empty()) return NULL;
52 SUnit *V = Queue.back();
58 //===----------------------------------------------------------------------===//
59 /// ScheduleDAGFast - The actual "fast" list scheduler implementation.
61 class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes {
63 /// AvailableQueue - The priority queue to use for the available SUnits.
64 FastPriorityQueue AvailableQueue;
66 /// LiveRegDefs - A set of physical registers and their definition
67 /// that are "live". These nodes must be scheduled before any other nodes that
68 /// modifies the registers can be scheduled.
70 std::vector<SUnit*> LiveRegDefs;
71 std::vector<unsigned> LiveRegCycles;
74 ScheduleDAGFast(SelectionDAG *dag, MachineBasicBlock *bb,
75 const TargetMachine &tm)
76 : ScheduleDAGSDNodes(dag, bb, tm) {}
80 /// AddPred - adds a predecessor edge to SUnit SU.
81 /// This returns true if this is a new predecessor.
82 bool AddPred(SUnit *SU, const SDep &D) {
83 return SU->addPred(D);
86 /// RemovePred - removes a predecessor edge from SUnit SU.
87 /// This returns true if an edge was removed.
88 bool RemovePred(SUnit *SU, const SDep &D) {
89 return SU->removePred(D);
93 void ReleasePred(SUnit *SU, SDep *PredEdge);
94 void ScheduleNodeBottomUp(SUnit*, unsigned);
95 SUnit *CopyAndMoveSuccessors(SUnit*);
96 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
97 const TargetRegisterClass*,
98 const TargetRegisterClass*,
99 SmallVector<SUnit*, 2>&);
100 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
101 void ListScheduleBottomUp();
103 } // end anonymous namespace
106 /// Schedule - Schedule the DAG using list scheduling.
107 void ScheduleDAGFast::Schedule() {
108 DOUT << "********** List Scheduling **********\n";
111 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
112 LiveRegCycles.resize(TRI->getNumRegs(), 0);
114 // Build scheduling units.
117 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
118 SUnits[su].dumpAll(this));
120 // Execute the actual scheduling loop.
121 ListScheduleBottomUp();
124 //===----------------------------------------------------------------------===//
125 // Bottom-Up Scheduling
126 //===----------------------------------------------------------------------===//
128 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
129 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
130 void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
131 SUnit *PredSU = PredEdge->getSUnit();
132 --PredSU->NumSuccsLeft;
135 if (PredSU->NumSuccsLeft < 0) {
136 cerr << "*** Scheduling failed! ***\n";
138 cerr << " has been released too many times!\n";
143 if (PredSU->NumSuccsLeft == 0) {
144 PredSU->isAvailable = true;
145 AvailableQueue.push(PredSU);
149 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
150 /// count of its predecessors. If a predecessor pending count is zero, add it to
151 /// the Available queue.
152 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
153 DOUT << "*** Scheduling [" << CurCycle << "]: ";
154 DEBUG(SU->dump(this));
156 SU->Cycle = CurCycle;
157 Sequence.push_back(SU);
159 // Bottom up: release predecessors
160 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
162 ReleasePred(SU, &*I);
163 if (I->isAssignedRegDep()) {
164 // This is a physical register dependency and it's impossible or
165 // expensive to copy the register. Make sure nothing that can
166 // clobber the register is scheduled between the predecessor and
168 if (!LiveRegDefs[I->getReg()]) {
170 LiveRegDefs[I->getReg()] = I->getSUnit();
171 LiveRegCycles[I->getReg()] = CurCycle;
176 // Release all the implicit physical register defs that are live.
177 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
179 if (I->isAssignedRegDep()) {
180 if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) {
181 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
182 assert(LiveRegDefs[I->getReg()] == SU &&
183 "Physical register dependency violated?");
185 LiveRegDefs[I->getReg()] = NULL;
186 LiveRegCycles[I->getReg()] = 0;
191 SU->isScheduled = true;
194 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
195 /// successors to the newly created node.
196 SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
197 if (SU->getNode()->getFlaggedNode())
200 SDNode *N = SU->getNode();
205 bool TryUnfold = false;
206 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
207 MVT VT = N->getValueType(i);
210 else if (VT == MVT::Other)
213 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
214 const SDValue &Op = N->getOperand(i);
215 MVT VT = Op.getNode()->getValueType(Op.getResNo());
221 SmallVector<SDNode*, 2> NewNodes;
222 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
225 DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
226 assert(NewNodes.size() == 2 && "Expected a load folding node!");
229 SDNode *LoadNode = NewNodes[0];
230 unsigned NumVals = N->getNumValues();
231 unsigned OldNumVals = SU->getNode()->getNumValues();
232 for (unsigned i = 0; i != NumVals; ++i)
233 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
234 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
235 SDValue(LoadNode, 1));
237 SUnit *NewSU = NewSUnit(N);
238 assert(N->getNodeId() == -1 && "Node already inserted!");
239 N->setNodeId(NewSU->NodeNum);
241 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
242 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
243 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
244 NewSU->isTwoAddress = true;
248 if (TID.isCommutable())
249 NewSU->isCommutable = true;
250 // FIXME: Calculate height / depth and propagate the changes?
251 NewSU->Depth = SU->Depth;
252 NewSU->Height = SU->Height;
254 // LoadNode may already exist. This can happen when there is another
255 // load from the same location and producing the same type of value
256 // but it has different alignment or volatileness.
257 bool isNewLoad = true;
259 if (LoadNode->getNodeId() != -1) {
260 LoadSU = &SUnits[LoadNode->getNodeId()];
263 LoadSU = NewSUnit(LoadNode);
264 LoadNode->setNodeId(LoadSU->NodeNum);
266 LoadSU->Depth = SU->Depth;
267 LoadSU->Height = SU->Height;
271 SmallVector<SDep, 4> ChainSuccs;
272 SmallVector<SDep, 4> LoadPreds;
273 SmallVector<SDep, 4> NodePreds;
274 SmallVector<SDep, 4> NodeSuccs;
275 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
279 else if (I->getSUnit()->getNode() &&
280 I->getSUnit()->getNode()->isOperandOf(LoadNode))
281 LoadPreds.push_back(*I);
283 NodePreds.push_back(*I);
285 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
288 ChainSuccs.push_back(*I);
290 NodeSuccs.push_back(*I);
293 if (ChainPred.getSUnit()) {
294 RemovePred(SU, ChainPred);
296 AddPred(LoadSU, ChainPred);
298 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
299 const SDep &Pred = LoadPreds[i];
300 RemovePred(SU, Pred);
302 AddPred(LoadSU, Pred);
305 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
306 const SDep &Pred = NodePreds[i];
307 RemovePred(SU, Pred);
308 AddPred(NewSU, Pred);
310 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
311 SDep D = NodeSuccs[i];
312 SUnit *SuccDep = D.getSUnit();
314 RemovePred(SuccDep, D);
318 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
319 SDep D = ChainSuccs[i];
320 SUnit *SuccDep = D.getSUnit();
322 RemovePred(SuccDep, D);
329 AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
334 if (NewSU->NumSuccsLeft == 0) {
335 NewSU->isAvailable = true;
341 DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
344 // New SUnit has the exact same predecessors.
345 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
347 if (!I->isArtificial()) {
349 NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
352 // Only copy scheduled successors. Cut them from old node's successor
353 // list and move them over.
354 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
355 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
357 if (I->isArtificial())
359 SUnit *SuccSU = I->getSUnit();
360 if (SuccSU->isScheduled) {
361 NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
366 DelDeps.push_back(std::make_pair(SuccSU, D));
369 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
370 RemovePred(DelDeps[i].first, DelDeps[i].second);
377 /// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
378 /// and move all scheduled successors of the given SUnit to the last copy.
379 void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
380 const TargetRegisterClass *DestRC,
381 const TargetRegisterClass *SrcRC,
382 SmallVector<SUnit*, 2> &Copies) {
383 SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
384 CopyFromSU->CopySrcRC = SrcRC;
385 CopyFromSU->CopyDstRC = DestRC;
387 SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
388 CopyToSU->CopySrcRC = DestRC;
389 CopyToSU->CopyDstRC = SrcRC;
391 // Only copy scheduled successors. Cut them from old node's successor
392 // list and move them over.
393 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
394 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
396 if (I->isArtificial())
398 SUnit *SuccSU = I->getSUnit();
399 if (SuccSU->isScheduled) {
401 D.setSUnit(CopyToSU);
403 DelDeps.push_back(std::make_pair(SuccSU, *I));
406 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
407 RemovePred(DelDeps[i].first, DelDeps[i].second);
410 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
411 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
413 Copies.push_back(CopyFromSU);
414 Copies.push_back(CopyToSU);
419 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
420 /// definition of the specified node.
421 /// FIXME: Move to SelectionDAG?
422 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
423 const TargetInstrInfo *TII) {
424 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
425 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
426 unsigned NumRes = TID.getNumDefs();
427 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
432 return N->getValueType(NumRes);
435 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
436 /// scheduling of the given node to satisfy live physical register dependencies.
437 /// If the specific node is the last one that's available to schedule, do
438 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
439 bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
440 SmallVector<unsigned, 4> &LRegs){
441 if (NumLiveRegs == 0)
444 SmallSet<unsigned, 4> RegAdded;
445 // If this node would clobber any "live" register, then it's not ready.
446 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
448 if (I->isAssignedRegDep()) {
449 unsigned Reg = I->getReg();
450 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
451 if (RegAdded.insert(Reg))
452 LRegs.push_back(Reg);
454 for (const unsigned *Alias = TRI->getAliasSet(Reg);
456 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
457 if (RegAdded.insert(*Alias))
458 LRegs.push_back(*Alias);
463 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
464 if (!Node->isMachineOpcode())
466 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
467 if (!TID.ImplicitDefs)
469 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
470 if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
471 if (RegAdded.insert(*Reg))
472 LRegs.push_back(*Reg);
474 for (const unsigned *Alias = TRI->getAliasSet(*Reg);
476 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
477 if (RegAdded.insert(*Alias))
478 LRegs.push_back(*Alias);
482 return !LRegs.empty();
486 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
488 void ScheduleDAGFast::ListScheduleBottomUp() {
489 unsigned CurCycle = 0;
490 // Add root to Available queue.
491 if (!SUnits.empty()) {
492 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
493 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
494 RootSU->isAvailable = true;
495 AvailableQueue.push(RootSU);
498 // While Available queue is not empty, grab the node with the highest
499 // priority. If it is not ready put it back. Schedule the node.
500 SmallVector<SUnit*, 4> NotReady;
501 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
502 Sequence.reserve(SUnits.size());
503 while (!AvailableQueue.empty()) {
504 bool Delayed = false;
506 SUnit *CurSU = AvailableQueue.pop();
508 SmallVector<unsigned, 4> LRegs;
509 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
512 LRegsMap.insert(std::make_pair(CurSU, LRegs));
514 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
515 NotReady.push_back(CurSU);
516 CurSU = AvailableQueue.pop();
519 // All candidates are delayed due to live physical reg dependencies.
520 // Try code duplication or inserting cross class copies
522 if (Delayed && !CurSU) {
524 // Try duplicating the nodes that produces these
525 // "expensive to copy" values to break the dependency. In case even
526 // that doesn't work, insert cross class copies.
527 SUnit *TrySU = NotReady[0];
528 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
529 assert(LRegs.size() == 1 && "Can't handle this yet!");
530 unsigned Reg = LRegs[0];
531 SUnit *LRDef = LiveRegDefs[Reg];
532 SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
534 // Issue expensive cross register class copies.
535 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
536 const TargetRegisterClass *RC =
537 TRI->getPhysicalRegisterRegClass(Reg, VT);
538 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
540 assert(false && "Don't know how to copy this physical register!");
543 SmallVector<SUnit*, 2> Copies;
544 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
545 DOUT << "Adding an edge from SU # " << TrySU->NodeNum
546 << " to SU #" << Copies.front()->NodeNum << "\n";
547 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
548 /*Reg=*/0, /*isNormalMemory=*/false,
549 /*isMustAlias=*/false, /*isArtificial=*/true));
550 NewDef = Copies.back();
553 DOUT << "Adding an edge from SU # " << NewDef->NodeNum
554 << " to SU #" << TrySU->NodeNum << "\n";
555 LiveRegDefs[Reg] = NewDef;
556 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
557 /*Reg=*/0, /*isNormalMemory=*/false,
558 /*isMustAlias=*/false, /*isArtificial=*/true));
559 TrySU->isAvailable = false;
564 assert(false && "Unable to resolve live physical register dependencies!");
569 // Add the nodes that aren't ready back onto the available list.
570 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
571 NotReady[i]->isPending = false;
572 // May no longer be available due to backtracking.
573 if (NotReady[i]->isAvailable)
574 AvailableQueue.push(NotReady[i]);
579 ScheduleNodeBottomUp(CurSU, CurCycle);
583 // Reverse the order if it is bottom up.
584 std::reverse(Sequence.begin(), Sequence.end());
588 // Verify that all SUnits were scheduled.
589 bool AnyNotSched = false;
590 unsigned DeadNodes = 0;
592 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
593 if (!SUnits[i].isScheduled) {
594 if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
599 cerr << "*** List scheduling failed! ***\n";
600 SUnits[i].dump(this);
601 cerr << "has not been scheduled!\n";
604 if (SUnits[i].NumSuccsLeft != 0) {
606 cerr << "*** List scheduling failed! ***\n";
607 SUnits[i].dump(this);
608 cerr << "has successors left!\n";
612 for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
615 assert(!AnyNotSched);
616 assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
617 "The number of nodes scheduled doesn't match the expected number!");
621 //===----------------------------------------------------------------------===//
622 // Public Constructor Functions
623 //===----------------------------------------------------------------------===//
625 llvm::ScheduleDAG* llvm::createFastDAGScheduler(SelectionDAGISel *IS,
627 const TargetMachine *TM,
628 MachineBasicBlock *BB, bool) {
629 return new ScheduleDAGFast(DAG, BB, *TM);