1 //===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This implements a fast scheduler.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "pre-RA-sched"
15 #include "llvm/CodeGen/ScheduleDAGSDNodes.h"
16 #include "llvm/CodeGen/SchedulerRegistry.h"
17 #include "llvm/Target/TargetRegisterInfo.h"
18 #include "llvm/Target/TargetData.h"
19 #include "llvm/Target/TargetMachine.h"
20 #include "llvm/Target/TargetInstrInfo.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/Compiler.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/Support/CommandLine.h"
29 STATISTIC(NumUnfolds, "Number of nodes unfolded");
30 STATISTIC(NumDups, "Number of duplicated nodes");
31 STATISTIC(NumPRCopies, "Number of physical copies");
33 static RegisterScheduler
34 fastDAGScheduler("fast", "Fast suboptimal list scheduling",
35 createFastDAGScheduler);
38 /// FastPriorityQueue - A degenerate priority queue that considers
39 /// all nodes to have the same priority.
41 struct VISIBILITY_HIDDEN FastPriorityQueue {
42 SmallVector<SUnit *, 16> Queue;
44 bool empty() const { return Queue.empty(); }
51 if (empty()) return NULL;
52 SUnit *V = Queue.back();
58 //===----------------------------------------------------------------------===//
59 /// ScheduleDAGFast - The actual "fast" list scheduler implementation.
61 class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes {
63 /// AvailableQueue - The priority queue to use for the available SUnits.
64 FastPriorityQueue AvailableQueue;
66 /// LiveRegDefs - A set of physical registers and their definition
67 /// that are "live". These nodes must be scheduled before any other nodes that
68 /// modifies the registers can be scheduled.
70 std::vector<SUnit*> LiveRegDefs;
71 std::vector<unsigned> LiveRegCycles;
74 ScheduleDAGFast(SelectionDAG *dag, MachineBasicBlock *bb,
75 const TargetMachine &tm)
76 : ScheduleDAGSDNodes(dag, bb, tm) {}
80 /// AddPred - adds a predecessor edge to SUnit SU.
81 /// This returns true if this is a new predecessor.
82 void AddPred(SUnit *SU, const SDep &D) {
86 /// RemovePred - removes a predecessor edge from SUnit SU.
87 /// This returns true if an edge was removed.
88 void RemovePred(SUnit *SU, const SDep &D) {
93 void ReleasePred(SUnit *SU, SDep *PredEdge);
94 void ScheduleNodeBottomUp(SUnit*, unsigned);
95 SUnit *CopyAndMoveSuccessors(SUnit*);
96 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
97 const TargetRegisterClass*,
98 const TargetRegisterClass*,
99 SmallVector<SUnit*, 2>&);
100 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
101 void ListScheduleBottomUp();
103 /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
104 bool ForceUnitLatencies() const { return true; }
106 } // end anonymous namespace
109 /// Schedule - Schedule the DAG using list scheduling.
110 void ScheduleDAGFast::Schedule() {
111 DOUT << "********** List Scheduling **********\n";
114 LiveRegDefs.resize(TRI->getNumRegs(), NULL);
115 LiveRegCycles.resize(TRI->getNumRegs(), 0);
117 // Build the scheduling graph.
120 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
121 SUnits[su].dumpAll(this));
123 // Execute the actual scheduling loop.
124 ListScheduleBottomUp();
127 //===----------------------------------------------------------------------===//
128 // Bottom-Up Scheduling
129 //===----------------------------------------------------------------------===//
131 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
132 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
133 void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
134 SUnit *PredSU = PredEdge->getSUnit();
135 --PredSU->NumSuccsLeft;
138 if (PredSU->NumSuccsLeft < 0) {
139 cerr << "*** Scheduling failed! ***\n";
141 cerr << " has been released too many times!\n";
146 if (PredSU->NumSuccsLeft == 0) {
147 PredSU->isAvailable = true;
148 AvailableQueue.push(PredSU);
152 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
153 /// count of its predecessors. If a predecessor pending count is zero, add it to
154 /// the Available queue.
155 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
156 DOUT << "*** Scheduling [" << CurCycle << "]: ";
157 DEBUG(SU->dump(this));
159 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
160 SU->setHeightToAtLeast(CurCycle);
161 Sequence.push_back(SU);
163 // Bottom up: release predecessors
164 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
166 ReleasePred(SU, &*I);
167 if (I->isAssignedRegDep()) {
168 // This is a physical register dependency and it's impossible or
169 // expensive to copy the register. Make sure nothing that can
170 // clobber the register is scheduled between the predecessor and
172 if (!LiveRegDefs[I->getReg()]) {
174 LiveRegDefs[I->getReg()] = I->getSUnit();
175 LiveRegCycles[I->getReg()] = CurCycle;
180 // Release all the implicit physical register defs that are live.
181 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
183 if (I->isAssignedRegDep()) {
184 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
185 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
186 assert(LiveRegDefs[I->getReg()] == SU &&
187 "Physical register dependency violated?");
189 LiveRegDefs[I->getReg()] = NULL;
190 LiveRegCycles[I->getReg()] = 0;
195 SU->isScheduled = true;
198 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
199 /// successors to the newly created node.
200 SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
201 if (SU->getNode()->getFlaggedNode())
204 SDNode *N = SU->getNode();
209 bool TryUnfold = false;
210 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
211 MVT VT = N->getValueType(i);
214 else if (VT == MVT::Other)
217 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
218 const SDValue &Op = N->getOperand(i);
219 MVT VT = Op.getNode()->getValueType(Op.getResNo());
225 SmallVector<SDNode*, 2> NewNodes;
226 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
229 DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
230 assert(NewNodes.size() == 2 && "Expected a load folding node!");
233 SDNode *LoadNode = NewNodes[0];
234 unsigned NumVals = N->getNumValues();
235 unsigned OldNumVals = SU->getNode()->getNumValues();
236 for (unsigned i = 0; i != NumVals; ++i)
237 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
238 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
239 SDValue(LoadNode, 1));
241 SUnit *NewSU = NewSUnit(N);
242 assert(N->getNodeId() == -1 && "Node already inserted!");
243 N->setNodeId(NewSU->NodeNum);
245 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
246 for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
247 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
248 NewSU->isTwoAddress = true;
252 if (TID.isCommutable())
253 NewSU->isCommutable = true;
255 // LoadNode may already exist. This can happen when there is another
256 // load from the same location and producing the same type of value
257 // but it has different alignment or volatileness.
258 bool isNewLoad = true;
260 if (LoadNode->getNodeId() != -1) {
261 LoadSU = &SUnits[LoadNode->getNodeId()];
264 LoadSU = NewSUnit(LoadNode);
265 LoadNode->setNodeId(LoadSU->NodeNum);
269 SmallVector<SDep, 4> ChainSuccs;
270 SmallVector<SDep, 4> LoadPreds;
271 SmallVector<SDep, 4> NodePreds;
272 SmallVector<SDep, 4> NodeSuccs;
273 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
277 else if (I->getSUnit()->getNode() &&
278 I->getSUnit()->getNode()->isOperandOf(LoadNode))
279 LoadPreds.push_back(*I);
281 NodePreds.push_back(*I);
283 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
286 ChainSuccs.push_back(*I);
288 NodeSuccs.push_back(*I);
291 if (ChainPred.getSUnit()) {
292 RemovePred(SU, ChainPred);
294 AddPred(LoadSU, ChainPred);
296 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
297 const SDep &Pred = LoadPreds[i];
298 RemovePred(SU, Pred);
300 AddPred(LoadSU, Pred);
303 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
304 const SDep &Pred = NodePreds[i];
305 RemovePred(SU, Pred);
306 AddPred(NewSU, Pred);
308 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
309 SDep D = NodeSuccs[i];
310 SUnit *SuccDep = D.getSUnit();
312 RemovePred(SuccDep, D);
316 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
317 SDep D = ChainSuccs[i];
318 SUnit *SuccDep = D.getSUnit();
320 RemovePred(SuccDep, D);
327 AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
332 if (NewSU->NumSuccsLeft == 0) {
333 NewSU->isAvailable = true;
339 DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
342 // New SUnit has the exact same predecessors.
343 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
345 if (!I->isArtificial())
348 // Only copy scheduled successors. Cut them from old node's successor
349 // list and move them over.
350 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
351 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
353 if (I->isArtificial())
355 SUnit *SuccSU = I->getSUnit();
356 if (SuccSU->isScheduled) {
361 DelDeps.push_back(std::make_pair(SuccSU, D));
364 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
365 RemovePred(DelDeps[i].first, DelDeps[i].second);
371 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
372 /// scheduled successors of the given SUnit to the last copy.
373 void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
374 const TargetRegisterClass *DestRC,
375 const TargetRegisterClass *SrcRC,
376 SmallVector<SUnit*, 2> &Copies) {
377 SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
378 CopyFromSU->CopySrcRC = SrcRC;
379 CopyFromSU->CopyDstRC = DestRC;
381 SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
382 CopyToSU->CopySrcRC = DestRC;
383 CopyToSU->CopyDstRC = SrcRC;
385 // Only copy scheduled successors. Cut them from old node's successor
386 // list and move them over.
387 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
388 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
390 if (I->isArtificial())
392 SUnit *SuccSU = I->getSUnit();
393 if (SuccSU->isScheduled) {
395 D.setSUnit(CopyToSU);
397 DelDeps.push_back(std::make_pair(SuccSU, *I));
400 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
401 RemovePred(DelDeps[i].first, DelDeps[i].second);
404 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
405 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
407 Copies.push_back(CopyFromSU);
408 Copies.push_back(CopyToSU);
413 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
414 /// definition of the specified node.
415 /// FIXME: Move to SelectionDAG?
416 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
417 const TargetInstrInfo *TII) {
418 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
419 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
420 unsigned NumRes = TID.getNumDefs();
421 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
426 return N->getValueType(NumRes);
429 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
430 /// scheduling of the given node to satisfy live physical register dependencies.
431 /// If the specific node is the last one that's available to schedule, do
432 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
433 bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
434 SmallVector<unsigned, 4> &LRegs){
435 if (NumLiveRegs == 0)
438 SmallSet<unsigned, 4> RegAdded;
439 // If this node would clobber any "live" register, then it's not ready.
440 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
442 if (I->isAssignedRegDep()) {
443 unsigned Reg = I->getReg();
444 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
445 if (RegAdded.insert(Reg))
446 LRegs.push_back(Reg);
448 for (const unsigned *Alias = TRI->getAliasSet(Reg);
450 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
451 if (RegAdded.insert(*Alias))
452 LRegs.push_back(*Alias);
457 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
458 if (!Node->isMachineOpcode())
460 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
461 if (!TID.ImplicitDefs)
463 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
464 if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
465 if (RegAdded.insert(*Reg))
466 LRegs.push_back(*Reg);
468 for (const unsigned *Alias = TRI->getAliasSet(*Reg);
470 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
471 if (RegAdded.insert(*Alias))
472 LRegs.push_back(*Alias);
476 return !LRegs.empty();
480 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
482 void ScheduleDAGFast::ListScheduleBottomUp() {
483 unsigned CurCycle = 0;
484 // Add root to Available queue.
485 if (!SUnits.empty()) {
486 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
487 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
488 RootSU->isAvailable = true;
489 AvailableQueue.push(RootSU);
492 // While Available queue is not empty, grab the node with the highest
493 // priority. If it is not ready put it back. Schedule the node.
494 SmallVector<SUnit*, 4> NotReady;
495 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
496 Sequence.reserve(SUnits.size());
497 while (!AvailableQueue.empty()) {
498 bool Delayed = false;
500 SUnit *CurSU = AvailableQueue.pop();
502 SmallVector<unsigned, 4> LRegs;
503 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
506 LRegsMap.insert(std::make_pair(CurSU, LRegs));
508 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
509 NotReady.push_back(CurSU);
510 CurSU = AvailableQueue.pop();
513 // All candidates are delayed due to live physical reg dependencies.
514 // Try code duplication or inserting cross class copies
516 if (Delayed && !CurSU) {
518 // Try duplicating the nodes that produces these
519 // "expensive to copy" values to break the dependency. In case even
520 // that doesn't work, insert cross class copies.
521 SUnit *TrySU = NotReady[0];
522 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
523 assert(LRegs.size() == 1 && "Can't handle this yet!");
524 unsigned Reg = LRegs[0];
525 SUnit *LRDef = LiveRegDefs[Reg];
526 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
527 const TargetRegisterClass *RC =
528 TRI->getPhysicalRegisterRegClass(Reg, VT);
529 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
531 // If cross copy register class is null, then it must be possible copy
532 // the value directly. Do not try duplicate the def.
535 NewDef = CopyAndMoveSuccessors(LRDef);
539 // Issue copies, these can be expensive cross register class copies.
540 SmallVector<SUnit*, 2> Copies;
541 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
542 DOUT << "Adding an edge from SU # " << TrySU->NodeNum
543 << " to SU #" << Copies.front()->NodeNum << "\n";
544 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
545 /*Reg=*/0, /*isNormalMemory=*/false,
546 /*isMustAlias=*/false, /*isArtificial=*/true));
547 NewDef = Copies.back();
550 DOUT << "Adding an edge from SU # " << NewDef->NodeNum
551 << " to SU #" << TrySU->NodeNum << "\n";
552 LiveRegDefs[Reg] = NewDef;
553 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
554 /*Reg=*/0, /*isNormalMemory=*/false,
555 /*isMustAlias=*/false, /*isArtificial=*/true));
556 TrySU->isAvailable = false;
561 assert(false && "Unable to resolve live physical register dependencies!");
566 // Add the nodes that aren't ready back onto the available list.
567 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
568 NotReady[i]->isPending = false;
569 // May no longer be available due to backtracking.
570 if (NotReady[i]->isAvailable)
571 AvailableQueue.push(NotReady[i]);
576 ScheduleNodeBottomUp(CurSU, CurCycle);
580 // Reverse the order if it is bottom up.
581 std::reverse(Sequence.begin(), Sequence.end());
585 // Verify that all SUnits were scheduled.
586 bool AnyNotSched = false;
587 unsigned DeadNodes = 0;
589 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
590 if (!SUnits[i].isScheduled) {
591 if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
596 cerr << "*** List scheduling failed! ***\n";
597 SUnits[i].dump(this);
598 cerr << "has not been scheduled!\n";
601 if (SUnits[i].NumSuccsLeft != 0) {
603 cerr << "*** List scheduling failed! ***\n";
604 SUnits[i].dump(this);
605 cerr << "has successors left!\n";
609 for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
612 assert(!AnyNotSched);
613 assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
614 "The number of nodes scheduled doesn't match the expected number!");
618 //===----------------------------------------------------------------------===//
619 // Public Constructor Functions
620 //===----------------------------------------------------------------------===//
622 llvm::ScheduleDAG* llvm::createFastDAGScheduler(SelectionDAGISel *IS,
624 const TargetMachine *TM,
625 MachineBasicBlock *BB, bool) {
626 return new ScheduleDAGFast(DAG, BB, *TM);