lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp

   1 //===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Evan Cheng and is distributed under the
   6 // University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements a top-down list scheduler, using standard algorithms.
  11 // The basic approach uses a priority queue of available nodes to schedule.
  12 // One at a time, nodes are taken from the priority queue (thus in priority
  13 // order), checked for legality to schedule, and emitted if legal.
  14 //
  15 // Nodes may not be legal to schedule either due to structural hazards (e.g.
  16 // pipeline or resource constraints) or because an input to the instruction has
  17 // not completed execution.
  18 //
  19 //===----------------------------------------------------------------------===//
  20
  21 #define DEBUG_TYPE "sched"
  22 #include "llvm/CodeGen/ScheduleDAG.h"
  23 #include "llvm/CodeGen/SSARegMap.h"
  24 #include "llvm/Target/MRegisterInfo.h"
  25 #include "llvm/Target/TargetData.h"
  26 #include "llvm/Target/TargetMachine.h"
  27 #include "llvm/Target/TargetInstrInfo.h"
  28 #include "llvm/Support/Debug.h"
  29 #include "llvm/ADT/Statistic.h"
  30 #include <climits>
  31 #include <iostream>
  32 #include <queue>
  33 using namespace llvm;
  34
  35 namespace {
  36   Statistic<> NumNoops ("scheduler", "Number of noops inserted");
  37   Statistic<> NumStalls("scheduler", "Number of pipeline stalls");
  38 }
  39
  40 namespace {
  41 //===----------------------------------------------------------------------===//
  42 /// ScheduleDAGList - The actual list scheduler implementation.  This supports
  43 /// top-down scheduling.
  44 ///
  45 class ScheduleDAGList : public ScheduleDAG {
  46 private:
  47   /// AvailableQueue - The priority queue to use for the available SUnits.
  48   ///
  49   SchedulingPriorityQueue *AvailableQueue;
  50
  51   /// PendingQueue - This contains all of the instructions whose operands have
  52   /// been issued, but their results are not ready yet (due to the latency of
  53   /// the operation).  Once the operands becomes available, the instruction is
  54   /// added to the AvailableQueue.  This keeps track of each SUnit and the
  55   /// number of cycles left to execute before the operation is available.
  56   std::vector<std::pair<unsigned, SUnit*> > PendingQueue;
  57
  58   /// HazardRec - The hazard recognizer to use.
  59   HazardRecognizer *HazardRec;
  60
  61 public:
  62   ScheduleDAGList(SelectionDAG &dag, MachineBasicBlock *bb,
  63                   const TargetMachine &tm,
  64                   SchedulingPriorityQueue *availqueue,
  65                   HazardRecognizer *HR)
  66     : ScheduleDAG(dag, bb, tm),
  67       AvailableQueue(availqueue), HazardRec(HR) {
  68     }
  69
  70   ~ScheduleDAGList() {
  71     delete HazardRec;
  72     delete AvailableQueue;
  73   }
  74
  75   void Schedule();
  76
  77 private:
  78   void ReleaseSucc(SUnit *SuccSU, bool isChain);
  79   void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
  80   void ListScheduleTopDown();
  81 };
  82 }  // end anonymous namespace
  83
  84 HazardRecognizer::~HazardRecognizer() {}
  85
  86
  87 /// Schedule - Schedule the DAG using list scheduling.
  88 void ScheduleDAGList::Schedule() {
  89   DEBUG(std::cerr << "********** List Scheduling **********\n");
  90
  91   // Build scheduling units.
  92   BuildSchedUnits();
  93
  94   AvailableQueue->initNodes(SUnits);
  95
  96   ListScheduleTopDown();
  97
  98   AvailableQueue->releaseState();
  99
 100   DEBUG(std::cerr << "*** Final schedule ***\n");
 101   DEBUG(dumpSchedule());
 102   DEBUG(std::cerr << "\n");
 103
 104   // Emit in scheduled order
 105   EmitSchedule();
 106 }
 107
 108 //===----------------------------------------------------------------------===//
 109 //  Top-Down Scheduling
 110 //===----------------------------------------------------------------------===//
 111
 112 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
 113 /// the PendingQueue if the count reaches zero.
 114 void ScheduleDAGList::ReleaseSucc(SUnit *SuccSU, bool isChain) {
 115   if (!isChain)
 116     SuccSU->NumPredsLeft--;
 117   else
 118     SuccSU->NumChainPredsLeft--;
 119
 120   assert(SuccSU->NumPredsLeft >= 0 && SuccSU->NumChainPredsLeft >= 0 &&
 121          "List scheduling internal error");
 122
 123   if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) {
 124     // Compute how many cycles it will be before this actually becomes
 125     // available.  This is the max of the start time of all predecessors plus
 126     // their latencies.
 127     unsigned AvailableCycle = 0;
 128     for (std::set<std::pair<SUnit*, bool> >::iterator I = SuccSU->Preds.begin(),
 129          E = SuccSU->Preds.end(); I != E; ++I) {
 130       // If this is a token edge, we don't need to wait for the latency of the
 131       // preceeding instruction (e.g. a long-latency load) unless there is also
 132       // some other data dependence.
 133       unsigned PredDoneCycle = I->first->Cycle;
 134       if (!I->second)
 135         PredDoneCycle += I->first->Latency;
 136       else if (I->first->Latency)
 137         PredDoneCycle += 1;
 138
 139       AvailableCycle = std::max(AvailableCycle, PredDoneCycle);
 140     }
 141
 142     PendingQueue.push_back(std::make_pair(AvailableCycle, SuccSU));
 143   }
 144 }
 145
 146 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
 147 /// count of its successors. If a successor pending count is zero, add it to
 148 /// the Available queue.
 149 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 150   DEBUG(std::cerr << "*** Scheduling [" << CurCycle << "]: ");
 151   DEBUG(SU->dump(&DAG));
 152
 153   Sequence.push_back(SU);
 154   SU->Cycle = CurCycle;
 155
 156   // Bottom up: release successors.
 157   for (std::set<std::pair<SUnit*, bool> >::iterator I = SU->Succs.begin(),
 158        E = SU->Succs.end(); I != E; ++I)
 159     ReleaseSucc(I->first, I->second);
 160 }
 161
 162 /// ListScheduleTopDown - The main loop of list scheduling for top-down
 163 /// schedulers.
 164 void ScheduleDAGList::ListScheduleTopDown() {
 165   unsigned CurCycle = 0;
 166   SUnit *Entry = SUnitMap[DAG.getEntryNode().Val];
 167
 168   // All leaves to Available queue.
 169   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
 170     // It is available if it has no predecessors.
 171     if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) {
 172       AvailableQueue->push(&SUnits[i]);
 173       SUnits[i].isAvailable = SUnits[i].isPending = true;
 174     }
 175   }
 176
 177   // Emit the entry node first.
 178   ScheduleNodeTopDown(Entry, CurCycle);
 179   HazardRec->EmitInstruction(Entry->Node);
 180
 181   // While Available queue is not empty, grab the node with the highest
 182   // priority. If it is not ready put it back.  Schedule the node.
 183   std::vector<SUnit*> NotReady;
 184   while (!AvailableQueue->empty() || !PendingQueue.empty()) {
 185     // Check to see if any of the pending instructions are ready to issue.  If
 186     // so, add them to the available queue.
 187     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
 188       if (PendingQueue[i].first == CurCycle) {
 189         AvailableQueue->push(PendingQueue[i].second);
 190         PendingQueue[i].second->isAvailable = true;
 191         PendingQueue[i] = PendingQueue.back();
 192         PendingQueue.pop_back();
 193         --i; --e;
 194       } else {
 195         assert(PendingQueue[i].first > CurCycle && "Negative latency?");
 196       }
 197     }
 198
 199     // If there are no instructions available, don't try to issue anything, and
 200     // don't advance the hazard recognizer.
 201     if (AvailableQueue->empty()) {
 202       ++CurCycle;
 203       continue;
 204     }
 205
 206     SUnit *FoundSUnit = 0;
 207     SDNode *FoundNode = 0;
 208
 209     bool HasNoopHazards = false;
 210     while (!AvailableQueue->empty()) {
 211       SUnit *CurSUnit = AvailableQueue->pop();
 212
 213       // Get the node represented by this SUnit.
 214       FoundNode = CurSUnit->Node;
 215
 216       // If this is a pseudo op, like copyfromreg, look to see if there is a
 217       // real target node flagged to it.  If so, use the target node.
 218       for (unsigned i = 0, e = CurSUnit->FlaggedNodes.size();
 219            FoundNode->getOpcode() < ISD::BUILTIN_OP_END && i != e; ++i)
 220         FoundNode = CurSUnit->FlaggedNodes[i];
 221
 222       HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode);
 223       if (HT == HazardRecognizer::NoHazard) {
 224         FoundSUnit = CurSUnit;
 225         break;
 226       }
 227
 228       // Remember if this is a noop hazard.
 229       HasNoopHazards |= HT == HazardRecognizer::NoopHazard;
 230
 231       NotReady.push_back(CurSUnit);
 232     }
 233
 234     // Add the nodes that aren't ready back onto the available list.
 235     if (!NotReady.empty()) {
 236       AvailableQueue->push_all(NotReady);
 237       NotReady.clear();
 238     }
 239
 240     // If we found a node to schedule, do it now.
 241     if (FoundSUnit) {
 242       ScheduleNodeTopDown(FoundSUnit, CurCycle);
 243       HazardRec->EmitInstruction(FoundNode);
 244       FoundSUnit->isScheduled = true;
 245       AvailableQueue->ScheduledNode(FoundSUnit);
 246
 247       // If this is a pseudo-op node, we don't want to increment the current
 248       // cycle.
 249       if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
 250         ++CurCycle;
 251     } else if (!HasNoopHazards) {
 252       // Otherwise, we have a pipeline stall, but no other problem, just advance
 253       // the current cycle and try again.
 254       DEBUG(std::cerr << "*** Advancing cycle, no work to do\n");
 255       HazardRec->AdvanceCycle();
 256       ++NumStalls;
 257       ++CurCycle;
 258     } else {
 259       // Otherwise, we have no instructions to issue and we have instructions
 260       // that will fault if we don't do this right.  This is the case for
 261       // processors without pipeline interlocks and other cases.
 262       DEBUG(std::cerr << "*** Emitting noop\n");
 263       HazardRec->EmitNoop();
 264       Sequence.push_back(0);   // NULL SUnit* -> noop
 265       ++NumNoops;
 266       ++CurCycle;
 267     }
 268   }
 269
 270 #ifndef NDEBUG
 271   // Verify that all SUnits were scheduled.
 272   bool AnyNotSched = false;
 273   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
 274     if (SUnits[i].NumPredsLeft != 0 || SUnits[i].NumChainPredsLeft != 0) {
 275       if (!AnyNotSched)
 276         std::cerr << "*** List scheduling failed! ***\n";
 277       SUnits[i].dump(&DAG);
 278       std::cerr << "has not been scheduled!\n";
 279       AnyNotSched = true;
 280     }
 281   }
 282   assert(!AnyNotSched);
 283 #endif
 284 }
 285
 286 //===----------------------------------------------------------------------===//
 287 //                    LatencyPriorityQueue Implementation
 288 //===----------------------------------------------------------------------===//
 289 //
 290 // This is a SchedulingPriorityQueue that schedules using latency information to
 291 // reduce the length of the critical path through the basic block.
 292 //
 293 namespace {
 294   class LatencyPriorityQueue;
 295
 296   /// Sorting functions for the Available queue.
 297   struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
 298     LatencyPriorityQueue *PQ;
 299     latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
 300     latency_sort(const latency_sort &RHS) : PQ(RHS.PQ) {}
 301
 302     bool operator()(const SUnit* left, const SUnit* right) const;
 303   };
 304 }  // end anonymous namespace
 305
 306 namespace {
 307   class LatencyPriorityQueue : public SchedulingPriorityQueue {
 308     // SUnits - The SUnits for the current graph.
 309     const std::vector<SUnit> *SUnits;
 310
 311     // Latencies - The latency (max of latency from this node to the bb exit)
 312     // for each node.
 313     std::vector<int> Latencies;
 314
 315     /// NumNodesSolelyBlocking - This vector contains, for every node in the
 316     /// Queue, the number of nodes that the node is the sole unscheduled
 317     /// predecessor for.  This is used as a tie-breaker heuristic for better
 318     /// mobility.
 319     std::vector<unsigned> NumNodesSolelyBlocking;
 320
 321     std::priority_queue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
 322 public:
 323     LatencyPriorityQueue() : Queue(latency_sort(this)) {
 324     }
 325
 326     void initNodes(const std::vector<SUnit> &sunits) {
 327       SUnits = &sunits;
 328       // Calculate node priorities.
 329       CalculatePriorities();
 330     }
 331     void releaseState() {
 332       SUnits = 0;
 333       Latencies.clear();
 334     }
 335
 336     unsigned getLatency(unsigned NodeNum) const {
 337       assert(NodeNum < Latencies.size());
 338       return Latencies[NodeNum];
 339     }
 340
 341     unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
 342       assert(NodeNum < NumNodesSolelyBlocking.size());
 343       return NumNodesSolelyBlocking[NodeNum];
 344     }
 345
 346     bool empty() const { return Queue.empty(); }
 347
 348     virtual void push(SUnit *U) {
 349       push_impl(U);
 350     }
 351     void push_impl(SUnit *U);
 352
 353     void push_all(const std::vector<SUnit *> &Nodes) {
 354       for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
 355         push_impl(Nodes[i]);
 356     }
 357
 358     SUnit *pop() {
 359       SUnit *V = Queue.top();
 360       Queue.pop();
 361       return V;
 362     }
 363
 364     // ScheduledNode - As nodes are scheduled, we look to see if there are any
 365     // successor nodes that have a single unscheduled predecessor.  If so, that
 366     // single predecessor has a higher priority, since scheduling it will make
 367     // the node available.
 368     void ScheduledNode(SUnit *Node);
 369
 370 private:
 371     void CalculatePriorities();
 372     int CalcLatency(const SUnit &SU);
 373     void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
 374
 375     /// RemoveFromPriorityQueue - This is a really inefficient way to remove a
 376     /// node from a priority queue.  We should roll our own heap to make this
 377     /// better or something.
 378     void RemoveFromPriorityQueue(SUnit *SU) {
 379       std::vector<SUnit*> Temp;
 380
 381       assert(!Queue.empty() && "Not in queue!");
 382       while (Queue.top() != SU) {
 383         Temp.push_back(Queue.top());
 384         Queue.pop();
 385         assert(!Queue.empty() && "Not in queue!");
 386       }
 387
 388       // Remove the node from the PQ.
 389       Queue.pop();
 390
 391       // Add all the other nodes back.
 392       for (unsigned i = 0, e = Temp.size(); i != e; ++i)
 393         Queue.push(Temp[i]);
 394     }
 395   };
 396 }
 397
 398 bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
 399   unsigned LHSNum = LHS->NodeNum;
 400   unsigned RHSNum = RHS->NodeNum;
 401
 402   // The most important heuristic is scheduling the critical path.
 403   unsigned LHSLatency = PQ->getLatency(LHSNum);
 404   unsigned RHSLatency = PQ->getLatency(RHSNum);
 405   if (LHSLatency < RHSLatency) return true;
 406   if (LHSLatency > RHSLatency) return false;
 407
 408   // After that, if two nodes have identical latencies, look to see if one will
 409   // unblock more other nodes than the other.
 410   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
 411   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
 412   if (LHSBlocked < RHSBlocked) return true;
 413   if (LHSBlocked > RHSBlocked) return false;
 414
 415   // Finally, just to provide a stable ordering, use the node number as a
 416   // deciding factor.
 417   return LHSNum < RHSNum;
 418 }
 419
 420
 421 /// CalcNodePriority - Calculate the maximal path from the node to the exit.
 422 ///
 423 int LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
 424   int &Latency = Latencies[SU.NodeNum];
 425   if (Latency != -1)
 426     return Latency;
 427
 428   int MaxSuccLatency = 0;
 429   for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU.Succs.begin(),
 430        E = SU.Succs.end(); I != E; ++I)
 431     MaxSuccLatency = std::max(MaxSuccLatency, CalcLatency(*I->first));
 432
 433   return Latency = MaxSuccLatency + SU.Latency;
 434 }
 435
 436 /// CalculatePriorities - Calculate priorities of all scheduling units.
 437 void LatencyPriorityQueue::CalculatePriorities() {
 438   Latencies.assign(SUnits->size(), -1);
 439   NumNodesSolelyBlocking.assign(SUnits->size(), 0);
 440
 441   for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
 442     CalcLatency((*SUnits)[i]);
 443 }
 444
 445 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
 446 /// of SU, return it, otherwise return null.
 447 static SUnit *getSingleUnscheduledPred(SUnit *SU) {
 448   SUnit *OnlyAvailablePred = 0;
 449   for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Preds.begin(),
 450        E = SU->Preds.end(); I != E; ++I)
 451     if (!I->first->isScheduled) {
 452       // We found an available, but not scheduled, predecessor.  If it's the
 453       // only one we have found, keep track of it... otherwise give up.
 454       if (OnlyAvailablePred && OnlyAvailablePred != I->first)
 455         return 0;
 456       OnlyAvailablePred = I->first;
 457     }
 458
 459   return OnlyAvailablePred;
 460 }
 461
 462 void LatencyPriorityQueue::push_impl(SUnit *SU) {
 463   // Look at all of the successors of this node.  Count the number of nodes that
 464   // this node is the sole unscheduled node for.
 465   unsigned NumNodesBlocking = 0;
 466   for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(),
 467        E = SU->Succs.end(); I != E; ++I)
 468     if (getSingleUnscheduledPred(I->first) == SU)
 469       ++NumNodesBlocking;
 470   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
 471
 472   Queue.push(SU);
 473 }
 474
 475
 476 // ScheduledNode - As nodes are scheduled, we look to see if there are any
 477 // successor nodes that have a single unscheduled predecessor.  If so, that
 478 // single predecessor has a higher priority, since scheduling it will make
 479 // the node available.
 480 void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
 481   for (std::set<std::pair<SUnit*, bool> >::const_iterator I = SU->Succs.begin(),
 482        E = SU->Succs.end(); I != E; ++I)
 483     AdjustPriorityOfUnscheduledPreds(I->first);
 484 }
 485
 486 /// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
 487 /// scheduled.  If SU is not itself available, then there is at least one
 488 /// predecessor node that has not been scheduled yet.  If SU has exactly ONE
 489 /// unscheduled predecessor, we want to increase its priority: it getting
 490 /// scheduled will make this node available, so it is better than some other
 491 /// node of the same priority that will not make a node available.
 492 void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
 493   if (SU->isPending) return;  // All preds scheduled.
 494
 495   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
 496   if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
 497
 498   // Okay, we found a single predecessor that is available, but not scheduled.
 499   // Since it is available, it must be in the priority queue.  First remove it.
 500   RemoveFromPriorityQueue(OnlyAvailablePred);
 501
 502   // Reinsert the node into the priority queue, which recomputes its
 503   // NumNodesSolelyBlocking value.
 504   push(OnlyAvailablePred);
 505 }
 506
 507
 508 //===----------------------------------------------------------------------===//
 509 //                         Public Constructor Functions
 510 //===----------------------------------------------------------------------===//
 511
 512 /// createTDListDAGScheduler - This creates a top-down list scheduler with the
 513 /// specified hazard recognizer.
 514 ScheduleDAG* llvm::createTDListDAGScheduler(SelectionDAG &DAG,
 515                                             MachineBasicBlock *BB,
 516                                             HazardRecognizer *HR) {
 517   return new ScheduleDAGList(DAG, BB, DAG.getTarget(),
 518                              new LatencyPriorityQueue(),
 519                              HR);
 520 }