lib/CodeGen/MachineScheduler.cpp

   1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "misched"
  16
  17 #include "llvm/CodeGen/MachineScheduler.h"
  18 #include "llvm/ADT/OwningPtr.h"
  19 #include "llvm/ADT/PriorityQueue.h"
  20 #include "llvm/Analysis/AliasAnalysis.h"
  21 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  22 #include "llvm/CodeGen/Passes.h"
  23 #include "llvm/CodeGen/RegisterClassInfo.h"
  24 #include "llvm/CodeGen/ScheduleDFS.h"
  25 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  26 #include "llvm/Support/CommandLine.h"
  27 #include "llvm/Support/Debug.h"
  28 #include "llvm/Support/ErrorHandling.h"
  29 #include "llvm/Support/GraphWriter.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include <queue>
  32
  33 using namespace llvm;
  34
  35 namespace llvm {
  36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  37                            cl::desc("Force top-down list scheduling"));
  38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  39                             cl::desc("Force bottom-up list scheduling"));
  40 }
  41
  42 #ifndef NDEBUG
  43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  44   cl::desc("Pop up a window to show MISched dags after they are processed"));
  45
  46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  47   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  48 #else
  49 static bool ViewMISchedDAGs = false;
  50 #endif // NDEBUG
  51
  52 // Experimental heuristics
  53 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
  54   cl::desc("Enable load clustering."), cl::init(true));
  55
  56 // Experimental heuristics
  57 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
  58   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
  59
  60 // DAG subtrees must have at least this many nodes.
  61 static const unsigned MinSubtreeSize = 8;
  62
  63 //===----------------------------------------------------------------------===//
  64 // Machine Instruction Scheduling Pass and Registry
  65 //===----------------------------------------------------------------------===//
  66
  67 MachineSchedContext::MachineSchedContext():
  68     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
  69   RegClassInfo = new RegisterClassInfo();
  70 }
  71
  72 MachineSchedContext::~MachineSchedContext() {
  73   delete RegClassInfo;
  74 }
  75
  76 namespace {
  77 /// MachineScheduler runs after coalescing and before register allocation.
  78 class MachineScheduler : public MachineSchedContext,
  79                          public MachineFunctionPass {
  80 public:
  81   MachineScheduler();
  82
  83   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
  84
  85   virtual void releaseMemory() {}
  86
  87   virtual bool runOnMachineFunction(MachineFunction&);
  88
  89   virtual void print(raw_ostream &O, const Module* = 0) const;
  90
  91   static char ID; // Class identification, replacement for typeinfo
  92 };
  93 } // namespace
  94
  95 char MachineScheduler::ID = 0;
  96
  97 char &llvm::MachineSchedulerID = MachineScheduler::ID;
  98
  99 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
 100                       "Machine Instruction Scheduler", false, false)
 101 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 102 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 103 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 104 INITIALIZE_PASS_END(MachineScheduler, "misched",
 105                     "Machine Instruction Scheduler", false, false)
 106
 107 MachineScheduler::MachineScheduler()
 108 : MachineFunctionPass(ID) {
 109   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 110 }
 111
 112 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 113   AU.setPreservesCFG();
 114   AU.addRequiredID(MachineDominatorsID);
 115   AU.addRequired<MachineLoopInfo>();
 116   AU.addRequired<AliasAnalysis>();
 117   AU.addRequired<TargetPassConfig>();
 118   AU.addRequired<SlotIndexes>();
 119   AU.addPreserved<SlotIndexes>();
 120   AU.addRequired<LiveIntervals>();
 121   AU.addPreserved<LiveIntervals>();
 122   MachineFunctionPass::getAnalysisUsage(AU);
 123 }
 124
 125 MachinePassRegistry MachineSchedRegistry::Registry;
 126
 127 /// A dummy default scheduler factory indicates whether the scheduler
 128 /// is overridden on the command line.
 129 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
 130   return 0;
 131 }
 132
 133 /// MachineSchedOpt allows command line selection of the scheduler.
 134 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
 135                RegisterPassParser<MachineSchedRegistry> >
 136 MachineSchedOpt("misched",
 137                 cl::init(&useDefaultMachineSched), cl::Hidden,
 138                 cl::desc("Machine instruction scheduler to use"));
 139
 140 static MachineSchedRegistry
 141 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
 142                      useDefaultMachineSched);
 143
 144 /// Forward declare the standard machine scheduler. This will be used as the
 145 /// default scheduler if the target does not set a default.
 146 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
 147
 148
 149 /// Decrement this iterator until reaching the top or a non-debug instr.
 150 static MachineBasicBlock::iterator
 151 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
 152   assert(I != Beg && "reached the top of the region, cannot decrement");
 153   while (--I != Beg) {
 154     if (!I->isDebugValue())
 155       break;
 156   }
 157   return I;
 158 }
 159
 160 /// If this iterator is a debug value, increment until reaching the End or a
 161 /// non-debug instruction.
 162 static MachineBasicBlock::iterator
 163 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
 164   for(; I != End; ++I) {
 165     if (!I->isDebugValue())
 166       break;
 167   }
 168   return I;
 169 }
 170
 171 /// Top-level MachineScheduler pass driver.
 172 ///
 173 /// Visit blocks in function order. Divide each block into scheduling regions
 174 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
 175 /// consistent with the DAG builder, which traverses the interior of the
 176 /// scheduling regions bottom-up.
 177 ///
 178 /// This design avoids exposing scheduling boundaries to the DAG builder,
 179 /// simplifying the DAG builder's support for "special" target instructions.
 180 /// At the same time the design allows target schedulers to operate across
 181 /// scheduling boundaries, for example to bundle the boudary instructions
 182 /// without reordering them. This creates complexity, because the target
 183 /// scheduler must update the RegionBegin and RegionEnd positions cached by
 184 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
 185 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 186 /// general bias against block splitting purely for implementation simplicity.
 187 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 188   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
 189
 190   // Initialize the context of the pass.
 191   MF = &mf;
 192   MLI = &getAnalysis<MachineLoopInfo>();
 193   MDT = &getAnalysis<MachineDominatorTree>();
 194   PassConfig = &getAnalysis<TargetPassConfig>();
 195   AA = &getAnalysis<AliasAnalysis>();
 196
 197   LIS = &getAnalysis<LiveIntervals>();
 198   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 199
 200   RegClassInfo->runOnMachineFunction(*MF);
 201
 202   // Select the scheduler, or set the default.
 203   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
 204   if (Ctor == useDefaultMachineSched) {
 205     // Get the default scheduler set by the target.
 206     Ctor = MachineSchedRegistry::getDefault();
 207     if (!Ctor) {
 208       Ctor = createConvergingSched;
 209       MachineSchedRegistry::setDefault(Ctor);
 210     }
 211   }
 212   // Instantiate the selected scheduler.
 213   OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
 214
 215   // Visit all machine basic blocks.
 216   //
 217   // TODO: Visit blocks in global postorder or postorder within the bottom-up
 218   // loop tree. Then we can optionally compute global RegPressure.
 219   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
 220        MBB != MBBEnd; ++MBB) {
 221
 222     Scheduler->startBlock(MBB);
 223
 224     // Break the block into scheduling regions [I, RegionEnd), and schedule each
 225     // region as soon as it is discovered. RegionEnd points the scheduling
 226     // boundary at the bottom of the region. The DAG does not include RegionEnd,
 227     // but the region does (i.e. the next RegionEnd is above the previous
 228     // RegionBegin). If the current block has no terminator then RegionEnd ==
 229     // MBB->end() for the bottom region.
 230     //
 231     // The Scheduler may insert instructions during either schedule() or
 232     // exitRegion(), even for empty regions. So the local iterators 'I' and
 233     // 'RegionEnd' are invalid across these calls.
 234     unsigned RemainingInstrs = MBB->size();
 235     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
 236         RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
 237
 238       // Avoid decrementing RegionEnd for blocks with no terminator.
 239       if (RegionEnd != MBB->end()
 240           || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
 241         --RegionEnd;
 242         // Count the boundary instruction.
 243         --RemainingInstrs;
 244       }
 245
 246       // The next region starts above the previous region. Look backward in the
 247       // instruction stream until we find the nearest boundary.
 248       MachineBasicBlock::iterator I = RegionEnd;
 249       for(;I != MBB->begin(); --I, --RemainingInstrs) {
 250         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
 251           break;
 252       }
 253       // Notify the scheduler of the region, even if we may skip scheduling
 254       // it. Perhaps it still needs to be bundled.
 255       Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
 256
 257       // Skip empty scheduling regions (0 or 1 schedulable instructions).
 258       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
 259         // Close the current region. Bundle the terminator if needed.
 260         // This invalidates 'RegionEnd' and 'I'.
 261         Scheduler->exitRegion();
 262         continue;
 263       }
 264       DEBUG(dbgs() << "********** MI Scheduling **********\n");
 265       DEBUG(dbgs() << MF->getName()
 266             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
 267             << "\n  From: " << *I << "    To: ";
 268             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
 269             else dbgs() << "End";
 270             dbgs() << " Remaining: " << RemainingInstrs << "\n");
 271
 272       // Schedule a region: possibly reorder instructions.
 273       // This invalidates 'RegionEnd' and 'I'.
 274       Scheduler->schedule();
 275
 276       // Close the current region.
 277       Scheduler->exitRegion();
 278
 279       // Scheduling has invalidated the current iterator 'I'. Ask the
 280       // scheduler for the top of it's scheduled region.
 281       RegionEnd = Scheduler->begin();
 282     }
 283     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
 284     Scheduler->finishBlock();
 285   }
 286   Scheduler->finalizeSchedule();
 287   DEBUG(LIS->print(dbgs()));
 288   return true;
 289 }
 290
 291 void MachineScheduler::print(raw_ostream &O, const Module* m) const {
 292   // unimplemented
 293 }
 294
 295 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 296 void ReadyQueue::dump() {
 297   dbgs() << Name << ": ";
 298   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
 299     dbgs() << Queue[i]->NodeNum << " ";
 300   dbgs() << "\n";
 301 }
 302 #endif
 303
 304 //===----------------------------------------------------------------------===//
 305 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
 306 // preservation.
 307 //===----------------------------------------------------------------------===//
 308
 309 ScheduleDAGMI::~ScheduleDAGMI() {
 310   delete DFSResult;
 311   DeleteContainerPointers(Mutations);
 312   delete SchedImpl;
 313 }
 314
 315 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
 316   if (SuccSU != &ExitSU) {
 317     // Do not use WillCreateCycle, it assumes SD scheduling.
 318     // If Pred is reachable from Succ, then the edge creates a cycle.
 319     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
 320       return false;
 321     Topo.AddPred(SuccSU, PredDep.getSUnit());
 322   }
 323   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
 324   // Return true regardless of whether a new edge needed to be inserted.
 325   return true;
 326 }
 327
 328 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 329 /// NumPredsLeft reaches zero, release the successor node.
 330 ///
 331 /// FIXME: Adjust SuccSU height based on MinLatency.
 332 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 333   SUnit *SuccSU = SuccEdge->getSUnit();
 334
 335   if (SuccEdge->isWeak()) {
 336     --SuccSU->WeakPredsLeft;
 337     if (SuccEdge->isCluster())
 338       NextClusterSucc = SuccSU;
 339     return;
 340   }
 341 #ifndef NDEBUG
 342   if (SuccSU->NumPredsLeft == 0) {
 343     dbgs() << "*** Scheduling failed! ***\n";
 344     SuccSU->dump(this);
 345     dbgs() << " has been released too many times!\n";
 346     llvm_unreachable(0);
 347   }
 348 #endif
 349   --SuccSU->NumPredsLeft;
 350   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
 351     SchedImpl->releaseTopNode(SuccSU);
 352 }
 353
 354 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
 355 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 356   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 357        I != E; ++I) {
 358     releaseSucc(SU, &*I);
 359   }
 360 }
 361
 362 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
 363 /// NumSuccsLeft reaches zero, release the predecessor node.
 364 ///
 365 /// FIXME: Adjust PredSU height based on MinLatency.
 366 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
 367   SUnit *PredSU = PredEdge->getSUnit();
 368
 369   if (PredEdge->isWeak()) {
 370     --PredSU->WeakSuccsLeft;
 371     if (PredEdge->isCluster())
 372       NextClusterPred = PredSU;
 373     return;
 374   }
 375 #ifndef NDEBUG
 376   if (PredSU->NumSuccsLeft == 0) {
 377     dbgs() << "*** Scheduling failed! ***\n";
 378     PredSU->dump(this);
 379     dbgs() << " has been released too many times!\n";
 380     llvm_unreachable(0);
 381   }
 382 #endif
 383   --PredSU->NumSuccsLeft;
 384   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
 385     SchedImpl->releaseBottomNode(PredSU);
 386 }
 387
 388 /// releasePredecessors - Call releasePred on each of SU's predecessors.
 389 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
 390   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
 391        I != E; ++I) {
 392     releasePred(SU, &*I);
 393   }
 394 }
 395
 396 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
 397                                     MachineBasicBlock::iterator InsertPos) {
 398   // Advance RegionBegin if the first instruction moves down.
 399   if (&*RegionBegin == MI)
 400     ++RegionBegin;
 401
 402   // Update the instruction stream.
 403   BB->splice(InsertPos, BB, MI);
 404
 405   // Update LiveIntervals
 406   LIS->handleMove(MI, /*UpdateFlags=*/true);
 407
 408   // Recede RegionBegin if an instruction moves above the first.
 409   if (RegionBegin == InsertPos)
 410     RegionBegin = MI;
 411 }
 412
 413 bool ScheduleDAGMI::checkSchedLimit() {
 414 #ifndef NDEBUG
 415   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
 416     CurrentTop = CurrentBottom;
 417     return false;
 418   }
 419   ++NumInstrsScheduled;
 420 #endif
 421   return true;
 422 }
 423
 424 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 425 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 426 /// the region, including the boundary itself and single-instruction regions
 427 /// that don't get scheduled.
 428 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
 429                                 MachineBasicBlock::iterator begin,
 430                                 MachineBasicBlock::iterator end,
 431                                 unsigned endcount)
 432 {
 433   ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
 434
 435   // For convenience remember the end of the liveness region.
 436   LiveRegionEnd =
 437     (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
 438 }
 439
 440 // Setup the register pressure trackers for the top scheduled top and bottom
 441 // scheduled regions.
 442 void ScheduleDAGMI::initRegPressure() {
 443   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
 444   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 445
 446   // Close the RPTracker to finalize live ins.
 447   RPTracker.closeRegion();
 448
 449   DEBUG(RPTracker.getPressure().dump(TRI));
 450
 451   // Initialize the live ins and live outs.
 452   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
 453   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
 454
 455   // Close one end of the tracker so we can call
 456   // getMaxUpward/DownwardPressureDelta before advancing across any
 457   // instructions. This converts currently live regs into live ins/outs.
 458   TopRPTracker.closeTop();
 459   BotRPTracker.closeBottom();
 460
 461   // Account for liveness generated by the region boundary.
 462   if (LiveRegionEnd != RegionEnd)
 463     BotRPTracker.recede();
 464
 465   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
 466
 467   // Cache the list of excess pressure sets in this region. This will also track
 468   // the max pressure in the scheduled code for these sets.
 469   RegionCriticalPSets.clear();
 470   const std::vector<unsigned> &RegionPressure =
 471     RPTracker.getPressure().MaxSetPressure;
 472   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
 473     unsigned Limit = TRI->getRegPressureSetLimit(i);
 474     DEBUG(dbgs() << TRI->getRegPressureSetName(i)
 475           << "Limit " << Limit
 476           << " Actual " << RegionPressure[i] << "\n");
 477     if (RegionPressure[i] > Limit)
 478       RegionCriticalPSets.push_back(PressureElement(i, 0));
 479   }
 480   DEBUG(dbgs() << "Excess PSets: ";
 481         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
 482           dbgs() << TRI->getRegPressureSetName(
 483             RegionCriticalPSets[i].PSetID) << " ";
 484         dbgs() << "\n");
 485 }
 486
 487 // FIXME: When the pressure tracker deals in pressure differences then we won't
 488 // iterate over all RegionCriticalPSets[i].
 489 void ScheduleDAGMI::
 490 updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
 491   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
 492     unsigned ID = RegionCriticalPSets[i].PSetID;
 493     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
 494     if ((int)NewMaxPressure[ID] > MaxUnits)
 495       MaxUnits = NewMaxPressure[ID];
 496   }
 497 }
 498
 499 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 500 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 501 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 502 ///
 503 /// This is a skeletal driver, with all the functionality pushed into helpers,
 504 /// so that it can be easilly extended by experimental schedulers. Generally,
 505 /// implementing MachineSchedStrategy should be sufficient to implement a new
 506 /// scheduling algorithm. However, if a scheduler further subclasses
 507 /// ScheduleDAGMI then it will want to override this virtual method in order to
 508 /// update any specialized state.
 509 void ScheduleDAGMI::schedule() {
 510   buildDAGWithRegPressure();
 511
 512   Topo.InitDAGTopologicalSorting();
 513
 514   postprocessDAG();
 515
 516   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 517   findRootsAndBiasEdges(TopRoots, BotRoots);
 518
 519   // Initialize the strategy before modifying the DAG.
 520   // This may initialize a DFSResult to be used for queue priority.
 521   SchedImpl->initialize(this);
 522
 523   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 524           SUnits[su].dumpAll(this));
 525   if (ViewMISchedDAGs) viewGraph();
 526
 527   // Initialize ready queues now that the DAG and priority data are finalized.
 528   initQueues(TopRoots, BotRoots);
 529
 530   bool IsTopNode = false;
 531   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
 532     assert(!SU->isScheduled && "Node already scheduled");
 533     if (!checkSchedLimit())
 534       break;
 535
 536     scheduleMI(SU, IsTopNode);
 537
 538     updateQueues(SU, IsTopNode);
 539   }
 540   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 541
 542   placeDebugValues();
 543
 544   DEBUG({
 545       unsigned BBNum = begin()->getParent()->getNumber();
 546       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
 547       dumpSchedule();
 548       dbgs() << '\n';
 549     });
 550 }
 551
 552 /// Build the DAG and setup three register pressure trackers.
 553 void ScheduleDAGMI::buildDAGWithRegPressure() {
 554   // Initialize the register pressure tracker used by buildSchedGraph.
 555   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 556
 557   // Account for liveness generate by the region boundary.
 558   if (LiveRegionEnd != RegionEnd)
 559     RPTracker.recede();
 560
 561   // Build the DAG, and compute current register pressure.
 562   buildSchedGraph(AA, &RPTracker);
 563
 564   // Initialize top/bottom trackers after computing region pressure.
 565   initRegPressure();
 566 }
 567
 568 /// Apply each ScheduleDAGMutation step in order.
 569 void ScheduleDAGMI::postprocessDAG() {
 570   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
 571     Mutations[i]->apply(this);
 572   }
 573 }
 574
 575 void ScheduleDAGMI::computeDFSResult() {
 576   if (!DFSResult)
 577     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
 578   DFSResult->clear();
 579   ScheduledTrees.clear();
 580   DFSResult->resize(SUnits.size());
 581   DFSResult->compute(SUnits);
 582   ScheduledTrees.resize(DFSResult->getNumSubtrees());
 583 }
 584
 585 void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
 586                                           SmallVectorImpl<SUnit*> &BotRoots) {
 587   for (std::vector<SUnit>::iterator
 588          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
 589     SUnit *SU = &(*I);
 590     assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
 591
 592     // Order predecessors so DFSResult follows the critical path.
 593     SU->biasCriticalPath();
 594
 595     // A SUnit is ready to top schedule if it has no predecessors.
 596     if (!I->NumPredsLeft)
 597       TopRoots.push_back(SU);
 598     // A SUnit is ready to bottom schedule if it has no successors.
 599     if (!I->NumSuccsLeft)
 600       BotRoots.push_back(SU);
 601   }
 602   ExitSU.biasCriticalPath();
 603 }
 604
 605 /// Identify DAG roots and setup scheduler queues.
 606 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
 607                                ArrayRef<SUnit*> BotRoots) {
 608   NextClusterSucc = NULL;
 609   NextClusterPred = NULL;
 610
 611   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
 612   //
 613   // Nodes with unreleased weak edges can still be roots.
 614   // Release top roots in forward order.
 615   for (SmallVectorImpl<SUnit*>::const_iterator
 616          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
 617     SchedImpl->releaseTopNode(*I);
 618   }
 619   // Release bottom roots in reverse order so the higher priority nodes appear
 620   // first. This is more natural and slightly more efficient.
 621   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
 622          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
 623     SchedImpl->releaseBottomNode(*I);
 624   }
 625
 626   releaseSuccessors(&EntrySU);
 627   releasePredecessors(&ExitSU);
 628
 629   SchedImpl->registerRoots();
 630
 631   // Advance past initial DebugValues.
 632   assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
 633   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
 634   TopRPTracker.setPos(CurrentTop);
 635
 636   CurrentBottom = RegionEnd;
 637 }
 638
 639 /// Move an instruction and update register pressure.
 640 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
 641   // Move the instruction to its new location in the instruction stream.
 642   MachineInstr *MI = SU->getInstr();
 643
 644   if (IsTopNode) {
 645     assert(SU->isTopReady() && "node still has unscheduled dependencies");
 646     if (&*CurrentTop == MI)
 647       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
 648     else {
 649       moveInstruction(MI, CurrentTop);
 650       TopRPTracker.setPos(MI);
 651     }
 652
 653     // Update top scheduled pressure.
 654     TopRPTracker.advance();
 655     assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
 656     updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
 657   }
 658   else {
 659     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
 660     MachineBasicBlock::iterator priorII =
 661       priorNonDebug(CurrentBottom, CurrentTop);
 662     if (&*priorII == MI)
 663       CurrentBottom = priorII;
 664     else {
 665       if (&*CurrentTop == MI) {
 666         CurrentTop = nextIfDebug(++CurrentTop, priorII);
 667         TopRPTracker.setPos(CurrentTop);
 668       }
 669       moveInstruction(MI, CurrentBottom);
 670       CurrentBottom = MI;
 671     }
 672     // Update bottom scheduled pressure.
 673     BotRPTracker.recede();
 674     assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
 675     updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
 676   }
 677 }
 678
 679 /// Update scheduler queues after scheduling an instruction.
 680 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 681   // Release dependent instructions for scheduling.
 682   if (IsTopNode)
 683     releaseSuccessors(SU);
 684   else
 685     releasePredecessors(SU);
 686
 687   SU->isScheduled = true;
 688
 689   if (DFSResult) {
 690     unsigned SubtreeID = DFSResult->getSubtreeID(SU);
 691     if (!ScheduledTrees.test(SubtreeID)) {
 692       ScheduledTrees.set(SubtreeID);
 693       DFSResult->scheduleTree(SubtreeID);
 694       SchedImpl->scheduleTree(SubtreeID);
 695     }
 696   }
 697
 698   // Notify the scheduling strategy after updating the DAG.
 699   SchedImpl->schedNode(SU, IsTopNode);
 700 }
 701
 702 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
 703 void ScheduleDAGMI::placeDebugValues() {
 704   // If first instruction was a DBG_VALUE then put it back.
 705   if (FirstDbgValue) {
 706     BB->splice(RegionBegin, BB, FirstDbgValue);
 707     RegionBegin = FirstDbgValue;
 708   }
 709
 710   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
 711          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
 712     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
 713     MachineInstr *DbgValue = P.first;
 714     MachineBasicBlock::iterator OrigPrevMI = P.second;
 715     if (&*RegionBegin == DbgValue)
 716       ++RegionBegin;
 717     BB->splice(++OrigPrevMI, BB, DbgValue);
 718     if (OrigPrevMI == llvm::prior(RegionEnd))
 719       RegionEnd = DbgValue;
 720   }
 721   DbgValues.clear();
 722   FirstDbgValue = NULL;
 723 }
 724
 725 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 726 void ScheduleDAGMI::dumpSchedule() const {
 727   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
 728     if (SUnit *SU = getSUnit(&(*MI)))
 729       SU->dump(this);
 730     else
 731       dbgs() << "Missing SUnit\n";
 732   }
 733 }
 734 #endif
 735
 736 //===----------------------------------------------------------------------===//
 737 // LoadClusterMutation - DAG post-processing to cluster loads.
 738 //===----------------------------------------------------------------------===//
 739
 740 namespace {
 741 /// \brief Post-process the DAG to create cluster edges between neighboring
 742 /// loads.
 743 class LoadClusterMutation : public ScheduleDAGMutation {
 744   struct LoadInfo {
 745     SUnit *SU;
 746     unsigned BaseReg;
 747     unsigned Offset;
 748     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
 749       : SU(su), BaseReg(reg), Offset(ofs) {}
 750   };
 751   static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
 752                            const LoadClusterMutation::LoadInfo &RHS);
 753
 754   const TargetInstrInfo *TII;
 755   const TargetRegisterInfo *TRI;
 756 public:
 757   LoadClusterMutation(const TargetInstrInfo *tii,
 758                       const TargetRegisterInfo *tri)
 759     : TII(tii), TRI(tri) {}
 760
 761   virtual void apply(ScheduleDAGMI *DAG);
 762 protected:
 763   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
 764 };
 765 } // anonymous
 766
 767 bool LoadClusterMutation::LoadInfoLess(
 768   const LoadClusterMutation::LoadInfo &LHS,
 769   const LoadClusterMutation::LoadInfo &RHS) {
 770   if (LHS.BaseReg != RHS.BaseReg)
 771     return LHS.BaseReg < RHS.BaseReg;
 772   return LHS.Offset < RHS.Offset;
 773 }
 774
 775 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
 776                                                   ScheduleDAGMI *DAG) {
 777   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
 778   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
 779     SUnit *SU = Loads[Idx];
 780     unsigned BaseReg;
 781     unsigned Offset;
 782     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
 783       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
 784   }
 785   if (LoadRecords.size() < 2)
 786     return;
 787   std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
 788   unsigned ClusterLength = 1;
 789   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
 790     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
 791       ClusterLength = 1;
 792       continue;
 793     }
 794
 795     SUnit *SUa = LoadRecords[Idx].SU;
 796     SUnit *SUb = LoadRecords[Idx+1].SU;
 797     if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
 798         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
 799
 800       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
 801             << SUb->NodeNum << ")\n");
 802       // Copy successor edges from SUa to SUb. Interleaving computation
 803       // dependent on SUa can prevent load combining due to register reuse.
 804       // Predecessor edges do not need to be copied from SUb to SUa since nearby
 805       // loads should have effectively the same inputs.
 806       for (SUnit::const_succ_iterator
 807              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
 808         if (SI->getSUnit() == SUb)
 809           continue;
 810         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
 811         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
 812       }
 813       ++ClusterLength;
 814     }
 815     else
 816       ClusterLength = 1;
 817   }
 818 }
 819
 820 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
 821 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
 822   // Map DAG NodeNum to store chain ID.
 823   DenseMap<unsigned, unsigned> StoreChainIDs;
 824   // Map each store chain to a set of dependent loads.
 825   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
 826   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
 827     SUnit *SU = &DAG->SUnits[Idx];
 828     if (!SU->getInstr()->mayLoad())
 829       continue;
 830     unsigned ChainPredID = DAG->SUnits.size();
 831     for (SUnit::const_pred_iterator
 832            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
 833       if (PI->isCtrl()) {
 834         ChainPredID = PI->getSUnit()->NodeNum;
 835         break;
 836       }
 837     }
 838     // Check if this chain-like pred has been seen
 839     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
 840     unsigned NumChains = StoreChainDependents.size();
 841     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
 842       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
 843     if (Result.second)
 844       StoreChainDependents.resize(NumChains + 1);
 845     StoreChainDependents[Result.first->second].push_back(SU);
 846   }
 847   // Iterate over the store chains.
 848   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
 849     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
 850 }
 851
 852 //===----------------------------------------------------------------------===//
 853 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
 854 //===----------------------------------------------------------------------===//
 855
 856 namespace {
 857 /// \brief Post-process the DAG to create cluster edges between instructions
 858 /// that may be fused by the processor into a single operation.
 859 class MacroFusion : public ScheduleDAGMutation {
 860   const TargetInstrInfo *TII;
 861 public:
 862   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
 863
 864   virtual void apply(ScheduleDAGMI *DAG);
 865 };
 866 } // anonymous
 867
 868 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
 869 /// fused operations.
 870 void MacroFusion::apply(ScheduleDAGMI *DAG) {
 871   // For now, assume targets can only fuse with the branch.
 872   MachineInstr *Branch = DAG->ExitSU.getInstr();
 873   if (!Branch)
 874     return;
 875
 876   for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
 877     SUnit *SU = &DAG->SUnits[--Idx];
 878     if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
 879       continue;
 880
 881     // Create a single weak edge from SU to ExitSU. The only effect is to cause
 882     // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
 883     // need to copy predecessor edges from ExitSU to SU, since top-down
 884     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
 885     // of SU, we could create an artificial edge from the deepest root, but it
 886     // hasn't been needed yet.
 887     bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
 888     (void)Success;
 889     assert(Success && "No DAG nodes should be reachable from ExitSU");
 890
 891     DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
 892     break;
 893   }
 894 }
 895
 896 //===----------------------------------------------------------------------===//
 897 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
 898 //===----------------------------------------------------------------------===//
 899
 900 namespace {
 901 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
 902 /// the schedule.
 903 class ConvergingScheduler : public MachineSchedStrategy {
 904 public:
 905   /// Represent the type of SchedCandidate found within a single queue.
 906   /// pickNodeBidirectional depends on these listed by decreasing priority.
 907   enum CandReason {
 908     NoCand, SingleExcess, SingleCritical, Cluster,
 909     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
 910     TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
 911     NodeOrder};
 912
 913 #ifndef NDEBUG
 914   static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
 915 #endif
 916
 917   /// Policy for scheduling the next instruction in the candidate's zone.
 918   struct CandPolicy {
 919     bool ReduceLatency;
 920     unsigned ReduceResIdx;
 921     unsigned DemandResIdx;
 922
 923     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
 924   };
 925
 926   /// Status of an instruction's critical resource consumption.
 927   struct SchedResourceDelta {
 928     // Count critical resources in the scheduled region required by SU.
 929     unsigned CritResources;
 930
 931     // Count critical resources from another region consumed by SU.
 932     unsigned DemandedResources;
 933
 934     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
 935
 936     bool operator==(const SchedResourceDelta &RHS) const {
 937       return CritResources == RHS.CritResources
 938         && DemandedResources == RHS.DemandedResources;
 939     }
 940     bool operator!=(const SchedResourceDelta &RHS) const {
 941       return !operator==(RHS);
 942     }
 943   };
 944
 945   /// Store the state used by ConvergingScheduler heuristics, required for the
 946   /// lifetime of one invocation of pickNode().
 947   struct SchedCandidate {
 948     CandPolicy Policy;
 949
 950     // The best SUnit candidate.
 951     SUnit *SU;
 952
 953     // The reason for this candidate.
 954     CandReason Reason;
 955
 956     // Register pressure values for the best candidate.
 957     RegPressureDelta RPDelta;
 958
 959     // Critical resource consumption of the best candidate.
 960     SchedResourceDelta ResDelta;
 961
 962     SchedCandidate(const CandPolicy &policy)
 963     : Policy(policy), SU(NULL), Reason(NoCand) {}
 964
 965     bool isValid() const { return SU; }
 966
 967     // Copy the status of another candidate without changing policy.
 968     void setBest(SchedCandidate &Best) {
 969       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
 970       SU = Best.SU;
 971       Reason = Best.Reason;
 972       RPDelta = Best.RPDelta;
 973       ResDelta = Best.ResDelta;
 974     }
 975
 976     void initResourceDelta(const ScheduleDAGMI *DAG,
 977                            const TargetSchedModel *SchedModel);
 978   };
 979
 980   /// Summarize the unscheduled region.
 981   struct SchedRemainder {
 982     // Critical path through the DAG in expected latency.
 983     unsigned CriticalPath;
 984
 985     // Unscheduled resources
 986     SmallVector<unsigned, 16> RemainingCounts;
 987     // Critical resource for the unscheduled zone.
 988     unsigned CritResIdx;
 989     // Number of micro-ops left to schedule.
 990     unsigned RemainingMicroOps;
 991
 992     void reset() {
 993       CriticalPath = 0;
 994       RemainingCounts.clear();
 995       CritResIdx = 0;
 996       RemainingMicroOps = 0;
 997     }
 998
 999     SchedRemainder() { reset(); }
1000
1001     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
1002
1003     unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
1004       if (!SchedModel->hasInstrSchedModel())
1005         return 0;
1006
1007       return std::max(
1008         RemainingMicroOps * SchedModel->getMicroOpFactor(),
1009         RemainingCounts[CritResIdx]);
1010     }
1011   };
1012
1013   /// Each Scheduling boundary is associated with ready queues. It tracks the
1014   /// current cycle in the direction of movement, and maintains the state
1015   /// of "hazards" and other interlocks at the current cycle.
1016   struct SchedBoundary {
1017     ScheduleDAGMI *DAG;
1018     const TargetSchedModel *SchedModel;
1019     SchedRemainder *Rem;
1020
1021     ReadyQueue Available;
1022     ReadyQueue Pending;
1023     bool CheckPending;
1024
1025     // For heuristics, keep a list of the nodes that immediately depend on the
1026     // most recently scheduled node.
1027     SmallPtrSet<const SUnit*, 8> NextSUs;
1028
1029     ScheduleHazardRecognizer *HazardRec;
1030
1031     unsigned CurrCycle;
1032     unsigned IssueCount;
1033
1034     /// MinReadyCycle - Cycle of the soonest available instruction.
1035     unsigned MinReadyCycle;
1036
1037     // The expected latency of the critical path in this scheduled zone.
1038     unsigned ExpectedLatency;
1039
1040     // Resources used in the scheduled zone beyond this boundary.
1041     SmallVector<unsigned, 16> ResourceCounts;
1042
1043     // Cache the critical resources ID in this scheduled zone.
1044     unsigned CritResIdx;
1045
1046     // Is the scheduled region resource limited vs. latency limited.
1047     bool IsResourceLimited;
1048
1049     unsigned ExpectedCount;
1050
1051 #ifndef NDEBUG
1052     // Remember the greatest min operand latency.
1053     unsigned MaxMinLatency;
1054 #endif
1055
1056     void reset() {
1057       // A new HazardRec is created for each DAG and owned by SchedBoundary.
1058       delete HazardRec;
1059
1060       Available.clear();
1061       Pending.clear();
1062       CheckPending = false;
1063       NextSUs.clear();
1064       HazardRec = 0;
1065       CurrCycle = 0;
1066       IssueCount = 0;
1067       MinReadyCycle = UINT_MAX;
1068       ExpectedLatency = 0;
1069       ResourceCounts.resize(1);
1070       assert(!ResourceCounts[0] && "nonzero count for bad resource");
1071       CritResIdx = 0;
1072       IsResourceLimited = false;
1073       ExpectedCount = 0;
1074 #ifndef NDEBUG
1075       MaxMinLatency = 0;
1076 #endif
1077       // Reserve a zero-count for invalid CritResIdx.
1078       ResourceCounts.resize(1);
1079     }
1080
1081     /// Pending queues extend the ready queues with the same ID and the
1082     /// PendingFlag set.
1083     SchedBoundary(unsigned ID, const Twine &Name):
1084       DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1085       Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
1086       HazardRec(0) {
1087       reset();
1088     }
1089
1090     ~SchedBoundary() { delete HazardRec; }
1091
1092     void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1093               SchedRemainder *rem);
1094
1095     bool isTop() const {
1096       return Available.getID() == ConvergingScheduler::TopQID;
1097     }
1098
1099     unsigned getUnscheduledLatency(SUnit *SU) const {
1100       if (isTop())
1101         return SU->getHeight();
1102       return SU->getDepth() + SU->Latency;
1103     }
1104
1105     unsigned getCriticalCount() const {
1106       return ResourceCounts[CritResIdx];
1107     }
1108
1109     bool checkHazard(SUnit *SU);
1110
1111     void setLatencyPolicy(CandPolicy &Policy);
1112
1113     void releaseNode(SUnit *SU, unsigned ReadyCycle);
1114
1115     void bumpCycle();
1116
1117     void countResource(unsigned PIdx, unsigned Cycles);
1118
1119     void bumpNode(SUnit *SU);
1120
1121     void releasePending();
1122
1123     void removeReady(SUnit *SU);
1124
1125     SUnit *pickOnlyChoice();
1126   };
1127
1128 private:
1129   ScheduleDAGMI *DAG;
1130   const TargetSchedModel *SchedModel;
1131   const TargetRegisterInfo *TRI;
1132
1133   // State of the top and bottom scheduled instruction boundaries.
1134   SchedRemainder Rem;
1135   SchedBoundary Top;
1136   SchedBoundary Bot;
1137
1138 public:
1139   /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1140   enum {
1141     TopQID = 1,
1142     BotQID = 2,
1143     LogMaxQID = 2
1144   };
1145
1146   ConvergingScheduler():
1147     DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1148
1149   virtual void initialize(ScheduleDAGMI *dag);
1150
1151   virtual SUnit *pickNode(bool &IsTopNode);
1152
1153   virtual void schedNode(SUnit *SU, bool IsTopNode);
1154
1155   virtual void releaseTopNode(SUnit *SU);
1156
1157   virtual void releaseBottomNode(SUnit *SU);
1158
1159   virtual void registerRoots();
1160
1161 protected:
1162   void balanceZones(
1163     ConvergingScheduler::SchedBoundary &CriticalZone,
1164     ConvergingScheduler::SchedCandidate &CriticalCand,
1165     ConvergingScheduler::SchedBoundary &OppositeZone,
1166     ConvergingScheduler::SchedCandidate &OppositeCand);
1167
1168   void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1169                            ConvergingScheduler::SchedCandidate &BotCand);
1170
1171   void tryCandidate(SchedCandidate &Cand,
1172                     SchedCandidate &TryCand,
1173                     SchedBoundary &Zone,
1174                     const RegPressureTracker &RPTracker,
1175                     RegPressureTracker &TempTracker);
1176
1177   SUnit *pickNodeBidirectional(bool &IsTopNode);
1178
1179   void pickNodeFromQueue(SchedBoundary &Zone,
1180                          const RegPressureTracker &RPTracker,
1181                          SchedCandidate &Candidate);
1182
1183 #ifndef NDEBUG
1184   void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
1185 #endif
1186 };
1187 } // namespace
1188
1189 void ConvergingScheduler::SchedRemainder::
1190 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1191   reset();
1192   if (!SchedModel->hasInstrSchedModel())
1193     return;
1194   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1195   for (std::vector<SUnit>::iterator
1196          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1197     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1198     RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1199     for (TargetSchedModel::ProcResIter
1200            PI = SchedModel->getWriteProcResBegin(SC),
1201            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1202       unsigned PIdx = PI->ProcResourceIdx;
1203       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1204       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1205     }
1206   }
1207   for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
1208        PIdx != PEnd; ++PIdx) {
1209     if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
1210         >= (int)SchedModel->getLatencyFactor()) {
1211       CritResIdx = PIdx;
1212     }
1213   }
1214 }
1215
1216 void ConvergingScheduler::SchedBoundary::
1217 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1218   reset();
1219   DAG = dag;
1220   SchedModel = smodel;
1221   Rem = rem;
1222   if (SchedModel->hasInstrSchedModel())
1223     ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1224 }
1225
1226 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1227   DAG = dag;
1228   SchedModel = DAG->getSchedModel();
1229   TRI = DAG->TRI;
1230
1231   Rem.init(DAG, SchedModel);
1232   Top.init(DAG, SchedModel, &Rem);
1233   Bot.init(DAG, SchedModel, &Rem);
1234
1235   DAG->computeDFSResult();
1236
1237   // Initialize resource counts.
1238
1239   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1240   // are disabled, then these HazardRecs will be disabled.
1241   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1242   const TargetMachine &TM = DAG->MF.getTarget();
1243   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1244   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1245
1246   assert((!ForceTopDown || !ForceBottomUp) &&
1247          "-misched-topdown incompatible with -misched-bottomup");
1248 }
1249
1250 void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1251   if (SU->isScheduled)
1252     return;
1253
1254   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1255        I != E; ++I) {
1256     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1257     unsigned MinLatency = I->getMinLatency();
1258 #ifndef NDEBUG
1259     Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1260 #endif
1261     if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1262       SU->TopReadyCycle = PredReadyCycle + MinLatency;
1263   }
1264   Top.releaseNode(SU, SU->TopReadyCycle);
1265 }
1266
1267 void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1268   if (SU->isScheduled)
1269     return;
1270
1271   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1272
1273   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1274        I != E; ++I) {
1275     if (I->isWeak())
1276       continue;
1277     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1278     unsigned MinLatency = I->getMinLatency();
1279 #ifndef NDEBUG
1280     Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1281 #endif
1282     if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1283       SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1284   }
1285   Bot.releaseNode(SU, SU->BotReadyCycle);
1286 }
1287
1288 void ConvergingScheduler::registerRoots() {
1289   Rem.CriticalPath = DAG->ExitSU.getDepth();
1290   // Some roots may not feed into ExitSU. Check all of them in case.
1291   for (std::vector<SUnit*>::const_iterator
1292          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1293     if ((*I)->getDepth() > Rem.CriticalPath)
1294       Rem.CriticalPath = (*I)->getDepth();
1295   }
1296   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1297 }
1298
1299 /// Does this SU have a hazard within the current instruction group.
1300 ///
1301 /// The scheduler supports two modes of hazard recognition. The first is the
1302 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1303 /// supports highly complicated in-order reservation tables
1304 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1305 ///
1306 /// The second is a streamlined mechanism that checks for hazards based on
1307 /// simple counters that the scheduler itself maintains. It explicitly checks
1308 /// for instruction dispatch limitations, including the number of micro-ops that
1309 /// can dispatch per cycle.
1310 ///
1311 /// TODO: Also check whether the SU must start a new group.
1312 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1313   if (HazardRec->isEnabled())
1314     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1315
1316   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1317   if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1318     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1319           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1320     return true;
1321   }
1322   return false;
1323 }
1324
1325 /// Compute the remaining latency to determine whether ILP should be increased.
1326 void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
1327   // FIXME: compile time. In all, we visit four queues here one we should only
1328   // need to visit the one that was last popped if we cache the result.
1329   unsigned RemLatency = 0;
1330   for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
1331        I != E; ++I) {
1332     unsigned L = getUnscheduledLatency(*I);
1333     if (L > RemLatency)
1334       RemLatency = L;
1335   }
1336   for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
1337        I != E; ++I) {
1338     unsigned L = getUnscheduledLatency(*I);
1339     if (L > RemLatency)
1340       RemLatency = L;
1341   }
1342   unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
1343   if (RemLatency + ExpectedLatency >= CriticalPathLimit
1344       && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
1345     Policy.ReduceLatency = true;
1346     DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
1347   }
1348 }
1349
1350 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1351                                                      unsigned ReadyCycle) {
1352
1353   if (ReadyCycle < MinReadyCycle)
1354     MinReadyCycle = ReadyCycle;
1355
1356   // Check for interlocks first. For the purpose of other heuristics, an
1357   // instruction that cannot issue appears as if it's not in the ReadyQueue.
1358   if (ReadyCycle > CurrCycle || checkHazard(SU))
1359     Pending.push(SU);
1360   else
1361     Available.push(SU);
1362
1363   // Record this node as an immediate dependent of the scheduled node.
1364   NextSUs.insert(SU);
1365 }
1366
1367 /// Move the boundary of scheduled code by one cycle.
1368 void ConvergingScheduler::SchedBoundary::bumpCycle() {
1369   unsigned Width = SchedModel->getIssueWidth();
1370   IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1371
1372   unsigned NextCycle = CurrCycle + 1;
1373   assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1374   if (MinReadyCycle > NextCycle) {
1375     IssueCount = 0;
1376     NextCycle = MinReadyCycle;
1377   }
1378
1379   if (!HazardRec->isEnabled()) {
1380     // Bypass HazardRec virtual calls.
1381     CurrCycle = NextCycle;
1382   }
1383   else {
1384     // Bypass getHazardType calls in case of long latency.
1385     for (; CurrCycle != NextCycle; ++CurrCycle) {
1386       if (isTop())
1387         HazardRec->AdvanceCycle();
1388       else
1389         HazardRec->RecedeCycle();
1390     }
1391   }
1392   CheckPending = true;
1393   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1394
1395   DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
1396         << CurrCycle << '\n');
1397 }
1398
1399 /// Add the given processor resource to this scheduled zone.
1400 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1401                                                        unsigned Cycles) {
1402   unsigned Factor = SchedModel->getResourceFactor(PIdx);
1403   DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1404         << " +(" << Cycles << "x" << Factor
1405         << ") / " << SchedModel->getLatencyFactor() << '\n');
1406
1407   unsigned Count = Factor * Cycles;
1408   ResourceCounts[PIdx] += Count;
1409   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1410   Rem->RemainingCounts[PIdx] -= Count;
1411
1412   // Check if this resource exceeds the current critical resource by a full
1413   // cycle. If so, it becomes the critical resource.
1414   if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1415       >= (int)SchedModel->getLatencyFactor()) {
1416     CritResIdx = PIdx;
1417     DEBUG(dbgs() << "  *** Critical resource "
1418           << SchedModel->getProcResource(PIdx)->Name << " x"
1419           << ResourceCounts[PIdx] << '\n');
1420   }
1421 }
1422
1423 /// Move the boundary of scheduled code by one SUnit.
1424 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1425   // Update the reservation table.
1426   if (HazardRec->isEnabled()) {
1427     if (!isTop() && SU->isCall) {
1428       // Calls are scheduled with their preceding instructions. For bottom-up
1429       // scheduling, clear the pipeline state before emitting.
1430       HazardRec->Reset();
1431     }
1432     HazardRec->EmitInstruction(SU);
1433   }
1434   // Update resource counts and critical resource.
1435   if (SchedModel->hasInstrSchedModel()) {
1436     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1437     Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1438     for (TargetSchedModel::ProcResIter
1439            PI = SchedModel->getWriteProcResBegin(SC),
1440            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1441       countResource(PI->ProcResourceIdx, PI->Cycles);
1442     }
1443   }
1444   if (isTop()) {
1445     if (SU->getDepth() > ExpectedLatency)
1446       ExpectedLatency = SU->getDepth();
1447   }
1448   else {
1449     if (SU->getHeight() > ExpectedLatency)
1450       ExpectedLatency = SU->getHeight();
1451   }
1452
1453   IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1454
1455   // Check the instruction group dispatch limit.
1456   // TODO: Check if this SU must end a dispatch group.
1457   IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1458
1459   // checkHazard prevents scheduling multiple instructions per cycle that exceed
1460   // issue width. However, we commonly reach the maximum. In this case
1461   // opportunistically bump the cycle to avoid uselessly checking everything in
1462   // the readyQ. Furthermore, a single instruction may produce more than one
1463   // cycle's worth of micro-ops.
1464   if (IssueCount >= SchedModel->getIssueWidth()) {
1465     DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1466     bumpCycle();
1467   }
1468 }
1469
1470 /// Release pending ready nodes in to the available queue. This makes them
1471 /// visible to heuristics.
1472 void ConvergingScheduler::SchedBoundary::releasePending() {
1473   // If the available queue is empty, it is safe to reset MinReadyCycle.
1474   if (Available.empty())
1475     MinReadyCycle = UINT_MAX;
1476
1477   // Check to see if any of the pending instructions are ready to issue.  If
1478   // so, add them to the available queue.
1479   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1480     SUnit *SU = *(Pending.begin()+i);
1481     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1482
1483     if (ReadyCycle < MinReadyCycle)
1484       MinReadyCycle = ReadyCycle;
1485
1486     if (ReadyCycle > CurrCycle)
1487       continue;
1488
1489     if (checkHazard(SU))
1490       continue;
1491
1492     Available.push(SU);
1493     Pending.remove(Pending.begin()+i);
1494     --i; --e;
1495   }
1496   DEBUG(if (!Pending.empty()) Pending.dump());
1497   CheckPending = false;
1498 }
1499
1500 /// Remove SU from the ready set for this boundary.
1501 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1502   if (Available.isInQueue(SU))
1503     Available.remove(Available.find(SU));
1504   else {
1505     assert(Pending.isInQueue(SU) && "bad ready count");
1506     Pending.remove(Pending.find(SU));
1507   }
1508 }
1509
1510 /// If this queue only has one ready candidate, return it. As a side effect,
1511 /// defer any nodes that now hit a hazard, and advance the cycle until at least
1512 /// one node is ready. If multiple instructions are ready, return NULL.
1513 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1514   if (CheckPending)
1515     releasePending();
1516
1517   if (IssueCount > 0) {
1518     // Defer any ready instrs that now have a hazard.
1519     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1520       if (checkHazard(*I)) {
1521         Pending.push(*I);
1522         I = Available.remove(I);
1523         continue;
1524       }
1525       ++I;
1526     }
1527   }
1528   for (unsigned i = 0; Available.empty(); ++i) {
1529     assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1530            "permanent hazard"); (void)i;
1531     bumpCycle();
1532     releasePending();
1533   }
1534   if (Available.size() == 1)
1535     return *Available.begin();
1536   return NULL;
1537 }
1538
1539 /// Record the candidate policy for opposite zones with different critical
1540 /// resources.
1541 ///
1542 /// If the CriticalZone is latency limited, don't force a policy for the
1543 /// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
1544 void ConvergingScheduler::balanceZones(
1545   ConvergingScheduler::SchedBoundary &CriticalZone,
1546   ConvergingScheduler::SchedCandidate &CriticalCand,
1547   ConvergingScheduler::SchedBoundary &OppositeZone,
1548   ConvergingScheduler::SchedCandidate &OppositeCand) {
1549
1550   if (!CriticalZone.IsResourceLimited)
1551     return;
1552   assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
1553
1554   SchedRemainder *Rem = CriticalZone.Rem;
1555
1556   // If the critical zone is overconsuming a resource relative to the
1557   // remainder, try to reduce it.
1558   unsigned RemainingCritCount =
1559     Rem->RemainingCounts[CriticalZone.CritResIdx];
1560   if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
1561       > (int)SchedModel->getLatencyFactor()) {
1562     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1563     DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
1564           << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1565           << '\n');
1566   }
1567   // If the other zone is underconsuming a resource relative to the full zone,
1568   // try to increase it.
1569   unsigned OppositeCount =
1570     OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1571   if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1572       > (int)SchedModel->getLatencyFactor()) {
1573     OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1574     DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
1575           << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1576           << '\n');
1577   }
1578 }
1579
1580 /// Determine if the scheduled zones exceed resource limits or critical path and
1581 /// set each candidate's ReduceHeight policy accordingly.
1582 void ConvergingScheduler::checkResourceLimits(
1583   ConvergingScheduler::SchedCandidate &TopCand,
1584   ConvergingScheduler::SchedCandidate &BotCand) {
1585
1586   // Set ReduceLatency to true if needed.
1587   Bot.setLatencyPolicy(BotCand.Policy);
1588   Top.setLatencyPolicy(TopCand.Policy);
1589
1590   // Handle resource-limited regions.
1591   if (Top.IsResourceLimited && Bot.IsResourceLimited
1592       && Top.CritResIdx == Bot.CritResIdx) {
1593     // If the scheduled critical resource in both zones is no longer the
1594     // critical remaining resource, attempt to reduce resource height both ways.
1595     if (Top.CritResIdx != Rem.CritResIdx) {
1596       TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1597       BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1598       DEBUG(dbgs() << "Reduce scheduled "
1599             << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1600     }
1601     return;
1602   }
1603   // Handle latency-limited regions.
1604   if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1605     // If the total scheduled expected latency exceeds the region's critical
1606     // path then reduce latency both ways.
1607     //
1608     // Just because a zone is not resource limited does not mean it is latency
1609     // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1610     // to exceed expected latency.
1611     if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1612         && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1613       TopCand.Policy.ReduceLatency = true;
1614       BotCand.Policy.ReduceLatency = true;
1615       DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
1616             << " + " << Bot.ExpectedLatency << '\n');
1617     }
1618     return;
1619   }
1620   // The critical resource is different in each zone, so request balancing.
1621
1622   // Compute the cost of each zone.
1623   Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1624   Top.ExpectedCount = std::max(
1625     Top.getCriticalCount(),
1626     Top.ExpectedCount * SchedModel->getLatencyFactor());
1627   Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1628   Bot.ExpectedCount = std::max(
1629     Bot.getCriticalCount(),
1630     Bot.ExpectedCount * SchedModel->getLatencyFactor());
1631
1632   balanceZones(Top, TopCand, Bot, BotCand);
1633   balanceZones(Bot, BotCand, Top, TopCand);
1634 }
1635
1636 void ConvergingScheduler::SchedCandidate::
1637 initResourceDelta(const ScheduleDAGMI *DAG,
1638                   const TargetSchedModel *SchedModel) {
1639   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1640     return;
1641
1642   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1643   for (TargetSchedModel::ProcResIter
1644          PI = SchedModel->getWriteProcResBegin(SC),
1645          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1646     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1647       ResDelta.CritResources += PI->Cycles;
1648     if (PI->ProcResourceIdx == Policy.DemandResIdx)
1649       ResDelta.DemandedResources += PI->Cycles;
1650   }
1651 }
1652
1653 /// Return true if this heuristic determines order.
1654 static bool tryLess(unsigned TryVal, unsigned CandVal,
1655                     ConvergingScheduler::SchedCandidate &TryCand,
1656                     ConvergingScheduler::SchedCandidate &Cand,
1657                     ConvergingScheduler::CandReason Reason) {
1658   if (TryVal < CandVal) {
1659     TryCand.Reason = Reason;
1660     return true;
1661   }
1662   if (TryVal > CandVal) {
1663     if (Cand.Reason > Reason)
1664       Cand.Reason = Reason;
1665     return true;
1666   }
1667   return false;
1668 }
1669
1670 static bool tryGreater(unsigned TryVal, unsigned CandVal,
1671                        ConvergingScheduler::SchedCandidate &TryCand,
1672                        ConvergingScheduler::SchedCandidate &Cand,
1673                        ConvergingScheduler::CandReason Reason) {
1674   if (TryVal > CandVal) {
1675     TryCand.Reason = Reason;
1676     return true;
1677   }
1678   if (TryVal < CandVal) {
1679     if (Cand.Reason > Reason)
1680       Cand.Reason = Reason;
1681     return true;
1682   }
1683   return false;
1684 }
1685
1686 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1687   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1688 }
1689
1690 /// Apply a set of heursitics to a new candidate. Heuristics are currently
1691 /// hierarchical. This may be more efficient than a graduated cost model because
1692 /// we don't need to evaluate all aspects of the model for each node in the
1693 /// queue. But it's really done to make the heuristics easier to debug and
1694 /// statistically analyze.
1695 ///
1696 /// \param Cand provides the policy and current best candidate.
1697 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1698 /// \param Zone describes the scheduled zone that we are extending.
1699 /// \param RPTracker describes reg pressure within the scheduled zone.
1700 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
1701 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1702                                        SchedCandidate &TryCand,
1703                                        SchedBoundary &Zone,
1704                                        const RegPressureTracker &RPTracker,
1705                                        RegPressureTracker &TempTracker) {
1706
1707   // Always initialize TryCand's RPDelta.
1708   TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1709                                   DAG->getRegionCriticalPSets(),
1710                                   DAG->getRegPressure().MaxSetPressure);
1711
1712   // Initialize the candidate if needed.
1713   if (!Cand.isValid()) {
1714     TryCand.Reason = NodeOrder;
1715     return;
1716   }
1717   // Avoid exceeding the target's limit.
1718   if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1719               Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1720     return;
1721   if (Cand.Reason == SingleExcess)
1722     Cand.Reason = MultiPressure;
1723
1724   // Avoid increasing the max critical pressure in the scheduled region.
1725   if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1726               Cand.RPDelta.CriticalMax.UnitIncrease,
1727               TryCand, Cand, SingleCritical))
1728     return;
1729   if (Cand.Reason == SingleCritical)
1730     Cand.Reason = MultiPressure;
1731
1732   // Keep clustered nodes together to encourage downstream peephole
1733   // optimizations which may reduce resource requirements.
1734   //
1735   // This is a best effort to set things up for a post-RA pass. Optimizations
1736   // like generating loads of multiple registers should ideally be done within
1737   // the scheduler pass by combining the loads during DAG postprocessing.
1738   const SUnit *NextClusterSU =
1739     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
1740   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
1741                  TryCand, Cand, Cluster))
1742     return;
1743   // Currently, weak edges are for clustering, so we hard-code that reason.
1744   // However, deferring the current TryCand will not change Cand's reason.
1745   CandReason OrigReason = Cand.Reason;
1746   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
1747               getWeakLeft(Cand.SU, Zone.isTop()),
1748               TryCand, Cand, Cluster)) {
1749     Cand.Reason = OrigReason;
1750     return;
1751   }
1752   // Avoid critical resource consumption and balance the schedule.
1753   TryCand.initResourceDelta(DAG, SchedModel);
1754   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
1755               TryCand, Cand, ResourceReduce))
1756     return;
1757   if (tryGreater(TryCand.ResDelta.DemandedResources,
1758                  Cand.ResDelta.DemandedResources,
1759                  TryCand, Cand, ResourceDemand))
1760     return;
1761
1762   // Avoid serializing long latency dependence chains.
1763   if (Cand.Policy.ReduceLatency) {
1764     if (Zone.isTop()) {
1765       if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
1766           > Zone.ExpectedCount) {
1767         if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1768                     TryCand, Cand, TopDepthReduce))
1769           return;
1770       }
1771       if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1772                      TryCand, Cand, TopPathReduce))
1773         return;
1774     }
1775     else {
1776       if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
1777           > Zone.ExpectedCount) {
1778         if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1779                     TryCand, Cand, BotHeightReduce))
1780           return;
1781       }
1782       if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1783                      TryCand, Cand, BotPathReduce))
1784         return;
1785     }
1786   }
1787
1788   // Avoid increasing the max pressure of the entire region.
1789   if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
1790               Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
1791     return;
1792   if (Cand.Reason == SingleMax)
1793     Cand.Reason = MultiPressure;
1794
1795   // Prefer immediate defs/users of the last scheduled instruction. This is a
1796   // nice pressure avoidance strategy that also conserves the processor's
1797   // register renaming resources and keeps the machine code readable.
1798   if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
1799                  TryCand, Cand, NextDefUse))
1800     return;
1801
1802   // Fall through to original instruction order.
1803   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
1804       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
1805     TryCand.Reason = NodeOrder;
1806   }
1807 }
1808
1809 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
1810 /// more desirable than RHS from scheduling standpoint.
1811 static bool compareRPDelta(const RegPressureDelta &LHS,
1812                            const RegPressureDelta &RHS) {
1813   // Compare each component of pressure in decreasing order of importance
1814   // without checking if any are valid. Invalid PressureElements are assumed to
1815   // have UnitIncrease==0, so are neutral.
1816
1817   // Avoid increasing the max critical pressure in the scheduled region.
1818   if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
1819     DEBUG(dbgs() << "RP excess top - bot: "
1820           << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
1821     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
1822   }
1823   // Avoid increasing the max critical pressure in the scheduled region.
1824   if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
1825     DEBUG(dbgs() << "RP critical top - bot: "
1826           << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
1827           << '\n');
1828     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
1829   }
1830   // Avoid increasing the max pressure of the entire region.
1831   if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
1832     DEBUG(dbgs() << "RP current top - bot: "
1833           << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
1834           << '\n');
1835     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
1836   }
1837   return false;
1838 }
1839
1840 #ifndef NDEBUG
1841 const char *ConvergingScheduler::getReasonStr(
1842   ConvergingScheduler::CandReason Reason) {
1843   switch (Reason) {
1844   case NoCand:         return "NOCAND    ";
1845   case SingleExcess:   return "REG-EXCESS";
1846   case SingleCritical: return "REG-CRIT  ";
1847   case Cluster:        return "CLUSTER   ";
1848   case SingleMax:      return "REG-MAX   ";
1849   case MultiPressure:  return "REG-MULTI ";
1850   case ResourceReduce: return "RES-REDUCE";
1851   case ResourceDemand: return "RES-DEMAND";
1852   case TopDepthReduce: return "TOP-DEPTH ";
1853   case TopPathReduce:  return "TOP-PATH  ";
1854   case BotHeightReduce:return "BOT-HEIGHT";
1855   case BotPathReduce:  return "BOT-PATH  ";
1856   case NextDefUse:     return "DEF-USE   ";
1857   case NodeOrder:      return "ORDER     ";
1858   };
1859   llvm_unreachable("Unknown reason!");
1860 }
1861
1862 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
1863                                          const SchedBoundary &Zone) {
1864   const char *Label = getReasonStr(Cand.Reason);
1865   PressureElement P;
1866   unsigned ResIdx = 0;
1867   unsigned Latency = 0;
1868   switch (Cand.Reason) {
1869   default:
1870     break;
1871   case SingleExcess:
1872     P = Cand.RPDelta.Excess;
1873     break;
1874   case SingleCritical:
1875     P = Cand.RPDelta.CriticalMax;
1876     break;
1877   case SingleMax:
1878     P = Cand.RPDelta.CurrentMax;
1879     break;
1880   case ResourceReduce:
1881     ResIdx = Cand.Policy.ReduceResIdx;
1882     break;
1883   case ResourceDemand:
1884     ResIdx = Cand.Policy.DemandResIdx;
1885     break;
1886   case TopDepthReduce:
1887     Latency = Cand.SU->getDepth();
1888     break;
1889   case TopPathReduce:
1890     Latency = Cand.SU->getHeight();
1891     break;
1892   case BotHeightReduce:
1893     Latency = Cand.SU->getHeight();
1894     break;
1895   case BotPathReduce:
1896     Latency = Cand.SU->getDepth();
1897     break;
1898   }
1899   dbgs() << Label << " " << Zone.Available.getName() << " ";
1900   if (P.isValid())
1901     dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
1902            << " ";
1903   else
1904     dbgs() << "     ";
1905   if (ResIdx)
1906     dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
1907   else
1908     dbgs() << "        ";
1909   if (Latency)
1910     dbgs() << Latency << " cycles ";
1911   else
1912     dbgs() << "         ";
1913   Cand.SU->dump(DAG);
1914 }
1915 #endif
1916
1917 /// Pick the best candidate from the top queue.
1918 ///
1919 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
1920 /// DAG building. To adjust for the current scheduling location we need to
1921 /// maintain the number of vreg uses remaining to be top-scheduled.
1922 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
1923                                             const RegPressureTracker &RPTracker,
1924                                             SchedCandidate &Cand) {
1925   ReadyQueue &Q = Zone.Available;
1926
1927   DEBUG(Q.dump());
1928
1929   // getMaxPressureDelta temporarily modifies the tracker.
1930   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1931
1932   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
1933
1934     SchedCandidate TryCand(Cand.Policy);
1935     TryCand.SU = *I;
1936     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
1937     if (TryCand.Reason != NoCand) {
1938       // Initialize resource delta if needed in case future heuristics query it.
1939       if (TryCand.ResDelta == SchedResourceDelta())
1940         TryCand.initResourceDelta(DAG, SchedModel);
1941       Cand.setBest(TryCand);
1942       DEBUG(traceCandidate(Cand, Zone));
1943     }
1944   }
1945 }
1946
1947 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
1948                       bool IsTop) {
1949   DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
1950         << " SU(" << Cand.SU->NodeNum << ") "
1951         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
1952 }
1953
1954 /// Pick the best candidate node from either the top or bottom queue.
1955 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
1956   // Schedule as far as possible in the direction of no choice. This is most
1957   // efficient, but also provides the best heuristics for CriticalPSets.
1958   if (SUnit *SU = Bot.pickOnlyChoice()) {
1959     IsTopNode = false;
1960     return SU;
1961   }
1962   if (SUnit *SU = Top.pickOnlyChoice()) {
1963     IsTopNode = true;
1964     return SU;
1965   }
1966   CandPolicy NoPolicy;
1967   SchedCandidate BotCand(NoPolicy);
1968   SchedCandidate TopCand(NoPolicy);
1969   checkResourceLimits(TopCand, BotCand);
1970
1971   // Prefer bottom scheduling when heuristics are silent.
1972   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1973   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1974
1975   // If either Q has a single candidate that provides the least increase in
1976   // Excess pressure, we can immediately schedule from that Q.
1977   //
1978   // RegionCriticalPSets summarizes the pressure within the scheduled region and
1979   // affects picking from either Q. If scheduling in one direction must
1980   // increase pressure for one of the excess PSets, then schedule in that
1981   // direction first to provide more freedom in the other direction.
1982   if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
1983     IsTopNode = false;
1984     tracePick(BotCand, IsTopNode);
1985     return BotCand.SU;
1986   }
1987   // Check if the top Q has a better candidate.
1988   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1989   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1990
1991   // If either Q has a single candidate that minimizes pressure above the
1992   // original region's pressure pick it.
1993   if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
1994     if (TopCand.Reason < BotCand.Reason) {
1995       IsTopNode = true;
1996       tracePick(TopCand, IsTopNode);
1997       return TopCand.SU;
1998     }
1999     IsTopNode = false;
2000     tracePick(BotCand, IsTopNode);
2001     return BotCand.SU;
2002   }
2003   // Check for a salient pressure difference and pick the best from either side.
2004   if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
2005     IsTopNode = true;
2006     tracePick(TopCand, IsTopNode);
2007     return TopCand.SU;
2008   }
2009   // Otherwise prefer the bottom candidate, in node order if all else failed.
2010   if (TopCand.Reason < BotCand.Reason) {
2011     IsTopNode = true;
2012     tracePick(TopCand, IsTopNode);
2013     return TopCand.SU;
2014   }
2015   IsTopNode = false;
2016   tracePick(BotCand, IsTopNode);
2017   return BotCand.SU;
2018 }
2019
2020 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2021 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
2022   if (DAG->top() == DAG->bottom()) {
2023     assert(Top.Available.empty() && Top.Pending.empty() &&
2024            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2025     return NULL;
2026   }
2027   SUnit *SU;
2028   do {
2029     if (ForceTopDown) {
2030       SU = Top.pickOnlyChoice();
2031       if (!SU) {
2032         CandPolicy NoPolicy;
2033         SchedCandidate TopCand(NoPolicy);
2034         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2035         assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2036         SU = TopCand.SU;
2037       }
2038       IsTopNode = true;
2039     }
2040     else if (ForceBottomUp) {
2041       SU = Bot.pickOnlyChoice();
2042       if (!SU) {
2043         CandPolicy NoPolicy;
2044         SchedCandidate BotCand(NoPolicy);
2045         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2046         assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2047         SU = BotCand.SU;
2048       }
2049       IsTopNode = false;
2050     }
2051     else {
2052       SU = pickNodeBidirectional(IsTopNode);
2053     }
2054   } while (SU->isScheduled);
2055
2056   if (SU->isTopReady())
2057     Top.removeReady(SU);
2058   if (SU->isBottomReady())
2059     Bot.removeReady(SU);
2060
2061   DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
2062         << " Scheduling Instruction in cycle "
2063         << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
2064         SU->dump(DAG));
2065   return SU;
2066 }
2067
2068 /// Update the scheduler's state after scheduling a node. This is the same node
2069 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2070 /// it's state based on the current cycle before MachineSchedStrategy does.
2071 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2072   if (IsTopNode) {
2073     SU->TopReadyCycle = Top.CurrCycle;
2074     Top.bumpNode(SU);
2075   }
2076   else {
2077     SU->BotReadyCycle = Bot.CurrCycle;
2078     Bot.bumpNode(SU);
2079   }
2080 }
2081
2082 /// Create the standard converging machine scheduler. This will be used as the
2083 /// default scheduler if the target does not set a default.
2084 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
2085   assert((!ForceTopDown || !ForceBottomUp) &&
2086          "-misched-topdown incompatible with -misched-bottomup");
2087   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
2088   // Register DAG post-processors.
2089   if (EnableLoadCluster)
2090     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2091   if (EnableMacroFusion)
2092     DAG->addMutation(new MacroFusion(DAG->TII));
2093   return DAG;
2094 }
2095 static MachineSchedRegistry
2096 ConvergingSchedRegistry("converge", "Standard converging scheduler.",
2097                         createConvergingSched);
2098
2099 //===----------------------------------------------------------------------===//
2100 // ILP Scheduler. Currently for experimental analysis of heuristics.
2101 //===----------------------------------------------------------------------===//
2102
2103 namespace {
2104 /// \brief Order nodes by the ILP metric.
2105 struct ILPOrder {
2106   const SchedDFSResult *DFSResult;
2107   const BitVector *ScheduledTrees;
2108   bool MaximizeILP;
2109
2110   ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
2111
2112   /// \brief Apply a less-than relation on node priority.
2113   ///
2114   /// (Return true if A comes after B in the Q.)
2115   bool operator()(const SUnit *A, const SUnit *B) const {
2116     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2117     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2118     if (SchedTreeA != SchedTreeB) {
2119       // Unscheduled trees have lower priority.
2120       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2121         return ScheduledTrees->test(SchedTreeB);
2122
2123       // Trees with shallower connections have have lower priority.
2124       if (DFSResult->getSubtreeLevel(SchedTreeA)
2125           != DFSResult->getSubtreeLevel(SchedTreeB)) {
2126         return DFSResult->getSubtreeLevel(SchedTreeA)
2127           < DFSResult->getSubtreeLevel(SchedTreeB);
2128       }
2129     }
2130     if (MaximizeILP)
2131       return DFSResult->getILP(A) < DFSResult->getILP(B);
2132     else
2133       return DFSResult->getILP(A) > DFSResult->getILP(B);
2134   }
2135 };
2136
2137 /// \brief Schedule based on the ILP metric.
2138 class ILPScheduler : public MachineSchedStrategy {
2139   /// In case all subtrees are eventually connected to a common root through
2140   /// data dependence (e.g. reduction), place an upper limit on their size.
2141   ///
2142   /// FIXME: A subtree limit is generally good, but in the situation commented
2143   /// above, where multiple similar subtrees feed a common root, we should
2144   /// only split at a point where the resulting subtrees will be balanced.
2145   /// (a motivating test case must be found).
2146   static const unsigned SubtreeLimit = 16;
2147
2148   ScheduleDAGMI *DAG;
2149   ILPOrder Cmp;
2150
2151   std::vector<SUnit*> ReadyQ;
2152 public:
2153   ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
2154
2155   virtual void initialize(ScheduleDAGMI *dag) {
2156     DAG = dag;
2157     DAG->computeDFSResult();
2158     Cmp.DFSResult = DAG->getDFSResult();
2159     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
2160     ReadyQ.clear();
2161   }
2162
2163   virtual void registerRoots() {
2164     // Restore the heap in ReadyQ with the updated DFS results.
2165     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2166   }
2167
2168   /// Implement MachineSchedStrategy interface.
2169   /// -----------------------------------------
2170
2171   /// Callback to select the highest priority node from the ready Q.
2172   virtual SUnit *pickNode(bool &IsTopNode) {
2173     if (ReadyQ.empty()) return NULL;
2174     pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2175     SUnit *SU = ReadyQ.back();
2176     ReadyQ.pop_back();
2177     IsTopNode = false;
2178     DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
2179           << *SU->getInstr()
2180           << " ILP: " << DAG->getDFSResult()->getILP(SU)
2181           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
2182           << DAG->getDFSResult()->getSubtreeLevel(
2183             DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
2184     return SU;
2185   }
2186
2187   /// \brief Scheduler callback to notify that a new subtree is scheduled.
2188   virtual void scheduleTree(unsigned SubtreeID) {
2189     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2190   }
2191
2192   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2193   /// DFSResults, and resort the priority Q.
2194   virtual void schedNode(SUnit *SU, bool IsTopNode) {
2195     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2196   }
2197
2198   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2199
2200   virtual void releaseBottomNode(SUnit *SU) {
2201     ReadyQ.push_back(SU);
2202     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2203   }
2204 };
2205 } // namespace
2206
2207 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2208   return new ScheduleDAGMI(C, new ILPScheduler(true));
2209 }
2210 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2211   return new ScheduleDAGMI(C, new ILPScheduler(false));
2212 }
2213 static MachineSchedRegistry ILPMaxRegistry(
2214   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2215 static MachineSchedRegistry ILPMinRegistry(
2216   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2217
2218 //===----------------------------------------------------------------------===//
2219 // Machine Instruction Shuffler for Correctness Testing
2220 //===----------------------------------------------------------------------===//
2221
2222 #ifndef NDEBUG
2223 namespace {
2224 /// Apply a less-than relation on the node order, which corresponds to the
2225 /// instruction order prior to scheduling. IsReverse implements greater-than.
2226 template<bool IsReverse>
2227 struct SUnitOrder {
2228   bool operator()(SUnit *A, SUnit *B) const {
2229     if (IsReverse)
2230       return A->NodeNum > B->NodeNum;
2231     else
2232       return A->NodeNum < B->NodeNum;
2233   }
2234 };
2235
2236 /// Reorder instructions as much as possible.
2237 class InstructionShuffler : public MachineSchedStrategy {
2238   bool IsAlternating;
2239   bool IsTopDown;
2240
2241   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2242   // gives nodes with a higher number higher priority causing the latest
2243   // instructions to be scheduled first.
2244   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2245     TopQ;
2246   // When scheduling bottom-up, use greater-than as the queue priority.
2247   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2248     BottomQ;
2249 public:
2250   InstructionShuffler(bool alternate, bool topdown)
2251     : IsAlternating(alternate), IsTopDown(topdown) {}
2252
2253   virtual void initialize(ScheduleDAGMI *) {
2254     TopQ.clear();
2255     BottomQ.clear();
2256   }
2257
2258   /// Implement MachineSchedStrategy interface.
2259   /// -----------------------------------------
2260
2261   virtual SUnit *pickNode(bool &IsTopNode) {
2262     SUnit *SU;
2263     if (IsTopDown) {
2264       do {
2265         if (TopQ.empty()) return NULL;
2266         SU = TopQ.top();
2267         TopQ.pop();
2268       } while (SU->isScheduled);
2269       IsTopNode = true;
2270     }
2271     else {
2272       do {
2273         if (BottomQ.empty()) return NULL;
2274         SU = BottomQ.top();
2275         BottomQ.pop();
2276       } while (SU->isScheduled);
2277       IsTopNode = false;
2278     }
2279     if (IsAlternating)
2280       IsTopDown = !IsTopDown;
2281     return SU;
2282   }
2283
2284   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2285
2286   virtual void releaseTopNode(SUnit *SU) {
2287     TopQ.push(SU);
2288   }
2289   virtual void releaseBottomNode(SUnit *SU) {
2290     BottomQ.push(SU);
2291   }
2292 };
2293 } // namespace
2294
2295 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2296   bool Alternate = !ForceTopDown && !ForceBottomUp;
2297   bool TopDown = !ForceBottomUp;
2298   assert((TopDown || !ForceTopDown) &&
2299          "-misched-topdown incompatible with -misched-bottomup");
2300   return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2301 }
2302 static MachineSchedRegistry ShufflerRegistry(
2303   "shuffle", "Shuffle machine instructions alternating directions",
2304   createInstructionShuffler);
2305 #endif // !NDEBUG
2306
2307 //===----------------------------------------------------------------------===//
2308 // GraphWriter support for ScheduleDAGMI.
2309 //===----------------------------------------------------------------------===//
2310
2311 #ifndef NDEBUG
2312 namespace llvm {
2313
2314 template<> struct GraphTraits<
2315   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
2316
2317 template<>
2318 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
2319
2320   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
2321
2322   static std::string getGraphName(const ScheduleDAG *G) {
2323     return G->MF.getName();
2324   }
2325
2326   static bool renderGraphFromBottomUp() {
2327     return true;
2328   }
2329
2330   static bool isNodeHidden(const SUnit *Node) {
2331     return (Node->NumPreds > 10 || Node->NumSuccs > 10);
2332   }
2333
2334   static bool hasNodeAddressLabel(const SUnit *Node,
2335                                   const ScheduleDAG *Graph) {
2336     return false;
2337   }
2338
2339   /// If you want to override the dot attributes printed for a particular
2340   /// edge, override this method.
2341   static std::string getEdgeAttributes(const SUnit *Node,
2342                                        SUnitIterator EI,
2343                                        const ScheduleDAG *Graph) {
2344     if (EI.isArtificialDep())
2345       return "color=cyan,style=dashed";
2346     if (EI.isCtrlDep())
2347       return "color=blue,style=dashed";
2348     return "";
2349   }
2350
2351   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
2352     std::string Str;
2353     raw_string_ostream SS(Str);
2354     SS << "SU(" << SU->NodeNum << ')';
2355     return SS.str();
2356   }
2357   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
2358     return G->getGraphNodeLabel(SU);
2359   }
2360
2361   static std::string getNodeAttributes(const SUnit *N,
2362                                        const ScheduleDAG *Graph) {
2363     std::string Str("shape=Mrecord");
2364     const SchedDFSResult *DFS =
2365       static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
2366     if (DFS) {
2367       Str += ",style=filled,fillcolor=\"#";
2368       Str += DOT::getColorString(DFS->getSubtreeID(N));
2369       Str += '"';
2370     }
2371     return Str;
2372   }
2373 };
2374 } // namespace llvm
2375 #endif // NDEBUG
2376
2377 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
2378 /// rendered using 'dot'.
2379 ///
2380 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
2381 #ifndef NDEBUG
2382   ViewGraph(this, Name, false, Title);
2383 #else
2384   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
2385          << "systems with Graphviz or gv!\n";
2386 #endif  // NDEBUG
2387 }
2388
2389 /// Out-of-line implementation with no arguments is handy for gdb.
2390 void ScheduleDAGMI::viewGraph() {
2391   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
2392 }