lib/CodeGen/MachineScheduler.cpp

   1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "misched"
  16
  17 #include "llvm/CodeGen/MachineScheduler.h"
  18 #include "llvm/ADT/OwningPtr.h"
  19 #include "llvm/ADT/PriorityQueue.h"
  20 #include "llvm/Analysis/AliasAnalysis.h"
  21 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  22 #include "llvm/CodeGen/MachineDominators.h"
  23 #include "llvm/CodeGen/MachineLoopInfo.h"
  24 #include "llvm/CodeGen/MachineRegisterInfo.h"
  25 #include "llvm/CodeGen/Passes.h"
  26 #include "llvm/CodeGen/RegisterClassInfo.h"
  27 #include "llvm/CodeGen/ScheduleDFS.h"
  28 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  29 #include "llvm/Support/CommandLine.h"
  30 #include "llvm/Support/Debug.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include "llvm/Support/GraphWriter.h"
  33 #include "llvm/Support/raw_ostream.h"
  34 #include "llvm/Target/TargetInstrInfo.h"
  35 #include <queue>
  36
  37 using namespace llvm;
  38
  39 namespace llvm {
  40 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  41                            cl::desc("Force top-down list scheduling"));
  42 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  43                             cl::desc("Force bottom-up list scheduling"));
  44 }
  45
  46 #ifndef NDEBUG
  47 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  48   cl::desc("Pop up a window to show MISched dags after they are processed"));
  49
  50 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  51   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  52
  53 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
  54   cl::desc("Only schedule this function"));
  55 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
  56   cl::desc("Only schedule this MBB#"));
  57 #else
  58 static bool ViewMISchedDAGs = false;
  59 #endif // NDEBUG
  60
  61 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
  62   cl::desc("Enable register pressure scheduling."), cl::init(true));
  63
  64 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
  65   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
  66
  67 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
  68   cl::desc("Enable load clustering."), cl::init(true));
  69
  70 // Experimental heuristics
  71 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
  72   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
  73
  74 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
  75   cl::desc("Verify machine instrs before and after machine scheduling"));
  76
  77 // DAG subtrees must have at least this many nodes.
  78 static const unsigned MinSubtreeSize = 8;
  79
  80 // Pin the vtables to this file.
  81 void MachineSchedStrategy::anchor() {}
  82 void ScheduleDAGMutation::anchor() {}
  83
  84 //===----------------------------------------------------------------------===//
  85 // Machine Instruction Scheduling Pass and Registry
  86 //===----------------------------------------------------------------------===//
  87
  88 MachineSchedContext::MachineSchedContext():
  89     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
  90   RegClassInfo = new RegisterClassInfo();
  91 }
  92
  93 MachineSchedContext::~MachineSchedContext() {
  94   delete RegClassInfo;
  95 }
  96
  97 namespace {
  98 /// Base class for a machine scheduler class that can run at any point.
  99 class MachineSchedulerBase : public MachineSchedContext,
 100                              public MachineFunctionPass {
 101 public:
 102   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
 103
 104   virtual void print(raw_ostream &O, const Module* = 0) const;
 105
 106 protected:
 107   void scheduleRegions(ScheduleDAGInstrs &Scheduler);
 108 };
 109
 110 /// MachineScheduler runs after coalescing and before register allocation.
 111 class MachineScheduler : public MachineSchedulerBase {
 112 public:
 113   MachineScheduler();
 114
 115   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 116
 117   virtual bool runOnMachineFunction(MachineFunction&);
 118
 119   static char ID; // Class identification, replacement for typeinfo
 120
 121 protected:
 122   ScheduleDAGInstrs *createMachineScheduler();
 123 };
 124
 125 /// PostMachineScheduler runs after shortly before code emission.
 126 class PostMachineScheduler : public MachineSchedulerBase {
 127 public:
 128   PostMachineScheduler();
 129
 130   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 131
 132   virtual bool runOnMachineFunction(MachineFunction&);
 133
 134   static char ID; // Class identification, replacement for typeinfo
 135
 136 protected:
 137   ScheduleDAGInstrs *createPostMachineScheduler();
 138 };
 139 } // namespace
 140
 141 char MachineScheduler::ID = 0;
 142
 143 char &llvm::MachineSchedulerID = MachineScheduler::ID;
 144
 145 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
 146                       "Machine Instruction Scheduler", false, false)
 147 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 148 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 149 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 150 INITIALIZE_PASS_END(MachineScheduler, "misched",
 151                     "Machine Instruction Scheduler", false, false)
 152
 153 MachineScheduler::MachineScheduler()
 154 : MachineSchedulerBase(ID) {
 155   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 156 }
 157
 158 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 159   AU.setPreservesCFG();
 160   AU.addRequiredID(MachineDominatorsID);
 161   AU.addRequired<MachineLoopInfo>();
 162   AU.addRequired<AliasAnalysis>();
 163   AU.addRequired<TargetPassConfig>();
 164   AU.addRequired<SlotIndexes>();
 165   AU.addPreserved<SlotIndexes>();
 166   AU.addRequired<LiveIntervals>();
 167   AU.addPreserved<LiveIntervals>();
 168   MachineFunctionPass::getAnalysisUsage(AU);
 169 }
 170
 171 char PostMachineScheduler::ID = 0;
 172
 173 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
 174
 175 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
 176                 "PostRA Machine Instruction Scheduler", false, false)
 177
 178 PostMachineScheduler::PostMachineScheduler()
 179 : MachineSchedulerBase(ID) {
 180   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
 181 }
 182
 183 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 184   AU.setPreservesCFG();
 185   AU.addRequiredID(MachineDominatorsID);
 186   AU.addRequired<MachineLoopInfo>();
 187   AU.addRequired<TargetPassConfig>();
 188   MachineFunctionPass::getAnalysisUsage(AU);
 189 }
 190
 191 MachinePassRegistry MachineSchedRegistry::Registry;
 192
 193 /// A dummy default scheduler factory indicates whether the scheduler
 194 /// is overridden on the command line.
 195 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
 196   return 0;
 197 }
 198
 199 /// MachineSchedOpt allows command line selection of the scheduler.
 200 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
 201                RegisterPassParser<MachineSchedRegistry> >
 202 MachineSchedOpt("misched",
 203                 cl::init(&useDefaultMachineSched), cl::Hidden,
 204                 cl::desc("Machine instruction scheduler to use"));
 205
 206 static MachineSchedRegistry
 207 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
 208                      useDefaultMachineSched);
 209
 210 /// Forward declare the standard machine scheduler. This will be used as the
 211 /// default scheduler if the target does not set a default.
 212 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
 213 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
 214
 215 /// Decrement this iterator until reaching the top or a non-debug instr.
 216 static MachineBasicBlock::const_iterator
 217 priorNonDebug(MachineBasicBlock::const_iterator I,
 218               MachineBasicBlock::const_iterator Beg) {
 219   assert(I != Beg && "reached the top of the region, cannot decrement");
 220   while (--I != Beg) {
 221     if (!I->isDebugValue())
 222       break;
 223   }
 224   return I;
 225 }
 226
 227 /// Non-const version.
 228 static MachineBasicBlock::iterator
 229 priorNonDebug(MachineBasicBlock::iterator I,
 230               MachineBasicBlock::const_iterator Beg) {
 231   return const_cast<MachineInstr*>(
 232     &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
 233 }
 234
 235 /// If this iterator is a debug value, increment until reaching the End or a
 236 /// non-debug instruction.
 237 static MachineBasicBlock::const_iterator
 238 nextIfDebug(MachineBasicBlock::const_iterator I,
 239             MachineBasicBlock::const_iterator End) {
 240   for(; I != End; ++I) {
 241     if (!I->isDebugValue())
 242       break;
 243   }
 244   return I;
 245 }
 246
 247 /// Non-const version.
 248 static MachineBasicBlock::iterator
 249 nextIfDebug(MachineBasicBlock::iterator I,
 250             MachineBasicBlock::const_iterator End) {
 251   // Cast the return value to nonconst MachineInstr, then cast to an
 252   // instr_iterator, which does not check for null, finally return a
 253   // bundle_iterator.
 254   return MachineBasicBlock::instr_iterator(
 255     const_cast<MachineInstr*>(
 256       &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
 257 }
 258
 259 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
 260 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
 261   // Select the scheduler, or set the default.
 262   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
 263   if (Ctor != useDefaultMachineSched)
 264     return Ctor(this);
 265
 266   // Get the default scheduler set by the target for this function.
 267   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
 268   if (Scheduler)
 269     return Scheduler;
 270
 271   // Default to GenericScheduler.
 272   return createGenericSchedLive(this);
 273 }
 274
 275 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
 276 /// the caller. We don't have a command line option to override the postRA
 277 /// scheduler. The Target must configure it.
 278 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 279   // Get the postRA scheduler set by the target for this function.
 280   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
 281   if (Scheduler)
 282     return Scheduler;
 283
 284   // Default to GenericScheduler.
 285   return createGenericSchedPostRA(this);
 286 }
 287
 288 /// Top-level MachineScheduler pass driver.
 289 ///
 290 /// Visit blocks in function order. Divide each block into scheduling regions
 291 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
 292 /// consistent with the DAG builder, which traverses the interior of the
 293 /// scheduling regions bottom-up.
 294 ///
 295 /// This design avoids exposing scheduling boundaries to the DAG builder,
 296 /// simplifying the DAG builder's support for "special" target instructions.
 297 /// At the same time the design allows target schedulers to operate across
 298 /// scheduling boundaries, for example to bundle the boudary instructions
 299 /// without reordering them. This creates complexity, because the target
 300 /// scheduler must update the RegionBegin and RegionEnd positions cached by
 301 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
 302 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 303 /// general bias against block splitting purely for implementation simplicity.
 304 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 305   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
 306
 307   // Initialize the context of the pass.
 308   MF = &mf;
 309   MLI = &getAnalysis<MachineLoopInfo>();
 310   MDT = &getAnalysis<MachineDominatorTree>();
 311   PassConfig = &getAnalysis<TargetPassConfig>();
 312   AA = &getAnalysis<AliasAnalysis>();
 313
 314   LIS = &getAnalysis<LiveIntervals>();
 315
 316   if (VerifyScheduling) {
 317     DEBUG(LIS->dump());
 318     MF->verify(this, "Before machine scheduling.");
 319   }
 320   RegClassInfo->runOnMachineFunction(*MF);
 321
 322   // Instantiate the selected scheduler for this target, function, and
 323   // optimization level.
 324   OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
 325   scheduleRegions(*Scheduler);
 326
 327   DEBUG(LIS->dump());
 328   if (VerifyScheduling)
 329     MF->verify(this, "After machine scheduling.");
 330   return true;
 331 }
 332
 333 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 334   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
 335
 336   // Initialize the context of the pass.
 337   MF = &mf;
 338   PassConfig = &getAnalysis<TargetPassConfig>();
 339
 340   if (VerifyScheduling)
 341     MF->verify(this, "Before post machine scheduling.");
 342
 343   // Instantiate the selected scheduler for this target, function, and
 344   // optimization level.
 345   OwningPtr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
 346   scheduleRegions(*Scheduler);
 347
 348   if (VerifyScheduling)
 349     MF->verify(this, "After post machine scheduling.");
 350   return true;
 351 }
 352
 353 /// Return true of the given instruction should not be included in a scheduling
 354 /// region.
 355 ///
 356 /// MachineScheduler does not currently support scheduling across calls. To
 357 /// handle calls, the DAG builder needs to be modified to create register
 358 /// anti/output dependencies on the registers clobbered by the call's regmask
 359 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
 360 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
 361 /// the boundary, but there would be no benefit to postRA scheduling across
 362 /// calls this late anyway.
 363 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
 364                             MachineBasicBlock *MBB,
 365                             MachineFunction *MF,
 366                             const TargetInstrInfo *TII,
 367                             bool IsPostRA) {
 368   return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
 369 }
 370
 371 /// Main driver for both MachineScheduler and PostMachineScheduler.
 372 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
 373   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 374   bool IsPostRA = Scheduler.isPostRA();
 375
 376   // Visit all machine basic blocks.
 377   //
 378   // TODO: Visit blocks in global postorder or postorder within the bottom-up
 379   // loop tree. Then we can optionally compute global RegPressure.
 380   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
 381        MBB != MBBEnd; ++MBB) {
 382
 383     Scheduler.startBlock(MBB);
 384
 385 #ifndef NDEBUG
 386     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
 387       continue;
 388     if (SchedOnlyBlock.getNumOccurrences()
 389         && (int)SchedOnlyBlock != MBB->getNumber())
 390       continue;
 391 #endif
 392
 393     // Break the block into scheduling regions [I, RegionEnd), and schedule each
 394     // region as soon as it is discovered. RegionEnd points the scheduling
 395     // boundary at the bottom of the region. The DAG does not include RegionEnd,
 396     // but the region does (i.e. the next RegionEnd is above the previous
 397     // RegionBegin). If the current block has no terminator then RegionEnd ==
 398     // MBB->end() for the bottom region.
 399     //
 400     // The Scheduler may insert instructions during either schedule() or
 401     // exitRegion(), even for empty regions. So the local iterators 'I' and
 402     // 'RegionEnd' are invalid across these calls.
 403     //
 404     // MBB::size() uses instr_iterator to count. Here we need a bundle to count
 405     // as a single instruction.
 406     unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
 407     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
 408         RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
 409
 410       // Avoid decrementing RegionEnd for blocks with no terminator.
 411       if (RegionEnd != MBB->end() ||
 412           isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
 413         --RegionEnd;
 414         // Count the boundary instruction.
 415         --RemainingInstrs;
 416       }
 417
 418       // The next region starts above the previous region. Look backward in the
 419       // instruction stream until we find the nearest boundary.
 420       unsigned NumRegionInstrs = 0;
 421       MachineBasicBlock::iterator I = RegionEnd;
 422       for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
 423         if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
 424           break;
 425       }
 426       // Notify the scheduler of the region, even if we may skip scheduling
 427       // it. Perhaps it still needs to be bundled.
 428       Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
 429
 430       // Skip empty scheduling regions (0 or 1 schedulable instructions).
 431       if (I == RegionEnd || I == std::prev(RegionEnd)) {
 432         // Close the current region. Bundle the terminator if needed.
 433         // This invalidates 'RegionEnd' and 'I'.
 434         Scheduler.exitRegion();
 435         continue;
 436       }
 437       DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
 438             << "MI Scheduling **********\n");
 439       DEBUG(dbgs() << MF->getName()
 440             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
 441             << "\n  From: " << *I << "    To: ";
 442             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
 443             else dbgs() << "End";
 444             dbgs() << " RegionInstrs: " << NumRegionInstrs
 445             << " Remaining: " << RemainingInstrs << "\n");
 446
 447       // Schedule a region: possibly reorder instructions.
 448       // This invalidates 'RegionEnd' and 'I'.
 449       Scheduler.schedule();
 450
 451       // Close the current region.
 452       Scheduler.exitRegion();
 453
 454       // Scheduling has invalidated the current iterator 'I'. Ask the
 455       // scheduler for the top of it's scheduled region.
 456       RegionEnd = Scheduler.begin();
 457     }
 458     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
 459     Scheduler.finishBlock();
 460     if (Scheduler.isPostRA()) {
 461       // FIXME: Ideally, no further passes should rely on kill flags. However,
 462       // thumb2 size reduction is currently an exception.
 463       Scheduler.fixupKills(MBB);
 464     }
 465   }
 466   Scheduler.finalizeSchedule();
 467 }
 468
 469 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
 470   // unimplemented
 471 }
 472
 473 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 474 void ReadyQueue::dump() {
 475   dbgs() << Name << ": ";
 476   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
 477     dbgs() << Queue[i]->NodeNum << " ";
 478   dbgs() << "\n";
 479 }
 480 #endif
 481
 482 //===----------------------------------------------------------------------===//
 483 // ScheduleDAGMI - Basic machine instruction scheduling. This is
 484 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
 485 // virtual registers.
 486 // ===----------------------------------------------------------------------===/
 487
 488 ScheduleDAGMI::~ScheduleDAGMI() {
 489   DeleteContainerPointers(Mutations);
 490   delete SchedImpl;
 491 }
 492
 493 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
 494   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
 495 }
 496
 497 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
 498   if (SuccSU != &ExitSU) {
 499     // Do not use WillCreateCycle, it assumes SD scheduling.
 500     // If Pred is reachable from Succ, then the edge creates a cycle.
 501     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
 502       return false;
 503     Topo.AddPred(SuccSU, PredDep.getSUnit());
 504   }
 505   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
 506   // Return true regardless of whether a new edge needed to be inserted.
 507   return true;
 508 }
 509
 510 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 511 /// NumPredsLeft reaches zero, release the successor node.
 512 ///
 513 /// FIXME: Adjust SuccSU height based on MinLatency.
 514 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 515   SUnit *SuccSU = SuccEdge->getSUnit();
 516
 517   if (SuccEdge->isWeak()) {
 518     --SuccSU->WeakPredsLeft;
 519     if (SuccEdge->isCluster())
 520       NextClusterSucc = SuccSU;
 521     return;
 522   }
 523 #ifndef NDEBUG
 524   if (SuccSU->NumPredsLeft == 0) {
 525     dbgs() << "*** Scheduling failed! ***\n";
 526     SuccSU->dump(this);
 527     dbgs() << " has been released too many times!\n";
 528     llvm_unreachable(0);
 529   }
 530 #endif
 531   --SuccSU->NumPredsLeft;
 532   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
 533     SchedImpl->releaseTopNode(SuccSU);
 534 }
 535
 536 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
 537 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 538   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 539        I != E; ++I) {
 540     releaseSucc(SU, &*I);
 541   }
 542 }
 543
 544 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
 545 /// NumSuccsLeft reaches zero, release the predecessor node.
 546 ///
 547 /// FIXME: Adjust PredSU height based on MinLatency.
 548 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
 549   SUnit *PredSU = PredEdge->getSUnit();
 550
 551   if (PredEdge->isWeak()) {
 552     --PredSU->WeakSuccsLeft;
 553     if (PredEdge->isCluster())
 554       NextClusterPred = PredSU;
 555     return;
 556   }
 557 #ifndef NDEBUG
 558   if (PredSU->NumSuccsLeft == 0) {
 559     dbgs() << "*** Scheduling failed! ***\n";
 560     PredSU->dump(this);
 561     dbgs() << " has been released too many times!\n";
 562     llvm_unreachable(0);
 563   }
 564 #endif
 565   --PredSU->NumSuccsLeft;
 566   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
 567     SchedImpl->releaseBottomNode(PredSU);
 568 }
 569
 570 /// releasePredecessors - Call releasePred on each of SU's predecessors.
 571 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
 572   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
 573        I != E; ++I) {
 574     releasePred(SU, &*I);
 575   }
 576 }
 577
 578 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 579 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 580 /// the region, including the boundary itself and single-instruction regions
 581 /// that don't get scheduled.
 582 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
 583                                      MachineBasicBlock::iterator begin,
 584                                      MachineBasicBlock::iterator end,
 585                                      unsigned regioninstrs)
 586 {
 587   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
 588
 589   SchedImpl->initPolicy(begin, end, regioninstrs);
 590 }
 591
 592 /// This is normally called from the main scheduler loop but may also be invoked
 593 /// by the scheduling strategy to perform additional code motion.
 594 void ScheduleDAGMI::moveInstruction(
 595   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
 596   // Advance RegionBegin if the first instruction moves down.
 597   if (&*RegionBegin == MI)
 598     ++RegionBegin;
 599
 600   // Update the instruction stream.
 601   BB->splice(InsertPos, BB, MI);
 602
 603   // Update LiveIntervals
 604   if (LIS)
 605     LIS->handleMove(MI, /*UpdateFlags=*/true);
 606
 607   // Recede RegionBegin if an instruction moves above the first.
 608   if (RegionBegin == InsertPos)
 609     RegionBegin = MI;
 610 }
 611
 612 bool ScheduleDAGMI::checkSchedLimit() {
 613 #ifndef NDEBUG
 614   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
 615     CurrentTop = CurrentBottom;
 616     return false;
 617   }
 618   ++NumInstrsScheduled;
 619 #endif
 620   return true;
 621 }
 622
 623 /// Per-region scheduling driver, called back from
 624 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
 625 /// does not consider liveness or register pressure. It is useful for PostRA
 626 /// scheduling and potentially other custom schedulers.
 627 void ScheduleDAGMI::schedule() {
 628   // Build the DAG.
 629   buildSchedGraph(AA);
 630
 631   Topo.InitDAGTopologicalSorting();
 632
 633   postprocessDAG();
 634
 635   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 636   findRootsAndBiasEdges(TopRoots, BotRoots);
 637
 638   // Initialize the strategy before modifying the DAG.
 639   // This may initialize a DFSResult to be used for queue priority.
 640   SchedImpl->initialize(this);
 641
 642   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 643           SUnits[su].dumpAll(this));
 644   if (ViewMISchedDAGs) viewGraph();
 645
 646   // Initialize ready queues now that the DAG and priority data are finalized.
 647   initQueues(TopRoots, BotRoots);
 648
 649   bool IsTopNode = false;
 650   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
 651     assert(!SU->isScheduled && "Node already scheduled");
 652     if (!checkSchedLimit())
 653       break;
 654
 655     MachineInstr *MI = SU->getInstr();
 656     if (IsTopNode) {
 657       assert(SU->isTopReady() && "node still has unscheduled dependencies");
 658       if (&*CurrentTop == MI)
 659         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
 660       else
 661         moveInstruction(MI, CurrentTop);
 662     }
 663     else {
 664       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
 665       MachineBasicBlock::iterator priorII =
 666         priorNonDebug(CurrentBottom, CurrentTop);
 667       if (&*priorII == MI)
 668         CurrentBottom = priorII;
 669       else {
 670         if (&*CurrentTop == MI)
 671           CurrentTop = nextIfDebug(++CurrentTop, priorII);
 672         moveInstruction(MI, CurrentBottom);
 673         CurrentBottom = MI;
 674       }
 675     }
 676     updateQueues(SU, IsTopNode);
 677
 678     // Notify the scheduling strategy after updating the DAG.
 679     SchedImpl->schedNode(SU, IsTopNode);
 680   }
 681   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 682
 683   placeDebugValues();
 684
 685   DEBUG({
 686       unsigned BBNum = begin()->getParent()->getNumber();
 687       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
 688       dumpSchedule();
 689       dbgs() << '\n';
 690     });
 691 }
 692
 693 /// Apply each ScheduleDAGMutation step in order.
 694 void ScheduleDAGMI::postprocessDAG() {
 695   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
 696     Mutations[i]->apply(this);
 697   }
 698 }
 699
 700 void ScheduleDAGMI::
 701 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
 702                       SmallVectorImpl<SUnit*> &BotRoots) {
 703   for (std::vector<SUnit>::iterator
 704          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
 705     SUnit *SU = &(*I);
 706     assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
 707
 708     // Order predecessors so DFSResult follows the critical path.
 709     SU->biasCriticalPath();
 710
 711     // A SUnit is ready to top schedule if it has no predecessors.
 712     if (!I->NumPredsLeft)
 713       TopRoots.push_back(SU);
 714     // A SUnit is ready to bottom schedule if it has no successors.
 715     if (!I->NumSuccsLeft)
 716       BotRoots.push_back(SU);
 717   }
 718   ExitSU.biasCriticalPath();
 719 }
 720
 721 /// Identify DAG roots and setup scheduler queues.
 722 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
 723                                ArrayRef<SUnit*> BotRoots) {
 724   NextClusterSucc = NULL;
 725   NextClusterPred = NULL;
 726
 727   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
 728   //
 729   // Nodes with unreleased weak edges can still be roots.
 730   // Release top roots in forward order.
 731   for (SmallVectorImpl<SUnit*>::const_iterator
 732          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
 733     SchedImpl->releaseTopNode(*I);
 734   }
 735   // Release bottom roots in reverse order so the higher priority nodes appear
 736   // first. This is more natural and slightly more efficient.
 737   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
 738          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
 739     SchedImpl->releaseBottomNode(*I);
 740   }
 741
 742   releaseSuccessors(&EntrySU);
 743   releasePredecessors(&ExitSU);
 744
 745   SchedImpl->registerRoots();
 746
 747   // Advance past initial DebugValues.
 748   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
 749   CurrentBottom = RegionEnd;
 750 }
 751
 752 /// Update scheduler queues after scheduling an instruction.
 753 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 754   // Release dependent instructions for scheduling.
 755   if (IsTopNode)
 756     releaseSuccessors(SU);
 757   else
 758     releasePredecessors(SU);
 759
 760   SU->isScheduled = true;
 761 }
 762
 763 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
 764 void ScheduleDAGMI::placeDebugValues() {
 765   // If first instruction was a DBG_VALUE then put it back.
 766   if (FirstDbgValue) {
 767     BB->splice(RegionBegin, BB, FirstDbgValue);
 768     RegionBegin = FirstDbgValue;
 769   }
 770
 771   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
 772          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
 773     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
 774     MachineInstr *DbgValue = P.first;
 775     MachineBasicBlock::iterator OrigPrevMI = P.second;
 776     if (&*RegionBegin == DbgValue)
 777       ++RegionBegin;
 778     BB->splice(++OrigPrevMI, BB, DbgValue);
 779     if (OrigPrevMI == std::prev(RegionEnd))
 780       RegionEnd = DbgValue;
 781   }
 782   DbgValues.clear();
 783   FirstDbgValue = NULL;
 784 }
 785
 786 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 787 void ScheduleDAGMI::dumpSchedule() const {
 788   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
 789     if (SUnit *SU = getSUnit(&(*MI)))
 790       SU->dump(this);
 791     else
 792       dbgs() << "Missing SUnit\n";
 793   }
 794 }
 795 #endif
 796
 797 //===----------------------------------------------------------------------===//
 798 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
 799 // preservation.
 800 //===----------------------------------------------------------------------===//
 801
 802 ScheduleDAGMILive::~ScheduleDAGMILive() {
 803   delete DFSResult;
 804 }
 805
 806 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 807 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 808 /// the region, including the boundary itself and single-instruction regions
 809 /// that don't get scheduled.
 810 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
 811                                 MachineBasicBlock::iterator begin,
 812                                 MachineBasicBlock::iterator end,
 813                                 unsigned regioninstrs)
 814 {
 815   // ScheduleDAGMI initializes SchedImpl's per-region policy.
 816   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
 817
 818   // For convenience remember the end of the liveness region.
 819   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
 820
 821   SUPressureDiffs.clear();
 822
 823   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
 824 }
 825
 826 // Setup the register pressure trackers for the top scheduled top and bottom
 827 // scheduled regions.
 828 void ScheduleDAGMILive::initRegPressure() {
 829   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
 830   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 831
 832   // Close the RPTracker to finalize live ins.
 833   RPTracker.closeRegion();
 834
 835   DEBUG(RPTracker.dump());
 836
 837   // Initialize the live ins and live outs.
 838   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
 839   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
 840
 841   // Close one end of the tracker so we can call
 842   // getMaxUpward/DownwardPressureDelta before advancing across any
 843   // instructions. This converts currently live regs into live ins/outs.
 844   TopRPTracker.closeTop();
 845   BotRPTracker.closeBottom();
 846
 847   BotRPTracker.initLiveThru(RPTracker);
 848   if (!BotRPTracker.getLiveThru().empty()) {
 849     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
 850     DEBUG(dbgs() << "Live Thru: ";
 851           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
 852   };
 853
 854   // For each live out vreg reduce the pressure change associated with other
 855   // uses of the same vreg below the live-out reaching def.
 856   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
 857
 858   // Account for liveness generated by the region boundary.
 859   if (LiveRegionEnd != RegionEnd) {
 860     SmallVector<unsigned, 8> LiveUses;
 861     BotRPTracker.recede(&LiveUses);
 862     updatePressureDiffs(LiveUses);
 863   }
 864
 865   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
 866
 867   // Cache the list of excess pressure sets in this region. This will also track
 868   // the max pressure in the scheduled code for these sets.
 869   RegionCriticalPSets.clear();
 870   const std::vector<unsigned> &RegionPressure =
 871     RPTracker.getPressure().MaxSetPressure;
 872   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
 873     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
 874     if (RegionPressure[i] > Limit) {
 875       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
 876             << " Limit " << Limit
 877             << " Actual " << RegionPressure[i] << "\n");
 878       RegionCriticalPSets.push_back(PressureChange(i));
 879     }
 880   }
 881   DEBUG(dbgs() << "Excess PSets: ";
 882         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
 883           dbgs() << TRI->getRegPressureSetName(
 884             RegionCriticalPSets[i].getPSet()) << " ";
 885         dbgs() << "\n");
 886 }
 887
 888 void ScheduleDAGMILive::
 889 updateScheduledPressure(const SUnit *SU,
 890                         const std::vector<unsigned> &NewMaxPressure) {
 891   const PressureDiff &PDiff = getPressureDiff(SU);
 892   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
 893   for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
 894        I != E; ++I) {
 895     if (!I->isValid())
 896       break;
 897     unsigned ID = I->getPSet();
 898     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
 899       ++CritIdx;
 900     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
 901       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
 902           && NewMaxPressure[ID] <= INT16_MAX)
 903         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
 904     }
 905     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
 906     if (NewMaxPressure[ID] >= Limit - 2) {
 907       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
 908             << NewMaxPressure[ID] << " > " << Limit << "(+ "
 909             << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
 910     }
 911   }
 912 }
 913
 914 /// Update the PressureDiff array for liveness after scheduling this
 915 /// instruction.
 916 void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
 917   for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
 918     /// FIXME: Currently assuming single-use physregs.
 919     unsigned Reg = LiveUses[LUIdx];
 920     DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
 921     if (!TRI->isVirtualRegister(Reg))
 922       continue;
 923
 924     // This may be called before CurrentBottom has been initialized. However,
 925     // BotRPTracker must have a valid position. We want the value live into the
 926     // instruction or live out of the block, so ask for the previous
 927     // instruction's live-out.
 928     const LiveInterval &LI = LIS->getInterval(Reg);
 929     VNInfo *VNI;
 930     MachineBasicBlock::const_iterator I =
 931       nextIfDebug(BotRPTracker.getPos(), BB->end());
 932     if (I == BB->end())
 933       VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
 934     else {
 935       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
 936       VNI = LRQ.valueIn();
 937     }
 938     // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
 939     assert(VNI && "No live value at use.");
 940     for (VReg2UseMap::iterator
 941            UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
 942       SUnit *SU = UI->SU;
 943       DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
 944             << *SU->getInstr());
 945       // If this use comes before the reaching def, it cannot be a last use, so
 946       // descrease its pressure change.
 947       if (!SU->isScheduled && SU != &ExitSU) {
 948         LiveQueryResult LRQ
 949           = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
 950         if (LRQ.valueIn() == VNI)
 951           getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
 952       }
 953     }
 954   }
 955 }
 956
 957 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 958 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 959 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 960 ///
 961 /// This is a skeletal driver, with all the functionality pushed into helpers,
 962 /// so that it can be easilly extended by experimental schedulers. Generally,
 963 /// implementing MachineSchedStrategy should be sufficient to implement a new
 964 /// scheduling algorithm. However, if a scheduler further subclasses
 965 /// ScheduleDAGMILive then it will want to override this virtual method in order
 966 /// to update any specialized state.
 967 void ScheduleDAGMILive::schedule() {
 968   buildDAGWithRegPressure();
 969
 970   Topo.InitDAGTopologicalSorting();
 971
 972   postprocessDAG();
 973
 974   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 975   findRootsAndBiasEdges(TopRoots, BotRoots);
 976
 977   // Initialize the strategy before modifying the DAG.
 978   // This may initialize a DFSResult to be used for queue priority.
 979   SchedImpl->initialize(this);
 980
 981   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 982           SUnits[su].dumpAll(this));
 983   if (ViewMISchedDAGs) viewGraph();
 984
 985   // Initialize ready queues now that the DAG and priority data are finalized.
 986   initQueues(TopRoots, BotRoots);
 987
 988   if (ShouldTrackPressure) {
 989     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
 990     TopRPTracker.setPos(CurrentTop);
 991   }
 992
 993   bool IsTopNode = false;
 994   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
 995     assert(!SU->isScheduled && "Node already scheduled");
 996     if (!checkSchedLimit())
 997       break;
 998
 999     scheduleMI(SU, IsTopNode);
1000
1001     updateQueues(SU, IsTopNode);
1002
1003     if (DFSResult) {
1004       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1005       if (!ScheduledTrees.test(SubtreeID)) {
1006         ScheduledTrees.set(SubtreeID);
1007         DFSResult->scheduleTree(SubtreeID);
1008         SchedImpl->scheduleTree(SubtreeID);
1009       }
1010     }
1011
1012     // Notify the scheduling strategy after updating the DAG.
1013     SchedImpl->schedNode(SU, IsTopNode);
1014   }
1015   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1016
1017   placeDebugValues();
1018
1019   DEBUG({
1020       unsigned BBNum = begin()->getParent()->getNumber();
1021       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1022       dumpSchedule();
1023       dbgs() << '\n';
1024     });
1025 }
1026
1027 /// Build the DAG and setup three register pressure trackers.
1028 void ScheduleDAGMILive::buildDAGWithRegPressure() {
1029   if (!ShouldTrackPressure) {
1030     RPTracker.reset();
1031     RegionCriticalPSets.clear();
1032     buildSchedGraph(AA);
1033     return;
1034   }
1035
1036   // Initialize the register pressure tracker used by buildSchedGraph.
1037   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1038                  /*TrackUntiedDefs=*/true);
1039
1040   // Account for liveness generate by the region boundary.
1041   if (LiveRegionEnd != RegionEnd)
1042     RPTracker.recede();
1043
1044   // Build the DAG, and compute current register pressure.
1045   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
1046
1047   // Initialize top/bottom trackers after computing region pressure.
1048   initRegPressure();
1049 }
1050
1051 void ScheduleDAGMILive::computeDFSResult() {
1052   if (!DFSResult)
1053     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1054   DFSResult->clear();
1055   ScheduledTrees.clear();
1056   DFSResult->resize(SUnits.size());
1057   DFSResult->compute(SUnits);
1058   ScheduledTrees.resize(DFSResult->getNumSubtrees());
1059 }
1060
1061 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1062 /// only provides the critical path for single block loops. To handle loops that
1063 /// span blocks, we could use the vreg path latencies provided by
1064 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1065 /// available for use in the scheduler.
1066 ///
1067 /// The cyclic path estimation identifies a def-use pair that crosses the back
1068 /// edge and considers the depth and height of the nodes. For example, consider
1069 /// the following instruction sequence where each instruction has unit latency
1070 /// and defines an epomymous virtual register:
1071 ///
1072 /// a->b(a,c)->c(b)->d(c)->exit
1073 ///
1074 /// The cyclic critical path is a two cycles: b->c->b
1075 /// The acyclic critical path is four cycles: a->b->c->d->exit
1076 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1077 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1078 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1079 /// LiveInDepth = depth(b) = len(a->b) = 1
1080 ///
1081 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1082 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1083 /// CyclicCriticalPath = min(2, 2) = 2
1084 ///
1085 /// This could be relevant to PostRA scheduling, but is currently implemented
1086 /// assuming LiveIntervals.
1087 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1088   // This only applies to single block loop.
1089   if (!BB->isSuccessor(BB))
1090     return 0;
1091
1092   unsigned MaxCyclicLatency = 0;
1093   // Visit each live out vreg def to find def/use pairs that cross iterations.
1094   ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
1095   for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
1096        RI != RE; ++RI) {
1097     unsigned Reg = *RI;
1098     if (!TRI->isVirtualRegister(Reg))
1099         continue;
1100     const LiveInterval &LI = LIS->getInterval(Reg);
1101     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1102     if (!DefVNI)
1103       continue;
1104
1105     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1106     const SUnit *DefSU = getSUnit(DefMI);
1107     if (!DefSU)
1108       continue;
1109
1110     unsigned LiveOutHeight = DefSU->getHeight();
1111     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1112     // Visit all local users of the vreg def.
1113     for (VReg2UseMap::iterator
1114            UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
1115       if (UI->SU == &ExitSU)
1116         continue;
1117
1118       // Only consider uses of the phi.
1119       LiveQueryResult LRQ =
1120         LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
1121       if (!LRQ.valueIn()->isPHIDef())
1122         continue;
1123
1124       // Assume that a path spanning two iterations is a cycle, which could
1125       // overestimate in strange cases. This allows cyclic latency to be
1126       // estimated as the minimum slack of the vreg's depth or height.
1127       unsigned CyclicLatency = 0;
1128       if (LiveOutDepth > UI->SU->getDepth())
1129         CyclicLatency = LiveOutDepth - UI->SU->getDepth();
1130
1131       unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
1132       if (LiveInHeight > LiveOutHeight) {
1133         if (LiveInHeight - LiveOutHeight < CyclicLatency)
1134           CyclicLatency = LiveInHeight - LiveOutHeight;
1135       }
1136       else
1137         CyclicLatency = 0;
1138
1139       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1140             << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
1141       if (CyclicLatency > MaxCyclicLatency)
1142         MaxCyclicLatency = CyclicLatency;
1143     }
1144   }
1145   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1146   return MaxCyclicLatency;
1147 }
1148
1149 /// Move an instruction and update register pressure.
1150 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1151   // Move the instruction to its new location in the instruction stream.
1152   MachineInstr *MI = SU->getInstr();
1153
1154   if (IsTopNode) {
1155     assert(SU->isTopReady() && "node still has unscheduled dependencies");
1156     if (&*CurrentTop == MI)
1157       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1158     else {
1159       moveInstruction(MI, CurrentTop);
1160       TopRPTracker.setPos(MI);
1161     }
1162
1163     if (ShouldTrackPressure) {
1164       // Update top scheduled pressure.
1165       TopRPTracker.advance();
1166       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1167       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1168     }
1169   }
1170   else {
1171     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1172     MachineBasicBlock::iterator priorII =
1173       priorNonDebug(CurrentBottom, CurrentTop);
1174     if (&*priorII == MI)
1175       CurrentBottom = priorII;
1176     else {
1177       if (&*CurrentTop == MI) {
1178         CurrentTop = nextIfDebug(++CurrentTop, priorII);
1179         TopRPTracker.setPos(CurrentTop);
1180       }
1181       moveInstruction(MI, CurrentBottom);
1182       CurrentBottom = MI;
1183     }
1184     if (ShouldTrackPressure) {
1185       // Update bottom scheduled pressure.
1186       SmallVector<unsigned, 8> LiveUses;
1187       BotRPTracker.recede(&LiveUses);
1188       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1189       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1190       updatePressureDiffs(LiveUses);
1191     }
1192   }
1193 }
1194
1195 //===----------------------------------------------------------------------===//
1196 // LoadClusterMutation - DAG post-processing to cluster loads.
1197 //===----------------------------------------------------------------------===//
1198
1199 namespace {
1200 /// \brief Post-process the DAG to create cluster edges between neighboring
1201 /// loads.
1202 class LoadClusterMutation : public ScheduleDAGMutation {
1203   struct LoadInfo {
1204     SUnit *SU;
1205     unsigned BaseReg;
1206     unsigned Offset;
1207     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
1208       : SU(su), BaseReg(reg), Offset(ofs) {}
1209   };
1210   static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
1211                            const LoadClusterMutation::LoadInfo &RHS);
1212
1213   const TargetInstrInfo *TII;
1214   const TargetRegisterInfo *TRI;
1215 public:
1216   LoadClusterMutation(const TargetInstrInfo *tii,
1217                       const TargetRegisterInfo *tri)
1218     : TII(tii), TRI(tri) {}
1219
1220   virtual void apply(ScheduleDAGMI *DAG);
1221 protected:
1222   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
1223 };
1224 } // anonymous
1225
1226 bool LoadClusterMutation::LoadInfoLess(
1227   const LoadClusterMutation::LoadInfo &LHS,
1228   const LoadClusterMutation::LoadInfo &RHS) {
1229   if (LHS.BaseReg != RHS.BaseReg)
1230     return LHS.BaseReg < RHS.BaseReg;
1231   return LHS.Offset < RHS.Offset;
1232 }
1233
1234 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
1235                                                   ScheduleDAGMI *DAG) {
1236   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
1237   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
1238     SUnit *SU = Loads[Idx];
1239     unsigned BaseReg;
1240     unsigned Offset;
1241     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
1242       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
1243   }
1244   if (LoadRecords.size() < 2)
1245     return;
1246   std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
1247   unsigned ClusterLength = 1;
1248   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
1249     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
1250       ClusterLength = 1;
1251       continue;
1252     }
1253
1254     SUnit *SUa = LoadRecords[Idx].SU;
1255     SUnit *SUb = LoadRecords[Idx+1].SU;
1256     if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
1257         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1258
1259       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
1260             << SUb->NodeNum << ")\n");
1261       // Copy successor edges from SUa to SUb. Interleaving computation
1262       // dependent on SUa can prevent load combining due to register reuse.
1263       // Predecessor edges do not need to be copied from SUb to SUa since nearby
1264       // loads should have effectively the same inputs.
1265       for (SUnit::const_succ_iterator
1266              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1267         if (SI->getSUnit() == SUb)
1268           continue;
1269         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1270         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1271       }
1272       ++ClusterLength;
1273     }
1274     else
1275       ClusterLength = 1;
1276   }
1277 }
1278
1279 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1280 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
1281   // Map DAG NodeNum to store chain ID.
1282   DenseMap<unsigned, unsigned> StoreChainIDs;
1283   // Map each store chain to a set of dependent loads.
1284   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1285   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1286     SUnit *SU = &DAG->SUnits[Idx];
1287     if (!SU->getInstr()->mayLoad())
1288       continue;
1289     unsigned ChainPredID = DAG->SUnits.size();
1290     for (SUnit::const_pred_iterator
1291            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1292       if (PI->isCtrl()) {
1293         ChainPredID = PI->getSUnit()->NodeNum;
1294         break;
1295       }
1296     }
1297     // Check if this chain-like pred has been seen
1298     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
1299     unsigned NumChains = StoreChainDependents.size();
1300     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1301       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1302     if (Result.second)
1303       StoreChainDependents.resize(NumChains + 1);
1304     StoreChainDependents[Result.first->second].push_back(SU);
1305   }
1306   // Iterate over the store chains.
1307   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1308     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
1309 }
1310
1311 //===----------------------------------------------------------------------===//
1312 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
1313 //===----------------------------------------------------------------------===//
1314
1315 namespace {
1316 /// \brief Post-process the DAG to create cluster edges between instructions
1317 /// that may be fused by the processor into a single operation.
1318 class MacroFusion : public ScheduleDAGMutation {
1319   const TargetInstrInfo *TII;
1320 public:
1321   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
1322
1323   virtual void apply(ScheduleDAGMI *DAG);
1324 };
1325 } // anonymous
1326
1327 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
1328 /// fused operations.
1329 void MacroFusion::apply(ScheduleDAGMI *DAG) {
1330   // For now, assume targets can only fuse with the branch.
1331   MachineInstr *Branch = DAG->ExitSU.getInstr();
1332   if (!Branch)
1333     return;
1334
1335   for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
1336     SUnit *SU = &DAG->SUnits[--Idx];
1337     if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
1338       continue;
1339
1340     // Create a single weak edge from SU to ExitSU. The only effect is to cause
1341     // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
1342     // need to copy predecessor edges from ExitSU to SU, since top-down
1343     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
1344     // of SU, we could create an artificial edge from the deepest root, but it
1345     // hasn't been needed yet.
1346     bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
1347     (void)Success;
1348     assert(Success && "No DAG nodes should be reachable from ExitSU");
1349
1350     DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
1351     break;
1352   }
1353 }
1354
1355 //===----------------------------------------------------------------------===//
1356 // CopyConstrain - DAG post-processing to encourage copy elimination.
1357 //===----------------------------------------------------------------------===//
1358
1359 namespace {
1360 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1361 /// the one use that defines the copy's source vreg, most likely an induction
1362 /// variable increment.
1363 class CopyConstrain : public ScheduleDAGMutation {
1364   // Transient state.
1365   SlotIndex RegionBeginIdx;
1366   // RegionEndIdx is the slot index of the last non-debug instruction in the
1367   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1368   SlotIndex RegionEndIdx;
1369 public:
1370   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1371
1372   virtual void apply(ScheduleDAGMI *DAG);
1373
1374 protected:
1375   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1376 };
1377 } // anonymous
1378
1379 /// constrainLocalCopy handles two possibilities:
1380 /// 1) Local src:
1381 /// I0:     = dst
1382 /// I1: src = ...
1383 /// I2:     = dst
1384 /// I3: dst = src (copy)
1385 /// (create pred->succ edges I0->I1, I2->I1)
1386 ///
1387 /// 2) Local copy:
1388 /// I0: dst = src (copy)
1389 /// I1:     = dst
1390 /// I2: src = ...
1391 /// I3:     = dst
1392 /// (create pred->succ edges I1->I2, I3->I2)
1393 ///
1394 /// Although the MachineScheduler is currently constrained to single blocks,
1395 /// this algorithm should handle extended blocks. An EBB is a set of
1396 /// contiguously numbered blocks such that the previous block in the EBB is
1397 /// always the single predecessor.
1398 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1399   LiveIntervals *LIS = DAG->getLIS();
1400   MachineInstr *Copy = CopySU->getInstr();
1401
1402   // Check for pure vreg copies.
1403   unsigned SrcReg = Copy->getOperand(1).getReg();
1404   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1405     return;
1406
1407   unsigned DstReg = Copy->getOperand(0).getReg();
1408   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
1409     return;
1410
1411   // Check if either the dest or source is local. If it's live across a back
1412   // edge, it's not local. Note that if both vregs are live across the back
1413   // edge, we cannot successfully contrain the copy without cyclic scheduling.
1414   unsigned LocalReg = DstReg;
1415   unsigned GlobalReg = SrcReg;
1416   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1417   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1418     LocalReg = SrcReg;
1419     GlobalReg = DstReg;
1420     LocalLI = &LIS->getInterval(LocalReg);
1421     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1422       return;
1423   }
1424   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1425
1426   // Find the global segment after the start of the local LI.
1427   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1428   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1429   // local live range. We could create edges from other global uses to the local
1430   // start, but the coalescer should have already eliminated these cases, so
1431   // don't bother dealing with it.
1432   if (GlobalSegment == GlobalLI->end())
1433     return;
1434
1435   // If GlobalSegment is killed at the LocalLI->start, the call to find()
1436   // returned the next global segment. But if GlobalSegment overlaps with
1437   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1438   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1439   if (GlobalSegment->contains(LocalLI->beginIndex()))
1440     ++GlobalSegment;
1441
1442   if (GlobalSegment == GlobalLI->end())
1443     return;
1444
1445   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1446   if (GlobalSegment != GlobalLI->begin()) {
1447     // Two address defs have no hole.
1448     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1449                                GlobalSegment->start)) {
1450       return;
1451     }
1452     // If the prior global segment may be defined by the same two-address
1453     // instruction that also defines LocalLI, then can't make a hole here.
1454     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1455                                LocalLI->beginIndex())) {
1456       return;
1457     }
1458     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1459     // it would be a disconnected component in the live range.
1460     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1461            "Disconnected LRG within the scheduling region.");
1462   }
1463   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1464   if (!GlobalDef)
1465     return;
1466
1467   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1468   if (!GlobalSU)
1469     return;
1470
1471   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1472   // constraining the uses of the last local def to precede GlobalDef.
1473   SmallVector<SUnit*,8> LocalUses;
1474   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1475   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1476   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1477   for (SUnit::const_succ_iterator
1478          I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1479        I != E; ++I) {
1480     if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1481       continue;
1482     if (I->getSUnit() == GlobalSU)
1483       continue;
1484     if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1485       return;
1486     LocalUses.push_back(I->getSUnit());
1487   }
1488   // Open the top of the GlobalLI hole by constraining any earlier global uses
1489   // to precede the start of LocalLI.
1490   SmallVector<SUnit*,8> GlobalUses;
1491   MachineInstr *FirstLocalDef =
1492     LIS->getInstructionFromIndex(LocalLI->beginIndex());
1493   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1494   for (SUnit::const_pred_iterator
1495          I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1496     if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1497       continue;
1498     if (I->getSUnit() == FirstLocalSU)
1499       continue;
1500     if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1501       return;
1502     GlobalUses.push_back(I->getSUnit());
1503   }
1504   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1505   // Add the weak edges.
1506   for (SmallVectorImpl<SUnit*>::const_iterator
1507          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1508     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1509           << GlobalSU->NodeNum << ")\n");
1510     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1511   }
1512   for (SmallVectorImpl<SUnit*>::const_iterator
1513          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1514     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1515           << FirstLocalSU->NodeNum << ")\n");
1516     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1517   }
1518 }
1519
1520 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1521 /// copy elimination.
1522 void CopyConstrain::apply(ScheduleDAGMI *DAG) {
1523   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1524
1525   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1526   if (FirstPos == DAG->end())
1527     return;
1528   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
1529   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1530     &*priorNonDebug(DAG->end(), DAG->begin()));
1531
1532   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1533     SUnit *SU = &DAG->SUnits[Idx];
1534     if (!SU->getInstr()->isCopy())
1535       continue;
1536
1537     constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
1538   }
1539 }
1540
1541 //===----------------------------------------------------------------------===//
1542 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1543 // and possibly other custom schedulers.
1544 //===----------------------------------------------------------------------===//
1545
1546 static const unsigned InvalidCycle = ~0U;
1547
1548 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1549
1550 void SchedBoundary::reset() {
1551   // A new HazardRec is created for each DAG and owned by SchedBoundary.
1552   // Destroying and reconstructing it is very expensive though. So keep
1553   // invalid, placeholder HazardRecs.
1554   if (HazardRec && HazardRec->isEnabled()) {
1555     delete HazardRec;
1556     HazardRec = 0;
1557   }
1558   Available.clear();
1559   Pending.clear();
1560   CheckPending = false;
1561   NextSUs.clear();
1562   CurrCycle = 0;
1563   CurrMOps = 0;
1564   MinReadyCycle = UINT_MAX;
1565   ExpectedLatency = 0;
1566   DependentLatency = 0;
1567   RetiredMOps = 0;
1568   MaxExecutedResCount = 0;
1569   ZoneCritResIdx = 0;
1570   IsResourceLimited = false;
1571   ReservedCycles.clear();
1572 #ifndef NDEBUG
1573   // Track the maximum number of stall cycles that could arise either from the
1574   // latency of a DAG edge or the number of cycles that a processor resource is
1575   // reserved (SchedBoundary::ReservedCycles).
1576   MaxObservedLatency = 0;
1577 #endif
1578   // Reserve a zero-count for invalid CritResIdx.
1579   ExecutedResCounts.resize(1);
1580   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1581 }
1582
1583 void SchedRemainder::
1584 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1585   reset();
1586   if (!SchedModel->hasInstrSchedModel())
1587     return;
1588   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1589   for (std::vector<SUnit>::iterator
1590          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1591     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1592     RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1593       * SchedModel->getMicroOpFactor();
1594     for (TargetSchedModel::ProcResIter
1595            PI = SchedModel->getWriteProcResBegin(SC),
1596            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1597       unsigned PIdx = PI->ProcResourceIdx;
1598       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1599       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1600     }
1601   }
1602 }
1603
1604 void SchedBoundary::
1605 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1606   reset();
1607   DAG = dag;
1608   SchedModel = smodel;
1609   Rem = rem;
1610   if (SchedModel->hasInstrSchedModel()) {
1611     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1612     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1613   }
1614 }
1615
1616 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1617 /// these "soft stalls" differently than the hard stall cycles based on CPU
1618 /// resources and computed by checkHazard(). A fully in-order model
1619 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1620 /// available for scheduling until they are ready. However, a weaker in-order
1621 /// model may use this for heuristics. For example, if a processor has in-order
1622 /// behavior when reading certain resources, this may come into play.
1623 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1624   if (!SU->isUnbuffered)
1625     return 0;
1626
1627   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1628   if (ReadyCycle > CurrCycle)
1629     return ReadyCycle - CurrCycle;
1630   return 0;
1631 }
1632
1633 /// Compute the next cycle at which the given processor resource can be
1634 /// scheduled.
1635 unsigned SchedBoundary::
1636 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1637   unsigned NextUnreserved = ReservedCycles[PIdx];
1638   // If this resource has never been used, always return cycle zero.
1639   if (NextUnreserved == InvalidCycle)
1640     return 0;
1641   // For bottom-up scheduling add the cycles needed for the current operation.
1642   if (!isTop())
1643     NextUnreserved += Cycles;
1644   return NextUnreserved;
1645 }
1646
1647 /// Does this SU have a hazard within the current instruction group.
1648 ///
1649 /// The scheduler supports two modes of hazard recognition. The first is the
1650 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1651 /// supports highly complicated in-order reservation tables
1652 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1653 ///
1654 /// The second is a streamlined mechanism that checks for hazards based on
1655 /// simple counters that the scheduler itself maintains. It explicitly checks
1656 /// for instruction dispatch limitations, including the number of micro-ops that
1657 /// can dispatch per cycle.
1658 ///
1659 /// TODO: Also check whether the SU must start a new group.
1660 bool SchedBoundary::checkHazard(SUnit *SU) {
1661   if (HazardRec->isEnabled()
1662       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1663     return true;
1664   }
1665   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1666   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1667     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1668           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1669     return true;
1670   }
1671   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1672     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1673     for (TargetSchedModel::ProcResIter
1674            PI = SchedModel->getWriteProcResBegin(SC),
1675            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1676       if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
1677         return true;
1678     }
1679   }
1680   return false;
1681 }
1682
1683 // Find the unscheduled node in ReadySUs with the highest latency.
1684 unsigned SchedBoundary::
1685 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1686   SUnit *LateSU = 0;
1687   unsigned RemLatency = 0;
1688   for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1689        I != E; ++I) {
1690     unsigned L = getUnscheduledLatency(*I);
1691     if (L > RemLatency) {
1692       RemLatency = L;
1693       LateSU = *I;
1694     }
1695   }
1696   if (LateSU) {
1697     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1698           << LateSU->NodeNum << ") " << RemLatency << "c\n");
1699   }
1700   return RemLatency;
1701 }
1702
1703 // Count resources in this zone and the remaining unscheduled
1704 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1705 // resource index, or zero if the zone is issue limited.
1706 unsigned SchedBoundary::
1707 getOtherResourceCount(unsigned &OtherCritIdx) {
1708   OtherCritIdx = 0;
1709   if (!SchedModel->hasInstrSchedModel())
1710     return 0;
1711
1712   unsigned OtherCritCount = Rem->RemIssueCount
1713     + (RetiredMOps * SchedModel->getMicroOpFactor());
1714   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
1715         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1716   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
1717        PIdx != PEnd; ++PIdx) {
1718     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
1719     if (OtherCount > OtherCritCount) {
1720       OtherCritCount = OtherCount;
1721       OtherCritIdx = PIdx;
1722     }
1723   }
1724   if (OtherCritIdx) {
1725     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
1726           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1727           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
1728   }
1729   return OtherCritCount;
1730 }
1731
1732 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
1733   if (ReadyCycle < MinReadyCycle)
1734     MinReadyCycle = ReadyCycle;
1735
1736   // Check for interlocks first. For the purpose of other heuristics, an
1737   // instruction that cannot issue appears as if it's not in the ReadyQueue.
1738   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
1739   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
1740     Pending.push(SU);
1741   else
1742     Available.push(SU);
1743
1744   // Record this node as an immediate dependent of the scheduled node.
1745   NextSUs.insert(SU);
1746 }
1747
1748 void SchedBoundary::releaseTopNode(SUnit *SU) {
1749   if (SU->isScheduled)
1750     return;
1751
1752   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1753        I != E; ++I) {
1754     if (I->isWeak())
1755       continue;
1756     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1757     unsigned Latency = I->getLatency();
1758 #ifndef NDEBUG
1759     MaxObservedLatency = std::max(Latency, MaxObservedLatency);
1760 #endif
1761     if (SU->TopReadyCycle < PredReadyCycle + Latency)
1762       SU->TopReadyCycle = PredReadyCycle + Latency;
1763   }
1764   releaseNode(SU, SU->TopReadyCycle);
1765 }
1766
1767 void SchedBoundary::releaseBottomNode(SUnit *SU) {
1768   if (SU->isScheduled)
1769     return;
1770
1771   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1772
1773   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1774        I != E; ++I) {
1775     if (I->isWeak())
1776       continue;
1777     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1778     unsigned Latency = I->getLatency();
1779 #ifndef NDEBUG
1780     MaxObservedLatency = std::max(Latency, MaxObservedLatency);
1781 #endif
1782     if (SU->BotReadyCycle < SuccReadyCycle + Latency)
1783       SU->BotReadyCycle = SuccReadyCycle + Latency;
1784   }
1785   releaseNode(SU, SU->BotReadyCycle);
1786 }
1787
1788 /// Move the boundary of scheduled code by one cycle.
1789 void SchedBoundary::bumpCycle(unsigned NextCycle) {
1790   if (SchedModel->getMicroOpBufferSize() == 0) {
1791     assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1792     if (MinReadyCycle > NextCycle)
1793       NextCycle = MinReadyCycle;
1794   }
1795   // Update the current micro-ops, which will issue in the next cycle.
1796   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
1797   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
1798
1799   // Decrement DependentLatency based on the next cycle.
1800   if ((NextCycle - CurrCycle) > DependentLatency)
1801     DependentLatency = 0;
1802   else
1803     DependentLatency -= (NextCycle - CurrCycle);
1804
1805   if (!HazardRec->isEnabled()) {
1806     // Bypass HazardRec virtual calls.
1807     CurrCycle = NextCycle;
1808   }
1809   else {
1810     // Bypass getHazardType calls in case of long latency.
1811     for (; CurrCycle != NextCycle; ++CurrCycle) {
1812       if (isTop())
1813         HazardRec->AdvanceCycle();
1814       else
1815         HazardRec->RecedeCycle();
1816     }
1817   }
1818   CheckPending = true;
1819   unsigned LFactor = SchedModel->getLatencyFactor();
1820   IsResourceLimited =
1821     (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1822     > (int)LFactor;
1823
1824   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
1825 }
1826
1827 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
1828   ExecutedResCounts[PIdx] += Count;
1829   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
1830     MaxExecutedResCount = ExecutedResCounts[PIdx];
1831 }
1832
1833 /// Add the given processor resource to this scheduled zone.
1834 ///
1835 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
1836 /// during which this resource is consumed.
1837 ///
1838 /// \return the next cycle at which the instruction may execute without
1839 /// oversubscribing resources.
1840 unsigned SchedBoundary::
1841 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
1842   unsigned Factor = SchedModel->getResourceFactor(PIdx);
1843   unsigned Count = Factor * Cycles;
1844   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
1845         << " +" << Cycles << "x" << Factor << "u\n");
1846
1847   // Update Executed resources counts.
1848   incExecutedResources(PIdx, Count);
1849   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1850   Rem->RemainingCounts[PIdx] -= Count;
1851
1852   // Check if this resource exceeds the current critical resource. If so, it
1853   // becomes the critical resource.
1854   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
1855     ZoneCritResIdx = PIdx;
1856     DEBUG(dbgs() << "  *** Critical resource "
1857           << SchedModel->getResourceName(PIdx) << ": "
1858           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
1859   }
1860   // For reserved resources, record the highest cycle using the resource.
1861   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
1862   if (NextAvailable > CurrCycle) {
1863     DEBUG(dbgs() << "  Resource conflict: "
1864           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
1865           << NextAvailable << "\n");
1866   }
1867   return NextAvailable;
1868 }
1869
1870 /// Move the boundary of scheduled code by one SUnit.
1871 void SchedBoundary::bumpNode(SUnit *SU) {
1872   // Update the reservation table.
1873   if (HazardRec->isEnabled()) {
1874     if (!isTop() && SU->isCall) {
1875       // Calls are scheduled with their preceding instructions. For bottom-up
1876       // scheduling, clear the pipeline state before emitting.
1877       HazardRec->Reset();
1878     }
1879     HazardRec->EmitInstruction(SU);
1880   }
1881   // checkHazard should prevent scheduling multiple instructions per cycle that
1882   // exceed the issue width.
1883   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1884   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
1885   assert(
1886       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
1887       "Cannot schedule this instruction's MicroOps in the current cycle.");
1888
1889   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1890   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
1891
1892   unsigned NextCycle = CurrCycle;
1893   switch (SchedModel->getMicroOpBufferSize()) {
1894   case 0:
1895     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
1896     break;
1897   case 1:
1898     if (ReadyCycle > NextCycle) {
1899       NextCycle = ReadyCycle;
1900       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
1901     }
1902     break;
1903   default:
1904     // We don't currently model the OOO reorder buffer, so consider all
1905     // scheduled MOps to be "retired". We do loosely model in-order resource
1906     // latency. If this instruction uses an in-order resource, account for any
1907     // likely stall cycles.
1908     if (SU->isUnbuffered && ReadyCycle > NextCycle)
1909       NextCycle = ReadyCycle;
1910     break;
1911   }
1912   RetiredMOps += IncMOps;
1913
1914   // Update resource counts and critical resource.
1915   if (SchedModel->hasInstrSchedModel()) {
1916     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
1917     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
1918     Rem->RemIssueCount -= DecRemIssue;
1919     if (ZoneCritResIdx) {
1920       // Scale scheduled micro-ops for comparing with the critical resource.
1921       unsigned ScaledMOps =
1922         RetiredMOps * SchedModel->getMicroOpFactor();
1923
1924       // If scaled micro-ops are now more than the previous critical resource by
1925       // a full cycle, then micro-ops issue becomes critical.
1926       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
1927           >= (int)SchedModel->getLatencyFactor()) {
1928         ZoneCritResIdx = 0;
1929         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
1930               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
1931       }
1932     }
1933     for (TargetSchedModel::ProcResIter
1934            PI = SchedModel->getWriteProcResBegin(SC),
1935            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1936       unsigned RCycle =
1937         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
1938       if (RCycle > NextCycle)
1939         NextCycle = RCycle;
1940     }
1941     if (SU->hasReservedResource) {
1942       // For reserved resources, record the highest cycle using the resource.
1943       // For top-down scheduling, this is the cycle in which we schedule this
1944       // instruction plus the number of cycles the operations reserves the
1945       // resource. For bottom-up is it simply the instruction's cycle.
1946       for (TargetSchedModel::ProcResIter
1947              PI = SchedModel->getWriteProcResBegin(SC),
1948              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1949         unsigned PIdx = PI->ProcResourceIdx;
1950         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
1951           ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
1952 #ifndef NDEBUG
1953           MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency);
1954 #endif
1955         }
1956       }
1957     }
1958   }
1959   // Update ExpectedLatency and DependentLatency.
1960   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
1961   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
1962   if (SU->getDepth() > TopLatency) {
1963     TopLatency = SU->getDepth();
1964     DEBUG(dbgs() << "  " << Available.getName()
1965           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
1966   }
1967   if (SU->getHeight() > BotLatency) {
1968     BotLatency = SU->getHeight();
1969     DEBUG(dbgs() << "  " << Available.getName()
1970           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
1971   }
1972   // If we stall for any reason, bump the cycle.
1973   if (NextCycle > CurrCycle) {
1974     bumpCycle(NextCycle);
1975   }
1976   else {
1977     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
1978     // resource limited. If a stall occurred, bumpCycle does this.
1979     unsigned LFactor = SchedModel->getLatencyFactor();
1980     IsResourceLimited =
1981       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1982       > (int)LFactor;
1983   }
1984   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
1985   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
1986   // one cycle.  Since we commonly reach the max MOps here, opportunistically
1987   // bump the cycle to avoid uselessly checking everything in the readyQ.
1988   CurrMOps += IncMOps;
1989   while (CurrMOps >= SchedModel->getIssueWidth()) {
1990     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
1991           << " at cycle " << CurrCycle << '\n');
1992     bumpCycle(++NextCycle);
1993   }
1994   DEBUG(dumpScheduledState());
1995 }
1996
1997 /// Release pending ready nodes in to the available queue. This makes them
1998 /// visible to heuristics.
1999 void SchedBoundary::releasePending() {
2000   // If the available queue is empty, it is safe to reset MinReadyCycle.
2001   if (Available.empty())
2002     MinReadyCycle = UINT_MAX;
2003
2004   // Check to see if any of the pending instructions are ready to issue.  If
2005   // so, add them to the available queue.
2006   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2007   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2008     SUnit *SU = *(Pending.begin()+i);
2009     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2010
2011     if (ReadyCycle < MinReadyCycle)
2012       MinReadyCycle = ReadyCycle;
2013
2014     if (!IsBuffered && ReadyCycle > CurrCycle)
2015       continue;
2016
2017     if (checkHazard(SU))
2018       continue;
2019
2020     Available.push(SU);
2021     Pending.remove(Pending.begin()+i);
2022     --i; --e;
2023   }
2024   DEBUG(if (!Pending.empty()) Pending.dump());
2025   CheckPending = false;
2026 }
2027
2028 /// Remove SU from the ready set for this boundary.
2029 void SchedBoundary::removeReady(SUnit *SU) {
2030   if (Available.isInQueue(SU))
2031     Available.remove(Available.find(SU));
2032   else {
2033     assert(Pending.isInQueue(SU) && "bad ready count");
2034     Pending.remove(Pending.find(SU));
2035   }
2036 }
2037
2038 /// If this queue only has one ready candidate, return it. As a side effect,
2039 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2040 /// one node is ready. If multiple instructions are ready, return NULL.
2041 SUnit *SchedBoundary::pickOnlyChoice() {
2042   if (CheckPending)
2043     releasePending();
2044
2045   if (CurrMOps > 0) {
2046     // Defer any ready instrs that now have a hazard.
2047     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2048       if (checkHazard(*I)) {
2049         Pending.push(*I);
2050         I = Available.remove(I);
2051         continue;
2052       }
2053       ++I;
2054     }
2055   }
2056   for (unsigned i = 0; Available.empty(); ++i) {
2057     assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
2058            "permanent hazard"); (void)i;
2059     bumpCycle(CurrCycle + 1);
2060     releasePending();
2061   }
2062   if (Available.size() == 1)
2063     return *Available.begin();
2064   return NULL;
2065 }
2066
2067 #ifndef NDEBUG
2068 // This is useful information to dump after bumpNode.
2069 // Note that the Queue contents are more useful before pickNodeFromQueue.
2070 void SchedBoundary::dumpScheduledState() {
2071   unsigned ResFactor;
2072   unsigned ResCount;
2073   if (ZoneCritResIdx) {
2074     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2075     ResCount = getResourceCount(ZoneCritResIdx);
2076   }
2077   else {
2078     ResFactor = SchedModel->getMicroOpFactor();
2079     ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2080   }
2081   unsigned LFactor = SchedModel->getLatencyFactor();
2082   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2083          << "  Retired: " << RetiredMOps;
2084   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2085   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2086          << ResCount / ResFactor << " "
2087          << SchedModel->getResourceName(ZoneCritResIdx)
2088          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2089          << (IsResourceLimited ? "  - Resource" : "  - Latency")
2090          << " limited.\n";
2091 }
2092 #endif
2093
2094 //===----------------------------------------------------------------------===//
2095 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2096 //===----------------------------------------------------------------------===//
2097
2098 namespace {
2099 /// Base class for GenericScheduler. This class maintains information about
2100 /// scheduling candidates based on TargetSchedModel making it easy to implement
2101 /// heuristics for either preRA or postRA scheduling.
2102 class GenericSchedulerBase : public MachineSchedStrategy {
2103 public:
2104   /// Represent the type of SchedCandidate found within a single queue.
2105   /// pickNodeBidirectional depends on these listed by decreasing priority.
2106   enum CandReason {
2107     NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
2108     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
2109     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
2110
2111 #ifndef NDEBUG
2112   static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
2113 #endif
2114
2115   /// Policy for scheduling the next instruction in the candidate's zone.
2116   struct CandPolicy {
2117     bool ReduceLatency;
2118     unsigned ReduceResIdx;
2119     unsigned DemandResIdx;
2120
2121     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
2122   };
2123
2124   /// Status of an instruction's critical resource consumption.
2125   struct SchedResourceDelta {
2126     // Count critical resources in the scheduled region required by SU.
2127     unsigned CritResources;
2128
2129     // Count critical resources from another region consumed by SU.
2130     unsigned DemandedResources;
2131
2132     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
2133
2134     bool operator==(const SchedResourceDelta &RHS) const {
2135       return CritResources == RHS.CritResources
2136         && DemandedResources == RHS.DemandedResources;
2137     }
2138     bool operator!=(const SchedResourceDelta &RHS) const {
2139       return !operator==(RHS);
2140     }
2141   };
2142
2143   /// Store the state used by GenericScheduler heuristics, required for the
2144   /// lifetime of one invocation of pickNode().
2145   struct SchedCandidate {
2146     CandPolicy Policy;
2147
2148     // The best SUnit candidate.
2149     SUnit *SU;
2150
2151     // The reason for this candidate.
2152     CandReason Reason;
2153
2154     // Set of reasons that apply to multiple candidates.
2155     uint32_t RepeatReasonSet;
2156
2157     // Register pressure values for the best candidate.
2158     RegPressureDelta RPDelta;
2159
2160     // Critical resource consumption of the best candidate.
2161     SchedResourceDelta ResDelta;
2162
2163     SchedCandidate(const CandPolicy &policy)
2164       : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
2165
2166     bool isValid() const { return SU; }
2167
2168     // Copy the status of another candidate without changing policy.
2169     void setBest(SchedCandidate &Best) {
2170       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
2171       SU = Best.SU;
2172       Reason = Best.Reason;
2173       RPDelta = Best.RPDelta;
2174       ResDelta = Best.ResDelta;
2175     }
2176
2177     bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
2178     void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
2179
2180     void initResourceDelta(const ScheduleDAGMI *DAG,
2181                            const TargetSchedModel *SchedModel);
2182   };
2183
2184 protected:
2185   const MachineSchedContext *Context;
2186   const TargetSchedModel *SchedModel;
2187   const TargetRegisterInfo *TRI;
2188
2189   SchedRemainder Rem;
2190 protected:
2191   GenericSchedulerBase(const MachineSchedContext *C):
2192     Context(C), SchedModel(0), TRI(0) {}
2193
2194   void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
2195                  SchedBoundary *OtherZone);
2196
2197 #ifndef NDEBUG
2198   void traceCandidate(const SchedCandidate &Cand);
2199 #endif
2200 };
2201 } // namespace
2202
2203 void GenericSchedulerBase::SchedCandidate::
2204 initResourceDelta(const ScheduleDAGMI *DAG,
2205                   const TargetSchedModel *SchedModel) {
2206   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2207     return;
2208
2209   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2210   for (TargetSchedModel::ProcResIter
2211          PI = SchedModel->getWriteProcResBegin(SC),
2212          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2213     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2214       ResDelta.CritResources += PI->Cycles;
2215     if (PI->ProcResourceIdx == Policy.DemandResIdx)
2216       ResDelta.DemandedResources += PI->Cycles;
2217   }
2218 }
2219
2220 /// Set the CandPolicy given a scheduling zone given the current resources and
2221 /// latencies inside and outside the zone.
2222 void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
2223                                      bool IsPostRA,
2224                                      SchedBoundary &CurrZone,
2225                                      SchedBoundary *OtherZone) {
2226   // Apply preemptive heuristics based on the the total latency and resources
2227   // inside and outside this zone. Potential stalls should be considered before
2228   // following this policy.
2229
2230   // Compute remaining latency. We need this both to determine whether the
2231   // overall schedule has become latency-limited and whether the instructions
2232   // outside this zone are resource or latency limited.
2233   //
2234   // The "dependent" latency is updated incrementally during scheduling as the
2235   // max height/depth of scheduled nodes minus the cycles since it was
2236   // scheduled:
2237   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2238   //
2239   // The "independent" latency is the max ready queue depth:
2240   //   ILat = max N.depth for N in Available|Pending
2241   //
2242   // RemainingLatency is the greater of independent and dependent latency.
2243   unsigned RemLatency = CurrZone.getDependentLatency();
2244   RemLatency = std::max(RemLatency,
2245                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
2246   RemLatency = std::max(RemLatency,
2247                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2248
2249   // Compute the critical resource outside the zone.
2250   unsigned OtherCritIdx = 0;
2251   unsigned OtherCount =
2252     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2253
2254   bool OtherResLimited = false;
2255   if (SchedModel->hasInstrSchedModel()) {
2256     unsigned LFactor = SchedModel->getLatencyFactor();
2257     OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
2258   }
2259   // Schedule aggressively for latency in PostRA mode. We don't check for
2260   // acyclic latency during PostRA, and highly out-of-order processors will
2261   // skip PostRA scheduling.
2262   if (!OtherResLimited) {
2263     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2264       Policy.ReduceLatency |= true;
2265       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2266             << " RemainingLatency " << RemLatency << " + "
2267             << CurrZone.getCurrCycle() << "c > CritPath "
2268             << Rem.CriticalPath << "\n");
2269     }
2270   }
2271   // If the same resource is limiting inside and outside the zone, do nothing.
2272   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2273     return;
2274
2275   DEBUG(
2276     if (CurrZone.isResourceLimited()) {
2277       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2278              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2279              << "\n";
2280     }
2281     if (OtherResLimited)
2282       dbgs() << "  RemainingLimit: "
2283              << SchedModel->getResourceName(OtherCritIdx) << "\n";
2284     if (!CurrZone.isResourceLimited() && !OtherResLimited)
2285       dbgs() << "  Latency limited both directions.\n");
2286
2287   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2288     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2289
2290   if (OtherResLimited)
2291     Policy.DemandResIdx = OtherCritIdx;
2292 }
2293
2294 #ifndef NDEBUG
2295 const char *GenericSchedulerBase::getReasonStr(
2296   GenericSchedulerBase::CandReason Reason) {
2297   switch (Reason) {
2298   case NoCand:         return "NOCAND    ";
2299   case PhysRegCopy:    return "PREG-COPY";
2300   case RegExcess:      return "REG-EXCESS";
2301   case RegCritical:    return "REG-CRIT  ";
2302   case Stall:          return "STALL     ";
2303   case Cluster:        return "CLUSTER   ";
2304   case Weak:           return "WEAK      ";
2305   case RegMax:         return "REG-MAX   ";
2306   case ResourceReduce: return "RES-REDUCE";
2307   case ResourceDemand: return "RES-DEMAND";
2308   case TopDepthReduce: return "TOP-DEPTH ";
2309   case TopPathReduce:  return "TOP-PATH  ";
2310   case BotHeightReduce:return "BOT-HEIGHT";
2311   case BotPathReduce:  return "BOT-PATH  ";
2312   case NextDefUse:     return "DEF-USE   ";
2313   case NodeOrder:      return "ORDER     ";
2314   };
2315   llvm_unreachable("Unknown reason!");
2316 }
2317
2318 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2319   PressureChange P;
2320   unsigned ResIdx = 0;
2321   unsigned Latency = 0;
2322   switch (Cand.Reason) {
2323   default:
2324     break;
2325   case RegExcess:
2326     P = Cand.RPDelta.Excess;
2327     break;
2328   case RegCritical:
2329     P = Cand.RPDelta.CriticalMax;
2330     break;
2331   case RegMax:
2332     P = Cand.RPDelta.CurrentMax;
2333     break;
2334   case ResourceReduce:
2335     ResIdx = Cand.Policy.ReduceResIdx;
2336     break;
2337   case ResourceDemand:
2338     ResIdx = Cand.Policy.DemandResIdx;
2339     break;
2340   case TopDepthReduce:
2341     Latency = Cand.SU->getDepth();
2342     break;
2343   case TopPathReduce:
2344     Latency = Cand.SU->getHeight();
2345     break;
2346   case BotHeightReduce:
2347     Latency = Cand.SU->getHeight();
2348     break;
2349   case BotPathReduce:
2350     Latency = Cand.SU->getDepth();
2351     break;
2352   }
2353   dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2354   if (P.isValid())
2355     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2356            << ":" << P.getUnitInc() << " ";
2357   else
2358     dbgs() << "      ";
2359   if (ResIdx)
2360     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2361   else
2362     dbgs() << "         ";
2363   if (Latency)
2364     dbgs() << " " << Latency << " cycles ";
2365   else
2366     dbgs() << "          ";
2367   dbgs() << '\n';
2368 }
2369 #endif
2370
2371 /// Return true if this heuristic determines order.
2372 static bool tryLess(int TryVal, int CandVal,
2373                     GenericSchedulerBase::SchedCandidate &TryCand,
2374                     GenericSchedulerBase::SchedCandidate &Cand,
2375                     GenericSchedulerBase::CandReason Reason) {
2376   if (TryVal < CandVal) {
2377     TryCand.Reason = Reason;
2378     return true;
2379   }
2380   if (TryVal > CandVal) {
2381     if (Cand.Reason > Reason)
2382       Cand.Reason = Reason;
2383     return true;
2384   }
2385   Cand.setRepeat(Reason);
2386   return false;
2387 }
2388
2389 static bool tryGreater(int TryVal, int CandVal,
2390                        GenericSchedulerBase::SchedCandidate &TryCand,
2391                        GenericSchedulerBase::SchedCandidate &Cand,
2392                        GenericSchedulerBase::CandReason Reason) {
2393   if (TryVal > CandVal) {
2394     TryCand.Reason = Reason;
2395     return true;
2396   }
2397   if (TryVal < CandVal) {
2398     if (Cand.Reason > Reason)
2399       Cand.Reason = Reason;
2400     return true;
2401   }
2402   Cand.setRepeat(Reason);
2403   return false;
2404 }
2405
2406 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2407                        GenericSchedulerBase::SchedCandidate &Cand,
2408                        SchedBoundary &Zone) {
2409   if (Zone.isTop()) {
2410     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2411       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2412                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2413         return true;
2414     }
2415     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2416                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2417       return true;
2418   }
2419   else {
2420     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2421       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2422                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2423         return true;
2424     }
2425     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2426                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2427       return true;
2428   }
2429   return false;
2430 }
2431
2432 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
2433                       bool IsTop) {
2434   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2435         << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
2436 }
2437
2438 namespace {
2439 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
2440 /// the schedule.
2441 class GenericScheduler : public GenericSchedulerBase {
2442   ScheduleDAGMILive *DAG;
2443
2444   // State of the top and bottom scheduled instruction boundaries.
2445   SchedBoundary Top;
2446   SchedBoundary Bot;
2447
2448   MachineSchedPolicy RegionPolicy;
2449 public:
2450   GenericScheduler(const MachineSchedContext *C):
2451     GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"),
2452     Bot(SchedBoundary::BotQID, "BotQ") {}
2453
2454   virtual void initPolicy(MachineBasicBlock::iterator Begin,
2455                           MachineBasicBlock::iterator End,
2456                           unsigned NumRegionInstrs) override;
2457
2458   virtual bool shouldTrackPressure() const override {
2459     return RegionPolicy.ShouldTrackPressure;
2460   }
2461
2462   virtual void initialize(ScheduleDAGMI *dag) override;
2463
2464   virtual SUnit *pickNode(bool &IsTopNode) override;
2465
2466   virtual void schedNode(SUnit *SU, bool IsTopNode) override;
2467
2468   virtual void releaseTopNode(SUnit *SU) override {
2469     Top.releaseTopNode(SU);
2470   }
2471
2472   virtual void releaseBottomNode(SUnit *SU) override {
2473     Bot.releaseBottomNode(SU);
2474   }
2475
2476   virtual void registerRoots() override;
2477
2478 protected:
2479   void checkAcyclicLatency();
2480
2481   void tryCandidate(SchedCandidate &Cand,
2482                     SchedCandidate &TryCand,
2483                     SchedBoundary &Zone,
2484                     const RegPressureTracker &RPTracker,
2485                     RegPressureTracker &TempTracker);
2486
2487   SUnit *pickNodeBidirectional(bool &IsTopNode);
2488
2489   void pickNodeFromQueue(SchedBoundary &Zone,
2490                          const RegPressureTracker &RPTracker,
2491                          SchedCandidate &Candidate);
2492
2493   void reschedulePhysRegCopies(SUnit *SU, bool isTop);
2494 };
2495 } // namespace
2496
2497 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2498   assert(dag->hasVRegLiveness() &&
2499          "(PreRA)GenericScheduler needs vreg liveness");
2500   DAG = static_cast<ScheduleDAGMILive*>(dag);
2501   SchedModel = DAG->getSchedModel();
2502   TRI = DAG->TRI;
2503
2504   Rem.init(DAG, SchedModel);
2505   Top.init(DAG, SchedModel, &Rem);
2506   Bot.init(DAG, SchedModel, &Rem);
2507
2508   // Initialize resource counts.
2509
2510   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2511   // are disabled, then these HazardRecs will be disabled.
2512   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2513   const TargetMachine &TM = DAG->MF.getTarget();
2514   if (!Top.HazardRec) {
2515     Top.HazardRec =
2516       TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
2517   }
2518   if (!Bot.HazardRec) {
2519     Bot.HazardRec =
2520       TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
2521   }
2522 }
2523
2524 /// Initialize the per-region scheduling policy.
2525 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2526                                   MachineBasicBlock::iterator End,
2527                                   unsigned NumRegionInstrs) {
2528   const TargetMachine &TM = Context->MF->getTarget();
2529   const TargetLowering *TLI = TM.getTargetLowering();
2530
2531   // Avoid setting up the register pressure tracker for small regions to save
2532   // compile time. As a rough heuristic, only track pressure when the number of
2533   // schedulable instructions exceeds half the integer register file.
2534   RegionPolicy.ShouldTrackPressure = true;
2535   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2536     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2537     if (TLI->isTypeLegal(LegalIntVT)) {
2538       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2539         TLI->getRegClassFor(LegalIntVT));
2540       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2541     }
2542   }
2543
2544   // For generic targets, we default to bottom-up, because it's simpler and more
2545   // compile-time optimizations have been implemented in that direction.
2546   RegionPolicy.OnlyBottomUp = true;
2547
2548   // Allow the subtarget to override default policy.
2549   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
2550   ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
2551
2552   // After subtarget overrides, apply command line options.
2553   if (!EnableRegPressure)
2554     RegionPolicy.ShouldTrackPressure = false;
2555
2556   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2557   // e.g. -misched-bottomup=false allows scheduling in both directions.
2558   assert((!ForceTopDown || !ForceBottomUp) &&
2559          "-misched-topdown incompatible with -misched-bottomup");
2560   if (ForceBottomUp.getNumOccurrences() > 0) {
2561     RegionPolicy.OnlyBottomUp = ForceBottomUp;
2562     if (RegionPolicy.OnlyBottomUp)
2563       RegionPolicy.OnlyTopDown = false;
2564   }
2565   if (ForceTopDown.getNumOccurrences() > 0) {
2566     RegionPolicy.OnlyTopDown = ForceTopDown;
2567     if (RegionPolicy.OnlyTopDown)
2568       RegionPolicy.OnlyBottomUp = false;
2569   }
2570 }
2571
2572 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2573 /// critical path by more cycles than it takes to drain the instruction buffer.
2574 /// We estimate an upper bounds on in-flight instructions as:
2575 ///
2576 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2577 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2578 /// InFlightResources = InFlightIterations * LoopResources
2579 ///
2580 /// TODO: Check execution resources in addition to IssueCount.
2581 void GenericScheduler::checkAcyclicLatency() {
2582   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2583     return;
2584
2585   // Scaled number of cycles per loop iteration.
2586   unsigned IterCount =
2587     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2588              Rem.RemIssueCount);
2589   // Scaled acyclic critical path.
2590   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2591   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2592   unsigned InFlightCount =
2593     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2594   unsigned BufferLimit =
2595     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2596
2597   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2598
2599   DEBUG(dbgs() << "IssueCycles="
2600         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2601         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2602         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2603         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2604         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2605         if (Rem.IsAcyclicLatencyLimited)
2606           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2607 }
2608
2609 void GenericScheduler::registerRoots() {
2610   Rem.CriticalPath = DAG->ExitSU.getDepth();
2611
2612   // Some roots may not feed into ExitSU. Check all of them in case.
2613   for (std::vector<SUnit*>::const_iterator
2614          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
2615     if ((*I)->getDepth() > Rem.CriticalPath)
2616       Rem.CriticalPath = (*I)->getDepth();
2617   }
2618   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
2619
2620   if (EnableCyclicPath) {
2621     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2622     checkAcyclicLatency();
2623   }
2624 }
2625
2626 static bool tryPressure(const PressureChange &TryP,
2627                         const PressureChange &CandP,
2628                         GenericSchedulerBase::SchedCandidate &TryCand,
2629                         GenericSchedulerBase::SchedCandidate &Cand,
2630                         GenericSchedulerBase::CandReason Reason) {
2631   int TryRank = TryP.getPSetOrMax();
2632   int CandRank = CandP.getPSetOrMax();
2633   // If both candidates affect the same set, go with the smallest increase.
2634   if (TryRank == CandRank) {
2635     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2636                    Reason);
2637   }
2638   // If one candidate decreases and the other increases, go with it.
2639   // Invalid candidates have UnitInc==0.
2640   if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2641               Reason)) {
2642     return true;
2643   }
2644   // If the candidates are decreasing pressure, reverse priority.
2645   if (TryP.getUnitInc() < 0)
2646     std::swap(TryRank, CandRank);
2647   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2648 }
2649
2650 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2651   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2652 }
2653
2654 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2655 /// their physreg def/use.
2656 ///
2657 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2658 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2659 /// with the operation that produces or consumes the physreg. We'll do this when
2660 /// regalloc has support for parallel copies.
2661 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2662   const MachineInstr *MI = SU->getInstr();
2663   if (!MI->isCopy())
2664     return 0;
2665
2666   unsigned ScheduledOper = isTop ? 1 : 0;
2667   unsigned UnscheduledOper = isTop ? 0 : 1;
2668   // If we have already scheduled the physreg produce/consumer, immediately
2669   // schedule the copy.
2670   if (TargetRegisterInfo::isPhysicalRegister(
2671         MI->getOperand(ScheduledOper).getReg()))
2672     return 1;
2673   // If the physreg is at the boundary, defer it. Otherwise schedule it
2674   // immediately to free the dependent. We can hoist the copy later.
2675   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2676   if (TargetRegisterInfo::isPhysicalRegister(
2677         MI->getOperand(UnscheduledOper).getReg()))
2678     return AtBoundary ? -1 : 1;
2679   return 0;
2680 }
2681
2682 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2683 /// hierarchical. This may be more efficient than a graduated cost model because
2684 /// we don't need to evaluate all aspects of the model for each node in the
2685 /// queue. But it's really done to make the heuristics easier to debug and
2686 /// statistically analyze.
2687 ///
2688 /// \param Cand provides the policy and current best candidate.
2689 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2690 /// \param Zone describes the scheduled zone that we are extending.
2691 /// \param RPTracker describes reg pressure within the scheduled zone.
2692 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
2693 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2694                                     SchedCandidate &TryCand,
2695                                     SchedBoundary &Zone,
2696                                     const RegPressureTracker &RPTracker,
2697                                     RegPressureTracker &TempTracker) {
2698
2699   if (DAG->isTrackingPressure()) {
2700     // Always initialize TryCand's RPDelta.
2701     if (Zone.isTop()) {
2702       TempTracker.getMaxDownwardPressureDelta(
2703         TryCand.SU->getInstr(),
2704         TryCand.RPDelta,
2705         DAG->getRegionCriticalPSets(),
2706         DAG->getRegPressure().MaxSetPressure);
2707     }
2708     else {
2709       if (VerifyScheduling) {
2710         TempTracker.getMaxUpwardPressureDelta(
2711           TryCand.SU->getInstr(),
2712           &DAG->getPressureDiff(TryCand.SU),
2713           TryCand.RPDelta,
2714           DAG->getRegionCriticalPSets(),
2715           DAG->getRegPressure().MaxSetPressure);
2716       }
2717       else {
2718         RPTracker.getUpwardPressureDelta(
2719           TryCand.SU->getInstr(),
2720           DAG->getPressureDiff(TryCand.SU),
2721           TryCand.RPDelta,
2722           DAG->getRegionCriticalPSets(),
2723           DAG->getRegPressure().MaxSetPressure);
2724       }
2725     }
2726   }
2727   DEBUG(if (TryCand.RPDelta.Excess.isValid())
2728           dbgs() << "  SU(" << TryCand.SU->NodeNum << ") "
2729                  << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
2730                  << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
2731
2732   // Initialize the candidate if needed.
2733   if (!Cand.isValid()) {
2734     TryCand.Reason = NodeOrder;
2735     return;
2736   }
2737
2738   if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
2739                  biasPhysRegCopy(Cand.SU, Zone.isTop()),
2740                  TryCand, Cand, PhysRegCopy))
2741     return;
2742
2743   // Avoid exceeding the target's limit. If signed PSetID is negative, it is
2744   // invalid; convert it to INT_MAX to give it lowest priority.
2745   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2746                                                Cand.RPDelta.Excess,
2747                                                TryCand, Cand, RegExcess))
2748     return;
2749
2750   // Avoid increasing the max critical pressure in the scheduled region.
2751   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2752                                                Cand.RPDelta.CriticalMax,
2753                                                TryCand, Cand, RegCritical))
2754     return;
2755
2756   // For loops that are acyclic path limited, aggressively schedule for latency.
2757   // This can result in very long dependence chains scheduled in sequence, so
2758   // once every cycle (when CurrMOps == 0), switch to normal heuristics.
2759   if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
2760       && tryLatency(TryCand, Cand, Zone))
2761     return;
2762
2763   // Prioritize instructions that read unbuffered resources by stall cycles.
2764   if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
2765               Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2766     return;
2767
2768   // Keep clustered nodes together to encourage downstream peephole
2769   // optimizations which may reduce resource requirements.
2770   //
2771   // This is a best effort to set things up for a post-RA pass. Optimizations
2772   // like generating loads of multiple registers should ideally be done within
2773   // the scheduler pass by combining the loads during DAG postprocessing.
2774   const SUnit *NextClusterSU =
2775     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2776   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
2777                  TryCand, Cand, Cluster))
2778     return;
2779
2780   // Weak edges are for clustering and other constraints.
2781   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
2782               getWeakLeft(Cand.SU, Zone.isTop()),
2783               TryCand, Cand, Weak)) {
2784     return;
2785   }
2786   // Avoid increasing the max pressure of the entire region.
2787   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2788                                                Cand.RPDelta.CurrentMax,
2789                                                TryCand, Cand, RegMax))
2790     return;
2791
2792   // Avoid critical resource consumption and balance the schedule.
2793   TryCand.initResourceDelta(DAG, SchedModel);
2794   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2795               TryCand, Cand, ResourceReduce))
2796     return;
2797   if (tryGreater(TryCand.ResDelta.DemandedResources,
2798                  Cand.ResDelta.DemandedResources,
2799                  TryCand, Cand, ResourceDemand))
2800     return;
2801
2802   // Avoid serializing long latency dependence chains.
2803   // For acyclic path limited loops, latency was already checked above.
2804   if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
2805       && tryLatency(TryCand, Cand, Zone)) {
2806     return;
2807   }
2808
2809   // Prefer immediate defs/users of the last scheduled instruction. This is a
2810   // local pressure avoidance strategy that also makes the machine code
2811   // readable.
2812   if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
2813                  TryCand, Cand, NextDefUse))
2814     return;
2815
2816   // Fall through to original instruction order.
2817   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2818       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2819     TryCand.Reason = NodeOrder;
2820   }
2821 }
2822
2823 /// Pick the best candidate from the queue.
2824 ///
2825 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2826 /// DAG building. To adjust for the current scheduling location we need to
2827 /// maintain the number of vreg uses remaining to be top-scheduled.
2828 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2829                                          const RegPressureTracker &RPTracker,
2830                                          SchedCandidate &Cand) {
2831   ReadyQueue &Q = Zone.Available;
2832
2833   DEBUG(Q.dump());
2834
2835   // getMaxPressureDelta temporarily modifies the tracker.
2836   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2837
2838   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2839
2840     SchedCandidate TryCand(Cand.Policy);
2841     TryCand.SU = *I;
2842     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
2843     if (TryCand.Reason != NoCand) {
2844       // Initialize resource delta if needed in case future heuristics query it.
2845       if (TryCand.ResDelta == SchedResourceDelta())
2846         TryCand.initResourceDelta(DAG, SchedModel);
2847       Cand.setBest(TryCand);
2848       DEBUG(traceCandidate(Cand));
2849     }
2850   }
2851 }
2852
2853 /// Pick the best candidate node from either the top or bottom queue.
2854 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
2855   // Schedule as far as possible in the direction of no choice. This is most
2856   // efficient, but also provides the best heuristics for CriticalPSets.
2857   if (SUnit *SU = Bot.pickOnlyChoice()) {
2858     IsTopNode = false;
2859     DEBUG(dbgs() << "Pick Bot NOCAND\n");
2860     return SU;
2861   }
2862   if (SUnit *SU = Top.pickOnlyChoice()) {
2863     IsTopNode = true;
2864     DEBUG(dbgs() << "Pick Top NOCAND\n");
2865     return SU;
2866   }
2867   CandPolicy NoPolicy;
2868   SchedCandidate BotCand(NoPolicy);
2869   SchedCandidate TopCand(NoPolicy);
2870   // Set the bottom-up policy based on the state of the current bottom zone and
2871   // the instructions outside the zone, including the top zone.
2872   setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
2873   // Set the top-down policy based on the state of the current top zone and
2874   // the instructions outside the zone, including the bottom zone.
2875   setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
2876
2877   // Prefer bottom scheduling when heuristics are silent.
2878   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2879   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2880
2881   // If either Q has a single candidate that provides the least increase in
2882   // Excess pressure, we can immediately schedule from that Q.
2883   //
2884   // RegionCriticalPSets summarizes the pressure within the scheduled region and
2885   // affects picking from either Q. If scheduling in one direction must
2886   // increase pressure for one of the excess PSets, then schedule in that
2887   // direction first to provide more freedom in the other direction.
2888   if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
2889       || (BotCand.Reason == RegCritical
2890           && !BotCand.isRepeat(RegCritical)))
2891   {
2892     IsTopNode = false;
2893     tracePick(BotCand, IsTopNode);
2894     return BotCand.SU;
2895   }
2896   // Check if the top Q has a better candidate.
2897   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2898   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2899
2900   // Choose the queue with the most important (lowest enum) reason.
2901   if (TopCand.Reason < BotCand.Reason) {
2902     IsTopNode = true;
2903     tracePick(TopCand, IsTopNode);
2904     return TopCand.SU;
2905   }
2906   // Otherwise prefer the bottom candidate, in node order if all else failed.
2907   IsTopNode = false;
2908   tracePick(BotCand, IsTopNode);
2909   return BotCand.SU;
2910 }
2911
2912 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2913 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
2914   if (DAG->top() == DAG->bottom()) {
2915     assert(Top.Available.empty() && Top.Pending.empty() &&
2916            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2917     return NULL;
2918   }
2919   SUnit *SU;
2920   do {
2921     if (RegionPolicy.OnlyTopDown) {
2922       SU = Top.pickOnlyChoice();
2923       if (!SU) {
2924         CandPolicy NoPolicy;
2925         SchedCandidate TopCand(NoPolicy);
2926         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2927         assert(TopCand.Reason != NoCand && "failed to find a candidate");
2928         tracePick(TopCand, true);
2929         SU = TopCand.SU;
2930       }
2931       IsTopNode = true;
2932     }
2933     else if (RegionPolicy.OnlyBottomUp) {
2934       SU = Bot.pickOnlyChoice();
2935       if (!SU) {
2936         CandPolicy NoPolicy;
2937         SchedCandidate BotCand(NoPolicy);
2938         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2939         assert(BotCand.Reason != NoCand && "failed to find a candidate");
2940         tracePick(BotCand, false);
2941         SU = BotCand.SU;
2942       }
2943       IsTopNode = false;
2944     }
2945     else {
2946       SU = pickNodeBidirectional(IsTopNode);
2947     }
2948   } while (SU->isScheduled);
2949
2950   if (SU->isTopReady())
2951     Top.removeReady(SU);
2952   if (SU->isBottomReady())
2953     Bot.removeReady(SU);
2954
2955   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
2956   return SU;
2957 }
2958
2959 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
2960
2961   MachineBasicBlock::iterator InsertPos = SU->getInstr();
2962   if (!isTop)
2963     ++InsertPos;
2964   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
2965
2966   // Find already scheduled copies with a single physreg dependence and move
2967   // them just above the scheduled instruction.
2968   for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
2969        I != E; ++I) {
2970     if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
2971       continue;
2972     SUnit *DepSU = I->getSUnit();
2973     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
2974       continue;
2975     MachineInstr *Copy = DepSU->getInstr();
2976     if (!Copy->isCopy())
2977       continue;
2978     DEBUG(dbgs() << "  Rescheduling physreg copy ";
2979           I->getSUnit()->dump(DAG));
2980     DAG->moveInstruction(Copy, InsertPos);
2981   }
2982 }
2983
2984 /// Update the scheduler's state after scheduling a node. This is the same node
2985 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
2986 /// update it's state based on the current cycle before MachineSchedStrategy
2987 /// does.
2988 ///
2989 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2990 /// them here. See comments in biasPhysRegCopy.
2991 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2992   if (IsTopNode) {
2993     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
2994     Top.bumpNode(SU);
2995     if (SU->hasPhysRegUses)
2996       reschedulePhysRegCopies(SU, true);
2997   }
2998   else {
2999     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3000     Bot.bumpNode(SU);
3001     if (SU->hasPhysRegDefs)
3002       reschedulePhysRegCopies(SU, false);
3003   }
3004 }
3005
3006 /// Create the standard converging machine scheduler. This will be used as the
3007 /// default scheduler if the target does not set a default.
3008 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
3009   ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C));
3010   // Register DAG post-processors.
3011   //
3012   // FIXME: extend the mutation API to allow earlier mutations to instantiate
3013   // data and pass it to later mutations. Have a single mutation that gathers
3014   // the interesting nodes in one pass.
3015   DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
3016   if (EnableLoadCluster && DAG->TII->enableClusterLoads())
3017     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
3018   if (EnableMacroFusion)
3019     DAG->addMutation(new MacroFusion(DAG->TII));
3020   return DAG;
3021 }
3022
3023 static MachineSchedRegistry
3024 GenericSchedRegistry("converge", "Standard converging scheduler.",
3025                      createGenericSchedLive);
3026
3027 //===----------------------------------------------------------------------===//
3028 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3029 //===----------------------------------------------------------------------===//
3030
3031 namespace {
3032 /// PostGenericScheduler - Interface to the scheduling algorithm used by
3033 /// ScheduleDAGMI.
3034 ///
3035 /// Callbacks from ScheduleDAGMI:
3036 ///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
3037 class PostGenericScheduler : public GenericSchedulerBase {
3038   ScheduleDAGMI *DAG;
3039   SchedBoundary Top;
3040   SmallVector<SUnit*, 8> BotRoots;
3041 public:
3042   PostGenericScheduler(const MachineSchedContext *C):
3043     GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
3044
3045   virtual ~PostGenericScheduler() {}
3046
3047   virtual void initPolicy(MachineBasicBlock::iterator Begin,
3048                           MachineBasicBlock::iterator End,
3049                           unsigned NumRegionInstrs) override {
3050     /* no configurable policy */
3051   };
3052
3053   /// PostRA scheduling does not track pressure.
3054   virtual bool shouldTrackPressure() const override { return false; }
3055
3056   virtual void initialize(ScheduleDAGMI *Dag) override {
3057     DAG = Dag;
3058     SchedModel = DAG->getSchedModel();
3059     TRI = DAG->TRI;
3060
3061     Rem.init(DAG, SchedModel);
3062     Top.init(DAG, SchedModel, &Rem);
3063     BotRoots.clear();
3064
3065     // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3066     // or are disabled, then these HazardRecs will be disabled.
3067     const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3068     const TargetMachine &TM = DAG->MF.getTarget();
3069     if (!Top.HazardRec) {
3070       Top.HazardRec =
3071         TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
3072     }
3073   }
3074
3075   virtual void registerRoots() override;
3076
3077   virtual SUnit *pickNode(bool &IsTopNode) override;
3078
3079   virtual void scheduleTree(unsigned SubtreeID) override {
3080     llvm_unreachable("PostRA scheduler does not support subtree analysis.");
3081   }
3082
3083   virtual void schedNode(SUnit *SU, bool IsTopNode) override;
3084
3085   virtual void releaseTopNode(SUnit *SU) override {
3086     Top.releaseTopNode(SU);
3087   }
3088
3089   // Only called for roots.
3090   virtual void releaseBottomNode(SUnit *SU) override {
3091     BotRoots.push_back(SU);
3092   }
3093
3094 protected:
3095   void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
3096
3097   void pickNodeFromQueue(SchedCandidate &Cand);
3098 };
3099 } // namespace
3100
3101 void PostGenericScheduler::registerRoots() {
3102   Rem.CriticalPath = DAG->ExitSU.getDepth();
3103
3104   // Some roots may not feed into ExitSU. Check all of them in case.
3105   for (SmallVectorImpl<SUnit*>::const_iterator
3106          I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
3107     if ((*I)->getDepth() > Rem.CriticalPath)
3108       Rem.CriticalPath = (*I)->getDepth();
3109   }
3110   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
3111 }
3112
3113 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3114 ///
3115 /// \param Cand provides the policy and current best candidate.
3116 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3117 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3118                                         SchedCandidate &TryCand) {
3119
3120   // Initialize the candidate if needed.
3121   if (!Cand.isValid()) {
3122     TryCand.Reason = NodeOrder;
3123     return;
3124   }
3125
3126   // Prioritize instructions that read unbuffered resources by stall cycles.
3127   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3128               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3129     return;
3130
3131   // Avoid critical resource consumption and balance the schedule.
3132   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3133               TryCand, Cand, ResourceReduce))
3134     return;
3135   if (tryGreater(TryCand.ResDelta.DemandedResources,
3136                  Cand.ResDelta.DemandedResources,
3137                  TryCand, Cand, ResourceDemand))
3138     return;
3139
3140   // Avoid serializing long latency dependence chains.
3141   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3142     return;
3143   }
3144
3145   // Fall through to original instruction order.
3146   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3147     TryCand.Reason = NodeOrder;
3148 }
3149
3150 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3151   ReadyQueue &Q = Top.Available;
3152
3153   DEBUG(Q.dump());
3154
3155   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
3156     SchedCandidate TryCand(Cand.Policy);
3157     TryCand.SU = *I;
3158     TryCand.initResourceDelta(DAG, SchedModel);
3159     tryCandidate(Cand, TryCand);
3160     if (TryCand.Reason != NoCand) {
3161       Cand.setBest(TryCand);
3162       DEBUG(traceCandidate(Cand));
3163     }
3164   }
3165 }
3166
3167 /// Pick the next node to schedule.
3168 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3169   if (DAG->top() == DAG->bottom()) {
3170     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3171     return NULL;
3172   }
3173   SUnit *SU;
3174   do {
3175     SU = Top.pickOnlyChoice();
3176     if (!SU) {
3177       CandPolicy NoPolicy;
3178       SchedCandidate TopCand(NoPolicy);
3179       // Set the top-down policy based on the state of the current top zone and
3180       // the instructions outside the zone, including the bottom zone.
3181       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL);
3182       pickNodeFromQueue(TopCand);
3183       assert(TopCand.Reason != NoCand && "failed to find a candidate");
3184       tracePick(TopCand, true);
3185       SU = TopCand.SU;
3186     }
3187   } while (SU->isScheduled);
3188
3189   IsTopNode = true;
3190   Top.removeReady(SU);
3191
3192   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3193   return SU;
3194 }
3195
3196 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3197 /// scheduled/remaining flags in the DAG nodes.
3198 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3199   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3200   Top.bumpNode(SU);
3201 }
3202
3203 /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
3204 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
3205   return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true);
3206 }
3207
3208 //===----------------------------------------------------------------------===//
3209 // ILP Scheduler. Currently for experimental analysis of heuristics.
3210 //===----------------------------------------------------------------------===//
3211
3212 namespace {
3213 /// \brief Order nodes by the ILP metric.
3214 struct ILPOrder {
3215   const SchedDFSResult *DFSResult;
3216   const BitVector *ScheduledTrees;
3217   bool MaximizeILP;
3218
3219   ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
3220
3221   /// \brief Apply a less-than relation on node priority.
3222   ///
3223   /// (Return true if A comes after B in the Q.)
3224   bool operator()(const SUnit *A, const SUnit *B) const {
3225     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3226     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3227     if (SchedTreeA != SchedTreeB) {
3228       // Unscheduled trees have lower priority.
3229       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3230         return ScheduledTrees->test(SchedTreeB);
3231
3232       // Trees with shallower connections have have lower priority.
3233       if (DFSResult->getSubtreeLevel(SchedTreeA)
3234           != DFSResult->getSubtreeLevel(SchedTreeB)) {
3235         return DFSResult->getSubtreeLevel(SchedTreeA)
3236           < DFSResult->getSubtreeLevel(SchedTreeB);
3237       }
3238     }
3239     if (MaximizeILP)
3240       return DFSResult->getILP(A) < DFSResult->getILP(B);
3241     else
3242       return DFSResult->getILP(A) > DFSResult->getILP(B);
3243   }
3244 };
3245
3246 /// \brief Schedule based on the ILP metric.
3247 class ILPScheduler : public MachineSchedStrategy {
3248   ScheduleDAGMILive *DAG;
3249   ILPOrder Cmp;
3250
3251   std::vector<SUnit*> ReadyQ;
3252 public:
3253   ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
3254
3255   virtual void initialize(ScheduleDAGMI *dag) {
3256     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3257     DAG = static_cast<ScheduleDAGMILive*>(dag);
3258     DAG->computeDFSResult();
3259     Cmp.DFSResult = DAG->getDFSResult();
3260     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3261     ReadyQ.clear();
3262   }
3263
3264   virtual void registerRoots() {
3265     // Restore the heap in ReadyQ with the updated DFS results.
3266     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3267   }
3268
3269   /// Implement MachineSchedStrategy interface.
3270   /// -----------------------------------------
3271
3272   /// Callback to select the highest priority node from the ready Q.
3273   virtual SUnit *pickNode(bool &IsTopNode) {
3274     if (ReadyQ.empty()) return NULL;
3275     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3276     SUnit *SU = ReadyQ.back();
3277     ReadyQ.pop_back();
3278     IsTopNode = false;
3279     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3280           << " ILP: " << DAG->getDFSResult()->getILP(SU)
3281           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3282           << DAG->getDFSResult()->getSubtreeLevel(
3283             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3284           << "Scheduling " << *SU->getInstr());
3285     return SU;
3286   }
3287
3288   /// \brief Scheduler callback to notify that a new subtree is scheduled.
3289   virtual void scheduleTree(unsigned SubtreeID) {
3290     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3291   }
3292
3293   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3294   /// DFSResults, and resort the priority Q.
3295   virtual void schedNode(SUnit *SU, bool IsTopNode) {
3296     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3297   }
3298
3299   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
3300
3301   virtual void releaseBottomNode(SUnit *SU) {
3302     ReadyQ.push_back(SU);
3303     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3304   }
3305 };
3306 } // namespace
3307
3308 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3309   return new ScheduleDAGMILive(C, new ILPScheduler(true));
3310 }
3311 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3312   return new ScheduleDAGMILive(C, new ILPScheduler(false));
3313 }
3314 static MachineSchedRegistry ILPMaxRegistry(
3315   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3316 static MachineSchedRegistry ILPMinRegistry(
3317   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3318
3319 //===----------------------------------------------------------------------===//
3320 // Machine Instruction Shuffler for Correctness Testing
3321 //===----------------------------------------------------------------------===//
3322
3323 #ifndef NDEBUG
3324 namespace {
3325 /// Apply a less-than relation on the node order, which corresponds to the
3326 /// instruction order prior to scheduling. IsReverse implements greater-than.
3327 template<bool IsReverse>
3328 struct SUnitOrder {
3329   bool operator()(SUnit *A, SUnit *B) const {
3330     if (IsReverse)
3331       return A->NodeNum > B->NodeNum;
3332     else
3333       return A->NodeNum < B->NodeNum;
3334   }
3335 };
3336
3337 /// Reorder instructions as much as possible.
3338 class InstructionShuffler : public MachineSchedStrategy {
3339   bool IsAlternating;
3340   bool IsTopDown;
3341
3342   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3343   // gives nodes with a higher number higher priority causing the latest
3344   // instructions to be scheduled first.
3345   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
3346     TopQ;
3347   // When scheduling bottom-up, use greater-than as the queue priority.
3348   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
3349     BottomQ;
3350 public:
3351   InstructionShuffler(bool alternate, bool topdown)
3352     : IsAlternating(alternate), IsTopDown(topdown) {}
3353
3354   virtual void initialize(ScheduleDAGMI*) {
3355     TopQ.clear();
3356     BottomQ.clear();
3357   }
3358
3359   /// Implement MachineSchedStrategy interface.
3360   /// -----------------------------------------
3361
3362   virtual SUnit *pickNode(bool &IsTopNode) {
3363     SUnit *SU;
3364     if (IsTopDown) {
3365       do {
3366         if (TopQ.empty()) return NULL;
3367         SU = TopQ.top();
3368         TopQ.pop();
3369       } while (SU->isScheduled);
3370       IsTopNode = true;
3371     }
3372     else {
3373       do {
3374         if (BottomQ.empty()) return NULL;
3375         SU = BottomQ.top();
3376         BottomQ.pop();
3377       } while (SU->isScheduled);
3378       IsTopNode = false;
3379     }
3380     if (IsAlternating)
3381       IsTopDown = !IsTopDown;
3382     return SU;
3383   }
3384
3385   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
3386
3387   virtual void releaseTopNode(SUnit *SU) {
3388     TopQ.push(SU);
3389   }
3390   virtual void releaseBottomNode(SUnit *SU) {
3391     BottomQ.push(SU);
3392   }
3393 };
3394 } // namespace
3395
3396 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3397   bool Alternate = !ForceTopDown && !ForceBottomUp;
3398   bool TopDown = !ForceBottomUp;
3399   assert((TopDown || !ForceTopDown) &&
3400          "-misched-topdown incompatible with -misched-bottomup");
3401   return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown));
3402 }
3403 static MachineSchedRegistry ShufflerRegistry(
3404   "shuffle", "Shuffle machine instructions alternating directions",
3405   createInstructionShuffler);
3406 #endif // !NDEBUG
3407
3408 //===----------------------------------------------------------------------===//
3409 // GraphWriter support for ScheduleDAGMILive.
3410 //===----------------------------------------------------------------------===//
3411
3412 #ifndef NDEBUG
3413 namespace llvm {
3414
3415 template<> struct GraphTraits<
3416   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3417
3418 template<>
3419 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3420
3421   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
3422
3423   static std::string getGraphName(const ScheduleDAG *G) {
3424     return G->MF.getName();
3425   }
3426
3427   static bool renderGraphFromBottomUp() {
3428     return true;
3429   }
3430
3431   static bool isNodeHidden(const SUnit *Node) {
3432     return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
3433   }
3434
3435   static bool hasNodeAddressLabel(const SUnit *Node,
3436                                   const ScheduleDAG *Graph) {
3437     return false;
3438   }
3439
3440   /// If you want to override the dot attributes printed for a particular
3441   /// edge, override this method.
3442   static std::string getEdgeAttributes(const SUnit *Node,
3443                                        SUnitIterator EI,
3444                                        const ScheduleDAG *Graph) {
3445     if (EI.isArtificialDep())
3446       return "color=cyan,style=dashed";
3447     if (EI.isCtrlDep())
3448       return "color=blue,style=dashed";
3449     return "";
3450   }
3451
3452   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3453     std::string Str;
3454     raw_string_ostream SS(Str);
3455     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3456     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3457       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
3458     SS << "SU:" << SU->NodeNum;
3459     if (DFS)
3460       SS << " I:" << DFS->getNumInstrs(SU);
3461     return SS.str();
3462   }
3463   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3464     return G->getGraphNodeLabel(SU);
3465   }
3466
3467   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3468     std::string Str("shape=Mrecord");
3469     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3470     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3471       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
3472     if (DFS) {
3473       Str += ",style=filled,fillcolor=\"#";
3474       Str += DOT::getColorString(DFS->getSubtreeID(N));
3475       Str += '"';
3476     }
3477     return Str;
3478   }
3479 };
3480 } // namespace llvm
3481 #endif // NDEBUG
3482
3483 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3484 /// rendered using 'dot'.
3485 ///
3486 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3487 #ifndef NDEBUG
3488   ViewGraph(this, Name, false, Title);
3489 #else
3490   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3491          << "systems with Graphviz or gv!\n";
3492 #endif  // NDEBUG
3493 }
3494
3495 /// Out-of-line implementation with no arguments is handy for gdb.
3496 void ScheduleDAGMI::viewGraph() {
3497   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3498 }