lib/CodeGen/MachineScheduler.cpp

   1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // MachineScheduler schedules machine instructions after phi elimination. It
  11 // preserves LiveIntervals so it can be invoked before register allocation.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #define DEBUG_TYPE "misched"
  16
  17 #include "llvm/CodeGen/MachineScheduler.h"
  18 #include "llvm/ADT/OwningPtr.h"
  19 #include "llvm/ADT/PriorityQueue.h"
  20 #include "llvm/Analysis/AliasAnalysis.h"
  21 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  22 #include "llvm/CodeGen/MachineDominators.h"
  23 #include "llvm/CodeGen/MachineLoopInfo.h"
  24 #include "llvm/CodeGen/MachineRegisterInfo.h"
  25 #include "llvm/CodeGen/Passes.h"
  26 #include "llvm/CodeGen/RegisterClassInfo.h"
  27 #include "llvm/CodeGen/ScheduleDFS.h"
  28 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
  29 #include "llvm/Support/CommandLine.h"
  30 #include "llvm/Support/Debug.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include "llvm/Support/GraphWriter.h"
  33 #include "llvm/Support/raw_ostream.h"
  34 #include "llvm/Target/TargetInstrInfo.h"
  35 #include <queue>
  36
  37 using namespace llvm;
  38
  39 namespace llvm {
  40 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
  41                            cl::desc("Force top-down list scheduling"));
  42 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
  43                             cl::desc("Force bottom-up list scheduling"));
  44 }
  45
  46 #ifndef NDEBUG
  47 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
  48   cl::desc("Pop up a window to show MISched dags after they are processed"));
  49
  50 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
  51   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  52 #else
  53 static bool ViewMISchedDAGs = false;
  54 #endif // NDEBUG
  55
  56 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
  57   cl::desc("Enable register pressure scheduling."), cl::init(true));
  58
  59 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
  60   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
  61
  62 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
  63   cl::desc("Enable load clustering."), cl::init(true));
  64
  65 // Experimental heuristics
  66 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
  67   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
  68
  69 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
  70   cl::desc("Verify machine instrs before and after machine scheduling"));
  71
  72 // DAG subtrees must have at least this many nodes.
  73 static const unsigned MinSubtreeSize = 8;
  74
  75 // Pin the vtables to this file.
  76 void MachineSchedStrategy::anchor() {}
  77 void ScheduleDAGMutation::anchor() {}
  78
  79 //===----------------------------------------------------------------------===//
  80 // Machine Instruction Scheduling Pass and Registry
  81 //===----------------------------------------------------------------------===//
  82
  83 MachineSchedContext::MachineSchedContext():
  84     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
  85   RegClassInfo = new RegisterClassInfo();
  86 }
  87
  88 MachineSchedContext::~MachineSchedContext() {
  89   delete RegClassInfo;
  90 }
  91
  92 namespace {
  93 /// Base class for a machine scheduler class that can run at any point.
  94 class MachineSchedulerBase : public MachineSchedContext,
  95                              public MachineFunctionPass {
  96 public:
  97   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
  98
  99   virtual void print(raw_ostream &O, const Module* = 0) const;
 100
 101 protected:
 102   void scheduleRegions(ScheduleDAGInstrs &Scheduler);
 103 };
 104
 105 /// MachineScheduler runs after coalescing and before register allocation.
 106 class MachineScheduler : public MachineSchedulerBase {
 107 public:
 108   MachineScheduler();
 109
 110   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 111
 112   virtual bool runOnMachineFunction(MachineFunction&);
 113
 114   static char ID; // Class identification, replacement for typeinfo
 115
 116 protected:
 117   ScheduleDAGInstrs *createMachineScheduler();
 118 };
 119
 120 /// PostMachineScheduler runs after shortly before code emission.
 121 class PostMachineScheduler : public MachineSchedulerBase {
 122 public:
 123   PostMachineScheduler();
 124
 125   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 126
 127   virtual bool runOnMachineFunction(MachineFunction&);
 128
 129   static char ID; // Class identification, replacement for typeinfo
 130
 131 protected:
 132   ScheduleDAGInstrs *createPostMachineScheduler();
 133 };
 134 } // namespace
 135
 136 char MachineScheduler::ID = 0;
 137
 138 char &llvm::MachineSchedulerID = MachineScheduler::ID;
 139
 140 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
 141                       "Machine Instruction Scheduler", false, false)
 142 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 143 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 144 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 145 INITIALIZE_PASS_END(MachineScheduler, "misched",
 146                     "Machine Instruction Scheduler", false, false)
 147
 148 MachineScheduler::MachineScheduler()
 149 : MachineSchedulerBase(ID) {
 150   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 151 }
 152
 153 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 154   AU.setPreservesCFG();
 155   AU.addRequiredID(MachineDominatorsID);
 156   AU.addRequired<MachineLoopInfo>();
 157   AU.addRequired<AliasAnalysis>();
 158   AU.addRequired<TargetPassConfig>();
 159   AU.addRequired<SlotIndexes>();
 160   AU.addPreserved<SlotIndexes>();
 161   AU.addRequired<LiveIntervals>();
 162   AU.addPreserved<LiveIntervals>();
 163   MachineFunctionPass::getAnalysisUsage(AU);
 164 }
 165
 166 char PostMachineScheduler::ID = 0;
 167
 168 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
 169
 170 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
 171                 "PostRA Machine Instruction Scheduler", false, false);
 172
 173 PostMachineScheduler::PostMachineScheduler()
 174 : MachineSchedulerBase(ID) {
 175   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
 176 }
 177
 178 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
 179   AU.setPreservesCFG();
 180   AU.addRequiredID(MachineDominatorsID);
 181   AU.addRequired<MachineLoopInfo>();
 182   AU.addRequired<TargetPassConfig>();
 183   MachineFunctionPass::getAnalysisUsage(AU);
 184 }
 185
 186 MachinePassRegistry MachineSchedRegistry::Registry;
 187
 188 /// A dummy default scheduler factory indicates whether the scheduler
 189 /// is overridden on the command line.
 190 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
 191   return 0;
 192 }
 193
 194 /// MachineSchedOpt allows command line selection of the scheduler.
 195 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
 196                RegisterPassParser<MachineSchedRegistry> >
 197 MachineSchedOpt("misched",
 198                 cl::init(&useDefaultMachineSched), cl::Hidden,
 199                 cl::desc("Machine instruction scheduler to use"));
 200
 201 static MachineSchedRegistry
 202 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
 203                      useDefaultMachineSched);
 204
 205 /// Forward declare the standard machine scheduler. This will be used as the
 206 /// default scheduler if the target does not set a default.
 207 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
 208 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
 209
 210 /// Decrement this iterator until reaching the top or a non-debug instr.
 211 static MachineBasicBlock::const_iterator
 212 priorNonDebug(MachineBasicBlock::const_iterator I,
 213               MachineBasicBlock::const_iterator Beg) {
 214   assert(I != Beg && "reached the top of the region, cannot decrement");
 215   while (--I != Beg) {
 216     if (!I->isDebugValue())
 217       break;
 218   }
 219   return I;
 220 }
 221
 222 /// Non-const version.
 223 static MachineBasicBlock::iterator
 224 priorNonDebug(MachineBasicBlock::iterator I,
 225               MachineBasicBlock::const_iterator Beg) {
 226   return const_cast<MachineInstr*>(
 227     &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
 228 }
 229
 230 /// If this iterator is a debug value, increment until reaching the End or a
 231 /// non-debug instruction.
 232 static MachineBasicBlock::const_iterator
 233 nextIfDebug(MachineBasicBlock::const_iterator I,
 234             MachineBasicBlock::const_iterator End) {
 235   for(; I != End; ++I) {
 236     if (!I->isDebugValue())
 237       break;
 238   }
 239   return I;
 240 }
 241
 242 /// Non-const version.
 243 static MachineBasicBlock::iterator
 244 nextIfDebug(MachineBasicBlock::iterator I,
 245             MachineBasicBlock::const_iterator End) {
 246   // Cast the return value to nonconst MachineInstr, then cast to an
 247   // instr_iterator, which does not check for null, finally return a
 248   // bundle_iterator.
 249   return MachineBasicBlock::instr_iterator(
 250     const_cast<MachineInstr*>(
 251       &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
 252 }
 253
 254 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
 255 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
 256   // Select the scheduler, or set the default.
 257   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
 258   if (Ctor != useDefaultMachineSched)
 259     return Ctor(this);
 260
 261   // Get the default scheduler set by the target for this function.
 262   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
 263   if (Scheduler)
 264     return Scheduler;
 265
 266   // Default to GenericScheduler.
 267   return createGenericSchedLive(this);
 268 }
 269
 270 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
 271 /// the caller. We don't have a command line option to override the postRA
 272 /// scheduler. The Target must configure it.
 273 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
 274   // Get the postRA scheduler set by the target for this function.
 275   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
 276   if (Scheduler)
 277     return Scheduler;
 278
 279   // Default to GenericScheduler.
 280   return createGenericSchedPostRA(this);
 281 }
 282
 283 /// Top-level MachineScheduler pass driver.
 284 ///
 285 /// Visit blocks in function order. Divide each block into scheduling regions
 286 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
 287 /// consistent with the DAG builder, which traverses the interior of the
 288 /// scheduling regions bottom-up.
 289 ///
 290 /// This design avoids exposing scheduling boundaries to the DAG builder,
 291 /// simplifying the DAG builder's support for "special" target instructions.
 292 /// At the same time the design allows target schedulers to operate across
 293 /// scheduling boundaries, for example to bundle the boudary instructions
 294 /// without reordering them. This creates complexity, because the target
 295 /// scheduler must update the RegionBegin and RegionEnd positions cached by
 296 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
 297 /// design would be to split blocks at scheduling boundaries, but LLVM has a
 298 /// general bias against block splitting purely for implementation simplicity.
 299 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 300   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
 301
 302   // Initialize the context of the pass.
 303   MF = &mf;
 304   MLI = &getAnalysis<MachineLoopInfo>();
 305   MDT = &getAnalysis<MachineDominatorTree>();
 306   PassConfig = &getAnalysis<TargetPassConfig>();
 307   AA = &getAnalysis<AliasAnalysis>();
 308
 309   LIS = &getAnalysis<LiveIntervals>();
 310
 311   if (VerifyScheduling) {
 312     DEBUG(LIS->dump());
 313     MF->verify(this, "Before machine scheduling.");
 314   }
 315   RegClassInfo->runOnMachineFunction(*MF);
 316
 317   // Instantiate the selected scheduler for this target, function, and
 318   // optimization level.
 319   OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
 320   scheduleRegions(*Scheduler);
 321
 322   DEBUG(LIS->dump());
 323   if (VerifyScheduling)
 324     MF->verify(this, "After machine scheduling.");
 325   return true;
 326 }
 327
 328 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 329   DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
 330
 331   // Initialize the context of the pass.
 332   MF = &mf;
 333   PassConfig = &getAnalysis<TargetPassConfig>();
 334
 335   if (VerifyScheduling)
 336     MF->verify(this, "Before post machine scheduling.");
 337
 338   // Instantiate the selected scheduler for this target, function, and
 339   // optimization level.
 340   OwningPtr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
 341   scheduleRegions(*Scheduler);
 342
 343   if (VerifyScheduling)
 344     MF->verify(this, "After post machine scheduling.");
 345   return true;
 346 }
 347
 348 /// Return true of the given instruction should not be included in a scheduling
 349 /// region.
 350 ///
 351 /// MachineScheduler does not currently support scheduling across calls. To
 352 /// handle calls, the DAG builder needs to be modified to create register
 353 /// anti/output dependencies on the registers clobbered by the call's regmask
 354 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
 355 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
 356 /// the boundary, but there would be no benefit to postRA scheduling across
 357 /// calls this late anyway.
 358 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
 359                             MachineBasicBlock *MBB,
 360                             MachineFunction *MF,
 361                             const TargetInstrInfo *TII,
 362                             bool IsPostRA) {
 363   return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
 364 }
 365
 366 /// Main driver for both MachineScheduler and PostMachineScheduler.
 367 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
 368   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 369   bool IsPostRA = Scheduler.isPostRA();
 370
 371   // Visit all machine basic blocks.
 372   //
 373   // TODO: Visit blocks in global postorder or postorder within the bottom-up
 374   // loop tree. Then we can optionally compute global RegPressure.
 375   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
 376        MBB != MBBEnd; ++MBB) {
 377
 378     Scheduler.startBlock(MBB);
 379
 380     // Break the block into scheduling regions [I, RegionEnd), and schedule each
 381     // region as soon as it is discovered. RegionEnd points the scheduling
 382     // boundary at the bottom of the region. The DAG does not include RegionEnd,
 383     // but the region does (i.e. the next RegionEnd is above the previous
 384     // RegionBegin). If the current block has no terminator then RegionEnd ==
 385     // MBB->end() for the bottom region.
 386     //
 387     // The Scheduler may insert instructions during either schedule() or
 388     // exitRegion(), even for empty regions. So the local iterators 'I' and
 389     // 'RegionEnd' are invalid across these calls.
 390     //
 391     // MBB::size() uses instr_iterator to count. Here we need a bundle to count
 392     // as a single instruction.
 393     unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
 394     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
 395         RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
 396
 397       // Avoid decrementing RegionEnd for blocks with no terminator.
 398       if (RegionEnd != MBB->end()
 399           || isSchedBoundary(llvm::prior(RegionEnd), MBB, MF, TII, IsPostRA)) {
 400         --RegionEnd;
 401         // Count the boundary instruction.
 402         --RemainingInstrs;
 403       }
 404
 405       // The next region starts above the previous region. Look backward in the
 406       // instruction stream until we find the nearest boundary.
 407       unsigned NumRegionInstrs = 0;
 408       MachineBasicBlock::iterator I = RegionEnd;
 409       for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
 410         if (isSchedBoundary(llvm::prior(I), MBB, MF, TII, IsPostRA))
 411           break;
 412       }
 413       // Notify the scheduler of the region, even if we may skip scheduling
 414       // it. Perhaps it still needs to be bundled.
 415       Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
 416
 417       // Skip empty scheduling regions (0 or 1 schedulable instructions).
 418       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
 419         // Close the current region. Bundle the terminator if needed.
 420         // This invalidates 'RegionEnd' and 'I'.
 421         Scheduler.exitRegion();
 422         continue;
 423       }
 424       DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
 425             << "MI Scheduling **********\n");
 426       DEBUG(dbgs() << MF->getName()
 427             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
 428             << "\n  From: " << *I << "    To: ";
 429             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
 430             else dbgs() << "End";
 431             dbgs() << " RegionInstrs: " << NumRegionInstrs
 432             << " Remaining: " << RemainingInstrs << "\n");
 433
 434       // Schedule a region: possibly reorder instructions.
 435       // This invalidates 'RegionEnd' and 'I'.
 436       Scheduler.schedule();
 437
 438       // Close the current region.
 439       Scheduler.exitRegion();
 440
 441       // Scheduling has invalidated the current iterator 'I'. Ask the
 442       // scheduler for the top of it's scheduled region.
 443       RegionEnd = Scheduler.begin();
 444     }
 445     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
 446     Scheduler.finishBlock();
 447     if (Scheduler.isPostRA()) {
 448       // FIXME: Ideally, no further passes should rely on kill flags. However,
 449       // thumb2 size reduction is currently an exception.
 450       Scheduler.fixupKills(MBB);
 451     }
 452   }
 453   Scheduler.finalizeSchedule();
 454 }
 455
 456 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
 457   // unimplemented
 458 }
 459
 460 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 461 void ReadyQueue::dump() {
 462   dbgs() << Name << ": ";
 463   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
 464     dbgs() << Queue[i]->NodeNum << " ";
 465   dbgs() << "\n";
 466 }
 467 #endif
 468
 469 //===----------------------------------------------------------------------===//
 470 // ScheduleDAGMI - Basic machine instruction scheduling. This is
 471 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
 472 // virtual registers.
 473 // ===----------------------------------------------------------------------===/
 474
 475 ScheduleDAGMI::~ScheduleDAGMI() {
 476   DeleteContainerPointers(Mutations);
 477   delete SchedImpl;
 478 }
 479
 480 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
 481   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
 482 }
 483
 484 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
 485   if (SuccSU != &ExitSU) {
 486     // Do not use WillCreateCycle, it assumes SD scheduling.
 487     // If Pred is reachable from Succ, then the edge creates a cycle.
 488     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
 489       return false;
 490     Topo.AddPred(SuccSU, PredDep.getSUnit());
 491   }
 492   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
 493   // Return true regardless of whether a new edge needed to be inserted.
 494   return true;
 495 }
 496
 497 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 498 /// NumPredsLeft reaches zero, release the successor node.
 499 ///
 500 /// FIXME: Adjust SuccSU height based on MinLatency.
 501 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 502   SUnit *SuccSU = SuccEdge->getSUnit();
 503
 504   if (SuccEdge->isWeak()) {
 505     --SuccSU->WeakPredsLeft;
 506     if (SuccEdge->isCluster())
 507       NextClusterSucc = SuccSU;
 508     return;
 509   }
 510 #ifndef NDEBUG
 511   if (SuccSU->NumPredsLeft == 0) {
 512     dbgs() << "*** Scheduling failed! ***\n";
 513     SuccSU->dump(this);
 514     dbgs() << " has been released too many times!\n";
 515     llvm_unreachable(0);
 516   }
 517 #endif
 518   --SuccSU->NumPredsLeft;
 519   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
 520     SchedImpl->releaseTopNode(SuccSU);
 521 }
 522
 523 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
 524 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
 525   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
 526        I != E; ++I) {
 527     releaseSucc(SU, &*I);
 528   }
 529 }
 530
 531 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
 532 /// NumSuccsLeft reaches zero, release the predecessor node.
 533 ///
 534 /// FIXME: Adjust PredSU height based on MinLatency.
 535 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
 536   SUnit *PredSU = PredEdge->getSUnit();
 537
 538   if (PredEdge->isWeak()) {
 539     --PredSU->WeakSuccsLeft;
 540     if (PredEdge->isCluster())
 541       NextClusterPred = PredSU;
 542     return;
 543   }
 544 #ifndef NDEBUG
 545   if (PredSU->NumSuccsLeft == 0) {
 546     dbgs() << "*** Scheduling failed! ***\n";
 547     PredSU->dump(this);
 548     dbgs() << " has been released too many times!\n";
 549     llvm_unreachable(0);
 550   }
 551 #endif
 552   --PredSU->NumSuccsLeft;
 553   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
 554     SchedImpl->releaseBottomNode(PredSU);
 555 }
 556
 557 /// releasePredecessors - Call releasePred on each of SU's predecessors.
 558 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
 559   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
 560        I != E; ++I) {
 561     releasePred(SU, &*I);
 562   }
 563 }
 564
 565 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 566 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 567 /// the region, including the boundary itself and single-instruction regions
 568 /// that don't get scheduled.
 569 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
 570                                      MachineBasicBlock::iterator begin,
 571                                      MachineBasicBlock::iterator end,
 572                                      unsigned regioninstrs)
 573 {
 574   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
 575
 576   SchedImpl->initPolicy(begin, end, regioninstrs);
 577 }
 578
 579 /// This is normally called from the main scheduler loop but may also be invoked
 580 /// by the scheduling strategy to perform additional code motion.
 581 void ScheduleDAGMI::moveInstruction(
 582   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
 583   // Advance RegionBegin if the first instruction moves down.
 584   if (&*RegionBegin == MI)
 585     ++RegionBegin;
 586
 587   // Update the instruction stream.
 588   BB->splice(InsertPos, BB, MI);
 589
 590   // Update LiveIntervals
 591   if (LIS)
 592     LIS->handleMove(MI, /*UpdateFlags=*/true);
 593
 594   // Recede RegionBegin if an instruction moves above the first.
 595   if (RegionBegin == InsertPos)
 596     RegionBegin = MI;
 597 }
 598
 599 bool ScheduleDAGMI::checkSchedLimit() {
 600 #ifndef NDEBUG
 601   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
 602     CurrentTop = CurrentBottom;
 603     return false;
 604   }
 605   ++NumInstrsScheduled;
 606 #endif
 607   return true;
 608 }
 609
 610 /// Per-region scheduling driver, called back from
 611 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
 612 /// does not consider liveness or register pressure. It is useful for PostRA
 613 /// scheduling and potentially other custom schedulers.
 614 void ScheduleDAGMI::schedule() {
 615   // Build the DAG.
 616   buildSchedGraph(AA);
 617
 618   Topo.InitDAGTopologicalSorting();
 619
 620   postprocessDAG();
 621
 622   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 623   findRootsAndBiasEdges(TopRoots, BotRoots);
 624
 625   // Initialize the strategy before modifying the DAG.
 626   // This may initialize a DFSResult to be used for queue priority.
 627   SchedImpl->initialize(this);
 628
 629   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 630           SUnits[su].dumpAll(this));
 631   if (ViewMISchedDAGs) viewGraph();
 632
 633   // Initialize ready queues now that the DAG and priority data are finalized.
 634   initQueues(TopRoots, BotRoots);
 635
 636   bool IsTopNode = false;
 637   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
 638     assert(!SU->isScheduled && "Node already scheduled");
 639     if (!checkSchedLimit())
 640       break;
 641
 642     MachineInstr *MI = SU->getInstr();
 643     if (IsTopNode) {
 644       assert(SU->isTopReady() && "node still has unscheduled dependencies");
 645       if (&*CurrentTop == MI)
 646         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
 647       else
 648         moveInstruction(MI, CurrentTop);
 649     }
 650     else {
 651       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
 652       MachineBasicBlock::iterator priorII =
 653         priorNonDebug(CurrentBottom, CurrentTop);
 654       if (&*priorII == MI)
 655         CurrentBottom = priorII;
 656       else {
 657         if (&*CurrentTop == MI)
 658           CurrentTop = nextIfDebug(++CurrentTop, priorII);
 659         moveInstruction(MI, CurrentBottom);
 660         CurrentBottom = MI;
 661       }
 662     }
 663     updateQueues(SU, IsTopNode);
 664
 665     // Notify the scheduling strategy after updating the DAG.
 666     SchedImpl->schedNode(SU, IsTopNode);
 667   }
 668   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 669
 670   placeDebugValues();
 671
 672   DEBUG({
 673       unsigned BBNum = begin()->getParent()->getNumber();
 674       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
 675       dumpSchedule();
 676       dbgs() << '\n';
 677     });
 678 }
 679
 680 /// Apply each ScheduleDAGMutation step in order.
 681 void ScheduleDAGMI::postprocessDAG() {
 682   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
 683     Mutations[i]->apply(this);
 684   }
 685 }
 686
 687 void ScheduleDAGMI::
 688 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
 689                       SmallVectorImpl<SUnit*> &BotRoots) {
 690   for (std::vector<SUnit>::iterator
 691          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
 692     SUnit *SU = &(*I);
 693     assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
 694
 695     // Order predecessors so DFSResult follows the critical path.
 696     SU->biasCriticalPath();
 697
 698     // A SUnit is ready to top schedule if it has no predecessors.
 699     if (!I->NumPredsLeft)
 700       TopRoots.push_back(SU);
 701     // A SUnit is ready to bottom schedule if it has no successors.
 702     if (!I->NumSuccsLeft)
 703       BotRoots.push_back(SU);
 704   }
 705   ExitSU.biasCriticalPath();
 706 }
 707
 708 /// Identify DAG roots and setup scheduler queues.
 709 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
 710                                ArrayRef<SUnit*> BotRoots) {
 711   NextClusterSucc = NULL;
 712   NextClusterPred = NULL;
 713
 714   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
 715   //
 716   // Nodes with unreleased weak edges can still be roots.
 717   // Release top roots in forward order.
 718   for (SmallVectorImpl<SUnit*>::const_iterator
 719          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
 720     SchedImpl->releaseTopNode(*I);
 721   }
 722   // Release bottom roots in reverse order so the higher priority nodes appear
 723   // first. This is more natural and slightly more efficient.
 724   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
 725          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
 726     SchedImpl->releaseBottomNode(*I);
 727   }
 728
 729   releaseSuccessors(&EntrySU);
 730   releasePredecessors(&ExitSU);
 731
 732   SchedImpl->registerRoots();
 733
 734   // Advance past initial DebugValues.
 735   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
 736   CurrentBottom = RegionEnd;
 737 }
 738
 739 /// Update scheduler queues after scheduling an instruction.
 740 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 741   // Release dependent instructions for scheduling.
 742   if (IsTopNode)
 743     releaseSuccessors(SU);
 744   else
 745     releasePredecessors(SU);
 746
 747   SU->isScheduled = true;
 748 }
 749
 750 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
 751 void ScheduleDAGMI::placeDebugValues() {
 752   // If first instruction was a DBG_VALUE then put it back.
 753   if (FirstDbgValue) {
 754     BB->splice(RegionBegin, BB, FirstDbgValue);
 755     RegionBegin = FirstDbgValue;
 756   }
 757
 758   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
 759          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
 760     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
 761     MachineInstr *DbgValue = P.first;
 762     MachineBasicBlock::iterator OrigPrevMI = P.second;
 763     if (&*RegionBegin == DbgValue)
 764       ++RegionBegin;
 765     BB->splice(++OrigPrevMI, BB, DbgValue);
 766     if (OrigPrevMI == llvm::prior(RegionEnd))
 767       RegionEnd = DbgValue;
 768   }
 769   DbgValues.clear();
 770   FirstDbgValue = NULL;
 771 }
 772
 773 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 774 void ScheduleDAGMI::dumpSchedule() const {
 775   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
 776     if (SUnit *SU = getSUnit(&(*MI)))
 777       SU->dump(this);
 778     else
 779       dbgs() << "Missing SUnit\n";
 780   }
 781 }
 782 #endif
 783
 784 //===----------------------------------------------------------------------===//
 785 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
 786 // preservation.
 787 //===----------------------------------------------------------------------===//
 788
 789 ScheduleDAGMILive::~ScheduleDAGMILive() {
 790   delete DFSResult;
 791 }
 792
 793 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 794 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 795 /// the region, including the boundary itself and single-instruction regions
 796 /// that don't get scheduled.
 797 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
 798                                 MachineBasicBlock::iterator begin,
 799                                 MachineBasicBlock::iterator end,
 800                                 unsigned regioninstrs)
 801 {
 802   // ScheduleDAGMI initializes SchedImpl's per-region policy.
 803   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
 804
 805   // For convenience remember the end of the liveness region.
 806   LiveRegionEnd =
 807     (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
 808
 809   SUPressureDiffs.clear();
 810
 811   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
 812 }
 813
 814 // Setup the register pressure trackers for the top scheduled top and bottom
 815 // scheduled regions.
 816 void ScheduleDAGMILive::initRegPressure() {
 817   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
 818   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 819
 820   // Close the RPTracker to finalize live ins.
 821   RPTracker.closeRegion();
 822
 823   DEBUG(RPTracker.dump());
 824
 825   // Initialize the live ins and live outs.
 826   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
 827   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
 828
 829   // Close one end of the tracker so we can call
 830   // getMaxUpward/DownwardPressureDelta before advancing across any
 831   // instructions. This converts currently live regs into live ins/outs.
 832   TopRPTracker.closeTop();
 833   BotRPTracker.closeBottom();
 834
 835   BotRPTracker.initLiveThru(RPTracker);
 836   if (!BotRPTracker.getLiveThru().empty()) {
 837     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
 838     DEBUG(dbgs() << "Live Thru: ";
 839           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
 840   };
 841
 842   // For each live out vreg reduce the pressure change associated with other
 843   // uses of the same vreg below the live-out reaching def.
 844   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
 845
 846   // Account for liveness generated by the region boundary.
 847   if (LiveRegionEnd != RegionEnd) {
 848     SmallVector<unsigned, 8> LiveUses;
 849     BotRPTracker.recede(&LiveUses);
 850     updatePressureDiffs(LiveUses);
 851   }
 852
 853   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
 854
 855   // Cache the list of excess pressure sets in this region. This will also track
 856   // the max pressure in the scheduled code for these sets.
 857   RegionCriticalPSets.clear();
 858   const std::vector<unsigned> &RegionPressure =
 859     RPTracker.getPressure().MaxSetPressure;
 860   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
 861     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
 862     if (RegionPressure[i] > Limit) {
 863       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
 864             << " Limit " << Limit
 865             << " Actual " << RegionPressure[i] << "\n");
 866       RegionCriticalPSets.push_back(PressureChange(i));
 867     }
 868   }
 869   DEBUG(dbgs() << "Excess PSets: ";
 870         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
 871           dbgs() << TRI->getRegPressureSetName(
 872             RegionCriticalPSets[i].getPSet()) << " ";
 873         dbgs() << "\n");
 874 }
 875
 876 void ScheduleDAGMILive::
 877 updateScheduledPressure(const SUnit *SU,
 878                         const std::vector<unsigned> &NewMaxPressure) {
 879   const PressureDiff &PDiff = getPressureDiff(SU);
 880   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
 881   for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end();
 882        I != E; ++I) {
 883     if (!I->isValid())
 884       break;
 885     unsigned ID = I->getPSet();
 886     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
 887       ++CritIdx;
 888     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
 889       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
 890           && NewMaxPressure[ID] <= INT16_MAX)
 891         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
 892     }
 893     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
 894     if (NewMaxPressure[ID] >= Limit - 2) {
 895       DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
 896             << NewMaxPressure[ID] << " > " << Limit << "(+ "
 897             << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
 898     }
 899   }
 900 }
 901
 902 /// Update the PressureDiff array for liveness after scheduling this
 903 /// instruction.
 904 void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
 905   for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
 906     /// FIXME: Currently assuming single-use physregs.
 907     unsigned Reg = LiveUses[LUIdx];
 908     DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
 909     if (!TRI->isVirtualRegister(Reg))
 910       continue;
 911
 912     // This may be called before CurrentBottom has been initialized. However,
 913     // BotRPTracker must have a valid position. We want the value live into the
 914     // instruction or live out of the block, so ask for the previous
 915     // instruction's live-out.
 916     const LiveInterval &LI = LIS->getInterval(Reg);
 917     VNInfo *VNI;
 918     MachineBasicBlock::const_iterator I =
 919       nextIfDebug(BotRPTracker.getPos(), BB->end());
 920     if (I == BB->end())
 921       VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
 922     else {
 923       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I));
 924       VNI = LRQ.valueIn();
 925     }
 926     // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
 927     assert(VNI && "No live value at use.");
 928     for (VReg2UseMap::iterator
 929            UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
 930       SUnit *SU = UI->SU;
 931       DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
 932             << *SU->getInstr());
 933       // If this use comes before the reaching def, it cannot be a last use, so
 934       // descrease its pressure change.
 935       if (!SU->isScheduled && SU != &ExitSU) {
 936         LiveQueryResult LRQ
 937           = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
 938         if (LRQ.valueIn() == VNI)
 939           getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
 940       }
 941     }
 942   }
 943 }
 944
 945 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 946 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 947 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 948 ///
 949 /// This is a skeletal driver, with all the functionality pushed into helpers,
 950 /// so that it can be easilly extended by experimental schedulers. Generally,
 951 /// implementing MachineSchedStrategy should be sufficient to implement a new
 952 /// scheduling algorithm. However, if a scheduler further subclasses
 953 /// ScheduleDAGMILive then it will want to override this virtual method in order
 954 /// to update any specialized state.
 955 void ScheduleDAGMILive::schedule() {
 956   buildDAGWithRegPressure();
 957
 958   Topo.InitDAGTopologicalSorting();
 959
 960   postprocessDAG();
 961
 962   SmallVector<SUnit*, 8> TopRoots, BotRoots;
 963   findRootsAndBiasEdges(TopRoots, BotRoots);
 964
 965   // Initialize the strategy before modifying the DAG.
 966   // This may initialize a DFSResult to be used for queue priority.
 967   SchedImpl->initialize(this);
 968
 969   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
 970           SUnits[su].dumpAll(this));
 971   if (ViewMISchedDAGs) viewGraph();
 972
 973   // Initialize ready queues now that the DAG and priority data are finalized.
 974   initQueues(TopRoots, BotRoots);
 975
 976   if (ShouldTrackPressure) {
 977     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
 978     TopRPTracker.setPos(CurrentTop);
 979   }
 980
 981   bool IsTopNode = false;
 982   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
 983     assert(!SU->isScheduled && "Node already scheduled");
 984     if (!checkSchedLimit())
 985       break;
 986
 987     scheduleMI(SU, IsTopNode);
 988
 989     updateQueues(SU, IsTopNode);
 990
 991     if (DFSResult) {
 992       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
 993       if (!ScheduledTrees.test(SubtreeID)) {
 994         ScheduledTrees.set(SubtreeID);
 995         DFSResult->scheduleTree(SubtreeID);
 996         SchedImpl->scheduleTree(SubtreeID);
 997       }
 998     }
 999
1000     // Notify the scheduling strategy after updating the DAG.
1001     SchedImpl->schedNode(SU, IsTopNode);
1002   }
1003   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1004
1005   placeDebugValues();
1006
1007   DEBUG({
1008       unsigned BBNum = begin()->getParent()->getNumber();
1009       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
1010       dumpSchedule();
1011       dbgs() << '\n';
1012     });
1013 }
1014
1015 /// Build the DAG and setup three register pressure trackers.
1016 void ScheduleDAGMILive::buildDAGWithRegPressure() {
1017   if (!ShouldTrackPressure) {
1018     RPTracker.reset();
1019     RegionCriticalPSets.clear();
1020     buildSchedGraph(AA);
1021     return;
1022   }
1023
1024   // Initialize the register pressure tracker used by buildSchedGraph.
1025   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1026                  /*TrackUntiedDefs=*/true);
1027
1028   // Account for liveness generate by the region boundary.
1029   if (LiveRegionEnd != RegionEnd)
1030     RPTracker.recede();
1031
1032   // Build the DAG, and compute current register pressure.
1033   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs);
1034
1035   // Initialize top/bottom trackers after computing region pressure.
1036   initRegPressure();
1037 }
1038
1039 void ScheduleDAGMILive::computeDFSResult() {
1040   if (!DFSResult)
1041     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1042   DFSResult->clear();
1043   ScheduledTrees.clear();
1044   DFSResult->resize(SUnits.size());
1045   DFSResult->compute(SUnits);
1046   ScheduledTrees.resize(DFSResult->getNumSubtrees());
1047 }
1048
1049 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1050 /// only provides the critical path for single block loops. To handle loops that
1051 /// span blocks, we could use the vreg path latencies provided by
1052 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1053 /// available for use in the scheduler.
1054 ///
1055 /// The cyclic path estimation identifies a def-use pair that crosses the back
1056 /// edge and considers the depth and height of the nodes. For example, consider
1057 /// the following instruction sequence where each instruction has unit latency
1058 /// and defines an epomymous virtual register:
1059 ///
1060 /// a->b(a,c)->c(b)->d(c)->exit
1061 ///
1062 /// The cyclic critical path is a two cycles: b->c->b
1063 /// The acyclic critical path is four cycles: a->b->c->d->exit
1064 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1065 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1066 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1067 /// LiveInDepth = depth(b) = len(a->b) = 1
1068 ///
1069 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1070 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1071 /// CyclicCriticalPath = min(2, 2) = 2
1072 ///
1073 /// This could be relevant to PostRA scheduling, but is currently implemented
1074 /// assuming LiveIntervals.
1075 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1076   // This only applies to single block loop.
1077   if (!BB->isSuccessor(BB))
1078     return 0;
1079
1080   unsigned MaxCyclicLatency = 0;
1081   // Visit each live out vreg def to find def/use pairs that cross iterations.
1082   ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs;
1083   for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end();
1084        RI != RE; ++RI) {
1085     unsigned Reg = *RI;
1086     if (!TRI->isVirtualRegister(Reg))
1087         continue;
1088     const LiveInterval &LI = LIS->getInterval(Reg);
1089     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1090     if (!DefVNI)
1091       continue;
1092
1093     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1094     const SUnit *DefSU = getSUnit(DefMI);
1095     if (!DefSU)
1096       continue;
1097
1098     unsigned LiveOutHeight = DefSU->getHeight();
1099     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1100     // Visit all local users of the vreg def.
1101     for (VReg2UseMap::iterator
1102            UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
1103       if (UI->SU == &ExitSU)
1104         continue;
1105
1106       // Only consider uses of the phi.
1107       LiveQueryResult LRQ =
1108         LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
1109       if (!LRQ.valueIn()->isPHIDef())
1110         continue;
1111
1112       // Assume that a path spanning two iterations is a cycle, which could
1113       // overestimate in strange cases. This allows cyclic latency to be
1114       // estimated as the minimum slack of the vreg's depth or height.
1115       unsigned CyclicLatency = 0;
1116       if (LiveOutDepth > UI->SU->getDepth())
1117         CyclicLatency = LiveOutDepth - UI->SU->getDepth();
1118
1119       unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
1120       if (LiveInHeight > LiveOutHeight) {
1121         if (LiveInHeight - LiveOutHeight < CyclicLatency)
1122           CyclicLatency = LiveInHeight - LiveOutHeight;
1123       }
1124       else
1125         CyclicLatency = 0;
1126
1127       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1128             << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
1129       if (CyclicLatency > MaxCyclicLatency)
1130         MaxCyclicLatency = CyclicLatency;
1131     }
1132   }
1133   DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1134   return MaxCyclicLatency;
1135 }
1136
1137 /// Move an instruction and update register pressure.
1138 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1139   // Move the instruction to its new location in the instruction stream.
1140   MachineInstr *MI = SU->getInstr();
1141
1142   if (IsTopNode) {
1143     assert(SU->isTopReady() && "node still has unscheduled dependencies");
1144     if (&*CurrentTop == MI)
1145       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1146     else {
1147       moveInstruction(MI, CurrentTop);
1148       TopRPTracker.setPos(MI);
1149     }
1150
1151     if (ShouldTrackPressure) {
1152       // Update top scheduled pressure.
1153       TopRPTracker.advance();
1154       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1155       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1156     }
1157   }
1158   else {
1159     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1160     MachineBasicBlock::iterator priorII =
1161       priorNonDebug(CurrentBottom, CurrentTop);
1162     if (&*priorII == MI)
1163       CurrentBottom = priorII;
1164     else {
1165       if (&*CurrentTop == MI) {
1166         CurrentTop = nextIfDebug(++CurrentTop, priorII);
1167         TopRPTracker.setPos(CurrentTop);
1168       }
1169       moveInstruction(MI, CurrentBottom);
1170       CurrentBottom = MI;
1171     }
1172     if (ShouldTrackPressure) {
1173       // Update bottom scheduled pressure.
1174       SmallVector<unsigned, 8> LiveUses;
1175       BotRPTracker.recede(&LiveUses);
1176       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1177       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1178       updatePressureDiffs(LiveUses);
1179     }
1180   }
1181 }
1182
1183 //===----------------------------------------------------------------------===//
1184 // LoadClusterMutation - DAG post-processing to cluster loads.
1185 //===----------------------------------------------------------------------===//
1186
1187 namespace {
1188 /// \brief Post-process the DAG to create cluster edges between neighboring
1189 /// loads.
1190 class LoadClusterMutation : public ScheduleDAGMutation {
1191   struct LoadInfo {
1192     SUnit *SU;
1193     unsigned BaseReg;
1194     unsigned Offset;
1195     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
1196       : SU(su), BaseReg(reg), Offset(ofs) {}
1197   };
1198   static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
1199                            const LoadClusterMutation::LoadInfo &RHS);
1200
1201   const TargetInstrInfo *TII;
1202   const TargetRegisterInfo *TRI;
1203 public:
1204   LoadClusterMutation(const TargetInstrInfo *tii,
1205                       const TargetRegisterInfo *tri)
1206     : TII(tii), TRI(tri) {}
1207
1208   virtual void apply(ScheduleDAGMI *DAG);
1209 protected:
1210   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
1211 };
1212 } // anonymous
1213
1214 bool LoadClusterMutation::LoadInfoLess(
1215   const LoadClusterMutation::LoadInfo &LHS,
1216   const LoadClusterMutation::LoadInfo &RHS) {
1217   if (LHS.BaseReg != RHS.BaseReg)
1218     return LHS.BaseReg < RHS.BaseReg;
1219   return LHS.Offset < RHS.Offset;
1220 }
1221
1222 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
1223                                                   ScheduleDAGMI *DAG) {
1224   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
1225   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
1226     SUnit *SU = Loads[Idx];
1227     unsigned BaseReg;
1228     unsigned Offset;
1229     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
1230       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
1231   }
1232   if (LoadRecords.size() < 2)
1233     return;
1234   std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
1235   unsigned ClusterLength = 1;
1236   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
1237     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
1238       ClusterLength = 1;
1239       continue;
1240     }
1241
1242     SUnit *SUa = LoadRecords[Idx].SU;
1243     SUnit *SUb = LoadRecords[Idx+1].SU;
1244     if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
1245         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1246
1247       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
1248             << SUb->NodeNum << ")\n");
1249       // Copy successor edges from SUa to SUb. Interleaving computation
1250       // dependent on SUa can prevent load combining due to register reuse.
1251       // Predecessor edges do not need to be copied from SUb to SUa since nearby
1252       // loads should have effectively the same inputs.
1253       for (SUnit::const_succ_iterator
1254              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
1255         if (SI->getSUnit() == SUb)
1256           continue;
1257         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
1258         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
1259       }
1260       ++ClusterLength;
1261     }
1262     else
1263       ClusterLength = 1;
1264   }
1265 }
1266
1267 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
1268 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
1269   // Map DAG NodeNum to store chain ID.
1270   DenseMap<unsigned, unsigned> StoreChainIDs;
1271   // Map each store chain to a set of dependent loads.
1272   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1273   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1274     SUnit *SU = &DAG->SUnits[Idx];
1275     if (!SU->getInstr()->mayLoad())
1276       continue;
1277     unsigned ChainPredID = DAG->SUnits.size();
1278     for (SUnit::const_pred_iterator
1279            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
1280       if (PI->isCtrl()) {
1281         ChainPredID = PI->getSUnit()->NodeNum;
1282         break;
1283       }
1284     }
1285     // Check if this chain-like pred has been seen
1286     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
1287     unsigned NumChains = StoreChainDependents.size();
1288     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1289       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1290     if (Result.second)
1291       StoreChainDependents.resize(NumChains + 1);
1292     StoreChainDependents[Result.first->second].push_back(SU);
1293   }
1294   // Iterate over the store chains.
1295   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
1296     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
1297 }
1298
1299 //===----------------------------------------------------------------------===//
1300 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
1301 //===----------------------------------------------------------------------===//
1302
1303 namespace {
1304 /// \brief Post-process the DAG to create cluster edges between instructions
1305 /// that may be fused by the processor into a single operation.
1306 class MacroFusion : public ScheduleDAGMutation {
1307   const TargetInstrInfo *TII;
1308 public:
1309   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
1310
1311   virtual void apply(ScheduleDAGMI *DAG);
1312 };
1313 } // anonymous
1314
1315 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
1316 /// fused operations.
1317 void MacroFusion::apply(ScheduleDAGMI *DAG) {
1318   // For now, assume targets can only fuse with the branch.
1319   MachineInstr *Branch = DAG->ExitSU.getInstr();
1320   if (!Branch)
1321     return;
1322
1323   for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
1324     SUnit *SU = &DAG->SUnits[--Idx];
1325     if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
1326       continue;
1327
1328     // Create a single weak edge from SU to ExitSU. The only effect is to cause
1329     // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
1330     // need to copy predecessor edges from ExitSU to SU, since top-down
1331     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
1332     // of SU, we could create an artificial edge from the deepest root, but it
1333     // hasn't been needed yet.
1334     bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
1335     (void)Success;
1336     assert(Success && "No DAG nodes should be reachable from ExitSU");
1337
1338     DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
1339     break;
1340   }
1341 }
1342
1343 //===----------------------------------------------------------------------===//
1344 // CopyConstrain - DAG post-processing to encourage copy elimination.
1345 //===----------------------------------------------------------------------===//
1346
1347 namespace {
1348 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
1349 /// the one use that defines the copy's source vreg, most likely an induction
1350 /// variable increment.
1351 class CopyConstrain : public ScheduleDAGMutation {
1352   // Transient state.
1353   SlotIndex RegionBeginIdx;
1354   // RegionEndIdx is the slot index of the last non-debug instruction in the
1355   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1356   SlotIndex RegionEndIdx;
1357 public:
1358   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1359
1360   virtual void apply(ScheduleDAGMI *DAG);
1361
1362 protected:
1363   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1364 };
1365 } // anonymous
1366
1367 /// constrainLocalCopy handles two possibilities:
1368 /// 1) Local src:
1369 /// I0:     = dst
1370 /// I1: src = ...
1371 /// I2:     = dst
1372 /// I3: dst = src (copy)
1373 /// (create pred->succ edges I0->I1, I2->I1)
1374 ///
1375 /// 2) Local copy:
1376 /// I0: dst = src (copy)
1377 /// I1:     = dst
1378 /// I2: src = ...
1379 /// I3:     = dst
1380 /// (create pred->succ edges I1->I2, I3->I2)
1381 ///
1382 /// Although the MachineScheduler is currently constrained to single blocks,
1383 /// this algorithm should handle extended blocks. An EBB is a set of
1384 /// contiguously numbered blocks such that the previous block in the EBB is
1385 /// always the single predecessor.
1386 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1387   LiveIntervals *LIS = DAG->getLIS();
1388   MachineInstr *Copy = CopySU->getInstr();
1389
1390   // Check for pure vreg copies.
1391   unsigned SrcReg = Copy->getOperand(1).getReg();
1392   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
1393     return;
1394
1395   unsigned DstReg = Copy->getOperand(0).getReg();
1396   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
1397     return;
1398
1399   // Check if either the dest or source is local. If it's live across a back
1400   // edge, it's not local. Note that if both vregs are live across the back
1401   // edge, we cannot successfully contrain the copy without cyclic scheduling.
1402   unsigned LocalReg = DstReg;
1403   unsigned GlobalReg = SrcReg;
1404   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1405   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1406     LocalReg = SrcReg;
1407     GlobalReg = DstReg;
1408     LocalLI = &LIS->getInterval(LocalReg);
1409     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1410       return;
1411   }
1412   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1413
1414   // Find the global segment after the start of the local LI.
1415   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1416   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1417   // local live range. We could create edges from other global uses to the local
1418   // start, but the coalescer should have already eliminated these cases, so
1419   // don't bother dealing with it.
1420   if (GlobalSegment == GlobalLI->end())
1421     return;
1422
1423   // If GlobalSegment is killed at the LocalLI->start, the call to find()
1424   // returned the next global segment. But if GlobalSegment overlaps with
1425   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1426   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1427   if (GlobalSegment->contains(LocalLI->beginIndex()))
1428     ++GlobalSegment;
1429
1430   if (GlobalSegment == GlobalLI->end())
1431     return;
1432
1433   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1434   if (GlobalSegment != GlobalLI->begin()) {
1435     // Two address defs have no hole.
1436     if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
1437                                GlobalSegment->start)) {
1438       return;
1439     }
1440     // If the prior global segment may be defined by the same two-address
1441     // instruction that also defines LocalLI, then can't make a hole here.
1442     if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start,
1443                                LocalLI->beginIndex())) {
1444       return;
1445     }
1446     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1447     // it would be a disconnected component in the live range.
1448     assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
1449            "Disconnected LRG within the scheduling region.");
1450   }
1451   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1452   if (!GlobalDef)
1453     return;
1454
1455   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1456   if (!GlobalSU)
1457     return;
1458
1459   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1460   // constraining the uses of the last local def to precede GlobalDef.
1461   SmallVector<SUnit*,8> LocalUses;
1462   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1463   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1464   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1465   for (SUnit::const_succ_iterator
1466          I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1467        I != E; ++I) {
1468     if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1469       continue;
1470     if (I->getSUnit() == GlobalSU)
1471       continue;
1472     if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1473       return;
1474     LocalUses.push_back(I->getSUnit());
1475   }
1476   // Open the top of the GlobalLI hole by constraining any earlier global uses
1477   // to precede the start of LocalLI.
1478   SmallVector<SUnit*,8> GlobalUses;
1479   MachineInstr *FirstLocalDef =
1480     LIS->getInstructionFromIndex(LocalLI->beginIndex());
1481   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1482   for (SUnit::const_pred_iterator
1483          I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1484     if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1485       continue;
1486     if (I->getSUnit() == FirstLocalSU)
1487       continue;
1488     if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1489       return;
1490     GlobalUses.push_back(I->getSUnit());
1491   }
1492   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1493   // Add the weak edges.
1494   for (SmallVectorImpl<SUnit*>::const_iterator
1495          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1496     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1497           << GlobalSU->NodeNum << ")\n");
1498     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1499   }
1500   for (SmallVectorImpl<SUnit*>::const_iterator
1501          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1502     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1503           << FirstLocalSU->NodeNum << ")\n");
1504     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1505   }
1506 }
1507
1508 /// \brief Callback from DAG postProcessing to create weak edges to encourage
1509 /// copy elimination.
1510 void CopyConstrain::apply(ScheduleDAGMI *DAG) {
1511   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1512
1513   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1514   if (FirstPos == DAG->end())
1515     return;
1516   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
1517   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1518     &*priorNonDebug(DAG->end(), DAG->begin()));
1519
1520   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1521     SUnit *SU = &DAG->SUnits[Idx];
1522     if (!SU->getInstr()->isCopy())
1523       continue;
1524
1525     constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
1526   }
1527 }
1528
1529 //===----------------------------------------------------------------------===//
1530 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1531 // and possibly other custom schedulers.
1532 //===----------------------------------------------------------------------===//
1533
1534 static const unsigned InvalidCycle = ~0U;
1535
1536 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1537
1538 void SchedBoundary::reset() {
1539   // A new HazardRec is created for each DAG and owned by SchedBoundary.
1540   // Destroying and reconstructing it is very expensive though. So keep
1541   // invalid, placeholder HazardRecs.
1542   if (HazardRec && HazardRec->isEnabled()) {
1543     delete HazardRec;
1544     HazardRec = 0;
1545   }
1546   Available.clear();
1547   Pending.clear();
1548   CheckPending = false;
1549   NextSUs.clear();
1550   CurrCycle = 0;
1551   CurrMOps = 0;
1552   MinReadyCycle = UINT_MAX;
1553   ExpectedLatency = 0;
1554   DependentLatency = 0;
1555   RetiredMOps = 0;
1556   MaxExecutedResCount = 0;
1557   ZoneCritResIdx = 0;
1558   IsResourceLimited = false;
1559   ReservedCycles.clear();
1560 #ifndef NDEBUG
1561   // Track the maximum number of stall cycles that could arise either from the
1562   // latency of a DAG edge or the number of cycles that a processor resource is
1563   // reserved (SchedBoundary::ReservedCycles).
1564   MaxObservedLatency = 0;
1565 #endif
1566   // Reserve a zero-count for invalid CritResIdx.
1567   ExecutedResCounts.resize(1);
1568   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1569 }
1570
1571 void SchedRemainder::
1572 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1573   reset();
1574   if (!SchedModel->hasInstrSchedModel())
1575     return;
1576   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1577   for (std::vector<SUnit>::iterator
1578          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1579     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1580     RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
1581       * SchedModel->getMicroOpFactor();
1582     for (TargetSchedModel::ProcResIter
1583            PI = SchedModel->getWriteProcResBegin(SC),
1584            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1585       unsigned PIdx = PI->ProcResourceIdx;
1586       unsigned Factor = SchedModel->getResourceFactor(PIdx);
1587       RemainingCounts[PIdx] += (Factor * PI->Cycles);
1588     }
1589   }
1590 }
1591
1592 void SchedBoundary::
1593 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1594   reset();
1595   DAG = dag;
1596   SchedModel = smodel;
1597   Rem = rem;
1598   if (SchedModel->hasInstrSchedModel()) {
1599     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1600     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1601   }
1602 }
1603
1604 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1605 /// these "soft stalls" differently than the hard stall cycles based on CPU
1606 /// resources and computed by checkHazard(). A fully in-order model
1607 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1608 /// available for scheduling until they are ready. However, a weaker in-order
1609 /// model may use this for heuristics. For example, if a processor has in-order
1610 /// behavior when reading certain resources, this may come into play.
1611 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
1612   if (!SU->isUnbuffered)
1613     return 0;
1614
1615   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1616   if (ReadyCycle > CurrCycle)
1617     return ReadyCycle - CurrCycle;
1618   return 0;
1619 }
1620
1621 /// Compute the next cycle at which the given processor resource can be
1622 /// scheduled.
1623 unsigned SchedBoundary::
1624 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1625   unsigned NextUnreserved = ReservedCycles[PIdx];
1626   // If this resource has never been used, always return cycle zero.
1627   if (NextUnreserved == InvalidCycle)
1628     return 0;
1629   // For bottom-up scheduling add the cycles needed for the current operation.
1630   if (!isTop())
1631     NextUnreserved += Cycles;
1632   return NextUnreserved;
1633 }
1634
1635 /// Does this SU have a hazard within the current instruction group.
1636 ///
1637 /// The scheduler supports two modes of hazard recognition. The first is the
1638 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1639 /// supports highly complicated in-order reservation tables
1640 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1641 ///
1642 /// The second is a streamlined mechanism that checks for hazards based on
1643 /// simple counters that the scheduler itself maintains. It explicitly checks
1644 /// for instruction dispatch limitations, including the number of micro-ops that
1645 /// can dispatch per cycle.
1646 ///
1647 /// TODO: Also check whether the SU must start a new group.
1648 bool SchedBoundary::checkHazard(SUnit *SU) {
1649   if (HazardRec->isEnabled()
1650       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1651     return true;
1652   }
1653   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1654   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1655     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1656           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1657     return true;
1658   }
1659   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
1660     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1661     for (TargetSchedModel::ProcResIter
1662            PI = SchedModel->getWriteProcResBegin(SC),
1663            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1664       if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
1665         return true;
1666     }
1667   }
1668   return false;
1669 }
1670
1671 // Find the unscheduled node in ReadySUs with the highest latency.
1672 unsigned SchedBoundary::
1673 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
1674   SUnit *LateSU = 0;
1675   unsigned RemLatency = 0;
1676   for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
1677        I != E; ++I) {
1678     unsigned L = getUnscheduledLatency(*I);
1679     if (L > RemLatency) {
1680       RemLatency = L;
1681       LateSU = *I;
1682     }
1683   }
1684   if (LateSU) {
1685     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
1686           << LateSU->NodeNum << ") " << RemLatency << "c\n");
1687   }
1688   return RemLatency;
1689 }
1690
1691 // Count resources in this zone and the remaining unscheduled
1692 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
1693 // resource index, or zero if the zone is issue limited.
1694 unsigned SchedBoundary::
1695 getOtherResourceCount(unsigned &OtherCritIdx) {
1696   OtherCritIdx = 0;
1697   if (!SchedModel->hasInstrSchedModel())
1698     return 0;
1699
1700   unsigned OtherCritCount = Rem->RemIssueCount
1701     + (RetiredMOps * SchedModel->getMicroOpFactor());
1702   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
1703         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
1704   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
1705        PIdx != PEnd; ++PIdx) {
1706     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
1707     if (OtherCount > OtherCritCount) {
1708       OtherCritCount = OtherCount;
1709       OtherCritIdx = PIdx;
1710     }
1711   }
1712   if (OtherCritIdx) {
1713     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
1714           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
1715           << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
1716   }
1717   return OtherCritCount;
1718 }
1719
1720 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
1721   if (ReadyCycle < MinReadyCycle)
1722     MinReadyCycle = ReadyCycle;
1723
1724   // Check for interlocks first. For the purpose of other heuristics, an
1725   // instruction that cannot issue appears as if it's not in the ReadyQueue.
1726   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
1727   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
1728     Pending.push(SU);
1729   else
1730     Available.push(SU);
1731
1732   // Record this node as an immediate dependent of the scheduled node.
1733   NextSUs.insert(SU);
1734 }
1735
1736 void SchedBoundary::releaseTopNode(SUnit *SU) {
1737   if (SU->isScheduled)
1738     return;
1739
1740   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1741        I != E; ++I) {
1742     if (I->isWeak())
1743       continue;
1744     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1745     unsigned Latency = I->getLatency();
1746 #ifndef NDEBUG
1747     MaxObservedLatency = std::max(Latency, MaxObservedLatency);
1748 #endif
1749     if (SU->TopReadyCycle < PredReadyCycle + Latency)
1750       SU->TopReadyCycle = PredReadyCycle + Latency;
1751   }
1752   releaseNode(SU, SU->TopReadyCycle);
1753 }
1754
1755 void SchedBoundary::releaseBottomNode(SUnit *SU) {
1756   if (SU->isScheduled)
1757     return;
1758
1759   assert(SU->getInstr() && "Scheduled SUnit must have instr");
1760
1761   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1762        I != E; ++I) {
1763     if (I->isWeak())
1764       continue;
1765     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1766     unsigned Latency = I->getLatency();
1767 #ifndef NDEBUG
1768     MaxObservedLatency = std::max(Latency, MaxObservedLatency);
1769 #endif
1770     if (SU->BotReadyCycle < SuccReadyCycle + Latency)
1771       SU->BotReadyCycle = SuccReadyCycle + Latency;
1772   }
1773   releaseNode(SU, SU->BotReadyCycle);
1774 }
1775
1776 /// Move the boundary of scheduled code by one cycle.
1777 void SchedBoundary::bumpCycle(unsigned NextCycle) {
1778   if (SchedModel->getMicroOpBufferSize() == 0) {
1779     assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1780     if (MinReadyCycle > NextCycle)
1781       NextCycle = MinReadyCycle;
1782   }
1783   // Update the current micro-ops, which will issue in the next cycle.
1784   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
1785   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
1786
1787   // Decrement DependentLatency based on the next cycle.
1788   if ((NextCycle - CurrCycle) > DependentLatency)
1789     DependentLatency = 0;
1790   else
1791     DependentLatency -= (NextCycle - CurrCycle);
1792
1793   if (!HazardRec->isEnabled()) {
1794     // Bypass HazardRec virtual calls.
1795     CurrCycle = NextCycle;
1796   }
1797   else {
1798     // Bypass getHazardType calls in case of long latency.
1799     for (; CurrCycle != NextCycle; ++CurrCycle) {
1800       if (isTop())
1801         HazardRec->AdvanceCycle();
1802       else
1803         HazardRec->RecedeCycle();
1804     }
1805   }
1806   CheckPending = true;
1807   unsigned LFactor = SchedModel->getLatencyFactor();
1808   IsResourceLimited =
1809     (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1810     > (int)LFactor;
1811
1812   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
1813 }
1814
1815 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
1816   ExecutedResCounts[PIdx] += Count;
1817   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
1818     MaxExecutedResCount = ExecutedResCounts[PIdx];
1819 }
1820
1821 /// Add the given processor resource to this scheduled zone.
1822 ///
1823 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
1824 /// during which this resource is consumed.
1825 ///
1826 /// \return the next cycle at which the instruction may execute without
1827 /// oversubscribing resources.
1828 unsigned SchedBoundary::
1829 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
1830   unsigned Factor = SchedModel->getResourceFactor(PIdx);
1831   unsigned Count = Factor * Cycles;
1832   DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
1833         << " +" << Cycles << "x" << Factor << "u\n");
1834
1835   // Update Executed resources counts.
1836   incExecutedResources(PIdx, Count);
1837   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1838   Rem->RemainingCounts[PIdx] -= Count;
1839
1840   // Check if this resource exceeds the current critical resource. If so, it
1841   // becomes the critical resource.
1842   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
1843     ZoneCritResIdx = PIdx;
1844     DEBUG(dbgs() << "  *** Critical resource "
1845           << SchedModel->getResourceName(PIdx) << ": "
1846           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
1847   }
1848   // For reserved resources, record the highest cycle using the resource.
1849   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
1850   if (NextAvailable > CurrCycle) {
1851     DEBUG(dbgs() << "  Resource conflict: "
1852           << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
1853           << NextAvailable << "\n");
1854   }
1855   return NextAvailable;
1856 }
1857
1858 /// Move the boundary of scheduled code by one SUnit.
1859 void SchedBoundary::bumpNode(SUnit *SU) {
1860   // Update the reservation table.
1861   if (HazardRec->isEnabled()) {
1862     if (!isTop() && SU->isCall) {
1863       // Calls are scheduled with their preceding instructions. For bottom-up
1864       // scheduling, clear the pipeline state before emitting.
1865       HazardRec->Reset();
1866     }
1867     HazardRec->EmitInstruction(SU);
1868   }
1869   // checkHazard should prevent scheduling multiple instructions per cycle that
1870   // exceed the issue width.
1871   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1872   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
1873   assert(
1874       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
1875       "Cannot schedule this instruction's MicroOps in the current cycle.");
1876
1877   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1878   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
1879
1880   unsigned NextCycle = CurrCycle;
1881   switch (SchedModel->getMicroOpBufferSize()) {
1882   case 0:
1883     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
1884     break;
1885   case 1:
1886     if (ReadyCycle > NextCycle) {
1887       NextCycle = ReadyCycle;
1888       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
1889     }
1890     break;
1891   default:
1892     // We don't currently model the OOO reorder buffer, so consider all
1893     // scheduled MOps to be "retired". We do loosely model in-order resource
1894     // latency. If this instruction uses an in-order resource, account for any
1895     // likely stall cycles.
1896     if (SU->isUnbuffered && ReadyCycle > NextCycle)
1897       NextCycle = ReadyCycle;
1898     break;
1899   }
1900   RetiredMOps += IncMOps;
1901
1902   // Update resource counts and critical resource.
1903   if (SchedModel->hasInstrSchedModel()) {
1904     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
1905     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
1906     Rem->RemIssueCount -= DecRemIssue;
1907     if (ZoneCritResIdx) {
1908       // Scale scheduled micro-ops for comparing with the critical resource.
1909       unsigned ScaledMOps =
1910         RetiredMOps * SchedModel->getMicroOpFactor();
1911
1912       // If scaled micro-ops are now more than the previous critical resource by
1913       // a full cycle, then micro-ops issue becomes critical.
1914       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
1915           >= (int)SchedModel->getLatencyFactor()) {
1916         ZoneCritResIdx = 0;
1917         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
1918               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
1919       }
1920     }
1921     for (TargetSchedModel::ProcResIter
1922            PI = SchedModel->getWriteProcResBegin(SC),
1923            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1924       unsigned RCycle =
1925         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
1926       if (RCycle > NextCycle)
1927         NextCycle = RCycle;
1928     }
1929     if (SU->hasReservedResource) {
1930       // For reserved resources, record the highest cycle using the resource.
1931       // For top-down scheduling, this is the cycle in which we schedule this
1932       // instruction plus the number of cycles the operations reserves the
1933       // resource. For bottom-up is it simply the instruction's cycle.
1934       for (TargetSchedModel::ProcResIter
1935              PI = SchedModel->getWriteProcResBegin(SC),
1936              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1937         unsigned PIdx = PI->ProcResourceIdx;
1938         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
1939           ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
1940 #ifndef NDEBUG
1941           MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency);
1942 #endif
1943         }
1944       }
1945     }
1946   }
1947   // Update ExpectedLatency and DependentLatency.
1948   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
1949   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
1950   if (SU->getDepth() > TopLatency) {
1951     TopLatency = SU->getDepth();
1952     DEBUG(dbgs() << "  " << Available.getName()
1953           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
1954   }
1955   if (SU->getHeight() > BotLatency) {
1956     BotLatency = SU->getHeight();
1957     DEBUG(dbgs() << "  " << Available.getName()
1958           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
1959   }
1960   // If we stall for any reason, bump the cycle.
1961   if (NextCycle > CurrCycle) {
1962     bumpCycle(NextCycle);
1963   }
1964   else {
1965     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
1966     // resource limited. If a stall occured, bumpCycle does this.
1967     unsigned LFactor = SchedModel->getLatencyFactor();
1968     IsResourceLimited =
1969       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
1970       > (int)LFactor;
1971   }
1972   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
1973   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
1974   // one cycle.  Since we commonly reach the max MOps here, opportunistically
1975   // bump the cycle to avoid uselessly checking everything in the readyQ.
1976   CurrMOps += IncMOps;
1977   while (CurrMOps >= SchedModel->getIssueWidth()) {
1978     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
1979           << " at cycle " << CurrCycle << '\n');
1980     bumpCycle(++NextCycle);
1981   }
1982   DEBUG(dumpScheduledState());
1983 }
1984
1985 /// Release pending ready nodes in to the available queue. This makes them
1986 /// visible to heuristics.
1987 void SchedBoundary::releasePending() {
1988   // If the available queue is empty, it is safe to reset MinReadyCycle.
1989   if (Available.empty())
1990     MinReadyCycle = UINT_MAX;
1991
1992   // Check to see if any of the pending instructions are ready to issue.  If
1993   // so, add them to the available queue.
1994   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
1995   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1996     SUnit *SU = *(Pending.begin()+i);
1997     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1998
1999     if (ReadyCycle < MinReadyCycle)
2000       MinReadyCycle = ReadyCycle;
2001
2002     if (!IsBuffered && ReadyCycle > CurrCycle)
2003       continue;
2004
2005     if (checkHazard(SU))
2006       continue;
2007
2008     Available.push(SU);
2009     Pending.remove(Pending.begin()+i);
2010     --i; --e;
2011   }
2012   DEBUG(if (!Pending.empty()) Pending.dump());
2013   CheckPending = false;
2014 }
2015
2016 /// Remove SU from the ready set for this boundary.
2017 void SchedBoundary::removeReady(SUnit *SU) {
2018   if (Available.isInQueue(SU))
2019     Available.remove(Available.find(SU));
2020   else {
2021     assert(Pending.isInQueue(SU) && "bad ready count");
2022     Pending.remove(Pending.find(SU));
2023   }
2024 }
2025
2026 /// If this queue only has one ready candidate, return it. As a side effect,
2027 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2028 /// one node is ready. If multiple instructions are ready, return NULL.
2029 SUnit *SchedBoundary::pickOnlyChoice() {
2030   if (CheckPending)
2031     releasePending();
2032
2033   if (CurrMOps > 0) {
2034     // Defer any ready instrs that now have a hazard.
2035     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2036       if (checkHazard(*I)) {
2037         Pending.push(*I);
2038         I = Available.remove(I);
2039         continue;
2040       }
2041       ++I;
2042     }
2043   }
2044   for (unsigned i = 0; Available.empty(); ++i) {
2045     assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
2046            "permanent hazard"); (void)i;
2047     bumpCycle(CurrCycle + 1);
2048     releasePending();
2049   }
2050   if (Available.size() == 1)
2051     return *Available.begin();
2052   return NULL;
2053 }
2054
2055 #ifndef NDEBUG
2056 // This is useful information to dump after bumpNode.
2057 // Note that the Queue contents are more useful before pickNodeFromQueue.
2058 void SchedBoundary::dumpScheduledState() {
2059   unsigned ResFactor;
2060   unsigned ResCount;
2061   if (ZoneCritResIdx) {
2062     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2063     ResCount = getResourceCount(ZoneCritResIdx);
2064   }
2065   else {
2066     ResFactor = SchedModel->getMicroOpFactor();
2067     ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
2068   }
2069   unsigned LFactor = SchedModel->getLatencyFactor();
2070   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2071          << "  Retired: " << RetiredMOps;
2072   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
2073   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
2074          << ResCount / ResFactor << " "
2075          << SchedModel->getResourceName(ZoneCritResIdx)
2076          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
2077          << (IsResourceLimited ? "  - Resource" : "  - Latency")
2078          << " limited.\n";
2079 }
2080 #endif
2081
2082 //===----------------------------------------------------------------------===//
2083 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2084 //===----------------------------------------------------------------------===//
2085
2086 namespace {
2087 /// Base class for GenericScheduler. This class maintains information about
2088 /// scheduling candidates based on TargetSchedModel making it easy to implement
2089 /// heuristics for either preRA or postRA scheduling.
2090 class GenericSchedulerBase : public MachineSchedStrategy {
2091 public:
2092   /// Represent the type of SchedCandidate found within a single queue.
2093   /// pickNodeBidirectional depends on these listed by decreasing priority.
2094   enum CandReason {
2095     NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
2096     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
2097     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
2098
2099 #ifndef NDEBUG
2100   static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
2101 #endif
2102
2103   /// Policy for scheduling the next instruction in the candidate's zone.
2104   struct CandPolicy {
2105     bool ReduceLatency;
2106     unsigned ReduceResIdx;
2107     unsigned DemandResIdx;
2108
2109     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
2110   };
2111
2112   /// Status of an instruction's critical resource consumption.
2113   struct SchedResourceDelta {
2114     // Count critical resources in the scheduled region required by SU.
2115     unsigned CritResources;
2116
2117     // Count critical resources from another region consumed by SU.
2118     unsigned DemandedResources;
2119
2120     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
2121
2122     bool operator==(const SchedResourceDelta &RHS) const {
2123       return CritResources == RHS.CritResources
2124         && DemandedResources == RHS.DemandedResources;
2125     }
2126     bool operator!=(const SchedResourceDelta &RHS) const {
2127       return !operator==(RHS);
2128     }
2129   };
2130
2131   /// Store the state used by GenericScheduler heuristics, required for the
2132   /// lifetime of one invocation of pickNode().
2133   struct SchedCandidate {
2134     CandPolicy Policy;
2135
2136     // The best SUnit candidate.
2137     SUnit *SU;
2138
2139     // The reason for this candidate.
2140     CandReason Reason;
2141
2142     // Set of reasons that apply to multiple candidates.
2143     uint32_t RepeatReasonSet;
2144
2145     // Register pressure values for the best candidate.
2146     RegPressureDelta RPDelta;
2147
2148     // Critical resource consumption of the best candidate.
2149     SchedResourceDelta ResDelta;
2150
2151     SchedCandidate(const CandPolicy &policy)
2152       : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
2153
2154     bool isValid() const { return SU; }
2155
2156     // Copy the status of another candidate without changing policy.
2157     void setBest(SchedCandidate &Best) {
2158       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
2159       SU = Best.SU;
2160       Reason = Best.Reason;
2161       RPDelta = Best.RPDelta;
2162       ResDelta = Best.ResDelta;
2163     }
2164
2165     bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
2166     void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
2167
2168     void initResourceDelta(const ScheduleDAGMI *DAG,
2169                            const TargetSchedModel *SchedModel);
2170   };
2171
2172 protected:
2173   const MachineSchedContext *Context;
2174   const TargetSchedModel *SchedModel;
2175   const TargetRegisterInfo *TRI;
2176
2177   SchedRemainder Rem;
2178 protected:
2179   GenericSchedulerBase(const MachineSchedContext *C):
2180     Context(C), SchedModel(0), TRI(0) {}
2181
2182   void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
2183                  SchedBoundary *OtherZone);
2184
2185 #ifndef NDEBUG
2186   void traceCandidate(const SchedCandidate &Cand);
2187 #endif
2188 };
2189 } // namespace
2190
2191 void GenericSchedulerBase::SchedCandidate::
2192 initResourceDelta(const ScheduleDAGMI *DAG,
2193                   const TargetSchedModel *SchedModel) {
2194   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2195     return;
2196
2197   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2198   for (TargetSchedModel::ProcResIter
2199          PI = SchedModel->getWriteProcResBegin(SC),
2200          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2201     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2202       ResDelta.CritResources += PI->Cycles;
2203     if (PI->ProcResourceIdx == Policy.DemandResIdx)
2204       ResDelta.DemandedResources += PI->Cycles;
2205   }
2206 }
2207
2208 /// Set the CandPolicy given a scheduling zone given the current resources and
2209 /// latencies inside and outside the zone.
2210 void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
2211                                      bool IsPostRA,
2212                                      SchedBoundary &CurrZone,
2213                                      SchedBoundary *OtherZone) {
2214   // Apply preemptive heuristics based on the the total latency and resources
2215   // inside and outside this zone. Potential stalls should be considered before
2216   // following this policy.
2217
2218   // Compute remaining latency. We need this both to determine whether the
2219   // overall schedule has become latency-limited and whether the instructions
2220   // outside this zone are resource or latency limited.
2221   //
2222   // The "dependent" latency is updated incrementally during scheduling as the
2223   // max height/depth of scheduled nodes minus the cycles since it was
2224   // scheduled:
2225   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2226   //
2227   // The "independent" latency is the max ready queue depth:
2228   //   ILat = max N.depth for N in Available|Pending
2229   //
2230   // RemainingLatency is the greater of independent and dependent latency.
2231   unsigned RemLatency = CurrZone.getDependentLatency();
2232   RemLatency = std::max(RemLatency,
2233                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
2234   RemLatency = std::max(RemLatency,
2235                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2236
2237   // Compute the critical resource outside the zone.
2238   unsigned OtherCritIdx;
2239   unsigned OtherCount =
2240     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2241
2242   bool OtherResLimited = false;
2243   if (SchedModel->hasInstrSchedModel()) {
2244     unsigned LFactor = SchedModel->getLatencyFactor();
2245     OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
2246   }
2247   // Schedule aggressively for latency in PostRA mode. We don't check for
2248   // acyclic latency during PostRA, and highly out-of-order processors will
2249   // skip PostRA scheduling.
2250   if (!OtherResLimited) {
2251     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
2252       Policy.ReduceLatency |= true;
2253       DEBUG(dbgs() << "  " << CurrZone.Available.getName()
2254             << " RemainingLatency " << RemLatency << " + "
2255             << CurrZone.getCurrCycle() << "c > CritPath "
2256             << Rem.CriticalPath << "\n");
2257     }
2258   }
2259   // If the same resource is limiting inside and outside the zone, do nothing.
2260   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2261     return;
2262
2263   DEBUG(
2264     if (CurrZone.isResourceLimited()) {
2265       dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
2266              << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
2267              << "\n";
2268     }
2269     if (OtherResLimited)
2270       dbgs() << "  RemainingLimit: "
2271              << SchedModel->getResourceName(OtherCritIdx) << "\n";
2272     if (!CurrZone.isResourceLimited() && !OtherResLimited)
2273       dbgs() << "  Latency limited both directions.\n");
2274
2275   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2276     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2277
2278   if (OtherResLimited)
2279     Policy.DemandResIdx = OtherCritIdx;
2280 }
2281
2282 #ifndef NDEBUG
2283 const char *GenericSchedulerBase::getReasonStr(
2284   GenericSchedulerBase::CandReason Reason) {
2285   switch (Reason) {
2286   case NoCand:         return "NOCAND    ";
2287   case PhysRegCopy:    return "PREG-COPY";
2288   case RegExcess:      return "REG-EXCESS";
2289   case RegCritical:    return "REG-CRIT  ";
2290   case Stall:          return "STALL     ";
2291   case Cluster:        return "CLUSTER   ";
2292   case Weak:           return "WEAK      ";
2293   case RegMax:         return "REG-MAX   ";
2294   case ResourceReduce: return "RES-REDUCE";
2295   case ResourceDemand: return "RES-DEMAND";
2296   case TopDepthReduce: return "TOP-DEPTH ";
2297   case TopPathReduce:  return "TOP-PATH  ";
2298   case BotHeightReduce:return "BOT-HEIGHT";
2299   case BotPathReduce:  return "BOT-PATH  ";
2300   case NextDefUse:     return "DEF-USE   ";
2301   case NodeOrder:      return "ORDER     ";
2302   };
2303   llvm_unreachable("Unknown reason!");
2304 }
2305
2306 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
2307   PressureChange P;
2308   unsigned ResIdx = 0;
2309   unsigned Latency = 0;
2310   switch (Cand.Reason) {
2311   default:
2312     break;
2313   case RegExcess:
2314     P = Cand.RPDelta.Excess;
2315     break;
2316   case RegCritical:
2317     P = Cand.RPDelta.CriticalMax;
2318     break;
2319   case RegMax:
2320     P = Cand.RPDelta.CurrentMax;
2321     break;
2322   case ResourceReduce:
2323     ResIdx = Cand.Policy.ReduceResIdx;
2324     break;
2325   case ResourceDemand:
2326     ResIdx = Cand.Policy.DemandResIdx;
2327     break;
2328   case TopDepthReduce:
2329     Latency = Cand.SU->getDepth();
2330     break;
2331   case TopPathReduce:
2332     Latency = Cand.SU->getHeight();
2333     break;
2334   case BotHeightReduce:
2335     Latency = Cand.SU->getHeight();
2336     break;
2337   case BotPathReduce:
2338     Latency = Cand.SU->getDepth();
2339     break;
2340   }
2341   dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2342   if (P.isValid())
2343     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2344            << ":" << P.getUnitInc() << " ";
2345   else
2346     dbgs() << "      ";
2347   if (ResIdx)
2348     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2349   else
2350     dbgs() << "         ";
2351   if (Latency)
2352     dbgs() << " " << Latency << " cycles ";
2353   else
2354     dbgs() << "          ";
2355   dbgs() << '\n';
2356 }
2357 #endif
2358
2359 /// Return true if this heuristic determines order.
2360 static bool tryLess(int TryVal, int CandVal,
2361                     GenericSchedulerBase::SchedCandidate &TryCand,
2362                     GenericSchedulerBase::SchedCandidate &Cand,
2363                     GenericSchedulerBase::CandReason Reason) {
2364   if (TryVal < CandVal) {
2365     TryCand.Reason = Reason;
2366     return true;
2367   }
2368   if (TryVal > CandVal) {
2369     if (Cand.Reason > Reason)
2370       Cand.Reason = Reason;
2371     return true;
2372   }
2373   Cand.setRepeat(Reason);
2374   return false;
2375 }
2376
2377 static bool tryGreater(int TryVal, int CandVal,
2378                        GenericSchedulerBase::SchedCandidate &TryCand,
2379                        GenericSchedulerBase::SchedCandidate &Cand,
2380                        GenericSchedulerBase::CandReason Reason) {
2381   if (TryVal > CandVal) {
2382     TryCand.Reason = Reason;
2383     return true;
2384   }
2385   if (TryVal < CandVal) {
2386     if (Cand.Reason > Reason)
2387       Cand.Reason = Reason;
2388     return true;
2389   }
2390   Cand.setRepeat(Reason);
2391   return false;
2392 }
2393
2394 static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
2395                        GenericSchedulerBase::SchedCandidate &Cand,
2396                        SchedBoundary &Zone) {
2397   if (Zone.isTop()) {
2398     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2399       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2400                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2401         return true;
2402     }
2403     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2404                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2405       return true;
2406   }
2407   else {
2408     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2409       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2410                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2411         return true;
2412     }
2413     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2414                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2415       return true;
2416   }
2417   return false;
2418 }
2419
2420 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
2421                       bool IsTop) {
2422   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2423         << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
2424 }
2425
2426 namespace {
2427 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
2428 /// the schedule.
2429 class GenericScheduler : public GenericSchedulerBase {
2430   ScheduleDAGMILive *DAG;
2431
2432   // State of the top and bottom scheduled instruction boundaries.
2433   SchedBoundary Top;
2434   SchedBoundary Bot;
2435
2436   MachineSchedPolicy RegionPolicy;
2437 public:
2438   GenericScheduler(const MachineSchedContext *C):
2439     GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"),
2440     Bot(SchedBoundary::BotQID, "BotQ") {}
2441
2442   virtual void initPolicy(MachineBasicBlock::iterator Begin,
2443                           MachineBasicBlock::iterator End,
2444                           unsigned NumRegionInstrs) LLVM_OVERRIDE;
2445
2446   virtual bool shouldTrackPressure() const LLVM_OVERRIDE {
2447     return RegionPolicy.ShouldTrackPressure;
2448   }
2449
2450   virtual void initialize(ScheduleDAGMI *dag) LLVM_OVERRIDE;
2451
2452   virtual SUnit *pickNode(bool &IsTopNode) LLVM_OVERRIDE;
2453
2454   virtual void schedNode(SUnit *SU, bool IsTopNode) LLVM_OVERRIDE;
2455
2456   virtual void releaseTopNode(SUnit *SU) LLVM_OVERRIDE {
2457     Top.releaseTopNode(SU);
2458   }
2459
2460   virtual void releaseBottomNode(SUnit *SU) LLVM_OVERRIDE {
2461     Bot.releaseBottomNode(SU);
2462   }
2463
2464   virtual void registerRoots() LLVM_OVERRIDE;
2465
2466 protected:
2467   void checkAcyclicLatency();
2468
2469   void tryCandidate(SchedCandidate &Cand,
2470                     SchedCandidate &TryCand,
2471                     SchedBoundary &Zone,
2472                     const RegPressureTracker &RPTracker,
2473                     RegPressureTracker &TempTracker);
2474
2475   SUnit *pickNodeBidirectional(bool &IsTopNode);
2476
2477   void pickNodeFromQueue(SchedBoundary &Zone,
2478                          const RegPressureTracker &RPTracker,
2479                          SchedCandidate &Candidate);
2480
2481   void reschedulePhysRegCopies(SUnit *SU, bool isTop);
2482 };
2483 } // namespace
2484
2485 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
2486   assert(dag->hasVRegLiveness() &&
2487          "(PreRA)GenericScheduler needs vreg liveness");
2488   DAG = static_cast<ScheduleDAGMILive*>(dag);
2489   SchedModel = DAG->getSchedModel();
2490   TRI = DAG->TRI;
2491
2492   Rem.init(DAG, SchedModel);
2493   Top.init(DAG, SchedModel, &Rem);
2494   Bot.init(DAG, SchedModel, &Rem);
2495
2496   // Initialize resource counts.
2497
2498   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2499   // are disabled, then these HazardRecs will be disabled.
2500   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2501   const TargetMachine &TM = DAG->MF.getTarget();
2502   if (!Top.HazardRec) {
2503     Top.HazardRec =
2504       TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
2505   }
2506   if (!Bot.HazardRec) {
2507     Bot.HazardRec =
2508       TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
2509   }
2510 }
2511
2512 /// Initialize the per-region scheduling policy.
2513 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
2514                                   MachineBasicBlock::iterator End,
2515                                   unsigned NumRegionInstrs) {
2516   const TargetMachine &TM = Context->MF->getTarget();
2517
2518   // Avoid setting up the register pressure tracker for small regions to save
2519   // compile time. As a rough heuristic, only track pressure when the number of
2520   // schedulable instructions exceeds half the integer register file.
2521   unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2522     TM.getTargetLowering()->getRegClassFor(MVT::i32));
2523
2524   RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2525
2526   // For generic targets, we default to bottom-up, because it's simpler and more
2527   // compile-time optimizations have been implemented in that direction.
2528   RegionPolicy.OnlyBottomUp = true;
2529
2530   // Allow the subtarget to override default policy.
2531   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
2532   ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
2533
2534   // After subtarget overrides, apply command line options.
2535   if (!EnableRegPressure)
2536     RegionPolicy.ShouldTrackPressure = false;
2537
2538   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2539   // e.g. -misched-bottomup=false allows scheduling in both directions.
2540   assert((!ForceTopDown || !ForceBottomUp) &&
2541          "-misched-topdown incompatible with -misched-bottomup");
2542   if (ForceBottomUp.getNumOccurrences() > 0) {
2543     RegionPolicy.OnlyBottomUp = ForceBottomUp;
2544     if (RegionPolicy.OnlyBottomUp)
2545       RegionPolicy.OnlyTopDown = false;
2546   }
2547   if (ForceTopDown.getNumOccurrences() > 0) {
2548     RegionPolicy.OnlyTopDown = ForceTopDown;
2549     if (RegionPolicy.OnlyTopDown)
2550       RegionPolicy.OnlyBottomUp = false;
2551   }
2552 }
2553
2554 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2555 /// critical path by more cycles than it takes to drain the instruction buffer.
2556 /// We estimate an upper bounds on in-flight instructions as:
2557 ///
2558 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2559 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2560 /// InFlightResources = InFlightIterations * LoopResources
2561 ///
2562 /// TODO: Check execution resources in addition to IssueCount.
2563 void GenericScheduler::checkAcyclicLatency() {
2564   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2565     return;
2566
2567   // Scaled number of cycles per loop iteration.
2568   unsigned IterCount =
2569     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2570              Rem.RemIssueCount);
2571   // Scaled acyclic critical path.
2572   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2573   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2574   unsigned InFlightCount =
2575     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2576   unsigned BufferLimit =
2577     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2578
2579   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2580
2581   DEBUG(dbgs() << "IssueCycles="
2582         << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2583         << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2584         << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount
2585         << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2586         << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2587         if (Rem.IsAcyclicLatencyLimited)
2588           dbgs() << "  ACYCLIC LATENCY LIMIT\n");
2589 }
2590
2591 void GenericScheduler::registerRoots() {
2592   Rem.CriticalPath = DAG->ExitSU.getDepth();
2593
2594   // Some roots may not feed into ExitSU. Check all of them in case.
2595   for (std::vector<SUnit*>::const_iterator
2596          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
2597     if ((*I)->getDepth() > Rem.CriticalPath)
2598       Rem.CriticalPath = (*I)->getDepth();
2599   }
2600   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
2601
2602   if (EnableCyclicPath) {
2603     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2604     checkAcyclicLatency();
2605   }
2606 }
2607
2608 static bool tryPressure(const PressureChange &TryP,
2609                         const PressureChange &CandP,
2610                         GenericSchedulerBase::SchedCandidate &TryCand,
2611                         GenericSchedulerBase::SchedCandidate &Cand,
2612                         GenericSchedulerBase::CandReason Reason) {
2613   int TryRank = TryP.getPSetOrMax();
2614   int CandRank = CandP.getPSetOrMax();
2615   // If both candidates affect the same set, go with the smallest increase.
2616   if (TryRank == CandRank) {
2617     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2618                    Reason);
2619   }
2620   // If one candidate decreases and the other increases, go with it.
2621   // Invalid candidates have UnitInc==0.
2622   if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2623               Reason)) {
2624     return true;
2625   }
2626   // If the candidates are decreasing pressure, reverse priority.
2627   if (TryP.getUnitInc() < 0)
2628     std::swap(TryRank, CandRank);
2629   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2630 }
2631
2632 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2633   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2634 }
2635
2636 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2637 /// their physreg def/use.
2638 ///
2639 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2640 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2641 /// with the operation that produces or consumes the physreg. We'll do this when
2642 /// regalloc has support for parallel copies.
2643 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
2644   const MachineInstr *MI = SU->getInstr();
2645   if (!MI->isCopy())
2646     return 0;
2647
2648   unsigned ScheduledOper = isTop ? 1 : 0;
2649   unsigned UnscheduledOper = isTop ? 0 : 1;
2650   // If we have already scheduled the physreg produce/consumer, immediately
2651   // schedule the copy.
2652   if (TargetRegisterInfo::isPhysicalRegister(
2653         MI->getOperand(ScheduledOper).getReg()))
2654     return 1;
2655   // If the physreg is at the boundary, defer it. Otherwise schedule it
2656   // immediately to free the dependent. We can hoist the copy later.
2657   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2658   if (TargetRegisterInfo::isPhysicalRegister(
2659         MI->getOperand(UnscheduledOper).getReg()))
2660     return AtBoundary ? -1 : 1;
2661   return 0;
2662 }
2663
2664 /// Apply a set of heursitics to a new candidate. Heuristics are currently
2665 /// hierarchical. This may be more efficient than a graduated cost model because
2666 /// we don't need to evaluate all aspects of the model for each node in the
2667 /// queue. But it's really done to make the heuristics easier to debug and
2668 /// statistically analyze.
2669 ///
2670 /// \param Cand provides the policy and current best candidate.
2671 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2672 /// \param Zone describes the scheduled zone that we are extending.
2673 /// \param RPTracker describes reg pressure within the scheduled zone.
2674 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
2675 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
2676                                     SchedCandidate &TryCand,
2677                                     SchedBoundary &Zone,
2678                                     const RegPressureTracker &RPTracker,
2679                                     RegPressureTracker &TempTracker) {
2680
2681   if (DAG->isTrackingPressure()) {
2682     // Always initialize TryCand's RPDelta.
2683     if (Zone.isTop()) {
2684       TempTracker.getMaxDownwardPressureDelta(
2685         TryCand.SU->getInstr(),
2686         TryCand.RPDelta,
2687         DAG->getRegionCriticalPSets(),
2688         DAG->getRegPressure().MaxSetPressure);
2689     }
2690     else {
2691       if (VerifyScheduling) {
2692         TempTracker.getMaxUpwardPressureDelta(
2693           TryCand.SU->getInstr(),
2694           &DAG->getPressureDiff(TryCand.SU),
2695           TryCand.RPDelta,
2696           DAG->getRegionCriticalPSets(),
2697           DAG->getRegPressure().MaxSetPressure);
2698       }
2699       else {
2700         RPTracker.getUpwardPressureDelta(
2701           TryCand.SU->getInstr(),
2702           DAG->getPressureDiff(TryCand.SU),
2703           TryCand.RPDelta,
2704           DAG->getRegionCriticalPSets(),
2705           DAG->getRegPressure().MaxSetPressure);
2706       }
2707     }
2708   }
2709   DEBUG(if (TryCand.RPDelta.Excess.isValid())
2710           dbgs() << "  SU(" << TryCand.SU->NodeNum << ") "
2711                  << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
2712                  << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
2713
2714   // Initialize the candidate if needed.
2715   if (!Cand.isValid()) {
2716     TryCand.Reason = NodeOrder;
2717     return;
2718   }
2719
2720   if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
2721                  biasPhysRegCopy(Cand.SU, Zone.isTop()),
2722                  TryCand, Cand, PhysRegCopy))
2723     return;
2724
2725   // Avoid exceeding the target's limit. If signed PSetID is negative, it is
2726   // invalid; convert it to INT_MAX to give it lowest priority.
2727   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
2728                                                Cand.RPDelta.Excess,
2729                                                TryCand, Cand, RegExcess))
2730     return;
2731
2732   // Avoid increasing the max critical pressure in the scheduled region.
2733   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
2734                                                Cand.RPDelta.CriticalMax,
2735                                                TryCand, Cand, RegCritical))
2736     return;
2737
2738   // For loops that are acyclic path limited, aggressively schedule for latency.
2739   // This can result in very long dependence chains scheduled in sequence, so
2740   // once every cycle (when CurrMOps == 0), switch to normal heuristics.
2741   if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
2742       && tryLatency(TryCand, Cand, Zone))
2743     return;
2744
2745   // Prioritize instructions that read unbuffered resources by stall cycles.
2746   if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
2747               Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
2748     return;
2749
2750   // Keep clustered nodes together to encourage downstream peephole
2751   // optimizations which may reduce resource requirements.
2752   //
2753   // This is a best effort to set things up for a post-RA pass. Optimizations
2754   // like generating loads of multiple registers should ideally be done within
2755   // the scheduler pass by combining the loads during DAG postprocessing.
2756   const SUnit *NextClusterSU =
2757     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2758   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
2759                  TryCand, Cand, Cluster))
2760     return;
2761
2762   // Weak edges are for clustering and other constraints.
2763   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
2764               getWeakLeft(Cand.SU, Zone.isTop()),
2765               TryCand, Cand, Weak)) {
2766     return;
2767   }
2768   // Avoid increasing the max pressure of the entire region.
2769   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
2770                                                Cand.RPDelta.CurrentMax,
2771                                                TryCand, Cand, RegMax))
2772     return;
2773
2774   // Avoid critical resource consumption and balance the schedule.
2775   TryCand.initResourceDelta(DAG, SchedModel);
2776   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2777               TryCand, Cand, ResourceReduce))
2778     return;
2779   if (tryGreater(TryCand.ResDelta.DemandedResources,
2780                  Cand.ResDelta.DemandedResources,
2781                  TryCand, Cand, ResourceDemand))
2782     return;
2783
2784   // Avoid serializing long latency dependence chains.
2785   // For acyclic path limited loops, latency was already checked above.
2786   if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
2787       && tryLatency(TryCand, Cand, Zone)) {
2788     return;
2789   }
2790
2791   // Prefer immediate defs/users of the last scheduled instruction. This is a
2792   // local pressure avoidance strategy that also makes the machine code
2793   // readable.
2794   if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
2795                  TryCand, Cand, NextDefUse))
2796     return;
2797
2798   // Fall through to original instruction order.
2799   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2800       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2801     TryCand.Reason = NodeOrder;
2802   }
2803 }
2804
2805 /// Pick the best candidate from the queue.
2806 ///
2807 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2808 /// DAG building. To adjust for the current scheduling location we need to
2809 /// maintain the number of vreg uses remaining to be top-scheduled.
2810 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2811                                          const RegPressureTracker &RPTracker,
2812                                          SchedCandidate &Cand) {
2813   ReadyQueue &Q = Zone.Available;
2814
2815   DEBUG(Q.dump());
2816
2817   // getMaxPressureDelta temporarily modifies the tracker.
2818   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2819
2820   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2821
2822     SchedCandidate TryCand(Cand.Policy);
2823     TryCand.SU = *I;
2824     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
2825     if (TryCand.Reason != NoCand) {
2826       // Initialize resource delta if needed in case future heuristics query it.
2827       if (TryCand.ResDelta == SchedResourceDelta())
2828         TryCand.initResourceDelta(DAG, SchedModel);
2829       Cand.setBest(TryCand);
2830       DEBUG(traceCandidate(Cand));
2831     }
2832   }
2833 }
2834
2835 /// Pick the best candidate node from either the top or bottom queue.
2836 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
2837   // Schedule as far as possible in the direction of no choice. This is most
2838   // efficient, but also provides the best heuristics for CriticalPSets.
2839   if (SUnit *SU = Bot.pickOnlyChoice()) {
2840     IsTopNode = false;
2841     DEBUG(dbgs() << "Pick Bot NOCAND\n");
2842     return SU;
2843   }
2844   if (SUnit *SU = Top.pickOnlyChoice()) {
2845     IsTopNode = true;
2846     DEBUG(dbgs() << "Pick Top NOCAND\n");
2847     return SU;
2848   }
2849   CandPolicy NoPolicy;
2850   SchedCandidate BotCand(NoPolicy);
2851   SchedCandidate TopCand(NoPolicy);
2852   // Set the bottom-up policy based on the state of the current bottom zone and
2853   // the instructions outside the zone, including the top zone.
2854   setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
2855   // Set the top-down policy based on the state of the current top zone and
2856   // the instructions outside the zone, including the bottom zone.
2857   setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
2858
2859   // Prefer bottom scheduling when heuristics are silent.
2860   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2861   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2862
2863   // If either Q has a single candidate that provides the least increase in
2864   // Excess pressure, we can immediately schedule from that Q.
2865   //
2866   // RegionCriticalPSets summarizes the pressure within the scheduled region and
2867   // affects picking from either Q. If scheduling in one direction must
2868   // increase pressure for one of the excess PSets, then schedule in that
2869   // direction first to provide more freedom in the other direction.
2870   if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
2871       || (BotCand.Reason == RegCritical
2872           && !BotCand.isRepeat(RegCritical)))
2873   {
2874     IsTopNode = false;
2875     tracePick(BotCand, IsTopNode);
2876     return BotCand.SU;
2877   }
2878   // Check if the top Q has a better candidate.
2879   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2880   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2881
2882   // Choose the queue with the most important (lowest enum) reason.
2883   if (TopCand.Reason < BotCand.Reason) {
2884     IsTopNode = true;
2885     tracePick(TopCand, IsTopNode);
2886     return TopCand.SU;
2887   }
2888   // Otherwise prefer the bottom candidate, in node order if all else failed.
2889   IsTopNode = false;
2890   tracePick(BotCand, IsTopNode);
2891   return BotCand.SU;
2892 }
2893
2894 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2895 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
2896   if (DAG->top() == DAG->bottom()) {
2897     assert(Top.Available.empty() && Top.Pending.empty() &&
2898            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2899     return NULL;
2900   }
2901   SUnit *SU;
2902   do {
2903     if (RegionPolicy.OnlyTopDown) {
2904       SU = Top.pickOnlyChoice();
2905       if (!SU) {
2906         CandPolicy NoPolicy;
2907         SchedCandidate TopCand(NoPolicy);
2908         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2909         assert(TopCand.Reason != NoCand && "failed to find a candidate");
2910         tracePick(TopCand, true);
2911         SU = TopCand.SU;
2912       }
2913       IsTopNode = true;
2914     }
2915     else if (RegionPolicy.OnlyBottomUp) {
2916       SU = Bot.pickOnlyChoice();
2917       if (!SU) {
2918         CandPolicy NoPolicy;
2919         SchedCandidate BotCand(NoPolicy);
2920         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2921         assert(BotCand.Reason != NoCand && "failed to find a candidate");
2922         tracePick(BotCand, false);
2923         SU = BotCand.SU;
2924       }
2925       IsTopNode = false;
2926     }
2927     else {
2928       SU = pickNodeBidirectional(IsTopNode);
2929     }
2930   } while (SU->isScheduled);
2931
2932   if (SU->isTopReady())
2933     Top.removeReady(SU);
2934   if (SU->isBottomReady())
2935     Bot.removeReady(SU);
2936
2937   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
2938   return SU;
2939 }
2940
2941 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
2942
2943   MachineBasicBlock::iterator InsertPos = SU->getInstr();
2944   if (!isTop)
2945     ++InsertPos;
2946   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
2947
2948   // Find already scheduled copies with a single physreg dependence and move
2949   // them just above the scheduled instruction.
2950   for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
2951        I != E; ++I) {
2952     if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
2953       continue;
2954     SUnit *DepSU = I->getSUnit();
2955     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
2956       continue;
2957     MachineInstr *Copy = DepSU->getInstr();
2958     if (!Copy->isCopy())
2959       continue;
2960     DEBUG(dbgs() << "  Rescheduling physreg copy ";
2961           I->getSUnit()->dump(DAG));
2962     DAG->moveInstruction(Copy, InsertPos);
2963   }
2964 }
2965
2966 /// Update the scheduler's state after scheduling a node. This is the same node
2967 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
2968 /// update it's state based on the current cycle before MachineSchedStrategy
2969 /// does.
2970 ///
2971 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2972 /// them here. See comments in biasPhysRegCopy.
2973 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2974   if (IsTopNode) {
2975     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
2976     Top.bumpNode(SU);
2977     if (SU->hasPhysRegUses)
2978       reschedulePhysRegCopies(SU, true);
2979   }
2980   else {
2981     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
2982     Bot.bumpNode(SU);
2983     if (SU->hasPhysRegDefs)
2984       reschedulePhysRegCopies(SU, false);
2985   }
2986 }
2987
2988 /// Create the standard converging machine scheduler. This will be used as the
2989 /// default scheduler if the target does not set a default.
2990 static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
2991   ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C));
2992   // Register DAG post-processors.
2993   //
2994   // FIXME: extend the mutation API to allow earlier mutations to instantiate
2995   // data and pass it to later mutations. Have a single mutation that gathers
2996   // the interesting nodes in one pass.
2997   DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
2998   if (EnableLoadCluster && DAG->TII->enableClusterLoads())
2999     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
3000   if (EnableMacroFusion)
3001     DAG->addMutation(new MacroFusion(DAG->TII));
3002   return DAG;
3003 }
3004
3005 static MachineSchedRegistry
3006 GenericSchedRegistry("converge", "Standard converging scheduler.",
3007                      createGenericSchedLive);
3008
3009 //===----------------------------------------------------------------------===//
3010 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3011 //===----------------------------------------------------------------------===//
3012
3013 namespace {
3014 /// PostGenericScheduler - Interface to the scheduling algorithm used by
3015 /// ScheduleDAGMI.
3016 ///
3017 /// Callbacks from ScheduleDAGMI:
3018 ///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
3019 class PostGenericScheduler : public GenericSchedulerBase {
3020   ScheduleDAGMI *DAG;
3021   SchedBoundary Top;
3022   SmallVector<SUnit*, 8> BotRoots;
3023 public:
3024   PostGenericScheduler(const MachineSchedContext *C):
3025     GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
3026
3027   virtual ~PostGenericScheduler() {}
3028
3029   virtual void initPolicy(MachineBasicBlock::iterator Begin,
3030                           MachineBasicBlock::iterator End,
3031                           unsigned NumRegionInstrs) LLVM_OVERRIDE {
3032     /* no configurable policy */
3033   };
3034
3035   /// PostRA scheduling does not track pressure.
3036   virtual bool shouldTrackPressure() const LLVM_OVERRIDE { return false; }
3037
3038   virtual void initialize(ScheduleDAGMI *Dag) LLVM_OVERRIDE {
3039     DAG = Dag;
3040     SchedModel = DAG->getSchedModel();
3041     TRI = DAG->TRI;
3042
3043     Rem.init(DAG, SchedModel);
3044     Top.init(DAG, SchedModel, &Rem);
3045     BotRoots.clear();
3046
3047     // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3048     // or are disabled, then these HazardRecs will be disabled.
3049     const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3050     const TargetMachine &TM = DAG->MF.getTarget();
3051     if (!Top.HazardRec) {
3052       Top.HazardRec =
3053         TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
3054     }
3055   }
3056
3057   virtual void registerRoots() LLVM_OVERRIDE;
3058
3059   virtual SUnit *pickNode(bool &IsTopNode) LLVM_OVERRIDE;
3060
3061   virtual void scheduleTree(unsigned SubtreeID) LLVM_OVERRIDE {
3062     llvm_unreachable("PostRA scheduler does not support subtree analysis.");
3063   }
3064
3065   virtual void schedNode(SUnit *SU, bool IsTopNode) LLVM_OVERRIDE;
3066
3067   virtual void releaseTopNode(SUnit *SU) LLVM_OVERRIDE {
3068     Top.releaseTopNode(SU);
3069   }
3070
3071   // Only called for roots.
3072   virtual void releaseBottomNode(SUnit *SU) LLVM_OVERRIDE {
3073     BotRoots.push_back(SU);
3074   }
3075
3076 protected:
3077   void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
3078
3079   void pickNodeFromQueue(SchedCandidate &Cand);
3080 };
3081 } // namespace
3082
3083 void PostGenericScheduler::registerRoots() {
3084   Rem.CriticalPath = DAG->ExitSU.getDepth();
3085
3086   // Some roots may not feed into ExitSU. Check all of them in case.
3087   for (SmallVectorImpl<SUnit*>::const_iterator
3088          I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
3089     if ((*I)->getDepth() > Rem.CriticalPath)
3090       Rem.CriticalPath = (*I)->getDepth();
3091   }
3092   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
3093 }
3094
3095 /// Apply a set of heursitics to a new candidate for PostRA scheduling.
3096 ///
3097 /// \param Cand provides the policy and current best candidate.
3098 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3099 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3100                                         SchedCandidate &TryCand) {
3101
3102   // Initialize the candidate if needed.
3103   if (!Cand.isValid()) {
3104     TryCand.Reason = NodeOrder;
3105     return;
3106   }
3107
3108   // Prioritize instructions that read unbuffered resources by stall cycles.
3109   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3110               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3111     return;
3112
3113   // Avoid critical resource consumption and balance the schedule.
3114   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3115               TryCand, Cand, ResourceReduce))
3116     return;
3117   if (tryGreater(TryCand.ResDelta.DemandedResources,
3118                  Cand.ResDelta.DemandedResources,
3119                  TryCand, Cand, ResourceDemand))
3120     return;
3121
3122   // Avoid serializing long latency dependence chains.
3123   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3124     return;
3125   }
3126
3127   // Fall through to original instruction order.
3128   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3129     TryCand.Reason = NodeOrder;
3130 }
3131
3132 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
3133   ReadyQueue &Q = Top.Available;
3134
3135   DEBUG(Q.dump());
3136
3137   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
3138     SchedCandidate TryCand(Cand.Policy);
3139     TryCand.SU = *I;
3140     TryCand.initResourceDelta(DAG, SchedModel);
3141     tryCandidate(Cand, TryCand);
3142     if (TryCand.Reason != NoCand) {
3143       Cand.setBest(TryCand);
3144       DEBUG(traceCandidate(Cand));
3145     }
3146   }
3147 }
3148
3149 /// Pick the next node to schedule.
3150 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
3151   if (DAG->top() == DAG->bottom()) {
3152     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3153     return NULL;
3154   }
3155   SUnit *SU;
3156   do {
3157     SU = Top.pickOnlyChoice();
3158     if (!SU) {
3159       CandPolicy NoPolicy;
3160       SchedCandidate TopCand(NoPolicy);
3161       // Set the top-down policy based on the state of the current top zone and
3162       // the instructions outside the zone, including the bottom zone.
3163       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL);
3164       pickNodeFromQueue(TopCand);
3165       assert(TopCand.Reason != NoCand && "failed to find a candidate");
3166       tracePick(TopCand, true);
3167       SU = TopCand.SU;
3168     }
3169   } while (SU->isScheduled);
3170
3171   IsTopNode = true;
3172   Top.removeReady(SU);
3173
3174   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
3175   return SU;
3176 }
3177
3178 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3179 /// scheduled/remaining flags in the DAG nodes.
3180 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3181   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3182   Top.bumpNode(SU);
3183 }
3184
3185 /// Create a generic scheduler with no vreg liveness or DAG mutation passes.
3186 static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
3187   return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true);
3188 }
3189
3190 //===----------------------------------------------------------------------===//
3191 // ILP Scheduler. Currently for experimental analysis of heuristics.
3192 //===----------------------------------------------------------------------===//
3193
3194 namespace {
3195 /// \brief Order nodes by the ILP metric.
3196 struct ILPOrder {
3197   const SchedDFSResult *DFSResult;
3198   const BitVector *ScheduledTrees;
3199   bool MaximizeILP;
3200
3201   ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
3202
3203   /// \brief Apply a less-than relation on node priority.
3204   ///
3205   /// (Return true if A comes after B in the Q.)
3206   bool operator()(const SUnit *A, const SUnit *B) const {
3207     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3208     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3209     if (SchedTreeA != SchedTreeB) {
3210       // Unscheduled trees have lower priority.
3211       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3212         return ScheduledTrees->test(SchedTreeB);
3213
3214       // Trees with shallower connections have have lower priority.
3215       if (DFSResult->getSubtreeLevel(SchedTreeA)
3216           != DFSResult->getSubtreeLevel(SchedTreeB)) {
3217         return DFSResult->getSubtreeLevel(SchedTreeA)
3218           < DFSResult->getSubtreeLevel(SchedTreeB);
3219       }
3220     }
3221     if (MaximizeILP)
3222       return DFSResult->getILP(A) < DFSResult->getILP(B);
3223     else
3224       return DFSResult->getILP(A) > DFSResult->getILP(B);
3225   }
3226 };
3227
3228 /// \brief Schedule based on the ILP metric.
3229 class ILPScheduler : public MachineSchedStrategy {
3230   ScheduleDAGMILive *DAG;
3231   ILPOrder Cmp;
3232
3233   std::vector<SUnit*> ReadyQ;
3234 public:
3235   ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
3236
3237   virtual void initialize(ScheduleDAGMI *dag) {
3238     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3239     DAG = static_cast<ScheduleDAGMILive*>(dag);
3240     DAG->computeDFSResult();
3241     Cmp.DFSResult = DAG->getDFSResult();
3242     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3243     ReadyQ.clear();
3244   }
3245
3246   virtual void registerRoots() {
3247     // Restore the heap in ReadyQ with the updated DFS results.
3248     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3249   }
3250
3251   /// Implement MachineSchedStrategy interface.
3252   /// -----------------------------------------
3253
3254   /// Callback to select the highest priority node from the ready Q.
3255   virtual SUnit *pickNode(bool &IsTopNode) {
3256     if (ReadyQ.empty()) return NULL;
3257     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3258     SUnit *SU = ReadyQ.back();
3259     ReadyQ.pop_back();
3260     IsTopNode = false;
3261     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
3262           << " ILP: " << DAG->getDFSResult()->getILP(SU)
3263           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
3264           << DAG->getDFSResult()->getSubtreeLevel(
3265             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
3266           << "Scheduling " << *SU->getInstr());
3267     return SU;
3268   }
3269
3270   /// \brief Scheduler callback to notify that a new subtree is scheduled.
3271   virtual void scheduleTree(unsigned SubtreeID) {
3272     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3273   }
3274
3275   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3276   /// DFSResults, and resort the priority Q.
3277   virtual void schedNode(SUnit *SU, bool IsTopNode) {
3278     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3279   }
3280
3281   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
3282
3283   virtual void releaseBottomNode(SUnit *SU) {
3284     ReadyQ.push_back(SU);
3285     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3286   }
3287 };
3288 } // namespace
3289
3290 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
3291   return new ScheduleDAGMILive(C, new ILPScheduler(true));
3292 }
3293 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
3294   return new ScheduleDAGMILive(C, new ILPScheduler(false));
3295 }
3296 static MachineSchedRegistry ILPMaxRegistry(
3297   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3298 static MachineSchedRegistry ILPMinRegistry(
3299   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3300
3301 //===----------------------------------------------------------------------===//
3302 // Machine Instruction Shuffler for Correctness Testing
3303 //===----------------------------------------------------------------------===//
3304
3305 #ifndef NDEBUG
3306 namespace {
3307 /// Apply a less-than relation on the node order, which corresponds to the
3308 /// instruction order prior to scheduling. IsReverse implements greater-than.
3309 template<bool IsReverse>
3310 struct SUnitOrder {
3311   bool operator()(SUnit *A, SUnit *B) const {
3312     if (IsReverse)
3313       return A->NodeNum > B->NodeNum;
3314     else
3315       return A->NodeNum < B->NodeNum;
3316   }
3317 };
3318
3319 /// Reorder instructions as much as possible.
3320 class InstructionShuffler : public MachineSchedStrategy {
3321   bool IsAlternating;
3322   bool IsTopDown;
3323
3324   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3325   // gives nodes with a higher number higher priority causing the latest
3326   // instructions to be scheduled first.
3327   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
3328     TopQ;
3329   // When scheduling bottom-up, use greater-than as the queue priority.
3330   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
3331     BottomQ;
3332 public:
3333   InstructionShuffler(bool alternate, bool topdown)
3334     : IsAlternating(alternate), IsTopDown(topdown) {}
3335
3336   virtual void initialize(ScheduleDAGMI*) {
3337     TopQ.clear();
3338     BottomQ.clear();
3339   }
3340
3341   /// Implement MachineSchedStrategy interface.
3342   /// -----------------------------------------
3343
3344   virtual SUnit *pickNode(bool &IsTopNode) {
3345     SUnit *SU;
3346     if (IsTopDown) {
3347       do {
3348         if (TopQ.empty()) return NULL;
3349         SU = TopQ.top();
3350         TopQ.pop();
3351       } while (SU->isScheduled);
3352       IsTopNode = true;
3353     }
3354     else {
3355       do {
3356         if (BottomQ.empty()) return NULL;
3357         SU = BottomQ.top();
3358         BottomQ.pop();
3359       } while (SU->isScheduled);
3360       IsTopNode = false;
3361     }
3362     if (IsAlternating)
3363       IsTopDown = !IsTopDown;
3364     return SU;
3365   }
3366
3367   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
3368
3369   virtual void releaseTopNode(SUnit *SU) {
3370     TopQ.push(SU);
3371   }
3372   virtual void releaseBottomNode(SUnit *SU) {
3373     BottomQ.push(SU);
3374   }
3375 };
3376 } // namespace
3377
3378 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
3379   bool Alternate = !ForceTopDown && !ForceBottomUp;
3380   bool TopDown = !ForceBottomUp;
3381   assert((TopDown || !ForceTopDown) &&
3382          "-misched-topdown incompatible with -misched-bottomup");
3383   return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown));
3384 }
3385 static MachineSchedRegistry ShufflerRegistry(
3386   "shuffle", "Shuffle machine instructions alternating directions",
3387   createInstructionShuffler);
3388 #endif // !NDEBUG
3389
3390 //===----------------------------------------------------------------------===//
3391 // GraphWriter support for ScheduleDAGMILive.
3392 //===----------------------------------------------------------------------===//
3393
3394 #ifndef NDEBUG
3395 namespace llvm {
3396
3397 template<> struct GraphTraits<
3398   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
3399
3400 template<>
3401 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
3402
3403   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
3404
3405   static std::string getGraphName(const ScheduleDAG *G) {
3406     return G->MF.getName();
3407   }
3408
3409   static bool renderGraphFromBottomUp() {
3410     return true;
3411   }
3412
3413   static bool isNodeHidden(const SUnit *Node) {
3414     return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
3415   }
3416
3417   static bool hasNodeAddressLabel(const SUnit *Node,
3418                                   const ScheduleDAG *Graph) {
3419     return false;
3420   }
3421
3422   /// If you want to override the dot attributes printed for a particular
3423   /// edge, override this method.
3424   static std::string getEdgeAttributes(const SUnit *Node,
3425                                        SUnitIterator EI,
3426                                        const ScheduleDAG *Graph) {
3427     if (EI.isArtificialDep())
3428       return "color=cyan,style=dashed";
3429     if (EI.isCtrlDep())
3430       return "color=blue,style=dashed";
3431     return "";
3432   }
3433
3434   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3435     std::string Str;
3436     raw_string_ostream SS(Str);
3437     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3438     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3439       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
3440     SS << "SU:" << SU->NodeNum;
3441     if (DFS)
3442       SS << " I:" << DFS->getNumInstrs(SU);
3443     return SS.str();
3444   }
3445   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3446     return G->getGraphNodeLabel(SU);
3447   }
3448
3449   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3450     std::string Str("shape=Mrecord");
3451     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3452     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3453       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
3454     if (DFS) {
3455       Str += ",style=filled,fillcolor=\"#";
3456       Str += DOT::getColorString(DFS->getSubtreeID(N));
3457       Str += '"';
3458     }
3459     return Str;
3460   }
3461 };
3462 } // namespace llvm
3463 #endif // NDEBUG
3464
3465 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3466 /// rendered using 'dot'.
3467 ///
3468 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3469 #ifndef NDEBUG
3470   ViewGraph(this, Name, false, Title);
3471 #else
3472   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3473          << "systems with Graphviz or gv!\n";
3474 #endif  // NDEBUG
3475 }
3476
3477 /// Out-of-line implementation with no arguments is handy for gdb.
3478 void ScheduleDAGMI::viewGraph() {
3479   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3480 }