[safestack] Fast access to the unsafe stack pointer on AArch64/Android.

[oota-llvm.git] / lib / CodeGen / MachineScheduler.cpp
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp

index cbe937a06f02efc98b5f905f2284153b930a1f67..ee2dbc86bf1f760d1a543abe71b5a9b51069dc99 100644 (file)
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -40,12 +40,20 @@ cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
                             cl::desc("Force top-down list scheduling"));
  cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
                              cl::desc("Force bottom-up list scheduling"));
+cl::opt<bool>
+DumpCriticalPathLength("misched-dcpl", cl::Hidden,
+                       cl::desc("Print critical path length to stdout"));
  }
  
  #ifndef NDEBUG
  static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
    cl::desc("Pop up a window to show MISched dags after they are processed"));
  
+/// In some situations a few uninteresting nodes depend on nearly all other
+/// nodes in the graph, provide a cutoff to hide them.
+static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
+  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
+
  static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
    cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
  
@@ -141,12 +149,12 @@ char MachineScheduler::ID = 0;
  
  char &llvm::MachineSchedulerID = MachineScheduler::ID;
  
-INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
+INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
                        "Machine Instruction Scheduler", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
  INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(MachineScheduler, "misched",
+INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
                      "Machine Instruction Scheduler", false, false)
  
  MachineScheduler::MachineScheduler()
@@ -158,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.setPreservesCFG();
    AU.addRequiredID(MachineDominatorsID);
    AU.addRequired<MachineLoopInfo>();
-  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
    AU.addRequired<TargetPassConfig>();
    AU.addRequired<SlotIndexes>();
    AU.addPreserved<SlotIndexes>();
@@ -206,6 +214,11 @@ static MachineSchedRegistry
  DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
                       useDefaultMachineSched);
  
+static cl::opt<bool> EnableMachineSched(
+    "enable-misched",
+    cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
+    cl::Hidden);
+
  /// Forward declare the standard machine scheduler. This will be used as the
  /// default scheduler if the target does not set a default.
  static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
@@ -301,6 +314,12 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
  /// design would be to split blocks at scheduling boundaries, but LLVM has a
  /// general bias against block splitting purely for implementation simplicity.
  bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+  if (EnableMachineSched.getNumOccurrences()) {
+    if (!EnableMachineSched)
+      return false;
+  } else if (!mf.getSubtarget().enableMachineScheduler())
+    return false;
+
    DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
  
    // Initialize the context of the pass.
@@ -308,7 +327,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
    MLI = &getAnalysis<MachineLoopInfo>();
    MDT = &getAnalysis<MachineDominatorTree>();
    PassConfig = &getAnalysis<TargetPassConfig>();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  
    LIS = &getAnalysis<LiveIntervals>();
  
@@ -333,9 +352,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
    if (skipOptnoneFunction(*mf.getFunction()))
      return false;
  
-  const TargetSubtargetInfo &ST =
-    mf.getTarget().getSubtarget<TargetSubtargetInfo>();
-  if (!ST.enablePostMachineScheduler()) {
+  if (!mf.getSubtarget().enablePostRAScheduler()) {
      DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
      return false;
    }
@@ -378,7 +395,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI,
  
  /// Main driver for both MachineScheduler and PostMachineScheduler.
  void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    bool IsPostRA = Scheduler.isPostRA();
  
    // Visit all machine basic blocks.
@@ -388,7 +405,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
    for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
         MBB != MBBEnd; ++MBB) {
  
-    Scheduler.startBlock(MBB);
+    Scheduler.startBlock(&*MBB);
  
  #ifndef NDEBUG
      if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
@@ -417,7 +434,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
  
        // Avoid decrementing RegionEnd for blocks with no terminator.
        if (RegionEnd != MBB->end() ||
-          isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
+          isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII, IsPostRA)) {
          --RegionEnd;
          // Count the boundary instruction.
          --RemainingInstrs;
@@ -427,13 +444,15 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
        // instruction stream until we find the nearest boundary.
        unsigned NumRegionInstrs = 0;
        MachineBasicBlock::iterator I = RegionEnd;
-      for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
-        if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
+      for(;I != MBB->begin(); --I, --RemainingInstrs) {
+        if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII, IsPostRA))
            break;
+        if (!I->isDebugValue())
+          ++NumRegionInstrs;
        }
        // Notify the scheduler of the region, even if we may skip scheduling
        // it. Perhaps it still needs to be bundled.
-      Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
+      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
  
        // Skip empty scheduling regions (0 or 1 schedulable instructions).
        if (I == RegionEnd || I == std::prev(RegionEnd)) {
@@ -451,6 +470,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
              else dbgs() << "End";
              dbgs() << " RegionInstrs: " << NumRegionInstrs
              << " Remaining: " << RemainingInstrs << "\n");
+      if (DumpCriticalPathLength) {
+        errs() << MF->getName();
+        errs() << ":BB# " << MBB->getNumber();
+        errs() << " " << MBB->getName() << " \n";
+      }
  
        // Schedule a region: possibly reorder instructions.
        // This invalidates 'RegionEnd' and 'I'.
@@ -468,7 +492,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
      if (Scheduler.isPostRA()) {
        // FIXME: Ideally, no further passes should rely on kill flags. However,
        // thumb2 size reduction is currently an exception.
-      Scheduler.fixupKills(MBB);
+      Scheduler.fixupKills(&*MBB);
      }
    }
    Scheduler.finalizeSchedule();
@@ -478,14 +502,13 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
    // unimplemented
  }
  
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
  void ReadyQueue::dump() {
-  dbgs() << Name << ": ";
+  dbgs() << "Queue " << Name << ": ";
    for (unsigned i = 0, e = Queue.size(); i < e; ++i)
      dbgs() << Queue[i]->NodeNum << " ";
    dbgs() << "\n";
  }
-#endif
  
  //===----------------------------------------------------------------------===//
  // ScheduleDAGMI - Basic machine instruction scheduling. This is
@@ -642,6 +665,9 @@ bool ScheduleDAGMI::checkSchedLimit() {
  /// does not consider liveness or register pressure. It is useful for PostRA
  /// scheduling and potentially other custom schedulers.
  void ScheduleDAGMI::schedule() {
+  DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
+  DEBUG(SchedImpl->dumpPolicy());
+
    // Build the DAG.
    buildSchedGraph(AA);
  
@@ -664,7 +690,11 @@ void ScheduleDAGMI::schedule() {
    initQueues(TopRoots, BotRoots);
  
    bool IsTopNode = false;
-  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+  while (true) {
+    DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
+    SUnit *SU = SchedImpl->pickNode(IsTopNode);
+    if (!SU) break;
+
      assert(!SU->isScheduled && "Node already scheduled");
      if (!checkSchedLimit())
        break;
@@ -691,7 +721,7 @@ void ScheduleDAGMI::schedule() {
        }
      }
      // Notify the scheduling strategy before updating the DAG.
-    // This sets the scheduled nodes ReadyCycle to CurrCycle. When updateQueues
+    // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
      // runs, it can then use the accurate ReadyCycle time to determine whether
      // newly released nodes can move to the readyQ.
      SchedImpl->schedNode(SU, IsTopNode);
@@ -925,8 +955,9 @@ updateScheduledPressure(const SUnit *SU,
      unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
      if (NewMaxPressure[ID] >= Limit - 2) {
        DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
-            << NewMaxPressure[ID] << " > " << Limit << "(+ "
-            << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
+            << NewMaxPressure[ID]
+            << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit
+            << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n");
      }
    }
  }
@@ -979,12 +1010,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
  /// only includes instructions that have DAG nodes, not scheduling boundaries.
  ///
  /// This is a skeletal driver, with all the functionality pushed into helpers,
-/// so that it can be easilly extended by experimental schedulers. Generally,
+/// so that it can be easily extended by experimental schedulers. Generally,
  /// implementing MachineSchedStrategy should be sufficient to implement a new
  /// scheduling algorithm. However, if a scheduler further subclasses
  /// ScheduleDAGMILive then it will want to override this virtual method in order
  /// to update any specialized state.
  void ScheduleDAGMILive::schedule() {
+  DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
+  DEBUG(SchedImpl->dumpPolicy());
    buildDAGWithRegPressure();
  
    Topo.InitDAGTopologicalSorting();
@@ -1011,15 +1044,17 @@ void ScheduleDAGMILive::schedule() {
    }
  
    bool IsTopNode = false;
-  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+  while (true) {
+    DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
+    SUnit *SU = SchedImpl->pickNode(IsTopNode);
+    if (!SU) break;
+
      assert(!SU->isScheduled && "Node already scheduled");
      if (!checkSchedLimit())
        break;
  
      scheduleMI(SU, IsTopNode);
  
-    updateQueues(SU, IsTopNode);
-
      if (DFSResult) {
        unsigned SubtreeID = DFSResult->getSubtreeID(SU);
        if (!ScheduledTrees.test(SubtreeID)) {
@@ -1031,6 +1066,8 @@ void ScheduleDAGMILive::schedule() {
  
      // Notify the scheduling strategy after updating the DAG.
      SchedImpl->schedNode(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
    }
    assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
  
@@ -1252,7 +1289,7 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
      SUnit *SU = Loads[Idx];
      unsigned BaseReg;
      unsigned Offset;
-    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
+    if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
        LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
    }
    if (LoadRecords.size() < 2)
@@ -1330,25 +1367,49 @@ namespace {
  /// \brief Post-process the DAG to create cluster edges between instructions
  /// that may be fused by the processor into a single operation.
  class MacroFusion : public ScheduleDAGMutation {
-  const TargetInstrInfo *TII;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
  public:
-  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+  MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
+    : TII(TII), TRI(TRI) {}
  
    void apply(ScheduleDAGMI *DAG) override;
  };
  } // anonymous
  
+/// Returns true if \p MI reads a register written by \p Other.
+static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
+                       const MachineInstr &Other) {
+  for (const MachineOperand &MO : MI.uses()) {
+    if (!MO.isReg() || !MO.readsReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Other.modifiesRegister(Reg, &TRI))
+      return true;
+  }
+  return false;
+}
+
  /// \brief Callback from DAG postProcessing to create cluster edges to encourage
  /// fused operations.
  void MacroFusion::apply(ScheduleDAGMI *DAG) {
    // For now, assume targets can only fuse with the branch.
-  MachineInstr *Branch = DAG->ExitSU.getInstr();
+  SUnit &ExitSU = DAG->ExitSU;
+  MachineInstr *Branch = ExitSU.getInstr();
    if (!Branch)
      return;
  
-  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
-    SUnit *SU = &DAG->SUnits[--Idx];
-    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+  for (SUnit &SU : DAG->SUnits) {
+    // SUnits with successors can't be schedule in front of the ExitSU.
+    if (!SU.Succs.empty())
+      continue;
+    // We only care if the node writes to a register that the branch reads.
+    MachineInstr *Pred = SU.getInstr();
+    if (!HasDataDep(TRI, *Branch, *Pred))
+      continue;
+
+    if (!TII.shouldScheduleAdjacent(Pred, Branch))
        continue;
  
      // Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1357,11 +1418,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
      // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
      // of SU, we could create an artificial edge from the deepest root, but it
      // hasn't been needed yet.
-    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+    bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
      (void)Success;
      assert(Success && "No DAG nodes should be reachable from ExitSU");
  
-    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+    DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
      break;
    }
  }
@@ -1425,12 +1486,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
    // Check if either the dest or source is local. If it's live across a back
    // edge, it's not local. Note that if both vregs are live across the back
    // edge, we cannot successfully contrain the copy without cyclic scheduling.
-  unsigned LocalReg = DstReg;
-  unsigned GlobalReg = SrcReg;
+  // If both the copy's source and dest are local live intervals, then we
+  // should treat the dest as the global for the purpose of adding
+  // constraints. This adds edges from source's other uses to the copy.
+  unsigned LocalReg = SrcReg;
+  unsigned GlobalReg = DstReg;
    LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
-    LocalReg = SrcReg;
-    GlobalReg = DstReg;
+    LocalReg = DstReg;
+    GlobalReg = SrcReg;
      LocalLI = &LIS->getInterval(LocalReg);
      if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
        return;
@@ -1687,8 +1751,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
      for (TargetSchedModel::ProcResIter
             PI = SchedModel->getWriteProcResBegin(SC),
             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-      if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+      unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
+      if (NRCycle > CurrCycle) {
+#ifndef NDEBUG
+        MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
+#endif
+        DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
+              << SchedModel->getResourceName(PI->ProcResourceIdx)
+              << "=" << NRCycle << "c\n");
          return true;
+      }
      }
    }
    return false;
@@ -1747,7 +1819,11 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
    assert(SU->getInstr() && "Scheduled SUnit must have instr");
  
  #ifndef NDEBUG
-  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
+  // ReadyCycle was been bumped up to the CurrCycle when this node was
+  // scheduled, but CurrCycle may have been eagerly advanced immediately after
+  // scheduling, so may now be greater than ReadyCycle.
+  if (ReadyCycle > CurrCycle)
+    MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
  #endif
  
    if (ReadyCycle < MinReadyCycle)
@@ -1942,10 +2018,12 @@ void SchedBoundary::bumpNode(SUnit *SU) {
               PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
          unsigned PIdx = PI->ProcResourceIdx;
          if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
-          ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
-#ifndef NDEBUG
-          MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
-#endif
+          if (isTop()) {
+            ReservedCycles[PIdx] =
+              std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
+          }
+          else
+            ReservedCycles[PIdx] = NextCycle;
          }
        }
      }
@@ -2048,8 +2126,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
      }
    }
    for (unsigned i = 0; Available.empty(); ++i) {
-    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
-           "permanent hazard"); (void)i;
+//  FIXME: Re-enable assert once PR20057 is resolved.
+//    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
+//           "permanent hazard");
+    (void)i;
      bumpCycle(CurrCycle + 1);
      releasePending();
    }
@@ -2112,7 +2192,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
                                       bool IsPostRA,
                                       SchedBoundary &CurrZone,
                                       SchedBoundary *OtherZone) {
-  // Apply preemptive heuristics based on the the total latency and resources
+  // Apply preemptive heuristics based on the total latency and resources
    // inside and outside this zone. Potential stalls should be considered before
    // following this policy.
  
@@ -2239,7 +2319,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
      Latency = Cand.SU->getDepth();
      break;
    }
-  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
    if (P.isValid())
      dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
             << ":" << P.getUnitInc() << " ";
@@ -2340,14 +2420,15 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
    // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
    // are disabled, then these HazardRecs will be disabled.
    const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
-  const TargetMachine &TM = DAG->MF.getTarget();
    if (!Top.HazardRec) {
      Top.HazardRec =
-      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+            Itin, DAG);
    }
    if (!Bot.HazardRec) {
      Bot.HazardRec =
-      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+            Itin, DAG);
    }
  }
  
@@ -2355,8 +2436,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
  void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
                                    MachineBasicBlock::iterator End,
                                    unsigned NumRegionInstrs) {
-  const TargetMachine &TM = Context->MF->getTarget();
-  const TargetLowering *TLI = TM.getTargetLowering();
+  const MachineFunction &MF = *Begin->getParent()->getParent();
+  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
  
    // Avoid setting up the register pressure tracker for small regions to save
    // compile time. As a rough heuristic, only track pressure when the number of
@@ -2376,8 +2457,8 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
    RegionPolicy.OnlyBottomUp = true;
  
    // Allow the subtarget to override default policy.
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-  ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs);
+  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End,
+                                        NumRegionInstrs);
  
    // After subtarget overrides, apply command line options.
    if (!EnableRegPressure)
@@ -2399,6 +2480,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
    }
  }
  
+void GenericScheduler::dumpPolicy() {
+  dbgs() << "GenericScheduler RegionPolicy: "
+         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
+         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
+         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
+         << "\n";
+}
+
  /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
  /// critical path by more cycles than it takes to drain the instruction buffer.
  /// We estimate an upper bounds on in-flight instructions as:
@@ -2445,7 +2534,10 @@ void GenericScheduler::registerRoots() {
      if ((*I)->getDepth() > Rem.CriticalPath)
        Rem.CriticalPath = (*I)->getDepth();
    }
-  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+  DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
+  if (DumpCriticalPathLength) {
+    errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
+  }
  
    if (EnableCyclicPath) {
      Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
@@ -2467,8 +2559,8 @@ static bool tryPressure(const PressureChange &TryP,
    }
    // If one candidate decreases and the other increases, go with it.
    // Invalid candidates have UnitInc==0.
-  if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
-              Reason)) {
+  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
+                 Reason)) {
      return true;
    }
    // If the candidates are decreasing pressure, reverse priority.
@@ -2555,7 +2647,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
      }
    }
    DEBUG(if (TryCand.RPDelta.Excess.isValid())
-          dbgs() << "  SU(" << TryCand.SU->NodeNum << ") "
+          dbgs() << "  Try  SU(" << TryCand.SU->NodeNum << ") "
                   << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
                   << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
  
@@ -2570,8 +2662,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
                   TryCand, Cand, PhysRegCopy))
      return;
  
-  // Avoid exceeding the target's limit. If signed PSetID is negative, it is
-  // invalid; convert it to INT_MAX to give it lowest priority.
+  // Avoid exceeding the target's limit.
    if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
                                                 Cand.RPDelta.Excess,
                                                 TryCand, Cand, RegExcess))
@@ -2631,8 +2722,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
  
    // Avoid serializing long latency dependence chains.
    // For acyclic path limited loops, latency was already checked above.
-  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
-      && tryLatency(TryCand, Cand, Zone)) {
+  if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
+      !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
      return;
    }
  
@@ -2846,7 +2937,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
    if (EnableLoadCluster && DAG->TII->enableClusterLoads())
      DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
    if (EnableMacroFusion)
-    DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
+    DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
    return DAG;
  }
  
@@ -2870,10 +2961,10 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
    // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
    // or are disabled, then these HazardRecs will be disabled.
    const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
-  const TargetMachine &TM = DAG->MF.getTarget();
    if (!Top.HazardRec) {
      Top.HazardRec =
-      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+            Itin, DAG);
    }
  }
  
@@ -2887,7 +2978,10 @@ void PostGenericScheduler::registerRoots() {
      if ((*I)->getDepth() > Rem.CriticalPath)
        Rem.CriticalPath = (*I)->getDepth();
    }
-  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+  DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
+  if (DumpCriticalPathLength) {
+    errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
+  }
  }
  
  /// Apply a set of heursitics to a new candidate for PostRA scheduling.
@@ -3210,7 +3304,10 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
    }
  
    static bool isNodeHidden(const SUnit *Node) {
-    return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
+    if (ViewMISchedCutoff == 0)
+      return false;
+    return (Node->Preds.size() > ViewMISchedCutoff
+         || Node->Succs.size() > ViewMISchedCutoff);
    }
  
    static bool hasNodeAddressLabel(const SUnit *Node,