Remove "localize global" optimization

[oota-llvm.git] / lib / CodeGen / ScheduleDAGInstrs.cpp
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp

index ef504067d11e8e35a34905413b6f5c4f033159bd..d940dbcf9f285315ac019779c29c15d8ea0399cb 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -36,6 +36,8 @@
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/Target/TargetRegisterInfo.h"
  #include "llvm/Target/TargetSubtargetInfo.h"
+#include <queue>
+
  using namespace llvm;
  
  static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
@@ -98,7 +100,7 @@ static void getUnderlyingObjects(const Value *V,
      SmallVector<Value *, 4> Objs;
      GetUnderlyingObjects(const_cast<Value *>(V), Objs);
  
-    for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+    for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
           I != IE; ++I) {
        V = *I;
        if (!Visited.insert(V))
@@ -116,12 +118,15 @@ static void getUnderlyingObjects(const Value *V,
    } while (!Working.empty());
  }
  
+typedef SmallVector<PointerIntPair<const Value *, 1, bool>, 4>
+UnderlyingObjectsVector;
+
  /// getUnderlyingObjectsForInstr - If this machine instr has memory reference
  /// information and it can be tracked to a normal reference to a known
  /// object, return the Value for that object.
  static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
-              const MachineFrameInfo *MFI,
-              SmallVectorImpl<std::pair<const Value *, bool> > &Objects) {
+                                         const MachineFrameInfo *MFI,
+                                         UnderlyingObjectsVector &Objects) {
    if (!MI->hasOneMemOperand() ||
        !(*MI->memoperands_begin())->getValue() ||
        (*MI->memoperands_begin())->isVolatile())
@@ -134,8 +139,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
    SmallVector<Value *, 4> Objs;
    getUnderlyingObjects(V, Objs);
  
-  for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
-       I != IE; ++I) {
+  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
+         I != IE; ++I) {
      bool MayAlias = true;
      V = *I;
  
@@ -155,7 +160,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
        return;
      }
  
-    Objects.push_back(std::make_pair(V, MayAlias));
+    Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias));
    }
  }
  
@@ -175,14 +180,11 @@ void ScheduleDAGInstrs::finishBlock() {
  void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
                                      MachineBasicBlock::iterator begin,
                                      MachineBasicBlock::iterator end,
-                                    unsigned endcount) {
+                                    unsigned regioninstrs) {
    assert(bb == BB && "startBlock should set BB");
    RegionBegin = begin;
    RegionEnd = end;
-  EndIndex = endcount;
-  MISUnitMap.clear();
-
-  ScheduleDAG::clearDAG();
+  NumRegionInstrs = regioninstrs;
  }
  
  /// Close the current scheduling region. Don't clear any state in case the
@@ -262,15 +264,15 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
        if (UseOp < 0)
          Dep = SDep(SU, SDep::Artificial);
        else {
+        // Set the hasPhysRegDefs only for physreg defs that have a use within
+        // the scheduling region.
+        SU->hasPhysRegDefs = true;
          Dep = SDep(SU, SDep::Data, *Alias);
          RegUse = UseSU->getInstr();
-        Dep.setMinLatency(
-          SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
-                                           RegUse, UseOp, /*FindMin=*/true));
        }
        Dep.setLatency(
-        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
-                                         RegUse, UseOp, /*FindMin=*/false));
+        SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
+                                         UseOp));
  
        ST.adjustSchedDependency(SU, UseSU, Dep);
        UseSU->addPred(Dep);
@@ -307,10 +309,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
            DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias));
          else {
            SDep Dep(SU, Kind, /*Reg=*/*Alias);
-          unsigned OutLatency =
-            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
-          Dep.setMinLatency(OutLatency);
-          Dep.setLatency(OutLatency);
+          Dep.setLatency(
+            SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
            DefSU->addPred(Dep);
          }
        }
@@ -318,6 +318,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
    }
  
    if (!MO.isDef()) {
+    SU->hasPhysRegUses = true;
      // Either insert a new Reg2SUnits entry with an empty SUnits list, or
      // retrieve the existing SUnits list for this register's uses.
      // Push this SUnit on the use list.
@@ -385,10 +386,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
      SUnit *DefSU = DefI->SU;
      if (DefSU != SU && DefSU != &ExitSU) {
        SDep Dep(SU, SDep::Output, Reg);
-      unsigned OutLatency =
-        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr());
-      Dep.setMinLatency(OutLatency);
-      Dep.setLatency(OutLatency);
+      Dep.setLatency(
+        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
        DefSU->addPred(Dep);
      }
      DefI->SU = SU;
@@ -405,6 +404,15 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
    MachineInstr *MI = SU->getInstr();
    unsigned Reg = MI->getOperand(OperIdx).getReg();
  
+  // Record this local VReg use.
+  VReg2UseMap::iterator UI = VRegUses.find(Reg);
+  for (; UI != VRegUses.end(); ++UI) {
+    if (UI->SU == SU)
+      break;
+  }
+  if (UI == VRegUses.end())
+    VRegUses.insert(VReg2SUnit(Reg, SU));
+
    // Lookup this operand's reaching definition.
    assert(LIS && "vreg dependencies requires LiveIntervals");
    LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
@@ -423,10 +431,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
        // Adjust the dependence latency using operand def/use information, then
        // allow the target to perform its own adjustments.
        int DefOp = Def->findRegisterDefOperandIdx(Reg);
-      dep.setLatency(
-        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
-      dep.setMinLatency(
-        SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+      dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
  
        const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
        ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
@@ -468,8 +473,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
  
    SmallVector<Value *, 4> Objs;
    getUnderlyingObjects(V, Objs);
-  for (SmallVector<Value *, 4>::iterator I = Objs.begin(),
-       IE = Objs.end(); I != IE; ++I) {
+  for (SmallVectorImpl<Value *>::iterator I = Objs.begin(),
+         IE = Objs.end(); I != IE; ++I) {
      V = *I;
  
      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
@@ -638,8 +643,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
                           bool isNormalMemory = false) {
    // If this is a false dependency,
    // do not add the edge, but rememeber the rejected node.
-  if (!EnableAASchedMI ||
-      MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+  if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
      SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
      Dep.setLatency(TrueMemOrderLatency);
      SUb->addPred(Dep);
@@ -667,7 +671,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
  void ScheduleDAGInstrs::initSUnits() {
    // We'll be allocating one SUnit for each real instruction in the region,
    // which is contained within a basic block.
-  SUnits.reserve(BB->size());
+  SUnits.reserve(NumRegionInstrs);
  
    for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
      MachineInstr *MI = I;
@@ -689,10 +693,22 @@ void ScheduleDAGInstrs::initSUnits() {
  /// DAG builder is an efficient place to do it because it already visits
  /// operands.
  void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
-                                        RegPressureTracker *RPTracker) {
+                                        RegPressureTracker *RPTracker,
+                                        PressureDiffs *PDiffs) {
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
+                                                       : ST.useAA();
+  AliasAnalysis *AAForDep = UseAA ? AA : 0;
+
+  MISUnitMap.clear();
+  ScheduleDAG::clearDAG();
+
    // Create an SUnit for each real instruction.
    initSUnits();
  
+  if (PDiffs)
+    PDiffs->init(SUnits.size());
+
    // We build scheduling units by walking a block's instruction list from bottom
    // to top.
  
@@ -718,10 +734,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
    Uses.setUniverse(TRI->getNumRegs());
  
    assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
-  // FIXME: Allow SparseSet to reserve space for the creation of virtual
-  // registers during scheduling. Don't artificially inflate the Universe
-  // because we want to assert that vregs are not created during DAG building.
+  VRegUses.clear();
    VRegDefs.setUniverse(MRI.getNumVirtRegs());
+  VRegUses.setUniverse(MRI.getNumVirtRegs());
  
    // Model data dependencies between instructions being scheduled and the
    // ExitSU.
@@ -741,17 +756,18 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
        DbgMI = MI;
        continue;
      }
+    SUnit *SU = MISUnitMap[MI];
+    assert(SU && "No SUnit mapped to this MI");
+
      if (RPTracker) {
-      RPTracker->recede();
+      PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0;
+      RPTracker->recede(/*LiveUses=*/0, PDiff);
        assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
      }
  
-    assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() &&
+    assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
             "Cannot schedule terminators or labels!");
  
-    SUnit *SU = MISUnitMap[MI];
-    assert(SU && "No SUnit mapped to this MI");
-
      // Add register-based dependencies (data, anti, and output).
      bool HasVRegDef = false;
      for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -829,20 +845,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
          unsigned ChainLatency = 0;
          if (AliasChain->getInstr()->mayLoad())
            ChainLatency = TrueMemOrderLatency;
-        addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+        addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
                             ChainLatency);
        }
        AliasChain = SU;
        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-        addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+        addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
                             TrueMemOrderLatency);
        for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
             E = AliasMemDefs.end(); I != E; ++I)
-        addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+        addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
        for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
             AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
          for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
+          addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
                               TrueMemOrderLatency);
        }
        adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
@@ -851,7 +867,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
        AliasMemDefs.clear();
        AliasMemUses.clear();
      } else if (MI->mayStore()) {
-      SmallVector<std::pair<const Value *, bool>, 4> Objs;
+      UnderlyingObjectsVector Objs;
        getUnderlyingObjectsForInstr(MI, MFI, Objs);
  
        if (Objs.empty()) {
@@ -860,10 +876,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
        }
  
        bool MayAlias = false;
-      for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
-           K = Objs.begin(), KE = Objs.end(); K != KE; ++K) {
-        const Value *V = K->first;
-        bool ThisMayAlias = K->second;
+      for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
+           K != KE; ++K) {
+        const Value *V = K->getPointer();
+        bool ThisMayAlias = K->getInt();
          if (ThisMayAlias)
            MayAlias = true;
  
@@ -875,7 +891,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
          MapVector<const Value *, SUnit *>::iterator IE =
            ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
          if (I != IE) {
-          addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+          addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
+                             0, true);
            I->second = SU;
          } else {
            if (ThisMayAlias)
@@ -890,7 +907,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
          if (J != JE) {
            for (unsigned i = 0, e = J->second.size(); i != e; ++i)
-            addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
+            addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
                                 TrueMemOrderLatency, true);
            J->second.clear();
          }
@@ -899,11 +916,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
          // Add dependencies from all the PendingLoads, i.e. loads
          // with no underlying object.
          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-          addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+          addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
                               TrueMemOrderLatency);
          // Add dependence on alias chain, if needed.
          if (AliasChain)
-          addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+          addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
          // But we also should check dependent instructions for the
          // SU in question.
          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
@@ -925,7 +942,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
        if (MI->isInvariantLoad(AA)) {
          // Invariant load, no chain dependencies needed!
        } else {
-        SmallVector<std::pair<const Value *, bool>, 4> Objs;
+        UnderlyingObjectsVector Objs;
          getUnderlyingObjectsForInstr(MI, MFI, Objs);
  
          if (Objs.empty()) {
@@ -933,7 +950,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            // potentially aliasing stores.
            for (MapVector<const Value *, SUnit *>::iterator I =
                   AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
-            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes);
+            addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
  
            PendingLoads.push_back(SU);
            MayAlias = true;
@@ -941,10 +958,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            MayAlias = false;
          }
  
-        for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+        for (UnderlyingObjectsVector::iterator
               J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
-          const Value *V = J->first;
-          bool ThisMayAlias = J->second;
+          const Value *V = J->getPointer();
+          bool ThisMayAlias = J->getInt();
  
            if (ThisMayAlias)
              MayAlias = true;
@@ -955,7 +972,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            MapVector<const Value *, SUnit *>::iterator IE =
              ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
            if (I != IE)
-            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+            addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
+                               0, true);
            if (ThisMayAlias)
              AliasMemUses[V].push_back(SU);
            else
@@ -965,7 +983,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
          // Add dependencies on alias and barrier chains, if needed.
          if (MayAlias && AliasChain)
-          addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+          addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
          if (BarrierChain)
            BarrierChain->addPred(SDep(SU, SDep::Barrier));
        }
@@ -994,7 +1012,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
    else if (SU == &ExitSU)
      oss << "<exit>";
    else
-    SU->getInstr()->print(oss);
+    SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
    return oss.str();
  }
  
@@ -1078,9 +1096,17 @@ public:
          joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
  
        // Either link or merge the TreeData entry from the child to the parent.
-      if (R.DFSNodeData[PredNum].SubtreeID == PredNum)
-        RootSet[PredNum].ParentNodeID = SU->NodeNum;
-      else {
+      if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
+        // If the predecessor's parent is invalid, this is a tree edge and the
+        // current node is the parent.
+        if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
+          RootSet[PredNum].ParentNodeID = SU->NodeNum;
+      }
+      else if (RootSet.count(PredNum)) {
+        // The predecessor is not a root, but is still in the root set. This
+        // must be the new parent that it was just joined to. Note that
+        // RootSet[PredNum].ParentNodeID may either be invalid or may still be
+        // set to the original parent.
          RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
          RootSet.erase(PredNum);
        }
@@ -1115,8 +1141,10 @@ public:
        if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
          R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
        R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
-      assert(RI->SubInstrCount <= R.DFSNodeData[RI->NodeID].InstrCount &&
-             "Bad SubInstrCount");
+      // Note that SubInstrCount may be greater than InstrCount if we joined
+      // subtrees across a cross edge. InstrCount will be attributed to the
+      // original parent, while SubInstrCount will be attributed to the joined
+      // parent.
      }
      R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
      R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
@@ -1221,7 +1249,7 @@ public:
  static bool hasDataSucc(const SUnit *SU) {
    for (SUnit::const_succ_iterator
           SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
-    if (SI->getKind() == SDep::Data)
+    if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
        return true;
    }
    return false;
@@ -1249,8 +1277,10 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
          const SDep &PredDep = *DFS.getPred();
          DFS.advance();
          // Ignore non-data edges.
-        if (PredDep.getKind() != SDep::Data)
+        if (PredDep.getKind() != SDep::Data
+            || PredDep.getSUnit()->isBoundaryNode()) {
            continue;
+        }
          // An already visited edge is a cross edge, assuming an acyclic DAG.
          if (Impl.isVisited(PredDep.getSUnit())) {
            Impl.visitCrossEdge(PredDep, DFS.getCurr());