Removing dependency on third party library for Intel JIT event support.

[oota-llvm.git] / lib / CodeGen / ScheduleDAGInstrs.cpp
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp

index 9f8b4613fdf73af8e35e40ef6c0274a78daefb69..a1a4efd108c4009a292fbcbb217034d4f80971ac 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,7 +13,6 @@
  //===----------------------------------------------------------------------===//
  
  #define DEBUG_TYPE "sched-instrs"
-#include "RegisterPressure.h"
  #include "llvm/Operator.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Analysis/ValueTracking.h"
@@ -22,6 +21,7 @@
  #include "llvm/CodeGen/MachineMemOperand.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterPressure.h"
  #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  #include "llvm/MC/MCInstrItineraries.h"
  #include "llvm/Target/TargetMachine.h"
@@ -47,11 +47,14 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
    : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
      InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
      IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false),
-    LoopRegs(MLI, MDT), FirstDbgValue(0) {
+    LoopRegs(MDT), FirstDbgValue(0) {
    assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
    DbgValues.clear();
    assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
           "Virtual registers must be removed prior to PostRA scheduling");
+
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  SchedModel.init(*ST.getSchedModel(), &ST, TII);
  }
  
  /// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -209,7 +212,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
        if (Reg == 0) continue;
  
        if (TRI->isPhysicalRegister(Reg))
-        Uses[Reg].push_back(&ExitSU);
+        Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
        else {
          assert(!IsPostRA && "Virtual register encountered after regalloc.");
          addVRegUseDeps(&ExitSU, i);
@@ -225,15 +228,15 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
               E = (*SI)->livein_end(); I != E; ++I) {
          unsigned Reg = *I;
          if (!Uses.contains(Reg))
-          Uses[Reg].push_back(&ExitSU);
+          Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
        }
    }
  }
  
  /// MO is an operand of SU's instruction that defines a physical register. Add
  /// data dependencies from SU to any uses of the physical register.
-void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
-                                           const MachineOperand &MO) {
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
+  const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
    assert(MO.isDef() && "expect physreg def");
  
    // Ask the target if address-backscheduling is desirable, and if so how much.
@@ -241,14 +244,17 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
    unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
    unsigned DataLatency = SU->Latency;
  
-  for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+       Alias.isValid(); ++Alias) {
      if (!Uses.contains(*Alias))
        continue;
-    std::vector<SUnit*> &UseList = Uses[*Alias];
+    std::vector<PhysRegSUOper> &UseList = Uses[*Alias];
      for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-      SUnit *UseSU = UseList[i];
+      SUnit *UseSU = UseList[i].SU;
        if (UseSU == SU)
          continue;
+      MachineInstr *UseMI = UseSU->getInstr();
+      int UseOp = UseList[i].OpIdx;
        unsigned LDataLatency = DataLatency;
        // Optionally add in a special extra latency for nodes that
        // feed addresses.
@@ -257,7 +263,6 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
        // adjustSchedDependency for the targets that care about it.
        if (SpecialAddressLatency != 0 && !UnitLatencies &&
            UseSU != &ExitSU) {
-        MachineInstr *UseMI = UseSU->getInstr();
          const MCInstrDesc &UseMCID = UseMI->getDesc();
          int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
          assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
@@ -270,10 +275,17 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
        // Adjust the dependence latency using operand def/use
        // information (if any), and then allow the target to
        // perform its own adjustments.
-      const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+      SDep dep(SU, SDep::Data, LDataLatency, *Alias);
        if (!UnitLatencies) {
-        computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
-        ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+        MachineInstr *RegUse = UseOp < 0 ? 0 : UseMI;
+        dep.setLatency(
+          SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                           RegUse, UseOp, /*FindMin=*/false));
+        dep.setMinLatency(
+          SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+                                           RegUse, UseOp, /*FindMin=*/true));
+
+        ST.adjustSchedDependency(SU, UseSU, dep);
        }
        UseSU->addPred(dep);
      }
@@ -294,12 +306,13 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
    // TODO: Using a latency of 1 here for output dependencies assumes
    //       there's no cost for reusing registers.
    SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
-  for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
+       Alias.isValid(); ++Alias) {
      if (!Defs.contains(*Alias))
        continue;
-    std::vector<SUnit *> &DefList = Defs[*Alias];
+    std::vector<PhysRegSUOper> &DefList = Defs[*Alias];
      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
-      SUnit *DefSU = DefList[i];
+      SUnit *DefSU = DefList[i].SU;
        if (DefSU == &ExitSU)
          continue;
        if (DefSU != SU &&
@@ -320,14 +333,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
      // Either insert a new Reg2SUnits entry with an empty SUnits list, or
      // retrieve the existing SUnits list for this register's uses.
      // Push this SUnit on the use list.
-    Uses[MO.getReg()].push_back(SU);
+    Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx));
    }
    else {
-    addPhysRegDataDeps(SU, MO);
+    addPhysRegDataDeps(SU, OperIdx);
  
      // Either insert a new Reg2SUnits entry with an empty SUnits list, or
      // retrieve the existing SUnits list for this register's defs.
-    std::vector<SUnit *> &DefList = Defs[MO.getReg()];
+    std::vector<PhysRegSUOper> &DefList = Defs[MO.getReg()];
  
      // If a def is going to wrap back around to the top of the loop,
      // backschedule it.
@@ -389,11 +402,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
      // the block. Instead, we leave only one call at the back of the
      // DefList.
      if (SU->isCall) {
-      while (!DefList.empty() && DefList.back()->isCall)
+      while (!DefList.empty() && DefList.back().SU->isCall)
          DefList.pop_back();
      }
      // Defs are pushed in the order they are visited and never reordered.
-    DefList.push_back(SU);
+    DefList.push_back(PhysRegSUOper(SU, OperIdx));
    }
  }
  
@@ -407,9 +420,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
    const MachineInstr *MI = SU->getInstr();
    unsigned Reg = MI->getOperand(OperIdx).getReg();
  
-  // SSA defs do not have output/anti dependencies.
+  // Singly defined vregs do not have output/anti dependencies.
    // The current operand is a def, so we have at least one.
-  if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
+  // Check here if there are any others...
+  if (MRI.hasOneDef(Reg))
      return;
  
    // Add output dependence to the next nearest def of this vreg.
@@ -445,19 +459,11 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
  
    // Lookup this operand's reaching definition.
    assert(LIS && "vreg dependencies requires LiveIntervals");
-  SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot();
-  LiveInterval *LI = &LIS->getInterval(Reg);
-  VNInfo *VNI = LI->getVNInfoBefore(UseIdx);
+  LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI));
+  VNInfo *VNI = LRQ.valueIn();
  
-  // Special case: An early-clobber tied operand reads and writes the
-  // register one slot early. e.g. InlineAsm.
-  //
-  // FIXME: Same special case is in shrinkToUses. Hide under an API.
-  if (SlotIndex::isSameInstr(VNI->def, UseIdx)) {
-    UseIdx = VNI->def;
-    VNI = LI->getVNInfoBefore(UseIdx);
-  }
    // VNI will be valid because MachineOperand::readsReg() is checked by caller.
+  assert(VNI && "No value to read by operand");
    MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
    // Phis and other noninstructions (after coalescing) have a NULL Def.
    if (Def) {
@@ -467,11 +473,16 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
        // Create a data dependence.
        //
        // TODO: Handle "special" address latencies cleanly.
-      const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+      SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
        if (!UnitLatencies) {
          // Adjust the dependence latency using operand def/use information, then
          // allow the target to perform its own adjustments.
-        computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+        int DefOp = Def->findRegisterDefOperandIdx(Reg);
+        dep.setLatency(
+          SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false));
+        dep.setMinLatency(
+          SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true));
+
          const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
          ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
        }
@@ -489,7 +500,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
  /// (like a call or something with unmodeled side effects).
  static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
    if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
-      (MI->hasVolatileMemoryRef() &&
+      (MI->hasOrderedMemoryRef() &&
         (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
      return true;
    return false;
@@ -641,7 +652,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
  /// checks whether SU can be aliasing any node dominated
  /// by it.
  static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
-            SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList) {
+                            SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+                            unsigned LatencyToLoad) {
    if (!SU)
      return;
  
@@ -652,9 +664,11 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
         I != IE; ++I) {
      if (SU == *I)
        continue;
-    if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr()))
-      (*I)->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+    if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+      unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0;
+      (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0,
                           /*isNormalMemory=*/true));
+    }
      // Now go through all the chain successors and iterate from them.
      // Keep track of visited nodes.
      for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
@@ -817,8 +831,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
      // after stack slots are lowered to actual addresses.
      // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
      // produce more precise dependence information.
-#define STORE_LOAD_LATENCY 1
-    unsigned TrueMemOrderLatency = 0;
+    unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
      if (isGlobalMemoryObject(AA, MI)) {
        // Be conservative with these and add dependencies on all memory
        // references, even those that are known to not alias.
@@ -837,7 +850,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
        BarrierChain = SU;
        // This is a barrier event that acts as a pivotal node in the DAG,
        // so it is safe to clear list of exposed nodes.
-      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes);
+      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
        RejectMemNodes.clear();
        NonAliasMemDefs.clear();
        NonAliasMemUses.clear();
@@ -845,8 +859,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
        // fall-through
      new_alias_chain:
        // Chain all possibly aliasing memory references though SU.
-      if (AliasChain)
-        addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+      if (AliasChain) {
+        unsigned ChainLatency = 0;
+        if (AliasChain->getInstr()->mayLoad())
+          ChainLatency = TrueMemOrderLatency;
+        addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes,
+                           ChainLatency);
+      }
        AliasChain = SU;
        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
          addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
@@ -860,13 +879,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes,
                               TrueMemOrderLatency);
        }
-      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes);
+      adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
        PendingLoads.clear();
        AliasMemDefs.clear();
        AliasMemUses.clear();
      } else if (MI->mayStore()) {
        bool MayAlias = true;
-      TrueMemOrderLatency = STORE_LOAD_LATENCY;
        if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
          // A store to a specific PseudoSourceValue. Add precise dependencies.
          // Record the def in MemDefs, first adding a dep if there is
@@ -907,7 +926,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
              addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
            // But we also should check dependent instructions for the
            // SU in question.
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes);
+          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                          TrueMemOrderLatency);
          }
          // Add dependence on barrier chain, if needed.
          // There is no point to check aliasing on barrier event. Even if
@@ -929,7 +949,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
                              /*isArtificial=*/true));
      } else if (MI->mayLoad()) {
        bool MayAlias = true;
-      TrueMemOrderLatency = 0;
        if (MI->isInvariantLoad(AA)) {
          // Invariant load, no chain dependencies needed!
        } else {
@@ -957,7 +976,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
            MayAlias = true;
          }
          if (MayAlias)
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes);
+          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
          // Add dependencies on alias and barrier chains, if needed.
          if (MayAlias && AliasChain)
            addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
@@ -976,8 +995,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
  }
  
  void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
-  // Compute the latency for the node.
-  if (!InstrItins || InstrItins->isEmpty()) {
+  // Compute the latency for the node. We only provide a default for missing
+  // itineraries. Empty itineraries still have latency properties.
+  if (!InstrItins) {
      SU->Latency = 1;
  
      // Simplistic target-independent heuristic: assume that loads take
@@ -989,67 +1009,10 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
    }
  }
  
-void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
-                                              SDep& dep) const {
-  if (!InstrItins || InstrItins->isEmpty())
-    return;
-
-  // For a data dependency with a known register...
-  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
-    return;
-
-  const unsigned Reg = dep.getReg();
-
-  // ... find the definition of the register in the defining
-  // instruction
-  MachineInstr *DefMI = Def->getInstr();
-  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
-  if (DefIdx != -1) {
-    const MachineOperand &MO = DefMI->getOperand(DefIdx);
-    if (MO.isReg() && MO.isImplicit() &&
-        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
-      // This is an implicit def, getOperandLatency() won't return the correct
-      // latency. e.g.
-      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
-      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
-      // What we want is to compute latency between def of %D6/%D7 and use of
-      // %Q3 instead.
-      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
-      if (DefMI->getOperand(Op2).isReg())
-        DefIdx = Op2;
-    }
-    MachineInstr *UseMI = Use->getInstr();
-    // For all uses of the register, calculate the maxmimum latency
-    int Latency = -1;
-    if (UseMI) {
-      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = UseMI->getOperand(i);
-        if (!MO.isReg() || !MO.isUse())
-          continue;
-        unsigned MOReg = MO.getReg();
-        if (MOReg != Reg)
-          continue;
-
-        int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
-                                              UseMI, i);
-        Latency = std::max(Latency, UseCycle);
-      }
-    } else {
-      // UseMI is null, then it must be a scheduling barrier.
-      if (!InstrItins || InstrItins->isEmpty())
-        return;
-      unsigned DefClass = DefMI->getDesc().getSchedClass();
-      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
-    }
-
-    // If we found a latency, then replace the existing dependence latency.
-    if (Latency >= 0)
-      dep.setLatency(Latency);
-  }
-}
-
  void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    SU->getInstr()->dump();
+#endif
  }
  
  std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {