AsmPrinter: Fix wrong OS X versions being emitted for darwin triples

[oota-llvm.git] / lib / CodeGen / MachineCombiner.cpp
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp

index 9ec7239b098a21b36f556a345d87195c56b62438..fa43c4dfa05a8d9c57b0e0146a170f60c307602c 100644 (file)
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -10,6 +10,7 @@
  // The machine combiner pass uses machine trace metrics to ensure the combined
  // instructions does not lengthen the critical path or the resource depth.
  //===----------------------------------------------------------------------===//
+
  #define DEBUG_TYPE "machine-combiner"
  
  #include "llvm/ADT/Statistic.h"
@@ -38,14 +39,14 @@ namespace {
  class MachineCombiner : public MachineFunctionPass {
    const TargetInstrInfo *TII;
    const TargetRegisterInfo *TRI;
-  const MCSchedModel *SchedModel;
+  MCSchedModel SchedModel;
    MachineRegisterInfo *MRI;
    MachineTraceMetrics *Traces;
    MachineTraceMetrics::Ensemble *MinInstr;
  
    TargetSchedModel TSchedModel;
  
-  /// OptSize - True if optimizing for code size.
+  /// True if optimizing for code size.
    bool OptSize;
  
  public:
@@ -67,10 +68,11 @@ private:
    unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
                        MachineTraceMetrics::Trace BlockTrace);
    bool
-  preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
-                           MachineTraceMetrics::Trace BlockTrace,
-                           SmallVectorImpl<MachineInstr *> &InsInstrs,
-                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg);
+  improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
+                          MachineTraceMetrics::Trace BlockTrace,
+                          SmallVectorImpl<MachineInstr *> &InsInstrs,
+                          DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+                          MachineCombinerPattern Pattern);
    bool preservesResourceLen(MachineBasicBlock *MBB,
                              MachineTraceMetrics::Trace BlockTrace,
                              SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -109,7 +111,7 @@ MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
    return DefInstr;
  }
  
-/// getDepth - Computes depth of instructions in vector \InsInstr.
+/// Computes depth of instructions in vector \InsInstr.
  ///
  /// \param InsInstrs is a vector of machine instructions
  /// \param InstrIdxForVirtReg is a dense map of virtual register to index
@@ -121,18 +123,17 @@ unsigned
  MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
                            DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
                            MachineTraceMetrics::Trace BlockTrace) {
-
    SmallVector<unsigned, 16> InstrDepth;
-  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+         "Missing machine model\n");
  
-  // Foreach instruction in in the new sequence compute the depth based on the
+  // For each instruction in the new sequence compute the depth based on the
    // operands. Use the trace information when possible. For new operands which
    // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
    for (auto *InstrPtr : InsInstrs) { // for each Use
      unsigned IDepth = 0;
      DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
-    for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = InstrPtr->getOperand(i);
+    for (const MachineOperand &MO : InstrPtr->operands()) {
        // Check for virtual register operand.
        if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
          continue;
@@ -169,8 +170,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
    return InstrDepth[NewRootIdx];
  }
  
-/// getLatency - Computes instruction latency as max of latency of defined
-/// operands
+/// Computes instruction latency as max of latency of defined operands.
  ///
  /// \param Root is a machine instruction that could be replaced by NewRoot.
  /// It is used to compute a more accurate latency information for NewRoot in
@@ -181,14 +181,13 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
  /// \returns Latency of \p NewRoot
  unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
                                       MachineTraceMetrics::Trace BlockTrace) {
-
-  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+         "Missing machine model\n");
  
    // Check each definition in NewRoot and compute the latency
    unsigned NewRootLatency = 0;
  
-  for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = NewRoot->getOperand(i);
+  for (const MachineOperand &MO : NewRoot->operands()) {
      // Check for virtual register operand.
      if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
        continue;
@@ -204,53 +203,86 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
            NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
            UseMO->findRegisterUseOperandIdx(MO.getReg()));
      } else {
-      LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
+      LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
      }
      NewRootLatency = std::max(NewRootLatency, LatencyOp);
    }
    return NewRootLatency;
  }
  
-/// preservesCriticalPathlen - True when the new instruction sequence does not
-/// lengthen the critical path. The DAGCombine code sequence ends in MI
-/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A
-/// necessary condition for the new sequence to replace the old sequence is that
-/// is cannot lengthen the critical path. This is decided by the formula
-/// (NewRootDepth + NewRootLatency) <=  (RootDepth + RootLatency + RootSlack)).
-/// The slack is the number of cycles Root can be delayed before the critical
-/// patch becomes longer.
-bool MachineCombiner::preservesCriticalPathLen(
+/// The combiner's goal may differ based on which pattern it is attempting
+/// to optimize.
+enum class CombinerObjective {
+  MustReduceDepth, // The data dependency chain must be improved.
+  Default          // The critical path must not be lengthened.
+};
+
+static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
+  // TODO: If C++ ever gets a real enum class, make this part of the
+  // MachineCombinerPattern class.
+  switch (P) {
+  case MachineCombinerPattern::REASSOC_AX_BY:
+  case MachineCombinerPattern::REASSOC_AX_YB:
+  case MachineCombinerPattern::REASSOC_XA_BY:
+  case MachineCombinerPattern::REASSOC_XA_YB:
+    return CombinerObjective::MustReduceDepth;
+  default:
+    return CombinerObjective::Default;
+  }
+}
+
+/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
+/// The new code sequence ends in MI NewRoot. A necessary condition for the new
+/// sequence to replace the old sequence is that it cannot lengthen the critical
+/// path. The definition of "improve" may be restricted by specifying that the
+/// new path improves the data dependency chain (MustReduceDepth).
+bool MachineCombiner::improvesCriticalPathLen(
      MachineBasicBlock *MBB, MachineInstr *Root,
      MachineTraceMetrics::Trace BlockTrace,
      SmallVectorImpl<MachineInstr *> &InsInstrs,
-    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
-
-  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
-  // NewRoot is the last instruction in the \p InsInstrs vector
-  // Get depth and latency of NewRoot
+    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+    MachineCombinerPattern Pattern) {
+  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+         "Missing machine model\n");
+  // NewRoot is the last instruction in the \p InsInstrs vector.
    unsigned NewRootIdx = InsInstrs.size() - 1;
    MachineInstr *NewRoot = InsInstrs[NewRootIdx];
-  unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
-  unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
  
-  // Get depth, latency and slack of Root
+  // Get depth and latency of NewRoot and Root.
+  unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
    unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
+
+  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+        dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
+        dbgs() << " RootDepth: " << RootDepth << "\n");
+
+  // For a transform such as reassociation, the cost equation is
+  // conservatively calculated so that we must improve the depth (data
+  // dependency cycles) in the critical path to proceed with the transform.
+  // Being conservative also protects against inaccuracies in the underlying
+  // machine trace metrics and CPU models.
+  if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth)
+    return NewRootDepth < RootDepth;
+
+  // A more flexible cost calculation for the critical path includes the slack
+  // of the original code sequence. This may allow the transform to proceed
+  // even if the instruction depths (data dependency cycles) become worse.
+  unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
    unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
    unsigned RootSlack = BlockTrace.getInstrSlack(Root);
  
-  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
-        dbgs() << " NewRootDepth: " << NewRootDepth
-               << " NewRootLatency: " << NewRootLatency << "\n";
-        dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
-               << " RootSlack: " << RootSlack << "\n";
-        dbgs() << " NewRootDepth + NewRootLatency "
+  DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
+        dbgs() << " RootLatency: " << RootLatency << "\n";
+        dbgs() << " RootSlack: " << RootSlack << "\n";
+        dbgs() << " NewRootDepth + NewRootLatency = "
                 << NewRootDepth + NewRootLatency << "\n";
-        dbgs() << " RootDepth + RootLatency + RootSlack "
+        dbgs() << " RootDepth + RootLatency + RootSlack = "
                 << RootDepth + RootLatency + RootSlack << "\n";);
  
-  /// True when the new sequence does not lenghten the critical path.
-  return ((NewRootDepth + NewRootLatency) <=
-          (RootDepth + RootLatency + RootSlack));
+  unsigned NewCycleCount = NewRootDepth + NewRootLatency;
+  unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
+  
+  return NewCycleCount <= OldCycleCount;
  }
  
  /// helper routine to convert instructions into SC
@@ -260,20 +292,24 @@ void MachineCombiner::instr2instrSC(
    for (auto *InstrPtr : Instrs) {
      unsigned Opc = InstrPtr->getOpcode();
      unsigned Idx = TII->get(Opc).getSchedClass();
-    const MCSchedClassDesc *SC = SchedModel->getSchedClassDesc(Idx);
+    const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx);
      InstrsSC.push_back(SC);
    }
  }
-/// preservesResourceLen - True when the new instructions do not increase
-/// resource length
+
+/// True when the new instructions do not increase resource length
  bool MachineCombiner::preservesResourceLen(
      MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
      SmallVectorImpl<MachineInstr *> &InsInstrs,
      SmallVectorImpl<MachineInstr *> &DelInstrs) {
+  if (!TSchedModel.hasInstrSchedModel())
+    return true;
  
    // Compute current resource length
  
-  ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
+  //ArrayRef<const MachineBasicBlock *> MBBarr(MBB);
+  SmallVector <const MachineBasicBlock *, 1> MBBarr;
+  MBBarr.push_back(MBB);
    unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr);
  
    // Deal with SC rather than Instructions.
@@ -286,7 +322,7 @@ bool MachineCombiner::preservesResourceLen(
    ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC);
    ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC);
  
-  // Compute new resource length
+  // Compute new resource length.
    unsigned ResLenAfterCombine =
        BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr);
  
@@ -298,16 +334,16 @@ bool MachineCombiner::preservesResourceLen(
  }
  
  /// \returns true when new instruction sequence should be generated
-/// independent if it lenghtens critical path or not
+/// independent if it lengthens critical path or not
  bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
    if (OptSize && (NewSize < OldSize))
      return true;
-  if (!TSchedModel.hasInstrSchedModel())
+  if (!TSchedModel.hasInstrSchedModelOrItineraries())
      return true;
    return false;
  }
  
-/// combineInstructions - substitute a slow code sequence with a faster one by
+/// Substitute a slow code sequence with a faster one by
  /// evaluating instruction combining pattern.
  /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
  /// combining based on machine trace metrics. Only combine a sequence of
@@ -324,7 +360,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
      auto &MI = *BlockIter++;
  
      DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
-    SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern;
+    SmallVector<MachineCombinerPattern, 16> Patterns;
      // The motivating example is:
      //
      //     MUL  Other        MUL_op1 MUL_op2  Other
@@ -347,56 +383,58 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
      //
      // The algorithm does not try to evaluate all patterns and pick the best.
      // This is only an artificial restriction though. In practice there is
-    // mostly one pattern and hasPattern() can order patterns based on an
-    // internal cost heuristic.
-
-    if (TII->hasPattern(MI, Pattern)) {
-      for (auto P : Pattern) {
-        SmallVector<MachineInstr *, 16> InsInstrs;
-        SmallVector<MachineInstr *, 16> DelInstrs;
-        DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
-        if (!MinInstr)
-          MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
-        MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+    // mostly one pattern, and getMachineCombinerPatterns() can order patterns
+    // based on an internal cost heuristic.
+
+    if (!TII->getMachineCombinerPatterns(MI, Patterns))
+      continue;
+
+    for (auto P : Patterns) {
+      SmallVector<MachineInstr *, 16> InsInstrs;
+      SmallVector<MachineInstr *, 16> DelInstrs;
+      DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+      if (!MinInstr)
+        MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+      MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+      Traces->verifyAnalysis();
+      TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+                                      InstrIdxForVirtReg);
+      unsigned NewInstCount = InsInstrs.size();
+      unsigned OldInstCount = DelInstrs.size();
+      // Found pattern, but did not generate alternative sequence.
+      // This can happen e.g. when an immediate could not be materialized
+      // in a single instruction.
+      if (!NewInstCount)
+        continue;
+
+      // Substitute when we optimize for codesize and the new sequence has
+      // fewer instructions OR
+      // the new sequence neither lengthens the critical path nor increases
+      // resource pressure.
+      if (doSubstitute(NewInstCount, OldInstCount) ||
+          (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+                                   InstrIdxForVirtReg, P) &&
+           preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
+        for (auto *InstrPtr : InsInstrs)
+          MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
+        for (auto *InstrPtr : DelInstrs)
+          InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+
+        Changed = true;
+        ++NumInstCombined;
+
+        Traces->invalidate(MBB);
          Traces->verifyAnalysis();
-        TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
-                                        InstrIdxForVirtReg);
-        // Found pattern, but did not generate alternative sequence.
-        // This can happen e.g. when an immediate could not be materialized
-        // in a single instruction.
-        if (!InsInstrs.size())
-          continue;
-        // Substitute when we optimize for codesize and the new sequence has
-        // fewer instructions OR
-        // the new sequence neither lenghten the critical path nor increases
-        // resource pressure.
-        if (doSubstitute(InsInstrs.size(), DelInstrs.size()) ||
-            (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
-                                      InstrIdxForVirtReg) &&
-             preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
-          for (auto *InstrPtr : InsInstrs)
-            MBB->insert((MachineBasicBlock::iterator) & MI,
-                        (MachineInstr *)InstrPtr);
-          for (auto *InstrPtr : DelInstrs)
-            InstrPtr->eraseFromParent();
-
-          Changed = true;
-          ++NumInstCombined;
-
-          Traces->invalidate(MBB);
-          Traces->verifyAnalysis();
-          // Eagerly stop after the first pattern fired
-          break;
-        } else {
-          // Cleanup instructions of the alternative code sequence. There is no
-          // use for them.
-          for (auto *InstrPtr : InsInstrs) {
-            MachineFunction *MF = MBB->getParent();
-            MF->DeleteMachineInstr((MachineInstr *)InstrPtr);
-          }
-        }
-        InstrIdxForVirtReg.clear();
+        // Eagerly stop after the first pattern fires.
+        break;
+      } else {
+        // Cleanup instructions of the alternative code sequence. There is no
+        // use for them.
+        MachineFunction *MF = MBB->getParent();
+        for (auto *InstrPtr : InsInstrs)
+          MF->DeleteMachineInstr(InstrPtr);
        }
+      InstrIdxForVirtReg.clear();
      }
    }
  
@@ -404,18 +442,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
  }
  
  bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
-  TRI = MF.getTarget().getRegisterInfo();
-  const TargetSubtargetInfo &STI =
-      MF.getTarget().getSubtarget<TargetSubtargetInfo>();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  TII = STI.getInstrInfo();
+  TRI = STI.getRegisterInfo();
    SchedModel = STI.getSchedModel();
-  TSchedModel.init(*SchedModel, &STI, TII);
+  TSchedModel.init(SchedModel, &STI, TII);
    MRI = &MF.getRegInfo();
    Traces = &getAnalysis<MachineTraceMetrics>();
-  MinInstr = 0;
-
-  OptSize = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  MinInstr = nullptr;
+  OptSize = MF.getFunction()->optForSize();
  
    DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
    if (!TII->useMachineCombiner()) {