misched: API for minimum vs. expected latency.

author Andrew Trick <atrick@apple.com>

Tue, 5 Jun 2012 21:11:27 +0000 (21:11 +0000)

committer Andrew Trick <atrick@apple.com>

Tue, 5 Jun 2012 21:11:27 +0000 (21:11 +0000)
author Andrew Trick <atrick@apple.com>
Tue, 5 Jun 2012 21:11:27 +0000 (21:11 +0000)
committer Andrew Trick <atrick@apple.com>
Tue, 5 Jun 2012 21:11:27 +0000 (21:11 +0000)
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h

index f4de6933b317cce2b2127d70c3dbb6245fbaadf1..3dd3c0c46739b22995d6f1a74f2b7a3af48cd770 100644 (file)
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -272,6 +272,9 @@ namespace llvm {
      unsigned Depth;                     // Node depth.
      unsigned Height;                    // Node height.
    public:
+    unsigned TopReadyCycle; // Cycle relative to start when node is ready.
+    unsigned BotReadyCycle; // Cycle relative to end when node is ready.
+
      const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
      const TargetRegisterClass *CopySrcRC;
  
@@ -287,7 +290,7 @@ namespace llvm {
          isScheduleHigh(false), isScheduleLow(false), isCloned(false),
          SchedulingPref(Sched::None),
          isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
-        CopyDstRC(NULL), CopySrcRC(NULL) {}
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
  
      /// SUnit - Construct an SUnit for post-regalloc scheduling to represent
      /// a MachineInstr.
@@ -301,7 +304,7 @@ namespace llvm {
          isScheduleHigh(false), isScheduleLow(false), isCloned(false),
          SchedulingPref(Sched::None),
          isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
-        CopyDstRC(NULL), CopySrcRC(NULL) {}
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
  
      /// SUnit - Construct a placeholder SUnit.
      SUnit()
@@ -314,7 +317,7 @@ namespace llvm {
          isScheduleHigh(false), isScheduleLow(false), isCloned(false),
          SchedulingPref(Sched::None),
          isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
-        CopyDstRC(NULL), CopySrcRC(NULL) {}
+        TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
  
      /// setNode - Assign the representative SDNode for this SUnit.
      /// This may be used during pre-regalloc scheduling.
@@ -552,12 +555,6 @@ namespace llvm {
      ///
      virtual void computeLatency(SUnit *SU) = 0;
  
-    /// ComputeOperandLatency - Override dependence edge latency using
-    /// operand use/def information
-    ///
-    virtual void computeOperandLatency(SUnit *, SUnit *,
-                                       SDep&) const { }
-
      /// ForceUnitLatencies - Return true if all scheduling edges should be given
      /// a latency value of one.  The default is to return false; schedulers may
      /// override this as needed.
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h

index 968cc56d5cff187a02ce68d81bb484863f452f93..874f9f1b80e925b9ebaa7d6e44d7b97410f7868d 100644 (file)
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -291,11 +291,15 @@ namespace llvm {
      ///
      virtual void computeLatency(SUnit *SU);
  
-    /// computeOperandLatency - Override dependence edge latency using
+    /// computeOperandLatency - Return dependence edge latency using
      /// operand use/def information
      ///
-    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
-                                       SDep& dep) const;
+    /// FindMin may be set to get the minimum vs. expected latency. Minimum
+    /// latency is used for scheduling groups, while expected latency is for
+    /// instruction cost and critical path.
+    virtual unsigned computeOperandLatency(SUnit *Def, SUnit *Use,
+                                           const SDep& dep,
+                                           bool FindMin = false) const;
  
      /// schedule - Order nodes according to selected style, filling
      /// in the Sequence member.
diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h

index 199489599e4d4a0643d6d0d048eff685e3c284df..62e19143482de5e3f4d69a222c9fd83d0f78cc9f 100644 (file)
--- a/include/llvm/MC/MCInstrItineraries.h
+++ b/include/llvm/MC/MCInstrItineraries.h
@@ -214,6 +214,12 @@ public:
    /// class.  The latency is the maximum completion time for any stage
    /// in the itinerary.
    ///
+  /// InstrStages override the itinerary's MinLatency property. In fact, if the
+  /// stage latencies, which may be zero, are less than MinLatency,
+  /// getStageLatency returns a value less than MinLatency.
+  ///
+  /// If no stages exist, MinLatency is used. If MinLatency is invalid (<0),
+  /// then it defaults to one cycle.
    unsigned getStageLatency(unsigned ItinClassIndx) const {
      // If the target doesn't provide itinerary information, use a simple
      // non-zero default value for all instructions.  Some target's provide a
@@ -222,7 +228,7 @@ public:
      // stage). This is different from beginStage == endStage != 0, which could
      // be used for zero-latency pseudo ops.
      if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0)
-      return 1;
+      return (Props.MinLatency < 0) ? 1 : Props.MinLatency;
  
      // Calculate the maximum completion time for any stage.
      unsigned Latency = 0, StartCycle = 0;
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h

index 896c152f9381c1f17ac51db2f3c87971dfe1e9dc..693166f55225db729cf799dc6ae40486d4a8a2b4 100644 (file)
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -668,18 +668,36 @@ public:
      return Opcode <= TargetOpcode::COPY;
    }
  
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                                SDNode *DefNode, unsigned DefIdx,
+                                SDNode *UseNode, unsigned UseIdx) const = 0;
+
    /// getOperandLatency - Compute and return the use operand latency of a given
    /// pair of def and use.
    /// In most cases, the static scheduling itinerary was enough to determine the
    /// operand latency. But it may not be possible for instructions with variable
    /// number of defs / uses.
+  ///
+  /// This is a raw interface to the itinerary that may be directly overriden by
+  /// a target. Use computeOperandLatency to get the best estimate of latency.
    virtual int getOperandLatency(const InstrItineraryData *ItinData,
-                              const MachineInstr *DefMI, unsigned DefIdx,
-                              const MachineInstr *UseMI, unsigned UseIdx) const;
-
-  virtual int getOperandLatency(const InstrItineraryData *ItinData,
-                                SDNode *DefNode, unsigned DefIdx,
-                                SDNode *UseNode, unsigned UseIdx) const = 0;
+                                const MachineInstr *DefMI, unsigned DefIdx,
+                                const MachineInstr *UseMI,
+                                unsigned UseIdx) const;
+
+  /// computeOperandLatency - Compute and return the latency of the given data
+  /// dependent def and use. DefMI must be a valid def. UseMI may be NULL for
+  /// an unknown use. If the subtarget allows, this may or may not need to call
+  /// getOperandLatency().
+  ///
+  /// FindMin may be set to get the minimum vs. expected latency. Minimum
+  /// latency is used for scheduling groups, while expected latency is for
+  /// instruction cost and critical path.
+  unsigned computeOperandLatency(const InstrItineraryData *ItinData,
+                                 const TargetRegisterInfo *TRI,
+                                 const MachineInstr *DefMI,
+                                 const MachineInstr *UseMI,
+                                 unsigned Reg, bool FindMin) const;
  
    /// getOutputLatency - Compute and return the output dependency latency of a
    /// a given pair of defs which both target the same register. This is usually
@@ -693,13 +711,17 @@ public:
    /// getInstrLatency - Compute the instruction latency of a given instruction.
    /// If the instruction has higher cost when predicated, it's returned via
    /// PredCost.
-  virtual int getInstrLatency(const InstrItineraryData *ItinData,
-                              const MachineInstr *MI,
-                              unsigned *PredCost = 0) const;
+  virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
+                                   const MachineInstr *MI,
+                                   unsigned *PredCost = 0) const;
  
    virtual int getInstrLatency(const InstrItineraryData *ItinData,
                                SDNode *Node) const = 0;
  
+  /// Return the default expected latency for a def based on it's opcode.
+  unsigned defaultDefLatency(const InstrItineraryData *ItinData,
+                             const MachineInstr *DefMI) const;
+
    /// isHighLatencyDef - Return true if this opcode has high latency to its
    /// result.
    virtual bool isHighLatencyDef(int opc) const { return false; }
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp

index c318d84617185469f652592f86e5aba4a63a605f..aa591779ba6a5047831943693dd84860975b302d 100644 (file)
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -21,8 +21,9 @@
  #include "llvm/CodeGen/Passes.h"
  #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
@@ -394,6 +395,12 @@ public:
      return RegionCriticalPSets;
    }
  
+  /// getIssueWidth - Return the max instructions per scheduling group.
+  ///
+  unsigned getIssueWidth() const {
+    return InstrItins ? InstrItins->Props.IssueWidth : 1;
+  }
+
  protected:
    void initRegPressure();
    void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
@@ -787,13 +794,16 @@ class ConvergingScheduler : public MachineSchedStrategy {
      /// MinReadyCycle - Cycle of the soonest available instruction.
      unsigned MinReadyCycle;
  
+    // Remember the greatest min operand latency.
+    unsigned MaxMinLatency;
+
      /// Pending queues extend the ready queues with the same ID and the
      /// PendingFlag set.
      SchedBoundary(unsigned ID, const Twine &Name):
        Available(ID, Name+".A"),
        Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
        CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
-      MinReadyCycle(UINT_MAX) {}
+      MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
  
      ~SchedBoundary() { delete HazardRec; }
  
@@ -805,6 +815,8 @@ class ConvergingScheduler : public MachineSchedStrategy {
  
      void bumpCycle();
  
+    void bumpNode(SUnit *SU, unsigned IssueWidth);
+
      void releasePending();
  
      void removeReady(SUnit *SU);
@@ -868,25 +880,53 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
  }
  
  void ConvergingScheduler::releaseTopNode(SUnit *SU) {
-  Top.releaseNode(SU, SU->getDepth());
+  if (SU->isScheduled)
+    return;
+
+  for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+    unsigned Latency =
+      DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
+#ifndef NDEBUG
+    Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
+#endif
+    if (SU->TopReadyCycle < PredReadyCycle + Latency)
+      SU->TopReadyCycle = PredReadyCycle + Latency;
+  }
+  Top.releaseNode(SU, SU->TopReadyCycle);
  }
  
  void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
-  Bot.releaseNode(SU, SU->getHeight());
+  if (SU->isScheduled)
+    return;
+
+  assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+    unsigned Latency =
+      DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
+#ifndef NDEBUG
+    Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
+#endif
+    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
+      SU->BotReadyCycle = SuccReadyCycle + Latency;
+  }
+  Bot.releaseNode(SU, SU->BotReadyCycle);
  }
  
  void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
                                                       unsigned ReadyCycle) {
-  if (SU->isScheduled)
-    return;
-
    if (ReadyCycle < MinReadyCycle)
      MinReadyCycle = ReadyCycle;
  
    // Check for interlocks first. For the purpose of other heuristics, an
    // instruction that cannot issue appears as if it's not in the ReadyQueue.
-  if (HazardRec->isEnabled()
-      && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard)
+  if (ReadyCycle > CurrCycle
+      || (HazardRec->isEnabled() && (HazardRec->getHazardType(SU)
+                                     != ScheduleHazardRecognizer::NoHazard)))
      Pending.push(SU);
    else
      Available.push(SU);
@@ -900,10 +940,11 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
    unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
  
    if (!HazardRec->isEnabled()) {
-    // Bypass lots of virtual calls in case of long latency.
+    // Bypass HazardRec virtual calls.
      CurrCycle = NextCycle;
    }
    else {
+    // Bypass getHazardType calls in case of long latency.
      for (; CurrCycle != NextCycle; ++CurrCycle) {
        if (isTop())
          HazardRec->AdvanceCycle();
@@ -917,6 +958,26 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
          << CurrCycle << '\n');
  }
  
+/// Move the boundary of scheduled code by one SUnit.
+void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU,
+                                                  unsigned IssueWidth) {
+  // Update the reservation table.
+  if (HazardRec->isEnabled()) {
+    if (!isTop() && SU->isCall) {
+      // Calls are scheduled with their preceding instructions. For bottom-up
+      // scheduling, clear the pipeline state before emitting.
+      HazardRec->Reset();
+    }
+    HazardRec->EmitInstruction(SU);
+  }
+  // Check the instruction group size limit.
+  ++IssueCount;
+  if (IssueCount == IssueWidth) {
+    DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+    bumpCycle();
+  }
+}
+
  /// Release pending ready nodes in to the available queue. This makes them
  /// visible to heuristics.
  void ConvergingScheduler::SchedBoundary::releasePending() {
@@ -928,7 +989,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
    // so, add them to the available queue.
    for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
      SUnit *SU = *(Pending.begin()+i);
-    unsigned ReadyCycle = isTop() ? SU->getHeight() : SU->getDepth();
+    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
  
      if (ReadyCycle < MinReadyCycle)
        MinReadyCycle = ReadyCycle;
@@ -965,7 +1026,8 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
      releasePending();
  
    for (unsigned i = 0; Available.empty(); ++i) {
-    assert(i <= HazardRec->getMaxLookAhead() && "permanent hazard"); (void)i;
+    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
+           "permanent hazard"); (void)i;
      bumpCycle();
      releasePending();
    }
@@ -1205,27 +1267,15 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
  
  /// Update the scheduler's state after scheduling a node. This is the same node
  /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
-/// it's state based on the current cycle before MachineSchedStrategy.
+/// it's state based on the current cycle before MachineSchedStrategy does.
  void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
-  // Update the reservation table.
-  if (IsTopNode && Top.HazardRec->isEnabled()) {
-    Top.HazardRec->EmitInstruction(SU);
-    if (Top.HazardRec->atIssueLimit()) {
-      DEBUG(dbgs() << "*** Max instrs at cycle " << Top.CurrCycle << '\n');
-      Top.bumpCycle();
-    }
+  if (IsTopNode) {
+    SU->TopReadyCycle = Top.CurrCycle;
+    Top.bumpNode(SU, DAG->getIssueWidth());
    }
-  else if (Bot.HazardRec->isEnabled()) {
-    if (SU->isCall) {
-      // Calls are scheduled with their preceding instructions. For bottom-up
-      // scheduling, clear the pipeline state before emitting.
-      Bot.HazardRec->Reset();
-    }
-    Bot.HazardRec->EmitInstruction(SU);
-    if (Bot.HazardRec->atIssueLimit()) {
-      DEBUG(dbgs() << "*** Max instrs at cycle " << Bot.CurrCycle << '\n');
-      Bot.bumpCycle();
-    }
+  else {
+    SU->BotReadyCycle = Bot.CurrCycle;
+    Bot.bumpNode(SU, DAG->getIssueWidth());
    }
  }
  
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp

index 773a29d0c23e5054810ce08a12beeed4aea9c18e..875e012ed12384a249b89ad37cd257310a0dcbbc 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -271,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
        // Adjust the dependence latency using operand def/use
        // information (if any), and then allow the target to
        // perform its own adjustments.
-      const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+      SDep dep(SU, SDep::Data, LDataLatency, *Alias);
        if (!UnitLatencies) {
-        computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
-        ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+        unsigned Latency = computeOperandLatency(SU, UseSU, dep);
+        dep.setLatency(Latency);
+
+        ST.adjustSchedDependency(SU, UseSU, dep);
        }
        UseSU->addPred(dep);
      }
@@ -461,11 +463,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
        // Create a data dependence.
        //
        // TODO: Handle "special" address latencies cleanly.
-      const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+      SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
        if (!UnitLatencies) {
          // Adjust the dependence latency using operand def/use information, then
          // allow the target to perform its own adjustments.
-        computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+        unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+        dep.setLatency(Latency);
+
          const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
          ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
        }
@@ -970,8 +974,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
  }
  
  void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
-  // Compute the latency for the node.
-  if (!InstrItins || InstrItins->isEmpty()) {
+  // Compute the latency for the node. We only provide a default for missing
+  // itineraries. Empty itineraries still have latency properties.
+  if (!InstrItins) {
      SU->Latency = 1;
  
      // Simplistic target-independent heuristic: assume that loads take
@@ -983,63 +988,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
    }
  }
  
-void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
-                                              SDep& dep) const {
-  if (!InstrItins || InstrItins->isEmpty())
-    return;
-
+unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
+                                                  const SDep& dep,
+                                                  bool FindMin) const {
    // For a data dependency with a known register...
    if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
-    return;
+    return 1;
  
-  const unsigned Reg = dep.getReg();
-
-  // ... find the definition of the register in the defining
-  // instruction
-  MachineInstr *DefMI = Def->getInstr();
-  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
-  if (DefIdx != -1) {
-    const MachineOperand &MO = DefMI->getOperand(DefIdx);
-    if (MO.isReg() && MO.isImplicit() &&
-        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
-      // This is an implicit def, getOperandLatency() won't return the correct
-      // latency. e.g.
-      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
-      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
-      // What we want is to compute latency between def of %D6/%D7 and use of
-      // %Q3 instead.
-      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
-      if (DefMI->getOperand(Op2).isReg())
-        DefIdx = Op2;
-    }
-    MachineInstr *UseMI = Use->getInstr();
-    // For all uses of the register, calculate the maxmimum latency
-    int Latency = -1;
-    if (UseMI) {
-      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = UseMI->getOperand(i);
-        if (!MO.isReg() || !MO.isUse())
-          continue;
-        unsigned MOReg = MO.getReg();
-        if (MOReg != Reg)
-          continue;
-
-        int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
-                                              UseMI, i);
-        Latency = std::max(Latency, UseCycle);
-      }
-    } else {
-      // UseMI is null, then it must be a scheduling barrier.
-      if (!InstrItins || InstrItins->isEmpty())
-        return;
-      unsigned DefClass = DefMI->getDesc().getSchedClass();
-      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
-    }
-
-    // If we found a latency, then replace the existing dependence latency.
-    if (Latency >= 0)
-      dep.setLatency(Latency);
-  }
+  return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
+                                    Use->getInstr(), dep.getReg(), FindMin);
  }
  
  void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h

index 75940ec33ddc0526a521a2e23c2a5b1e886282dc..5384576a0c91b51e7d81f1b9dbd249e948b12b84 100644 (file)
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -98,12 +98,6 @@ namespace llvm {
      ///
      virtual void computeLatency(SUnit *SU);
  
-    /// computeOperandLatency - Override dependence edge latency using
-    /// operand use/def information
-    ///
-    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
-                                       SDep& dep) const { }
-
      virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
                                         unsigned OpIdx, SDep& dep) const;
  
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp

index 5218aa1f7a8be4ee802f6456dfdb259dfc061521..ec2b5772308b4532dd9f92c3e555ef1825e942fd 100644 (file)
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1046,7 +1046,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
        return true;  // Below MI
      unsigned DefDist = DDI->second;
      assert(Dist > DefDist && "Visited def already?");
-    if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+    if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
        return true;
    }
    return false;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index 398e0c9ec34fb199afc4a05b0122afff2d55efa0..bcc4f9c9d20525c8cef67771f8b868d26061de4d 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2567,12 +2567,13 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
  
  int
  ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
-                             const MachineInstr *DefMI, unsigned DefIdx,
-                             const MachineInstr *UseMI, unsigned UseIdx) const {
+                                    const MachineInstr *DefMI, unsigned DefIdx,
+                                    const MachineInstr *UseMI,
+                                    unsigned UseIdx) const {
    if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
-      DefMI->isRegSequence() || DefMI->isImplicitDef())
+      DefMI->isRegSequence() || DefMI->isImplicitDef()) {
      return 1;
-
+  }
    if (!ItinData || ItinData->isEmpty())
      return DefMI->mayLoad() ? 3 : 1;
  
@@ -2983,14 +2984,16 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
                             DepMI->getNumOperands());
  }
  
-int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
-                                      const MachineInstr *MI,
-                                      unsigned *PredCost) const {
+unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                           const MachineInstr *MI,
+                                           unsigned *PredCost) const {
    if (MI->isCopyLike() || MI->isInsertSubreg() ||
        MI->isRegSequence() || MI->isImplicitDef())
      return 1;
  
-  if (!ItinData || ItinData->isEmpty())
+  // Be sure to call getStageLatency for an empty itinerary in case it has a
+  // valid MinLatency property.
+  if (!ItinData)
      return 1;
  
    if (MI->isBundle()) {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h

index 2fe85072a33080bccdc72f51ec834f90ae2fd4e7..8217f239d19087a8d155da8ce335b6c286f4da5f 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -249,8 +249,9 @@ private:
                          const MCInstrDesc &UseMCID,
                          unsigned UseIdx, unsigned UseAlign) const;
  
-  int getInstrLatency(const InstrItineraryData *ItinData,
-                      const MachineInstr *MI, unsigned *PredCost = 0) const;
+  unsigned getInstrLatency(const InstrItineraryData *ItinData,
+                           const MachineInstr *MI,
+                           unsigned *PredCost = 0) const;
  
    int getInstrLatency(const InstrItineraryData *ItinData,
                        SDNode *Node) const;
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp

index 6088ba5cc35f8b468e8ccd2d54d46d8b4a8fecda..ae38732065c780b128a49299827826c992e155ec 100644 (file)
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -61,22 +61,125 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
    return 1;
  }
  
+/// Return the default expected latency for a def based on it's opcode.
+unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
+                                            const MachineInstr *DefMI) const {
+  if (DefMI->mayLoad())
+    return ItinData->Props.LoadLatency;
+  if (isHighLatencyDef(DefMI->getOpcode()))
+    return ItinData->Props.HighLatency;
+  return 1;
+}
+
+/// Both DefMI and UseMI must be valid.  By default, call directly to the
+/// itinerary. This may be overriden by the target.
  int
  TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
-                             const MachineInstr *DefMI, unsigned DefIdx,
-                             const MachineInstr *UseMI, unsigned UseIdx) const {
-  if (!ItinData || ItinData->isEmpty())
-    return -1;
-
+                                   const MachineInstr *DefMI, unsigned DefIdx,
+                                   const MachineInstr *UseMI,
+                                   unsigned UseIdx) const {
    unsigned DefClass = DefMI->getDesc().getSchedClass();
    unsigned UseClass = UseMI->getDesc().getSchedClass();
    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
  }
  
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
-                                     const MachineInstr *MI,
-                                     unsigned *PredCost) const {
-  if (!ItinData || ItinData->isEmpty())
+/// computeOperandLatency - Compute and return the latency of the given data
+/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an
+/// unknown use. Depending on the subtarget's itinerary properties, this may or
+/// may not need to call getOperandLatency().
+///
+/// FindMin may be set to get the minimum vs. expected latency. Minimum
+/// latency is used for scheduling groups, while expected latency is for
+/// instruction cost and critical path.
+///
+/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency.
+/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use.
+unsigned TargetInstrInfo::
+computeOperandLatency(const InstrItineraryData *ItinData,
+                      const TargetRegisterInfo *TRI,
+                      const MachineInstr *DefMI, const MachineInstr *UseMI,
+                      unsigned Reg, bool FindMin) const {
+
+  // Default to one cycle for missing itinerary. Empty itineraries still have
+  // a properties. We have one hard-coded exception for loads, to preserve
+  // existing behavior.
+  if (!ItinData)
+    return DefMI->mayLoad() ? 2 : 1;
+
+  // Return a latency based on the itinerary properties and defining instruction
+  // if possible. Some common subtargets don't require per-operand latency,
+  // especially for minimum latencies.
+  if (FindMin) {
+    // If MinLatency is valid, call getInstrLatency. This uses Stage latency if
+    // it exists before defaulting to MinLatency.
+    if (ItinData->Props.MinLatency >= 0)
+      return getInstrLatency(ItinData, DefMI);
+
+    // If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
+    // For empty itineraries, short-cirtuit the check and default to one cycle.
+    if (ItinData->isEmpty())
+      return 1;
+  }
+  else if(ItinData->isEmpty())
+    return defaultDefLatency(ItinData, DefMI);
+
+  // ...operand lookup required
+
+  // Find the definition of the register in the defining instruction.
+  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+  if (DefIdx != -1) {
+    const MachineOperand &MO = DefMI->getOperand(DefIdx);
+    if (MO.isReg() && MO.isImplicit() &&
+        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+      // This is an implicit def, getOperandLatency() won't return the correct
+      // latency. e.g.
+      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+      // What we want is to compute latency between def of %D6/%D7 and use of
+      // %Q3 instead.
+      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+      if (DefMI->getOperand(Op2).isReg())
+        DefIdx = Op2;
+    }
+    // For all uses of the register, calculate the maxmimum latency
+    int OperLatency = -1;
+
+    // UseMI is null, then it must be a scheduling barrier.
+    if (!UseMI) {
+      unsigned DefClass = DefMI->getDesc().getSchedClass();
+      OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
+    }
+    else {
+      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = UseMI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MOReg != Reg)
+          continue;
+
+        int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i);
+        OperLatency = std::max(OperLatency, UseCycle);
+      }
+    }
+    // If we found an operand latency, we're done.
+    if (OperLatency >= 0)
+      return OperLatency;
+  }
+  // No operand latency was found.
+  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
+  // Expected latency is the max of the stage latency and itinerary props.
+  if (!FindMin)
+    InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+  return InstrLatency;
+}
+
+unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                          const MachineInstr *MI,
+                                          unsigned *PredCost) const {
+  // Default to one cycle for no itinerary. However, an "empty" itinerary may
+  // still have a MinLatency property, which getStageLatency checks.
+  if (!ItinData)
      return 1;
  
    return ItinData->getStageLatency(MI->getDesc().getSchedClass());
author	Andrew Trick <atrick@apple.com>
	Tue, 5 Jun 2012 21:11:27 +0000 (21:11 +0000)
committer	Andrew Trick <atrick@apple.com>
	Tue, 5 Jun 2012 21:11:27 +0000 (21:11 +0000)
include/llvm/CodeGen/ScheduleDAG.h		patch \| blob \| history
include/llvm/CodeGen/ScheduleDAGInstrs.h		patch \| blob \| history
include/llvm/MC/MCInstrItineraries.h		patch \| blob \| history
include/llvm/Target/TargetInstrInfo.h		patch \| blob \| history
lib/CodeGen/MachineScheduler.cpp		patch \| blob \| history
lib/CodeGen/ScheduleDAGInstrs.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h		patch \| blob \| history
lib/CodeGen/TwoAddressInstructionPass.cpp		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.h		patch \| blob \| history
lib/Target/TargetInstrInfo.cpp		patch \| blob \| history