Two sets of changes. Sorry they are intermingled.

author Evan Cheng <evan.cheng@apple.com>

Wed, 3 Nov 2010 00:45:17 +0000 (00:45 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Wed, 3 Nov 2010 00:45:17 +0000 (00:45 +0000)
author Evan Cheng <evan.cheng@apple.com>
Wed, 3 Nov 2010 00:45:17 +0000 (00:45 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Wed, 3 Nov 2010 00:45:17 +0000 (00:45 +0000)
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h

index 076268b99c2061f6fb716996689a170432cd4ff9..a86ba831853c3371052425e6019a01b866082665 100644 (file)
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -247,6 +247,7 @@ namespace llvm {
      unsigned NumSuccs;                  // # of SDep::Data sucss.
      unsigned NumPredsLeft;              // # of preds not scheduled.
      unsigned NumSuccsLeft;              // # of succs not scheduled.
+    bool isCall           : 1;          // Is a function call.
      bool isTwoAddress     : 1;          // Is a two-address instruction.
      bool isCommutable     : 1;          // Is a commutable instruction.
      bool hasPhysRegDefs   : 1;          // Has physreg defs that are being used.
@@ -273,7 +274,8 @@ namespace llvm {
      SUnit(SDNode *node, unsigned nodenum)
        : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
          NodeQueueId(0),  Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
+        NumSuccsLeft(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
          hasPhysRegDefs(false), hasPhysRegClobbers(false),
          isPending(false), isAvailable(false), isScheduled(false),
          isScheduleHigh(false), isCloned(false),
@@ -286,7 +288,8 @@ namespace llvm {
      SUnit(MachineInstr *instr, unsigned nodenum)
        : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
          NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
+        NumSuccsLeft(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
          hasPhysRegDefs(false), hasPhysRegClobbers(false),
          isPending(false), isAvailable(false), isScheduled(false),
          isScheduleHigh(false), isCloned(false),
@@ -298,7 +301,8 @@ namespace llvm {
      SUnit()
        : Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
          NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
-        NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
+        NumSuccsLeft(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
          hasPhysRegDefs(false), hasPhysRegClobbers(false),
          isPending(false), isAvailable(false), isScheduled(false),
          isScheduleHigh(false), isCloned(false),
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h

index 0ecfc79c6b99e8c9df19fd071c27dddd62e3a0ed..7e77dba761f09a6676bb7284f8d162ae70dfc0ca 100644 (file)
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -304,12 +304,14 @@ public:
      return true;
    }
  
-  /// isProfitableToIfCvt - Return true if it's profitable to first "NumInstrs"
+  /// isProfitableToIfCvt - Return true if it's profitable to predicate
+  /// instructions with accumulated instruction latency of "NumCycles"
    /// of the specified basic block, where the probability of the instructions
    /// being executed is given by Probability, and Confidence is a measure
    /// of our confidence that it will be properly predicted.
    virtual
-  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs,
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                           unsigned ExtraPredCycles,
                             float Probability, float Confidence) const {
      return false;
    }
@@ -321,19 +323,22 @@ public:
    /// by Probability, and Confidence is a measure of our confidence that it
    /// will be properly predicted.
    virtual bool
-  isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
-                      MachineBasicBlock &FMBB, unsigned NumFInstrs,
+  isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                      unsigned NumTCycles, unsigned ExtraTCycles,
+                      MachineBasicBlock &FMBB,
+                      unsigned NumFCycles, unsigned ExtraFCycles,
                        float Probability, float Confidence) const {
      return false;
    }
  
    /// isProfitableToDupForIfCvt - Return true if it's profitable for
-  /// if-converter to duplicate a specific number of instructions in the
-  /// specified MBB to enable if-conversion, where the probability of the 
-  /// instructions being executed is given by Probability, and Confidence is
-  /// a measure of our confidence that it will be properly predicted.
+  /// if-converter to duplicate instructions of specified accumulated
+  /// instruction latencies in the specified MBB to enable if-conversion.
+  /// The probability of the instructions being executed is given by
+  /// Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
    virtual bool
-  isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs,
+  isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
                              float Probability, float Confidence) const {
      return false;
    }
@@ -608,24 +613,31 @@ public:
  
    /// getNumMicroOps - Return the number of u-operations the given machine
    /// instruction will be decoded to on the target cpu.
-  virtual unsigned getNumMicroOps(const MachineInstr *MI,
-                                  const InstrItineraryData *ItinData) const;
+  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+                                  const MachineInstr *MI) const;
  
    /// getOperandLatency - Compute and return the use operand latency of a given
-  /// itinerary class and operand index if the value is produced by an
-  /// instruction of the specified itinerary class and def operand index.
+  /// pair of def and use.
    /// In most cases, the static scheduling itinerary was enough to determine the
    /// operand latency. But it may not be possible for instructions with variable
    /// number of defs / uses.
-  virtual
-  int getOperandLatency(const InstrItineraryData *ItinData,
-                        const MachineInstr *DefMI, unsigned DefIdx,
-                        const MachineInstr *UseMI, unsigned UseIdx) const;
-
-  virtual
-  int getOperandLatency(const InstrItineraryData *ItinData,
-                        SDNode *DefNode, unsigned DefIdx,
-                        SDNode *UseNode, unsigned UseIdx) const;
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                              const MachineInstr *DefMI, unsigned DefIdx,
+                              const MachineInstr *UseMI, unsigned UseIdx) const;
+
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                                SDNode *DefNode, unsigned DefIdx,
+                                SDNode *UseNode, unsigned UseIdx) const;
+
+  /// getInstrLatency - Compute the instruction latency of a given instruction.
+  /// If the instruction has higher cost when predicated, it's returned via
+  /// PredCost.
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              const MachineInstr *MI,
+                              unsigned *PredCost = 0) const;
+
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              SDNode *Node) const;
  
    /// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
    /// and an use in the current loop, return true if the target considered
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp

index e90985bf89942f16839f33f3cd7ad72439df7823..c05aa40b6742548facce62b348c07a11aa211873 100644 (file)
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -93,7 +93,8 @@ namespace {
      /// ClobbersPred    - True if BB could modify predicates (e.g. has
      ///                   cmp, call, etc.)
      /// NonPredSize     - Number of non-predicated instructions.
-    /// ExtraCost       - Extra cost for microcoded instructions.
+    /// ExtraCost       - Extra cost for multi-cycle instructions.
+    /// ExtraCost2      - Some instructions are slower when predicated
      /// BB              - Corresponding MachineBasicBlock.
      /// TrueBB / FalseBB- See AnalyzeBranch().
      /// BrCond          - Conditions for end of block conditional branches.
@@ -110,6 +111,7 @@ namespace {
        bool ClobbersPred    : 1;
        unsigned NonPredSize;
        unsigned ExtraCost;
+      unsigned ExtraCost2;
        MachineBasicBlock *BB;
        MachineBasicBlock *TrueBB;
        MachineBasicBlock *FalseBB;
@@ -119,7 +121,7 @@ namespace {
                   IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
                   HasFallThrough(false), IsUnpredicable(false),
                   CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
-                 ExtraCost(0), BB(0), TrueBB(0), FalseBB(0) {}
+                 ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
      };
  
      /// IfcvtToken - Record information about pending if-conversions to attempt:
@@ -203,17 +205,20 @@ namespace {
                                 bool IgnoreBr = false);
      void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
  
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size,
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+                            unsigned Cycle, unsigned Extra,
                              float Prediction, float Confidence) const {
-      return Size > 0 && TII->isProfitableToIfCvt(BB, Size,
-                                                  Prediction, Confidence);
+      return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+                                                   Prediction, Confidence);
      }
  
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
-                            MachineBasicBlock &FBB, unsigned FSize,
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+                            unsigned TCycle, unsigned TExtra,
+                            MachineBasicBlock &FBB,
+                            unsigned FCycle, unsigned FExtra,
                              float Prediction, float Confidence) const {
-      return TSize > 0 && FSize > 0 &&
-        TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize,
+      return TCycle > 0 && FCycle > 0 &&
+        TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
                                   Prediction, Confidence);
      }
  
@@ -649,6 +654,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
    // Then scan all the instructions.
    BBI.NonPredSize = 0;
    BBI.ExtraCost = 0;
+  BBI.ExtraCost2 = 0;
    BBI.ClobbersPred = false;
    for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
         I != E; ++I) {
@@ -665,9 +671,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
      if (!isCondBr) {
        if (!isPredicated) {
          BBI.NonPredSize++;
-        unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins);
-        if (NumOps > 1)
-          BBI.ExtraCost += NumOps-1;
+        unsigned ExtraPredCost = 0;
+        unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+                                                  &ExtraPredCost);
+        if (NumCycles > 1)
+          BBI.ExtraCost += NumCycles-1;
+        BBI.ExtraCost2 += ExtraPredCost;
        } else if (!AlreadyPredicated) {
          // FIXME: This instruction is already predicated before the
          // if-conversion pass. It's probably something like a conditional move.
@@ -815,9 +824,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
    
    if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
        MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
-                                       TrueBBI.ExtraCost),
+                                       TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
                           *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
-                                        FalseBBI.ExtraCost),
+                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
                           Prediction, Confidence) &&
        FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
        FeasibilityAnalysis(FalseBBI, RevCond)) {
@@ -836,7 +845,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
  
    if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
        MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
        FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
      // Triangle:
      //   EBB
@@ -851,7 +860,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
  
    if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
        MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
        FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
      Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
      Enqueued = true;
@@ -859,7 +868,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
  
    if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
        MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
        FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
      // Simple (split, no rejoin):
      //   EBB
@@ -878,7 +887,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
                        1.0-Prediction, Confidence) &&
          MeetIfcvtSizeLimit(*FalseBBI.BB,
                             FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
          FeasibilityAnalysis(FalseBBI, RevCond, true)) {
        Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
        Enqueued = true;
@@ -888,7 +897,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
                        1.0-Prediction, Confidence) &&
          MeetIfcvtSizeLimit(*FalseBBI.BB,
                             FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
          FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
        Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
        Enqueued = true;
@@ -897,7 +906,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
      if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
          MeetIfcvtSizeLimit(*FalseBBI.BB,
                             FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
          FeasibilityAnalysis(FalseBBI, RevCond)) {
        Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
        Enqueued = true;
@@ -1427,9 +1436,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
      MachineInstr *MI = MF.CloneMachineInstr(I);
      ToBBI.BB->insert(ToBBI.BB->end(), MI);
      ToBBI.NonPredSize++;
-    unsigned NumOps = TII->getNumMicroOps(MI, InstrItins);
-    if (NumOps > 1)
-      ToBBI.ExtraCost += NumOps-1;
+    unsigned ExtraPredCost = 0;
+    unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+    if (NumCycles > 1)
+      ToBBI.ExtraCost += NumCycles-1;
+    ToBBI.ExtraCost2 += ExtraPredCost;
  
      if (!TII->isPredicated(I) && !MI->isDebugValue()) {
        if (!TII->PredicateInstruction(MI, Cond)) {
@@ -1504,8 +1515,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
  
    ToBBI.NonPredSize += FromBBI.NonPredSize;
    ToBBI.ExtraCost += FromBBI.ExtraCost;
+  ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
    FromBBI.NonPredSize = 0;
    FromBBI.ExtraCost = 0;
+  FromBBI.ExtraCost2 = 0;
  
    ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
    ToBBI.HasFallThrough = FromBBI.HasFallThrough;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp

index abd68caf12244ccde73d2c64075953e7e24a7cc6..e86a78c691957d1af8c98597c9a03a605594c2b9 100644 (file)
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -238,6 +238,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
             "Cannot schedule terminators or labels!");
      // Create the SUnit for this MI.
      SUnit *SU = NewSUnit(MI);
+    SU->isCall = TID.isCall();
+    SU->isCommutable = TID.isCommutable();
  
      // Assign the Latency field of SU using target-provided information.
      if (UnitLatencies)
@@ -564,9 +566,9 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
      // extra time.
      if (SU->getInstr()->getDesc().mayLoad())
        SU->Latency += 2;
-  } else
-    SU->Latency =
-      InstrItins->getStageLatency(SU->getInstr()->getDesc().getSchedClass());
+  } else {
+    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+  }
  }
  
  void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp

index ea1aaa1e05c78b95fd9e2b8ecc2d059d37cb74bc..9978d00f20f0e4897ad317aee2ed570fa28b4119 100644 (file)
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1589,6 +1589,10 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
  }
  
  bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
    bool LHigh = SPQ->HighRegPressure(left);
    bool RHigh = SPQ->HighRegPressure(right);
    // Avoid causing spills. If register pressure is high, schedule for
@@ -1648,6 +1652,10 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
  
  bool ilp_ls_rr_sort::operator()(const SUnit *left,
                                  const SUnit *right) const {
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
    bool LHigh = SPQ->HighRegPressure(left);
    bool RHigh = SPQ->HighRegPressure(right);
    // Avoid causing spills. If register pressure is high, schedule for
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp

index 7d01bd31b960148517eaffc4a4f5654938ef8c47..429b1152b076db1c9444e071b72fd672a5e8c681 100644 (file)
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
    SUnit *SU = NewSUnit(Old->getNode());
    SU->OrigNode = Old->OrigNode;
    SU->Latency = Old->Latency;
+  SU->isCall = Old->isCall;
    SU->isTwoAddress = Old->isTwoAddress;
    SU->isCommutable = Old->isCommutable;
    SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -300,6 +301,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
        N = N->getOperand(N->getNumOperands()-1).getNode();
        assert(N->getNodeId() == -1 && "Node already inserted!");
        N->setNodeId(NodeSUnit->NodeNum);
+      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+        NodeSUnit->isCall = true;
      }
      
      // Scan down to find any flagged succs.
@@ -316,6 +319,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
            assert(N->getNodeId() == -1 && "Node already inserted!");
            N->setNodeId(NodeSUnit->NodeNum);
            N = *UI;
+          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+            NodeSUnit->isCall = true;
            break;
          }
        if (!HasFlagUse) break;
@@ -438,10 +443,8 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
    // all nodes flagged together into this SUnit.
    SU->Latency = 0;
    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
-    if (N->isMachineOpcode()) {
-      SU->Latency += InstrItins->
-        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
-    }
+    if (N->isMachineOpcode())
+      SU->Latency += TII->getInstrLatency(InstrItins, N);
  }
  
  void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index 32570650086514517dbc29c2ccd90c17cc1d441f..b023379e7ba8c8ab877fa3ee615bac24b1b79718 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -40,10 +40,6 @@ static cl::opt<bool>
  EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
                 cl::desc("Enable ARM 2-addr to 3-addr conv"));
  
-static cl::opt<bool>
-OldARMIfCvt("old-arm-ifcvt", cl::Hidden,
-             cl::desc("Use old-style ARM if-conversion heuristics"));
-
  ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
    : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
      Subtarget(STI) {
@@ -1205,53 +1201,36 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
  }
  
  bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                           unsigned NumInstrs,
+                                           unsigned NumCyles,
+                                           unsigned ExtraPredCycles,
                                             float Probability,
                                             float Confidence) const {
-  if (!NumInstrs)
+  if (!NumCyles)
      return false;
  
-  // Use old-style heuristics
-  if (OldARMIfCvt) {
-    if (Subtarget.getCPUString() == "generic")
-      // Generic (and overly aggressive) if-conversion limits for testing.
-      return NumInstrs <= 10;
-    if (Subtarget.hasV7Ops())
-      return NumInstrs <= 3;
-    return NumInstrs <= 2;
-  }
-
    // Attempt to estimate the relative costs of predication versus branching.
-  float UnpredCost = Probability * NumInstrs;
+  float UnpredCost = Probability * NumCyles;
    UnpredCost += 1.0; // The branch itself
    UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
  
-  float PredCost = NumInstrs;
-
-  return PredCost < UnpredCost;
-
+  return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
  }
  
  bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
-                    MachineBasicBlock &FMBB, unsigned NumF,
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                    unsigned TCycles, unsigned TExtra,
+                    MachineBasicBlock &FMBB,
+                    unsigned FCycles, unsigned FExtra,
                      float Probability, float Confidence) const {
-  // Use old-style if-conversion heuristics
-  if (OldARMIfCvt) {
-    return NumT && NumF && NumT <= 2 && NumF <= 2;
-  }
-
-  if (!NumT || !NumF)
+  if (!TCycles || !FCycles)
      return false;
  
    // Attempt to estimate the relative costs of predication versus branching.
-  float UnpredCost = Probability * NumT + (1.0 - Probability) * NumF;
+  float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
    UnpredCost += 1.0; // The branch itself
    UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
  
-  float PredCost = NumT + NumF;
-
-  return PredCost < UnpredCost;
+  return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
  }
  
  /// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -1591,8 +1570,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
  }
  
  unsigned
-ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
-                                 const InstrItineraryData *ItinData) const {
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                 const MachineInstr *MI) const {
    if (!ItinData || ItinData->isEmpty())
      return 1;
  
@@ -1649,9 +1628,14 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
    case ARM::t2STM_UPD: {
      unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
      if (Subtarget.isCortexA8()) {
-      // 4 registers would be issued: 1, 2, 1.
-      // 5 registers would be issued: 1, 2, 2.
-      return 1 + (NumRegs / 2);
+      if (NumRegs < 4)
+        return 2;
+      // 4 registers would be issued: 2, 2.
+      // 5 registers would be issued: 2, 2, 1.
+      UOps = (NumRegs / 2);
+      if (NumRegs % 2)
+        ++UOps;
+      return UOps;
      } else if (Subtarget.isCortexA9()) {
        UOps = (NumRegs / 2);
        // If there are odd number of registers or if it's not 64-bit aligned,
@@ -2025,6 +2009,46 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    return Latency;
  }
  
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                      const MachineInstr *MI,
+                                      unsigned *PredCost) const {
+  if (MI->isCopyLike() || MI->isInsertSubreg() ||
+      MI->isRegSequence() || MI->isImplicitDef())
+    return 1;
+
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Class = TID.getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
+    // When predicated, CPSR is an additional source operand for CPSR updating
+    // instructions, this apparently increases their latencies.
+    *PredCost = 1;
+  if (UOps)
+    return ItinData->getStageLatency(Class);
+  return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                      SDNode *Node) const {
+  if (!Node->isMachineOpcode())
+    return 1;
+
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Opcode = Node->getMachineOpcode();
+  switch (Opcode) {
+  default:
+    return ItinData->getStageLatency(get(Opcode).getSchedClass());
+  case ARM::VLDMQ:
+  case ARM::VSTMQ:
+    return 2;
+  }  
+}
+
  bool ARMBaseInstrInfo::
  hasHighOperandLatency(const InstrItineraryData *ItinData,
                        const MachineRegisterInfo *MRI,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h

index e19bd9c566d91737ce0035ee83ed229d3bd3d939..c11f02ccb109aa23def260c391900f1df5d73ae7 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -318,18 +318,20 @@ public:
                                      const MachineFunction &MF) const;
  
    virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                   unsigned NumInstrs,
+                                   unsigned NumCyles, unsigned ExtraPredCycles,
                                     float Prob, float Confidence) const;
  
-  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT,
-                                   MachineBasicBlock &FMBB,unsigned NumF,
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumT, unsigned ExtraT,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumF, unsigned ExtraF,
                                     float Probability, float Confidence) const;
  
    virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
-                                         unsigned NumInstrs,
+                                         unsigned NumCyles,
                                           float Probability,
                                           float Confidence) const {
-    return NumInstrs == 1;
+    return NumCyles == 1;
    }
  
    /// AnalyzeCompare - For a comparison instruction, return the source register
@@ -345,8 +347,8 @@ public:
                                      const MachineRegisterInfo *MRI,
                                      MachineBasicBlock::iterator &MII) const;
  
-  virtual unsigned getNumMicroOps(const MachineInstr *MI,
-                                  const InstrItineraryData *ItinData) const;
+  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+                                  const MachineInstr *MI) const;
  
    virtual
    int getOperandLatency(const InstrItineraryData *ItinData,
@@ -379,6 +381,12 @@ private:
                          const TargetInstrDesc &UseTID,
                          unsigned UseIdx, unsigned UseAlign) const;
  
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      SDNode *Node) const;
+
    bool hasHighOperandLatency(const InstrItineraryData *ItinData,
                               const MachineRegisterInfo *MRI,
                               const MachineInstr *DefMI, unsigned DefIdx,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp

index 0a0f3146efdc91400387ee46318ed21db62b98e5..719b140ce9fa90289ecad45471db17a92f23b2ab 100644 (file)
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -42,33 +42,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
    return 0;
  }
  
-bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                          unsigned NumInstrs,
-                                          float Prediction,
-                                          float Confidence) const {
-  if (!OldT2IfCvt)
-    return ARMBaseInstrInfo::isProfitableToIfCvt(MBB, NumInstrs,
-                                                 Prediction, Confidence);
-  return NumInstrs && NumInstrs <= 3;
-}
-  
-bool Thumb2InstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
-                    MachineBasicBlock &FMBB, unsigned NumF,
-                    float Prediction, float Confidence) const {
-  if (!OldT2IfCvt)
-    return ARMBaseInstrInfo::isProfitableToIfCvt(TMBB, NumT,
-                                                 FMBB, NumF,
-                                                 Prediction, Confidence);
-    
-  // FIXME: Catch optimization such as:
-  //        r0 = movne
-  //        r0 = moveq
-  return NumT && NumF &&
-    NumT <= 3 && NumF <= 3;
-}
-
-
  void
  Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
                                           MachineBasicBlock *NewDest) const {
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h

index b348ad01911a0aa5c99e7db63f647e3662afc847..9ed7eea7e2dbfe7174fb8293be54224dfb38248f 100644 (file)
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -38,12 +38,6 @@ public:
    bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MBBI) const;
  
-  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs,
-                           float Prediction, float Confidence) const;
-  bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
-                           MachineBasicBlock &FMBB, unsigned NumFInstrs,
-                           float Prediction, float Confidence) const;
-
    void copyPhysReg(MachineBasicBlock &MBB,
                     MachineBasicBlock::iterator I, DebugLoc DL,
                     unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp

index 62818d0cae9de5b91a98978c9d07f9131f6eff49..eca97ab09669397d44407e9d5de68686f1b28794 100644 (file)
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -50,8 +50,8 @@ TargetInstrInfo::~TargetInstrInfo() {
  }
  
  unsigned
-TargetInstrInfo::getNumMicroOps(const MachineInstr *MI,
-                                const InstrItineraryData *ItinData) const {
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                const MachineInstr *MI) const {
    if (!ItinData || ItinData->isEmpty())
      return 1;
  
@@ -94,6 +94,26 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
  }
  
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     const MachineInstr *MI,
+                                     unsigned *PredCost) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     SDNode *N) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  if (!N->isMachineOpcode())
+    return 1;
+
+  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
  bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
                                         const MachineInstr *DefMI,
                                         unsigned DefIdx) const {
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll

index 7f299aa1ceb2235c4fd0980d1103608f80f159e6..0198908f06499b9986d9b2a01abf0f1fd728d199 100644 (file)
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -8,8 +8,9 @@
  define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
  entry:
  ; Make sure to use base-updating stores for saving callee-saved registers.
+; CHECK: push
  ; CHECK-NOT: sub sp
-; CHECK: vpush 
+; CHECK: push 
         %predicted_block = alloca [4 x [4 x i32]], align 4              ; <[4 x [4 x i32]]*> [#uses=1]
         br label %cond_next489
  
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll

index 707f0510b91862f7469d0eb7fc098041fe8d4c80..75428ac2165510a9851d4928bc65bedadfb094c9 100644 (file)
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -4,27 +4,40 @@
  ; micro-coded and would have long issue latency even if predicated on
  ; false predicate.
  
-%0 = type { float, float, float, float }
-%pln = type { %vec, float }
-%vec = type { [4 x float] }
-
-define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
-; CHECK: aaa:
-; CHECK: vldr.32
-; CHECK-NOT: vldrne
-; CHECK-NOT: vpopne
-; CHECK-NOT: popne
-; CHECK: vpop
-; CHECK: pop
+define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
  entry:
-  br i1 undef, label %bb81, label %bb48
+; CHECK: t:
+; CHECK: vpop {d8}
+; CHECK-NOT: vpopne
+; CHECK: ldmia sp!, {r7, pc}
+; CHECK: vpop {d8}
+; CHECK: ldmia sp!, {r7, pc}
+  br i1 undef, label %if.else, label %if.then
  
-bb48:                                             ; preds = %entry
-  %0 = call arm_aapcs_vfpcc  %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
-  ret float 0.000000e+00
+if.then:                                          ; preds = %entry
+  %mul73 = fmul double undef, 0.000000e+00
+  %sub76 = fsub double %mul73, undef
+  store double %sub76, double* undef, align 4
+  %call88 = tail call double @cos(double 0.000000e+00) nounwind
+  %mul89 = fmul double undef, %call88
+  %sub92 = fsub double %mul89, undef
+  store double %sub92, double* undef, align 4
+  ret void
  
-bb81:                                             ; preds = %entry
-  ret float 0.000000e+00
+if.else:                                          ; preds = %entry
+  %tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555)
+  %add112 = fadd double %tmp101, undef
+  %mul118 = fmul double %add112, undef
+  store double 0.000000e+00, double* %x, align 4
+  ret void
  }
  
-declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
+declare double @acos(double)
+
+declare double @sqrt(double) readnone
+
+declare double @cos(double) readnone
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll

new file mode 100644 (file)

index 0000000..63f8557
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://8598427
+; Adjust if-converter heuristics to avoid predicating vmrs which can cause
+; significant regression.
+
+%struct.xyz_t = type { double, double, double }
+
+define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
+; CHECK: effie:
+entry:
+  %0 = icmp sgt i32 %tsets, 0
+  br i1 %0, label %bb.nph, label %bb6
+
+bb.nph:                                           ; preds = %entry
+  %1 = add nsw i32 %b, %a
+  %2 = add nsw i32 %1, %c
+  br label %bb
+
+bb:                                               ; preds = %bb4, %bb.nph
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
+  %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
+  %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
+  %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
+  %3 = load double* %scevgep10, align 4
+  %4 = load double* %scevgep11, align 4
+  %5 = fcmp uge double %3, %4
+  br i1 %5, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+; CHECK-NOT: it
+; CHECK-NOT: vcmpemi
+; CHECK-NOT: vmrsmi
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
+  %6 = load double* %scevgep12, align 4
+  %7 = fcmp uge double %3, %6
+  br i1 %7, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %8 = add nsw i32 %2, %r.19
+  br label %bb4
+
+bb3:                                              ; preds = %bb1, %bb
+  %9 = add nsw i32 %r.19, 1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %bb2
+  %r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ]
+  %10 = add nsw i32 %n.08, 1
+  %exitcond = icmp eq i32 %10, %tsets
+  br i1 %exitcond, label %bb6, label %bb
+
+bb6:                                              ; preds = %bb4, %entry
+  %r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ]
+  ret i32 %r.1.lcssa
+}
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll

index 3bc8fee6d339401650870110180afad8b0851f97..52e40b234ba826b4b1550a59f27665515ffc505f 100644 (file)
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -4,14 +4,14 @@
  ; constant offset addressing, so that each of the following stores
  ; uses the same register.
  
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
-; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
+; CHECK: vstr.32 s{{.*}}, [lr, #-128]
+; CHECK: vstr.32 s{{.*}}, [lr, #-96]
+; CHECK: vstr.32 s{{.*}}, [lr, #-64]
+; CHECK: vstr.32 s{{.*}}, [lr, #-32]
+; CHECK: vstr.32 s{{.*}}, [lr]
+; CHECK: vstr.32 s{{.*}}, [lr, #32]
+; CHECK: vstr.32 s{{.*}}, [lr, #64]
+; CHECK: vstr.32 s{{.*}}, [lr, #96]
  
  target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
  
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll

index 390955472ec0b28ee9288b571fa2963e2241023c..24eb3a88fb0403105bc6ffca833a74055c967983 100644 (file)
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -271,7 +271,6 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {
  entry:
  ; CHECK: t10:
  ; CHECK: vmov.i32 q9, #0x3F000000
-; CHECK: vmov d0, d17
  ; CHECK: vmla.f32 q8, q8, d0[0]
    %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
    %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll

index 080341c8df412fbeb3eb8f90097f41346fe679c2..9ed6a01255f822ed68bded3f7ffc041018d1b227 100644 (file)
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -23,8 +23,6 @@ entry:
    %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
  ; Constant pool load followed by add.
  ; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64
-; CHECK: vadd.f64
  ; CHECK: vldr.64 [[LDR:d.*]],
  ; CHECK: LPC0_0:
  ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
author	Evan Cheng <evan.cheng@apple.com>
	Wed, 3 Nov 2010 00:45:17 +0000 (00:45 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Wed, 3 Nov 2010 00:45:17 +0000 (00:45 +0000)
include/llvm/CodeGen/ScheduleDAG.h		patch \| blob \| history
include/llvm/Target/TargetInstrInfo.h		patch \| blob \| history
lib/CodeGen/IfConversion.cpp		patch \| blob \| history
lib/CodeGen/ScheduleDAGInstrs.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.h		patch \| blob \| history
lib/Target/ARM/Thumb2InstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/Thumb2InstrInfo.h		patch \| blob \| history
lib/Target/TargetInstrInfo.cpp		patch \| blob \| history
test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll		patch \| blob \| history
test/CodeGen/ARM/ifcvt10.ll		patch \| blob \| history
test/CodeGen/ARM/ifcvt11.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/ARM/lsr-on-unrolled-loops.ll		patch \| blob \| history
test/CodeGen/ARM/reg_sequence.ll		patch \| blob \| history
test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll		patch \| blob \| history