- Allow target to specify when is register pressure "too high". In most cases,
authorEvan Cheng <evan.cheng@apple.com>
Fri, 23 Jul 2010 22:39:59 +0000 (22:39 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Fri, 23 Jul 2010 22:39:59 +0000 (22:39 +0000)
  it's too late to start backing off aggressive latency scheduling when most
  of the registers are in use so the threshold should be a bit tighter.
- Correctly handle live out's and extract_subreg etc.
- Enable register pressure aware scheduling by default for hybrid scheduler.
  For ARM, this is almost always a win on # of instructions. It's runtime
  neutral for most of the tests. But for some kernels with high register
  pressure it can be a huge win. e.g. 464.h264ref reduced number of spills by
  54 and sped up by 20%.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109279 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Target/TargetLowering.h
lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
lib/Target/ARM/ARMISelLowering.cpp
lib/Target/ARM/ARMISelLowering.h
test/CodeGen/ARM/lsr-on-unrolled-loops.ll

index 285c4be5bff379285641def52f8d96f2177e4e27..2d8838c520b3ac3d203effc5626b360697bd0487 100644 (file)
@@ -186,6 +186,14 @@ public:
     return RepRegClassCostForVT[VT.getSimpleVT().SimpleTy];
   }
 
+  /// getRegPressureLimit - Return the register pressure "high water mark" for
+  /// the specific register class. The scheduler is in high register pressure
+  /// mode (for the specific register class) if it goes over the limit.
+  virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                                       MachineFunction &MF) const {
+    return 0;
+  }
+
   /// isTypeLegal - Return true if the target has native support for the
   /// specified value type.  This means that it has a register that directly
   /// holds it without promotions or expansions.
index 334ce58ac7c275dde4eb6133d2d455e0becbec50..2ffd35034a9fb0b0f46108acce1fdef6f3d23997 100644 (file)
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
-static cl::opt<bool> RegPressureAware("reg-pressure-aware-sched",
-                                      cl::init(false), cl::Hidden);
-
 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
 STATISTIC(NumDups,       "Number of duplicated nodes");
@@ -1075,7 +1071,7 @@ namespace {
         std::fill(RegPressure.begin(), RegPressure.end(), 0);
         for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
                E = TRI->regclass_end(); I != E; ++I)
-          RegLimit[(*I)->getID()] = tri->getAllocatableSet(MF, *I).count() - 1;
+          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
       }
     }
     
@@ -1172,10 +1168,12 @@ namespace {
       SU->NodeQueueId = 0;
     }
 
-    bool HighRegPressure(const SUnit *SU) const {
+    bool HighRegPressure(const SUnit *SU, unsigned &Excess) const {
       if (!TLI)
         return false;
 
+      bool High = false;
+      Excess = 0;
       for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
            I != E; ++I) {
         if (I->isCtrl())
@@ -1183,12 +1181,41 @@ namespace {
         SUnit *PredSU = I->getSUnit();
         const SDNode *PN = PredSU->getNode();
         if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyToReg) {
-            EVT VT = PN->getOperand(1).getValueType();
+          if (PN->getOpcode() == ISD::CopyFromReg) {
+            EVT VT = PN->getValueType(0);
             unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
             unsigned Cost = TLI->getRepRegClassCostFor(VT);
-            if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
-              return true;
+            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
+              High = true;
+              Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
+            }
+          }
+          continue;
+        }
+        unsigned POpc = PN->getMachineOpcode();
+        if (POpc == TargetOpcode::IMPLICIT_DEF)
+          continue;
+        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+          EVT VT = PN->getOperand(0).getValueType();
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          unsigned Cost = TLI->getRepRegClassCostFor(VT);
+          // Check if this increases register pressure of the specific register
+          // class to the point where it would cause spills.
+          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
+            High = true;
+            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
+          }
+          continue;            
+        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+                   POpc == TargetOpcode::SUBREG_TO_REG) {
+          EVT VT = PN->getValueType(0);
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          unsigned Cost = TLI->getRepRegClassCostFor(VT);
+          // Check if this increases register pressure of the specific register
+          // class to the point where it would cause spills.
+          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
+            High = true;
+            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
           }
           continue;
         }
@@ -1201,12 +1228,14 @@ namespace {
           unsigned Cost = TLI->getRepRegClassCostFor(VT);
           // Check if this increases register pressure of the specific register
           // class to the point where it would cause spills.
-          if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
-            return true;
+          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
+            High = true;
+            Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
+          }
         }
       }
 
-      return false;
+      return High;
     }
 
     void ScheduledNode(SUnit *SU) {
@@ -1214,13 +1243,18 @@ namespace {
         return;
 
       const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode())
-        return;
-      unsigned Opc = N->getMachineOpcode();
-      if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
-          Opc == TargetOpcode::REG_SEQUENCE ||
-          Opc == TargetOpcode::IMPLICIT_DEF)
-        return;
+      if (!N->isMachineOpcode()) {
+        if (N->getOpcode() != ISD::CopyToReg)
+          return;
+      } else {
+        unsigned Opc = N->getMachineOpcode();
+        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+            Opc == TargetOpcode::INSERT_SUBREG ||
+            Opc == TargetOpcode::SUBREG_TO_REG ||
+            Opc == TargetOpcode::REG_SEQUENCE ||
+            Opc == TargetOpcode::IMPLICIT_DEF)
+          return;
+      }
 
       for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
            I != E; ++I) {
@@ -1231,8 +1265,8 @@ namespace {
           continue;
         const SDNode *PN = PredSU->getNode();
         if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyToReg) {
-            EVT VT = PN->getOperand(1).getValueType();
+          if (PN->getOpcode() == ISD::CopyFromReg) {
+            EVT VT = PN->getValueType(0);
             unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
             RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
           }
@@ -1241,6 +1275,18 @@ namespace {
         unsigned POpc = PN->getMachineOpcode();
         if (POpc == TargetOpcode::IMPLICIT_DEF)
           continue;
+        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+          EVT VT = PN->getOperand(0).getValueType();
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          continue;            
+        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+                   POpc == TargetOpcode::SUBREG_TO_REG) {
+          EVT VT = PN->getValueType(0);
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          continue;
+        }
         unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
         for (unsigned i = 0; i != NumDefs; ++i) {
           EVT VT = PN->getValueType(i);
@@ -1251,19 +1297,19 @@ namespace {
         }
       }
 
-      if (!SU->NumSuccs)
-        return;
-      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-      for (unsigned i = 0; i != NumDefs; ++i) {
-        EVT VT = N->getValueType(i);
-        if (!N->hasAnyUseOfValue(i))
-          continue;
-        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-        if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
-          // Register pressure tracking is imprecise. This can happen.
-          RegPressure[RCId] = 0;
-        else
-          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      if (SU->NumSuccs) {
+        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+        for (unsigned i = 0; i != NumDefs; ++i) {
+          EVT VT = N->getValueType(i);
+          if (!N->hasAnyUseOfValue(i))
+            continue;
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+            // Register pressure tracking is imprecise. This can happen.
+            RegPressure[RCId] = 0;
+          else
+            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+        }
       }
 
       dumpRegPressure();
@@ -1274,10 +1320,14 @@ namespace {
         return;
 
       const SDNode *N = SU->getNode();
-      if (!N->isMachineOpcode())
-        return;
+      if (!N->isMachineOpcode()) {
+        if (N->getOpcode() != ISD::CopyToReg)
+          return;
+      }
       unsigned Opc = N->getMachineOpcode();
-      if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
+      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+          Opc == TargetOpcode::INSERT_SUBREG ||
+          Opc == TargetOpcode::SUBREG_TO_REG ||
           Opc == TargetOpcode::REG_SEQUENCE ||
           Opc == TargetOpcode::IMPLICIT_DEF)
         return;
@@ -1291,8 +1341,8 @@ namespace {
           continue;
         const SDNode *PN = PredSU->getNode();
         if (!PN->isMachineOpcode()) {
-          if (PN->getOpcode() == ISD::CopyToReg) {
-            EVT VT = PN->getOperand(1).getValueType();
+          if (PN->getOpcode() == ISD::CopyFromReg) {
+            EVT VT = PN->getValueType(0);
             unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
             RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
           }
@@ -1301,6 +1351,18 @@ namespace {
         unsigned POpc = PN->getMachineOpcode();
         if (POpc == TargetOpcode::IMPLICIT_DEF)
           continue;
+        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+          EVT VT = PN->getOperand(0).getValueType();
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          continue;            
+        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+                   POpc == TargetOpcode::SUBREG_TO_REG) {
+          EVT VT = PN->getValueType(0);
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+          continue;
+        }
         unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
         for (unsigned i = 0; i != NumDefs; ++i) {
           EVT VT = PN->getValueType(i);
@@ -1315,17 +1377,17 @@ namespace {
         }
       }
 
-      if (!SU->NumSuccs)
-        return;
-      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-      for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
-        EVT VT = N->getValueType(i);
-        if (VT == MVT::Flag || VT == MVT::Other)
-          continue;
-        if (!N->hasAnyUseOfValue(i))
-          continue;
-        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      if (SU->NumSuccs) {
+        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+          EVT VT = N->getValueType(i);
+          if (VT == MVT::Flag || VT == MVT::Other)
+            continue;
+          if (!N->hasAnyUseOfValue(i))
+            continue;
+          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+        }
       }
 
       dumpRegPressure();
@@ -1464,13 +1526,20 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
 }
 
 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
-  bool LHigh = SPQ->HighRegPressure(left);
-  bool RHigh = SPQ->HighRegPressure(right);
+  unsigned LExcess, RExcess;
+  bool LHigh = SPQ->HighRegPressure(left, LExcess);
+  bool RHigh = SPQ->HighRegPressure(right, RExcess);
   if (LHigh && !RHigh)
     return true;
   else if (!LHigh && RHigh)
     return false;
-  else if (!LHigh && !RHigh) {
+  else if (LHigh && RHigh) {
+    if (LExcess > RExcess)
+      return true;
+    else if (LExcess < RExcess)
+      return false;
+    // Otherwise schedule for register pressure reduction.
+  } else {
     // Low register pressure situation, schedule for latency if possible.
     bool LStall = left->SchedulingPref == Sched::Latency &&
       SPQ->getCurCycle() < left->getHeight();
@@ -1889,8 +1958,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
   const TargetLowering *TLI = &IS->getTargetLowering();
   
   HybridBURRPriorityQueue *PQ =
-    new HybridBURRPriorityQueue(*IS->MF, RegPressureAware, TII, TRI,
-                                (RegPressureAware ? TLI : 0));
+    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
   PQ->setScheduleDAG(SD);
   return SD;  
index 0e33758508fc8d7fecce955d6625ff210384397c..1f9908c6f6c1343a71aed2467c2fb198fc08b799 100644 (file)
@@ -166,6 +166,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     : TargetLowering(TM, createTLOF(TM)) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
+  RegInfo = TM.getRegisterInfo();
 
   if (Subtarget->isTargetDarwin()) {
     // Uses VFP for Thumb libfuncs if available.
@@ -729,6 +730,23 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   return Sched::RegPressure;
 }
 
+unsigned
+ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+                                       MachineFunction &MF) const {
+  unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case ARM::tGPRRegClassID:
+    return 5 - FPDiff;
+  case ARM::GPRRegClassID:
+    return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0);
+  case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
+  case ARM::DPRRegClassID:
+    return 32 - 10;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Lowering Code
 //===----------------------------------------------------------------------===//
index 05d7d5f1cf145eb2c8219c003336fccd7e3019df..b544b5eee2baa8ce2c42b4d28020163205e34920 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "ARMSubtarget.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -268,6 +269,9 @@ namespace llvm {
 
     Sched::Preference getSchedulingPreference(SDNode *N) const;
 
+    unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                                 MachineFunction &MF) const;
+
     bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
@@ -285,6 +289,8 @@ namespace llvm {
     /// make the right decision when generating code for different targets.
     const ARMSubtarget *Subtarget;
 
+    const TargetRegisterInfo *RegInfo;
+
     /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
     ///
     unsigned ARMPCLabelIndex;
index fc43ff487780d4297984e224c629ad8f61f308fb..99eed79c86635a30d387669c002eb70d915d875f 100644 (file)
@@ -4,14 +4,14 @@
 ; constant offset addressing, so that each of the following stores
 ; uses the same register.
 
-; CHECK: vstr.32 s0, [r{{.*}}, #-128]
-; CHECK: vstr.32 s0, [r{{.*}}, #-96]
-; CHECK: vstr.32 s0, [r{{.*}}, #-64]
-; CHECK: vstr.32 s0, [r{{.*}}, #-32]
-; CHECK: vstr.32 s0, [r{{.*}}]
-; CHECK: vstr.32 s0, [r{{.*}}, #32]
-; CHECK: vstr.32 s0, [r{{.*}}, #64]
-; CHECK: vstr.32 s0, [r{{.*}}, #96]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"