ARM scheduler model: Swift has varying latencies, uops for simple ALU ops

[oota-llvm.git] / lib / Target / ARM / ARMBaseInstrInfo.cpp
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index 0076910c95aaa9f183ad2624332072c144b96ddd..9e68ff44890e1d9cb455eb8a2ca22617f95071bb 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
    // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
    // widened to VMOVD.  We prefer the VMOVD when possible because it may be
    // changed into a VORR that can go down the NEON pipeline.
-  if (!WidenVMOVS || !MI->isCopy())
+  if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
      return false;
  
    // Look for a copy between even S-registers.  That is where we keep floats
@@ -2719,7 +2719,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
    case ARM::t2STMDB_UPD: {
      unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
      if (Subtarget.isSwift()) {
-      // rdar://8402126
        int UOps = 1 + NumRegs;  // One for address computation, one for each ld / st.
        switch (Opc) {
        default: break;
@@ -3735,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
    if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
      return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
  
-  // A9-like cores are particularly picky about mixing the two and want these
+  // CortexA9 is particularly picky about mixing the two and wants these
    // converted.
-  if (Subtarget.isLikeA9() && !isPredicated(MI) &&
+  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
        (MI->getOpcode() == ARM::VMOVRS ||
         MI->getOpcode() == ARM::VMOVSR ||
         MI->getOpcode() == ARM::VMOVS))
@@ -4024,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
  // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
  //
  // FCONSTD can be used as a dependency-breaking instruction.
-
-
  unsigned ARMBaseInstrInfo::
  getPartialRegUpdateClearance(const MachineInstr *MI,
                               unsigned OpNum,
                               const TargetRegisterInfo *TRI) const {
-  // Only Swift has partial register update problems.
-  if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+  if (!SwiftPartialUpdateClearance ||
+      !(Subtarget.isSwift() || Subtarget.isCortexA15()))
      return 0;
  
    assert(TRI && "Need TRI instance");
@@ -4047,7 +4044,6 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
    case ARM::VLDRS:
    case ARM::FCONSTS:
    case ARM::VMOVSR:
-    // rdar://problem/8791586
    case ARM::VMOVv8i8:
    case ARM::VMOVv4i16:
    case ARM::VMOVv2i32:
@@ -4058,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
  
      // Explicitly reads the dependency.
    case ARM::VLD1LNd32:
-    UseOp = 1;
+    UseOp = 3;
      break;
    default:
      return 0;
@@ -4127,3 +4123,15 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
  bool ARMBaseInstrInfo::hasNOP() const {
    return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
  }
+
+bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+  unsigned ShOpVal = MI->getOperand(3).getImm();
+  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
+  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
+  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
+      ((ShImm == 1 || ShImm == 2) &&
+       ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
+    return true;
+
+  return false;
+}