Add a new attribute called 'jumptable' that creates jump-instruction tables for funct...

[oota-llvm.git] / lib / Target / ARM / ARMBaseInstrInfo.cpp
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index b4266693c1a9402af89b3521f5198f4b8850a0f0..1c6c210dae8d93ef7e0f47827c96883f398870cb 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,16 +32,19 @@
  #include "llvm/IR/Function.h"
  #include "llvm/IR/GlobalValue.h"
  #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
  #include "llvm/Support/BranchProbability.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
  
-#define GET_INSTRINFO_CTOR
-#include "ARMGenInstrInfo.inc"
-
  using namespace llvm;
  
+#define DEBUG_TYPE "arm-instrinfo"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "ARMGenInstrInfo.inc"
+
  static cl::opt<bool>
  EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
                 cl::desc("Enable ARM 2-addr to 3-addr conv"));
@@ -125,14 +128,14 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
    // FIXME: Thumb2 support.
  
    if (!EnableARM3Addr)
-    return NULL;
+    return nullptr;
  
    MachineInstr *MI = MBBI;
    MachineFunction &MF = *MI->getParent()->getParent();
    uint64_t TSFlags = MI->getDesc().TSFlags;
    bool isPre = false;
    switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
-  default: return NULL;
+  default: return nullptr;
    case ARMII::IndexModePre:
      isPre = true;
      break;
@@ -144,10 +147,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
    // operation.
    unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
    if (MemOpc == 0)
-    return NULL;
+    return nullptr;
  
-  MachineInstr *UpdateMI = NULL;
-  MachineInstr *MemMI = NULL;
+  MachineInstr *UpdateMI = nullptr;
+  MachineInstr *MemMI = nullptr;
    unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
    const MCInstrDesc &MCID = MI->getDesc();
    unsigned NumOps = MCID.getNumOperands();
@@ -169,7 +172,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
        if (ARM_AM::getSOImmVal(Amt) == -1)
          // Can't encode it in a so_imm operand. This transformation will
          // add more than 1 instruction. Abandon!
-        return NULL;
+        return nullptr;
        UpdateMI = BuildMI(MF, MI->getDebugLoc(),
                           get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
          .addReg(BaseReg).addImm(Amt)
@@ -273,8 +276,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
                                  MachineBasicBlock *&FBB,
                                  SmallVectorImpl<MachineOperand> &Cond,
                                  bool AllowModify) const {
-  TBB = 0;
-  FBB = 0;
+  TBB = nullptr;
+  FBB = nullptr;
  
    MachineBasicBlock::iterator I = MBB.end();
    if (I == MBB.begin())
@@ -283,7 +286,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
  
    // Walk backwards from the end of the basic block until the branch is
    // analyzed or we give up.
-  while (isPredicated(I) || I->isTerminator()) {
+  while (isPredicated(I) || I->isTerminator() || I->isDebugValue()) {
  
      // Flag to be raised on unanalyzeable instructions. This is useful in cases
      // where we want to clean up on the end of the basic block before we bail
@@ -331,12 +334,12 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
             I->isReturn())) {
        // Forget any previous condition branch information - it no longer applies.
        Cond.clear();
-      FBB = 0;
+      FBB = nullptr;
  
        // If we can modify the function, delete everything below this
        // unconditional branch.
        if (AllowModify) {
-        MachineBasicBlock::iterator DI = llvm::next(I);
+        MachineBasicBlock::iterator DI = std::next(I);
          while (DI != MBB.end()) {
            MachineInstr *InstToDelete = DI;
            ++DI;
@@ -405,7 +408,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
    assert((Cond.size() == 2 || Cond.size() == 0) &&
           "ARM branch conditions have two components!");
  
-  if (FBB == 0) {
+  if (!FBB) {
      if (Cond.empty()) { // Unconditional branch?
        if (isThumb)
          BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
@@ -525,7 +528,7 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
      MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
  
    if (AFI->isThumb2Function()) {
-    if (getSubtarget().hasV8Ops())
+    if (getSubtarget().restrictIT())
        return isV8EligibleForIT(MI);
    } else { // non-Thumb
      if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
@@ -535,6 +538,22 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
    return true;
  }
  
+namespace llvm {
+template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isUse())
+      continue;
+    if (MO.getReg() != ARM::CPSR)
+      continue;
+    if (!MO.isDead())
+      return false;
+  }
+  // all definitions of CPSR are dead
+  return true;
+}
+}
+
  /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
  LLVM_ATTRIBUTE_NOINLINE
  static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
@@ -559,15 +578,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
    // If this machine instr is an inline asm, measure it.
    if (MI->getOpcode() == ARM::INLINEASM)
      return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
-  if (MI->isLabel())
-    return 0;
    unsigned Opc = MI->getOpcode();
    switch (Opc) {
-  case TargetOpcode::IMPLICIT_DEF:
-  case TargetOpcode::KILL:
-  case TargetOpcode::PROLOG_LABEL:
-  case TargetOpcode::EH_LABEL:
-  case TargetOpcode::DBG_VALUE:
+  default:
+    // pseudo-instruction sizes are zero.
      return 0;
    case TargetOpcode::BUNDLE:
      return getInstBundleLength(MI);
@@ -611,7 +625,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
        MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
      unsigned JTI = JTOP.getIndex();
      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-    assert(MJTI != 0);
+    assert(MJTI != nullptr);
      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
      assert(JTI < JT.size());
      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
@@ -630,9 +644,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
        ++NumEntries;
      return NumEntries * EntrySize + InstSize;
    }
-  default:
-    // Otherwise, pseudo-instruction sizes are zero.
-    return 0;
    }
  }
  
@@ -652,16 +663,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                     unsigned DestReg, unsigned SrcReg,
                                     bool KillSrc) const {
    bool GPRDest = ARM::GPRRegClass.contains(DestReg);
-  bool GPRSrc  = ARM::GPRRegClass.contains(SrcReg);
+  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
  
    if (GPRDest && GPRSrc) {
      AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
-                                  .addReg(SrcReg, getKillRegState(KillSrc))));
+                                    .addReg(SrcReg, getKillRegState(KillSrc))));
      return;
    }
  
    bool SPRDest = ARM::SPRRegClass.contains(DestReg);
-  bool SPRSrc  = ARM::SPRRegClass.contains(SrcReg);
+  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
  
    unsigned Opc = 0;
    if (SPRDest && SPRSrc)
@@ -690,26 +701,47 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    int Spacing = 1;
  
    // Use VORRq when possible.
-  if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
-  else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
+    Opc = ARM::VORRq;
+    BeginIdx = ARM::qsub_0;
+    SubRegs = 2;
+  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+    Opc = ARM::VORRq;
+    BeginIdx = ARM::qsub_0;
+    SubRegs = 4;
    // Fall back to VMOVD.
-  else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
-  else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
-  else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
-  else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
-
-  else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
-  else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
-  else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
+    Opc = ARM::VMOVD;
+    BeginIdx = ARM::dsub_0;
+    SubRegs = 2;
+  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
+    Opc = ARM::VMOVD;
+    BeginIdx = ARM::dsub_0;
+    SubRegs = 3;
+  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
+    Opc = ARM::VMOVD;
+    BeginIdx = ARM::dsub_0;
+    SubRegs = 4;
+  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
+    Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
+    BeginIdx = ARM::gsub_0;
+    SubRegs = 2;
+  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
+    Opc = ARM::VMOVD;
+    BeginIdx = ARM::dsub_0;
+    SubRegs = 2;
+    Spacing = 2;
+  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
+    Opc = ARM::VMOVD;
+    BeginIdx = ARM::dsub_0;
+    SubRegs = 3;
+    Spacing = 2;
+  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
+    Opc = ARM::VMOVD;
+    BeginIdx = ARM::dsub_0;
+    SubRegs = 4;
+    Spacing = 2;
+  }
  
    assert(Opc && "Impossible reg-to-reg copy");
  
@@ -718,22 +750,21 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
  
    // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
    if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
-    BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+    BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
      Spacing = -Spacing;
    }
  #ifndef NDEBUG
    SmallSet<unsigned, 4> DstRegs;
  #endif
    for (unsigned i = 0; i != SubRegs; ++i) {
-    unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
-    unsigned Src = TRI->getSubReg(SrcReg,  BeginIdx + i*Spacing);
+    unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
+    unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
      assert(Dst && Src && "Bad sub-register");
  #ifndef NDEBUG
      assert(!DstRegs.count(Src) && "destructive vector copy");
      DstRegs.insert(Dst);
  #endif
-    Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
-      .addReg(Src);
+    Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
      // VORR takes two source operands.
      if (Opc == ARM::VORRq)
        Mov.addReg(Src);
@@ -1222,7 +1253,8 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
      static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
  
    unsigned PCLabelId = AFI->createPICLabelUId();
-  ARMConstantPoolValue *NewCPV = 0;
+  ARMConstantPoolValue *NewCPV = nullptr;
+
    // FIXME: The below assumes PIC relocation model and that the function
    // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
    // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
@@ -1305,10 +1337,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
        Opcode == ARM::t2LDRpci_pic ||
        Opcode == ARM::tLDRpci ||
        Opcode == ARM::tLDRpci_pic ||
-      Opcode == ARM::MOV_ga_dyn ||
+      Opcode == ARM::LDRLIT_ga_pcrel ||
+      Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
+      Opcode == ARM::tLDRLIT_ga_pcrel ||
        Opcode == ARM::MOV_ga_pcrel ||
        Opcode == ARM::MOV_ga_pcrel_ldr ||
-      Opcode == ARM::t2MOV_ga_dyn ||
        Opcode == ARM::t2MOV_ga_pcrel) {
      if (MI1->getOpcode() != Opcode)
        return false;
@@ -1320,10 +1353,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
      if (MO0.getOffset() != MO1.getOffset())
        return false;
  
-    if (Opcode == ARM::MOV_ga_dyn ||
+    if (Opcode == ARM::LDRLIT_ga_pcrel ||
+        Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
+        Opcode == ARM::tLDRLIT_ga_pcrel ||
          Opcode == ARM::MOV_ga_pcrel ||
          Opcode == ARM::MOV_ga_pcrel_ldr ||
-        Opcode == ARM::t2MOV_ga_dyn ||
          Opcode == ARM::t2MOV_ga_pcrel)
        // Ignore the PC labels.
        return MO0.getGlobal() == MO1.getGlobal();
@@ -1514,7 +1548,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
      return false;
  
    // Terminators and labels can't be scheduled around.
-  if (MI->isTerminator() || MI->isLabel())
+  if (MI->isTerminator() || MI->isPosition())
      return true;
  
    // Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1630,10 +1664,10 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
      ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
      // MOVCC AL can't be inverted. Shouldn't happen.
      if (CC == ARMCC::AL || PredReg != ARM::CPSR)
-      return NULL;
+      return nullptr;
      MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
      if (!MI)
-      return NULL;
+      return nullptr;
      // After swapping the MOVCC operands, also invert the condition.
      MI->getOperand(MI->findFirstPredOperandIdx())
        .setImm(ARMCC::getOppositeCondition(CC));
@@ -1649,35 +1683,36 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
                                        const MachineRegisterInfo &MRI,
                                        const TargetInstrInfo *TII) {
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-    return 0;
+    return nullptr;
    if (!MRI.hasOneNonDBGUse(Reg))
-    return 0;
+    return nullptr;
    MachineInstr *MI = MRI.getVRegDef(Reg);
    if (!MI)
-    return 0;
+    return nullptr;
    // MI is folded into the MOVCC by predicating it.
    if (!MI->isPredicable())
-    return 0;
+    return nullptr;
    // Check if MI has any non-dead defs or physreg uses. This also detects
    // predicated instructions which will be reading CPSR.
    for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
      const MachineOperand &MO = MI->getOperand(i);
      // Reject frame index operands, PEI can't handle the predicated pseudos.
      if (MO.isFI() || MO.isCPI() || MO.isJTI())
-      return 0;
+      return nullptr;
      if (!MO.isReg())
        continue;
      // MI can't have any tied operands, that would conflict with predication.
      if (MO.isTied())
-      return 0;
+      return nullptr;
      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
-      return 0;
+      return nullptr;
      if (MO.isDef() && !MO.isDead())
-      return 0;
+      return nullptr;
    }
    bool DontMoveAcrossStores = true;
-  if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
-    return 0;
+  if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr,
+                        DontMoveAcrossStores))
+    return nullptr;
    return MI;
  }
  
@@ -1706,19 +1741,25 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
                                                 bool PreferFalse) const {
    assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
           "Unknown select instruction");
-  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
    MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
    bool Invert = !DefMI;
    if (!DefMI)
      DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
    if (!DefMI)
-    return 0;
+    return nullptr;
+
+  // Find new register class to use.
+  MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+  unsigned       DestReg  = MI->getOperand(0).getReg();
+  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+  if (!MRI.constrainRegClass(DestReg, PreviousClass))
+    return nullptr;
  
    // Create a new predicated version of DefMI.
    // Rfalse is the first use.
    MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
-                                      DefMI->getDesc(),
-                                      MI->getOperand(0).getReg());
+                                      DefMI->getDesc(), DestReg);
  
    // Copy all the DefMI operands, excluding its (null) predicate.
    const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1741,7 +1782,6 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
    // register operand tied to the first def.
    // The tie makes the register allocator ensure the FalseReg is allocated the
    // same register as operand 0.
-  MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
    FalseReg.setImplicit();
    NewMI.addOperand(FalseReg);
    NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
@@ -1801,6 +1841,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
                                 unsigned DestReg, unsigned BaseReg, int NumBytes,
                                 ARMCC::CondCodes Pred, unsigned PredReg,
                                 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+  if (NumBytes == 0 && DestReg != BaseReg) {
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
+      .addReg(BaseReg, RegState::Kill)
+      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+      .setMIFlags(MIFlags);
+    return;
+  }
+
    bool isSub = NumBytes < 0;
    if (isSub) NumBytes = -NumBytes;
  
@@ -1824,6 +1872,125 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
    }
  }
  
+static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI,
+                      MachineInstr *MI) {
+  for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true);
+       Subreg.isValid(); ++Subreg)
+    if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) !=
+        MachineBasicBlock::LQR_Dead)
+      return true;
+  return false;
+}
+bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
+                                      MachineFunction &MF, MachineInstr *MI,
+                                      unsigned NumBytes) {
+  // This optimisation potentially adds lots of load and store
+  // micro-operations, it's only really a great benefit to code-size.
+  if (!Subtarget.isMinSize())
+    return false;
+
+  // If only one register is pushed/popped, LLVM can use an LDR/STR
+  // instead. We can't modify those so make sure we're dealing with an
+  // instruction we understand.
+  bool IsPop = isPopOpcode(MI->getOpcode());
+  bool IsPush = isPushOpcode(MI->getOpcode());
+  if (!IsPush && !IsPop)
+    return false;
+
+  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
+                      MI->getOpcode() == ARM::VLDMDIA_UPD;
+  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
+                     MI->getOpcode() == ARM::tPOP ||
+                     MI->getOpcode() == ARM::tPOP_RET;
+
+  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
+                          MI->getOperand(1).getReg() == ARM::SP)) &&
+         "trying to fold sp update into non-sp-updating push/pop");
+
+  // The VFP push & pop act on D-registers, so we can only fold an adjustment
+  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
+  // if this is violated.
+  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
+    return false;
+
+  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
+  // pred) so the list starts at 4. Thumb1 starts after the predicate.
+  int RegListIdx = IsT1PushPop ? 2 : 4;
+
+  // Calculate the space we'll need in terms of registers.
+  unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
+  unsigned RD0Reg, RegsNeeded;
+  if (IsVFPPushPop) {
+    RD0Reg = ARM::D0;
+    RegsNeeded = NumBytes / 8;
+  } else {
+    RD0Reg = ARM::R0;
+    RegsNeeded = NumBytes / 4;
+  }
+
+  // We're going to have to strip all list operands off before
+  // re-adding them since the order matters, so save the existing ones
+  // for later.
+  SmallVector<MachineOperand, 4> RegList;
+  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+    RegList.push_back(MI->getOperand(i));
+
+  const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+
+  // Now try to find enough space in the reglist to allocate NumBytes.
+  for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
+       --CurReg) {
+    if (!IsPop) {
+      // Pushing any register is completely harmless, mark the
+      // register involved as undef since we don't care about it in
+      // the slightest.
+      RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
+                                                  false, false, true));
+      --RegsNeeded;
+      continue;
+    }
+
+    // However, we can only pop an extra register if it's not live. For
+    // registers live within the function we might clobber a return value
+    // register; the other way a register can be live here is if it's
+    // callee-saved.
+    // TODO: Currently, computeRegisterLiveness() does not report "live" if a
+    // sub reg is live. When computeRegisterLiveness() works for sub reg, it
+    // can replace isAnySubRegLive().
+    if (isCalleeSavedRegister(CurReg, CSRegs) ||
+        isAnySubRegLive(CurReg, TRI, MI)) {
+      // VFP pops don't allow holes in the register list, so any skip is fatal
+      // for our transformation. GPR pops do, so we should just keep looking.
+      if (IsVFPPushPop)
+        return false;
+      else
+        continue;
+    }
+
+    // Mark the unimportant registers as <def,dead> in the POP.
+    RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
+                                                true));
+    --RegsNeeded;
+  }
+
+  if (RegsNeeded > 0)
+    return false;
+
+  // Finally we know we can profitably perform the optimisation so go
+  // ahead: strip all existing registers off and add them back again
+  // in the right order.
+  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+    MI->RemoveOperand(i);
+
+  // Add the complete list back in.
+  MachineInstrBuilder MIB(MF, &*MI);
+  for (int i = RegList.size() - 1; i >= 0; --i)
+    MIB.addOperand(RegList[i]);
+
+  return true;
+}
+
  bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                                  unsigned FrameReg, int &Offset,
                                  const ARMBaseInstrInfo &TII) {
@@ -2017,7 +2184,7 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
        // Walk down one instruction which is potentially an 'and'.
        const MachineInstr &Copy = *MI;
        MachineBasicBlock::iterator AND(
-        llvm::next(MachineBasicBlock::iterator(MI)));
+        std::next(MachineBasicBlock::iterator(MI)));
        if (AND == MI->getParent()->end()) return false;
        MI = AND;
        return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
@@ -2093,9 +2260,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
    // Masked compares sometimes use the same register as the corresponding 'and'.
    if (CmpMask != ~0) {
      if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
-      MI = 0;
-      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
-           UE = MRI->use_end(); UI != UE; ++UI) {
+      MI = nullptr;
+      for (MachineRegisterInfo::use_instr_iterator
+           UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
+           UI != UE; ++UI) {
          if (UI->getParent() != CmpInstr->getParent()) continue;
          MachineInstr *PotentialAND = &*UI;
          if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
@@ -2119,17 +2287,17 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
    // One is MI, the other is a SUB instruction.
    // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
    // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
-  MachineInstr *Sub = NULL;
+  MachineInstr *Sub = nullptr;
    if (SrcReg2 != 0)
      // MI is not a candidate for CMPrr.
-    MI = NULL;
+    MI = nullptr;
    else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
      // Conservatively refuse to convert an instruction which isn't in the same
      // BB as the comparison.
      // For CMPri, we need to check Sub, thus we can't return here.
      if (CmpInstr->getOpcode() == ARM::CMPri ||
         CmpInstr->getOpcode() == ARM::t2CMPri)
-      MI = NULL;
+      MI = nullptr;
      else
        return false;
    }
@@ -2230,8 +2398,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
            isSafe = true;
            break;
          }
-        // Condition code is after the operand before CPSR.
-        ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+        // Condition code is after the operand before CPSR except for VSELs.
+        ARMCC::CondCodes CC;
+        bool IsInstrVSel = true;
+        switch (Instr.getOpcode()) {
+        default:
+          IsInstrVSel = false;
+          CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+          break;
+        case ARM::VSELEQD:
+        case ARM::VSELEQS:
+          CC = ARMCC::EQ;
+          break;
+        case ARM::VSELGTD:
+        case ARM::VSELGTS:
+          CC = ARMCC::GT;
+          break;
+        case ARM::VSELGED:
+        case ARM::VSELGES:
+          CC = ARMCC::GE;
+          break;
+        case ARM::VSELVSS:
+        case ARM::VSELVSD:
+          CC = ARMCC::VS;
+          break;
+        }
+
          if (Sub) {
            ARMCC::CondCodes NewCC = getSwappedCondition(CC);
            if (NewCC == ARMCC::AL)
@@ -2242,11 +2434,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
            // If it is safe to remove CmpInstr, the condition code of these
            // operands will be modified.
            if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
-              Sub->getOperand(2).getReg() == SrcReg)
-            OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
-                                                      NewCC));
-        }
-        else
+              Sub->getOperand(2).getReg() == SrcReg) {
+            // VSel doesn't support condition code update.
+            if (IsInstrVSel)
+              return false;
+            OperandsToUpdate.push_back(
+                std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
+          }
+        } else
            switch (CC) {
            default:
              // CPSR can be used multiple times, we should continue.
@@ -2778,7 +2973,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
          break;
        }
        return UOps;
-    } else if (Subtarget.isCortexA8()) {
+    } else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
        if (NumRegs < 4)
          return 2;
        // 4 registers would be issued: 2, 2.
@@ -2815,7 +3010,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
      return ItinData->getOperandCycle(DefClass, DefIdx);
  
    int DefCycle;
-  if (Subtarget.isCortexA8()) {
+  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
      // (regno / 2) + (regno % 2) + 1
      DefCycle = RegNo / 2 + 1;
      if (RegNo % 2)
@@ -2856,7 +3051,7 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
      return ItinData->getOperandCycle(DefClass, DefIdx);
  
    int DefCycle;
-  if (Subtarget.isCortexA8()) {
+  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
      // 4 registers would be issued: 1, 2, 1.
      // 5 registers would be issued: 1, 2, 2.
      DefCycle = RegNo / 2;
@@ -2890,7 +3085,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
      return ItinData->getOperandCycle(UseClass, UseIdx);
  
    int UseCycle;
-  if (Subtarget.isCortexA8()) {
+  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
      // (regno / 2) + (regno % 2) + 1
      UseCycle = RegNo / 2 + 1;
      if (RegNo % 2)
@@ -2930,7 +3125,7 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
      return ItinData->getOperandCycle(UseClass, UseIdx);
  
    int UseCycle;
-  if (Subtarget.isCortexA8()) {
+  if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
      UseCycle = RegNo / 2;
      if (UseCycle < 2)
        UseCycle = 2;
@@ -3067,8 +3262,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
    Dist = 0;
  
    MachineBasicBlock::const_iterator I = MI; ++I;
-  MachineBasicBlock::const_instr_iterator II =
-    llvm::prior(I.getInstrIterator());
+  MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());
    assert(II->isInsideBundle() && "Empty bundle?");
  
    int Idx = -1;
@@ -3107,7 +3301,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
  
    if (Idx == -1) {
      Dist = 0;
-    return 0;
+    return nullptr;
    }
  
    UseIdx = Idx;
@@ -3121,7 +3315,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
                              const MachineInstr *DefMI,
                              const MCInstrDesc *DefMCID, unsigned DefAlign) {
    int Adjust = 0;
-  if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) {
+  if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {
      // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
      // variants are one cycle cheaper.
      switch (DefMCID->getOpcode()) {
@@ -3422,7 +3616,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
                                    UseMCID, UseIdx, UseAlign);
  
    if (Latency > 1 &&
-      (Subtarget.isCortexA8() || Subtarget.isLikeA9())) {
+      (Subtarget.isCortexA8() || Subtarget.isLikeA9() ||
+       Subtarget.isCortexA7())) {
      // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
      // variants are one cycle cheaper.
      switch (DefMCID.getOpcode()) {
@@ -3515,6 +3710,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
      case ARM::VLD3d16Pseudo:
      case ARM::VLD3d32Pseudo:
      case ARM::VLD1d64TPseudo:
+    case ARM::VLD1d64TPseudoWB_fixed:
      case ARM::VLD3d8Pseudo_UPD:
      case ARM::VLD3d16Pseudo_UPD:
      case ARM::VLD3d32Pseudo_UPD:
@@ -3531,6 +3727,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
      case ARM::VLD4d16Pseudo:
      case ARM::VLD4d32Pseudo:
      case ARM::VLD1d64QPseudo:
+    case ARM::VLD1d64QPseudoWB_fixed:
      case ARM::VLD4d8Pseudo_UPD:
      case ARM::VLD4d16Pseudo_UPD:
      case ARM::VLD4d32Pseudo_UPD:
@@ -4162,6 +4359,29 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
    MI->addRegisterKilled(DReg, TRI, true);
  }
  
+void ARMBaseInstrInfo::getUnconditionalBranch(
+    MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
+  if (Subtarget.isThumb())
+    Branch.setOpcode(ARM::tB);
+  else if (Subtarget.isThumb2())
+    Branch.setOpcode(ARM::t2B);
+  else
+    Branch.setOpcode(ARM::Bcc);
+
+  Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+  Branch.addOperand(MCOperand::CreateImm(ARMCC::AL));
+  Branch.addOperand(MCOperand::CreateReg(0));
+}
+
+void ARMBaseInstrInfo::getTrap(MCInst &MI) const {
+  if (Subtarget.isThumb())
+    MI.setOpcode(ARM::tTRAP);
+  else if (Subtarget.useNaClTrap())
+    MI.setOpcode(ARM::TRAPNaCl);
+  else
+    MI.setOpcode(ARM::TRAP);
+}
+
  bool ARMBaseInstrInfo::hasNOP() const {
    return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
  }