Add LICENSE.TXT covering contributions made by ARM.

[oota-llvm.git] / lib / Target / ARM / ARMFrameLowering.cpp
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp

index adff2399c78fe762d892835ac740778f4b7187a6..39d27c452df348c7f8e309de91bc6588a4323d9d 100644 (file)
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -12,42 +12,54 @@
  //===----------------------------------------------------------------------===//
  
  #include "ARMFrameLowering.h"
  //===----------------------------------------------------------------------===//
  
  #include "ARMFrameLowering.h"
-#include "ARMAddressingModes.h"
  #include "ARMBaseInstrInfo.h"
  #include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
  #include "ARMMachineFunctionInfo.h"
  #include "ARMMachineFunctionInfo.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/RegisterScavenging.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
  #include "llvm/Target/TargetOptions.h"
  
  using namespace llvm;
  
  #include "llvm/Target/TargetOptions.h"
  
  using namespace llvm;
  
+static cl::opt<bool>
+SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
+                     cl::desc("Align ARM NEON spills in prolog and epilog"));
+
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+                        unsigned NumAlignedDPRCS2Regs);
+
  /// hasFP - Return true if the specified function should have a dedicated frame
  /// pointer register.  This is true if the function has variable sized allocas
  /// or if frame pointer elimination is disabled.
  /// hasFP - Return true if the specified function should have a dedicated frame
  /// pointer register.  This is true if the function has variable sized allocas
  /// or if frame pointer elimination is disabled.
-///
  bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
    const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
  
  bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
    const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
  
-  // Mac OS X requires FP not to be clobbered for backtracing purpose.
-  if (STI.isTargetDarwin())
+  // iOS requires FP not to be clobbered for backtracing purpose.
+  if (STI.isTargetIOS())
      return true;
  
    const MachineFrameInfo *MFI = MF.getFrameInfo();
    // Always eliminate non-leaf frame pointers.
      return true;
  
    const MachineFrameInfo *MFI = MF.getFrameInfo();
    // Always eliminate non-leaf frame pointers.
-  return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+  return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+           MFI->hasCalls()) ||
            RegInfo->needsStackRealignment(MF) ||
            MFI->hasVarSizedObjects() ||
            MFI->isFrameAddressTaken());
  }
  
            RegInfo->needsStackRealignment(MF) ||
            MFI->hasVarSizedObjects() ||
            MFI->isFrameAddressTaken());
  }
  
-// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-// not required, we reserve argument space for call sites in the function
-// immediately on entry to the current function. This eliminates the need for
-// add/sub sp brackets around call sites. Returns true if the call frame is
-// included as part of the stack frame.
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function.  This eliminates the need for
+/// add/sub sp brackets around call sites.  Returns true if the call frame is
+/// included as part of the stack frame.
  bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
    const MachineFrameInfo *FFI = MF.getFrameInfo();
    unsigned CFSize = FFI->getMaxCallFrameSize();
  bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
    const MachineFrameInfo *FFI = MF.getFrameInfo();
    unsigned CFSize = FFI->getMaxCallFrameSize();
@@ -61,15 +73,16 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
    return !MF.getFrameInfo()->hasVarSizedObjects();
  }
  
    return !MF.getFrameInfo()->hasVarSizedObjects();
  }
  
-// canSimplifyCallFramePseudos - If there is a reserved call frame, the
-// call frame pseudos can be simplified. Unlike most targets, having a FP
-// is not sufficient here since we still may reference some objects via SP
-// even when FP is available in Thumb2 mode.
-bool ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF)const {
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified.  Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
    return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
  }
  
    return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
  }
  
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
    for (unsigned i = 0; CSRegs[i]; ++i)
      if (Reg == CSRegs[i])
        return true;
    for (unsigned i = 0; CSRegs[i]; ++i)
      if (Reg == CSRegs[i])
        return true;
@@ -78,7 +91,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
  
  static bool isCSRestore(MachineInstr *MI,
                          const ARMBaseInstrInfo &TII,
  
  static bool isCSRestore(MachineInstr *MI,
                          const ARMBaseInstrInfo &TII,
-                        const unsigned *CSRegs) {
+                        const uint16_t *CSRegs) {
    // Integer spill area is handled with "pop".
    if (MI->getOpcode() == ARM::LDMIA_RET ||
        MI->getOpcode() == ARM::t2LDMIA_RET ||
    // Integer spill area is handled with "pop".
    if (MI->getOpcode() == ARM::LDMIA_RET ||
        MI->getOpcode() == ARM::t2LDMIA_RET ||
@@ -92,7 +105,8 @@ static bool isCSRestore(MachineInstr *MI,
          return false;
      return true;
    }
          return false;
      return true;
    }
-  if ((MI->getOpcode() == ARM::LDR_POST ||
+  if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
+       MI->getOpcode() == ARM::LDR_POST_REG ||
         MI->getOpcode() == ARM::t2LDR_POST) &&
        isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
        MI->getOperand(1).getReg() == ARM::SP)
         MI->getOpcode() == ARM::t2LDR_POST) &&
        isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
        MI->getOperand(1).getReg() == ARM::SP)
@@ -105,14 +119,13 @@ static void
  emitSPUpdate(bool isARM,
               MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
               DebugLoc dl, const ARMBaseInstrInfo &TII,
  emitSPUpdate(bool isARM,
               MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
               DebugLoc dl, const ARMBaseInstrInfo &TII,
-             int NumBytes,
-             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
    if (isARM)
      emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
    if (isARM)
      emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
-                            Pred, PredReg, TII);
+                            ARMCC::AL, 0, TII, MIFlags);
    else
      emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
    else
      emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
-                           Pred, PredReg, TII);
+                           ARMCC::AL, 0, TII, MIFlags);
  }
  
  void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
  }
  
  void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -137,14 +150,22 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
    // belongs to which callee-save spill areas.
    unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
    int FramePtrSpillFI = 0;
    // belongs to which callee-save spill areas.
    unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
    int FramePtrSpillFI = 0;
+  int D8SpillFI = 0;
+
+  // All calls are tail calls in GHC calling conv, and functions have no
+  // prologue/epilogue.
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+    return;
  
    // Allocate the vararg register save area. This is not counted in NumBytes.
    if (VARegSaveSize)
  
    // Allocate the vararg register save area. This is not counted in NumBytes.
    if (VARegSaveSize)
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+                 MachineInstr::FrameSetup);
  
    if (!AFI->hasStackFrame()) {
      if (NumBytes != 0)
  
    if (!AFI->hasStackFrame()) {
      if (NumBytes != 0)
-      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+                   MachineInstr::FrameSetup);
      return;
    }
  
      return;
    }
  
@@ -168,7 +189,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
      case ARM::R11:
        if (Reg == FramePtr)
          FramePtrSpillFI = FI;
      case ARM::R11:
        if (Reg == FramePtr)
          FramePtrSpillFI = FI;
-      if (STI.isTargetDarwin()) {
+      if (STI.isTargetIOS()) {
          AFI->addGPRCalleeSavedArea2Frame(FI);
          GPRCS2Size += 4;
        } else {
          AFI->addGPRCalleeSavedArea2Frame(FI);
          GPRCS2Size += 4;
        } else {
@@ -177,8 +198,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
        }
        break;
      default:
        }
        break;
      default:
-      AFI->addDPRCalleeSavedAreaFrame(FI);
-      DPRCSSize += 8;
+      // This is a DPR. Exclude the aligned DPRCS2 spills.
+      if (Reg == ARM::D8)
+        D8SpillFI = FI;
+      if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) {
+        AFI->addDPRCalleeSavedAreaFrame(FI);
+        DPRCSSize += 8;
+      }
      }
    }
  
      }
    }
  
@@ -186,8 +212,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
    if (GPRCS1Size > 0) MBBI++;
  
    // Set FP to point to the stack slot that contains the previous FP.
    if (GPRCS1Size > 0) MBBI++;
  
    // Set FP to point to the stack slot that contains the previous FP.
-  // For Darwin, FP is R7, which has now been stored in spill area 1.
-  // Otherwise, if this is not Darwin, all the callee-saved registers go
+  // For iOS, FP is R7, which has now been stored in spill area 1.
+  // Otherwise, if this is not iOS, all the callee-saved registers go
    // into spill area 1, including the FP in R11.  In either case, it is
    // now safe to emit this assignment.
    bool HasFP = hasFP(MF);
    // into spill area 1, including the FP in R11.  In either case, it is
    // now safe to emit this assignment.
    bool HasFP = hasFP(MF);
@@ -195,7 +221,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
      unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
      MachineInstrBuilder MIB =
        BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
      unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
      MachineInstrBuilder MIB =
        BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
-      .addFrameIndex(FramePtrSpillFI).addImm(0);
+      .addFrameIndex(FramePtrSpillFI).addImm(0)
+      .setMIFlag(MachineInstr::FrameSetup);
      AddDefaultCC(AddDefaultPred(MIB));
    }
  
      AddDefaultCC(AddDefaultPred(MIB));
    }
  
@@ -214,12 +241,29 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
    AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
  
    // Move past area 3.
    AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
  
    // Move past area 3.
-  if (DPRCSSize > 0) MBBI++;
+  if (DPRCSSize > 0) {
+    MBBI++;
+    // Since vpush register list cannot have gaps, there may be multiple vpush
+    // instructions in the prologue.
+    while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+      MBBI++;
+  }
+
+  // Move past the aligned DPRCS2 area.
+  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
+    MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
+    // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
+    // leaves the stack pointer pointing to the DPRCS2 area.
+    //
+    // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
+    NumBytes += MFI->getObjectOffset(D8SpillFI);
+  } else
+    NumBytes = DPRCSOffset;
  
  
-  NumBytes = DPRCSOffset;
    if (NumBytes) {
      // Adjust SP after all the callee-save spills.
    if (NumBytes) {
      // Adjust SP after all the callee-save spills.
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+                 MachineInstr::FrameSetup);
      if (HasFP && isARM)
        // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
        // Note it's not safe to do this in Thumb2 mode because it would have
      if (HasFP && isARM)
        // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
        // Note it's not safe to do this in Thumb2 mode because it would have
@@ -242,7 +286,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
  
    // If we need dynamic stack realignment, do it here. Be paranoid and make
    // sure if we also have VLAs, we have a base pointer for frame access.
  
    // If we need dynamic stack realignment, do it here. Be paranoid and make
    // sure if we also have VLAs, we have a base pointer for frame access.
-  if (RegInfo->needsStackRealignment(MF)) {
+  // If aligned NEON registers were spilled, the stack has already been
+  // realigned.
+  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
      unsigned MaxAlign = MFI->getMaxAlignment();
      assert (!AFI->isThumb1OnlyFunction());
      if (!AFI->isThumbFunction()) {
      unsigned MaxAlign = MFI->getMaxAlignment();
      assert (!AFI->isThumb1OnlyFunction());
      if (!AFI->isThumbFunction()) {
@@ -258,14 +304,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
        // bic r4, r4, MaxAlign
        // mov sp, r4
        // FIXME: It will be better just to find spare register here.
        // bic r4, r4, MaxAlign
        // mov sp, r4
        // FIXME: It will be better just to find spare register here.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
-        .addReg(ARM::SP, RegState::Kill);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+        .addReg(ARM::SP, RegState::Kill));
        AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
                                            TII.get(ARM::t2BICri), ARM::R4)
                                    .addReg(ARM::R4, RegState::Kill)
                                    .addImm(MaxAlign-1)));
        AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
                                            TII.get(ARM::t2BICri), ARM::R4)
                                    .addReg(ARM::R4, RegState::Kill)
                                    .addImm(MaxAlign-1)));
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-        .addReg(ARM::R4, RegState::Kill);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+        .addReg(ARM::R4, RegState::Kill));
      }
  
      AFI->setShouldRestoreSPFromFP(true);
      }
  
      AFI->setShouldRestoreSPFromFP(true);
@@ -275,6 +321,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
    // of the stack pointer is at this point. Any variable size objects
    // will be allocated after this, so we can still use the base pointer
    // to reference locals.
    // of the stack pointer is at this point. Any variable size objects
    // will be allocated after this, so we can still use the base pointer
    // to reference locals.
+  // FIXME: Clarify FrameSetup flags here.
    if (RegInfo->hasBasePointer(MF)) {
      if (isARM)
        BuildMI(MBB, MBBI, dl,
    if (RegInfo->hasBasePointer(MF)) {
      if (isARM)
        BuildMI(MBB, MBBI, dl,
@@ -282,9 +329,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
          .addReg(ARM::SP)
          .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
      else
          .addReg(ARM::SP)
          .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
      else
-      BuildMI(MBB, MBBI, dl,
-              TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister())
-        .addReg(ARM::SP);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                             RegInfo->getBaseRegister())
+        .addReg(ARM::SP));
    }
  
    // If the frame has variable sized objects then the epilogue must restore
    }
  
    // If the frame has variable sized objects then the epilogue must restore
@@ -295,10 +342,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
  }
  
  void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
  }
  
  void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
-                                MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert(MBBI->getDesc().isReturn() &&
-         "Can only insert epilog into returning blocks");
+                                    MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
    unsigned RetOpcode = MBBI->getOpcode();
    DebugLoc dl = MBBI->getDebugLoc();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    unsigned RetOpcode = MBBI->getOpcode();
    DebugLoc dl = MBBI->getDebugLoc();
    MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -314,12 +360,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
    int NumBytes = (int)MFI->getStackSize();
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
  
    int NumBytes = (int)MFI->getStackSize();
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
  
+  // All calls are tail calls in GHC calling conv, and functions have no
+  // prologue/epilogue.
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+    return;
+
    if (!AFI->hasStackFrame()) {
      if (NumBytes != 0)
        emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
    } else {
      // Unwind MBBI to point to first LDR / VLDRD.
    if (!AFI->hasStackFrame()) {
      if (NumBytes != 0)
        emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
    } else {
      // Unwind MBBI to point to first LDR / VLDRD.
-    const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+    const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
      if (MBBI != MBB.begin()) {
        do
          --MBBI;
      if (MBBI != MBB.begin()) {
        do
          --MBBI;
@@ -343,7 +394,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                    ARMCC::AL, 0, TII);
          else {
            // It's not possible to restore SP from FP in a single instruction.
                                    ARMCC::AL, 0, TII);
          else {
            // It's not possible to restore SP from FP in a single instruction.
-          // For Darwin, this looks like:
+          // For iOS, this looks like:
            // mov sp, r7
            // sub sp, #24
            // This is bad, if an interrupt is taken after the mov, sp is in an
            // mov sp, r7
            // sub sp, #24
            // This is bad, if an interrupt is taken after the mov, sp is in an
@@ -353,8 +404,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                   "No scratch register to restore SP from FP!");
            emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                   ARMCC::AL, 0, TII);
                   "No scratch register to restore SP from FP!");
            emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                   ARMCC::AL, 0, TII);
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
-            .addReg(ARM::R4);
+          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                                 ARM::SP)
+            .addReg(ARM::R4));
          }
        } else {
          // Thumb2 or ARM.
          }
        } else {
          // Thumb2 or ARM.
@@ -362,29 +414,35 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
            BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
              .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
          else
            BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
              .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
          else
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
-            .addReg(FramePtr);
+          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                                 ARM::SP)
+            .addReg(FramePtr));
        }
      } else if (NumBytes)
        emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
  
      // Increment past our save areas.
        }
      } else if (NumBytes)
        emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
  
      // Increment past our save areas.
-    if (AFI->getDPRCalleeSavedAreaSize()) MBBI++;
+    if (AFI->getDPRCalleeSavedAreaSize()) {
+      MBBI++;
+      // Since vpop register list cannot have gaps, there may be multiple vpop
+      // instructions in the epilogue.
+      while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+        MBBI++;
+    }
      if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
      if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
    }
  
      if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
      if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
    }
  
-  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
-      RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
      // Tail call return: adjust the stack pointer and jump to callee.
      // Tail call return: adjust the stack pointer and jump to callee.
-    MBBI = prior(MBB.end());
+    MBBI = MBB.getLastNonDebugInstr();
      MachineOperand &JumpTarget = MBBI->getOperand(0);
  
      // Jump to label or value in register.
      MachineOperand &JumpTarget = MBBI->getOperand(0);
  
      // Jump to label or value in register.
-    if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
-      unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
-        ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
-        : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+    if (RetOpcode == ARM::TCRETURNdi) {
+      unsigned TCOpcode = STI.isThumb() ?
+               (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+               ARM::TAILJMPd;
        MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
        if (JumpTarget.isGlobal())
          MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
        MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
        if (JumpTarget.isGlobal())
          MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
@@ -394,11 +452,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
          MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                                JumpTarget.getTargetFlags());
        }
          MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                                JumpTarget.getTargetFlags());
        }
+
+      // Add the default predicate in Thumb mode.
+      if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
      } else if (RetOpcode == ARM::TCRETURNri) {
      } else if (RetOpcode == ARM::TCRETURNri) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
-    } else if (RetOpcode == ARM::TCRETURNriND) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+      BuildMI(MBB, MBBI, dl,
+              TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
          addReg(JumpTarget.getReg(), RegState::Kill);
      }
  
          addReg(JumpTarget.getReg(), RegState::Kill);
      }
  
@@ -408,27 +467,27 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
  
      // Delete the pseudo instruction TCRETURN.
      MBB.erase(MBBI);
  
      // Delete the pseudo instruction TCRETURN.
      MBB.erase(MBBI);
+    MBBI = NewMI;
    }
  
    if (VARegSaveSize)
      emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
  }
  
    }
  
    if (VARegSaveSize)
      emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
  }
  
-// Provide a base+offset reference to an FI slot for debug info. It's the
-// same as what we use for resolving the code-gen references for now.
-// FIXME: This can go wrong when references are SP-relative and simple call
-//        frames aren't used.
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info.  It's the same as what we use for resolving the code-gen
+/// references for now.  FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
  int
  ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
  int
  ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
-                                     unsigned &FrameReg) const {
+                                         unsigned &FrameReg) const {
    return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
  }
  
  int
  ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
    return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
  }
  
  int
  ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
-                                         int FI,
-                                         unsigned &FrameReg,
-                                         int SPAdj) const {
+                                             int FI, unsigned &FrameReg,
+                                             int SPAdj) const {
    const MachineFrameInfo *MFI = MF.getFrameInfo();
    const ARMBaseRegisterInfo *RegInfo =
      static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
    const MachineFrameInfo *MFI = MF.getFrameInfo();
    const ARMBaseRegisterInfo *RegInfo =
      static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
@@ -446,6 +505,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
    else if (AFI->isDPRCalleeSavedAreaFrame(FI))
      return Offset - AFI->getDPRCalleeSavedAreaOffset();
  
    else if (AFI->isDPRCalleeSavedAreaFrame(FI))
      return Offset - AFI->getDPRCalleeSavedAreaOffset();
  
+  // SP can move around if there are allocas.  We may also lose track of SP
+  // when emergency spilling inside a non-reserved call frame setup.
+  bool hasMovingSP = !hasReservedCallFrame(MF);
+
    // When dynamically realigning the stack, use the frame pointer for
    // parameters, and the stack/base pointer for locals.
    if (RegInfo->needsStackRealignment(MF)) {
    // When dynamically realigning the stack, use the frame pointer for
    // parameters, and the stack/base pointer for locals.
    if (RegInfo->needsStackRealignment(MF)) {
@@ -453,7 +516,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
      if (isFixed) {
        FrameReg = RegInfo->getFrameRegister(MF);
        Offset = FPOffset;
      if (isFixed) {
        FrameReg = RegInfo->getFrameRegister(MF);
        Offset = FPOffset;
-    } else if (MFI->hasVarSizedObjects()) {
+    } else if (hasMovingSP) {
        assert(RegInfo->hasBasePointer(MF) &&
               "VLAs and dynamic stack alignment, but missing base pointer!");
        FrameReg = RegInfo->getBaseRegister();
        assert(RegInfo->hasBasePointer(MF) &&
               "VLAs and dynamic stack alignment, but missing base pointer!");
        FrameReg = RegInfo->getBaseRegister();
@@ -465,25 +528,28 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
    if (hasFP(MF) && AFI->hasStackFrame()) {
      // Use frame pointer to reference fixed objects. Use it for locals if
      // there are VLAs (and thus the SP isn't reliable as a base).
    if (hasFP(MF) && AFI->hasStackFrame()) {
      // Use frame pointer to reference fixed objects. Use it for locals if
      // there are VLAs (and thus the SP isn't reliable as a base).
-    if (isFixed || (MFI->hasVarSizedObjects() &&
-                    !RegInfo->hasBasePointer(MF))) {
+    if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
        FrameReg = RegInfo->getFrameRegister(MF);
        return FPOffset;
        FrameReg = RegInfo->getFrameRegister(MF);
        return FPOffset;
-    } else if (MFI->hasVarSizedObjects()) {
+    } else if (hasMovingSP) {
        assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
        assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
-      // Try to use the frame pointer if we can, else use the base pointer
-      // since it's available. This is handy for the emergency spill slot, in
-      // particular.
        if (AFI->isThumb2Function()) {
        if (AFI->isThumb2Function()) {
+        // Try to use the frame pointer if we can, else use the base pointer
+        // since it's available. This is handy for the emergency spill slot, in
+        // particular.
          if (FPOffset >= -255 && FPOffset < 0) {
            FrameReg = RegInfo->getFrameRegister(MF);
            return FPOffset;
          }
          if (FPOffset >= -255 && FPOffset < 0) {
            FrameReg = RegInfo->getFrameRegister(MF);
            return FPOffset;
          }
-      } else
-        FrameReg = RegInfo->getBaseRegister();
+      }
      } else if (AFI->isThumb2Function()) {
      } else if (AFI->isThumb2Function()) {
+      // Use  add <rd>, sp, #<imm8>
+      //      ldr <rd>, [sp, #<imm8>]
+      // if at all possible to save space.
+      if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
+        return Offset;
        // In Thumb2 mode, the negative offset is very limited. Try to avoid
        // In Thumb2 mode, the negative offset is very limited. Try to avoid
-      // out of range references.
+      // out of range references. ldr <rt>,[<rn>, #-<imm8>]
        if (FPOffset >= -255 && FPOffset < 0) {
          FrameReg = RegInfo->getFrameRegister(MF);
          return FPOffset;
        if (FPOffset >= -255 && FPOffset < 0) {
          FrameReg = RegInfo->getFrameRegister(MF);
          return FPOffset;
@@ -500,16 +566,20 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
    return Offset;
  }
  
    return Offset;
  }
  
-int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                          int FI) const {
    unsigned FrameReg;
    return getFrameIndexReference(MF, FI, FrameReg);
  }
  
  void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
    unsigned FrameReg;
    return getFrameIndexReference(MF, FI, FrameReg);
  }
  
  void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator MI,
-                                const std::vector<CalleeSavedInfo> &CSI,
-                                unsigned StmOpc, unsigned StrOpc, bool NoGap,
-                                bool(*Func)(unsigned, bool)) const {
+                                    MachineBasicBlock::iterator MI,
+                                    const std::vector<CalleeSavedInfo> &CSI,
+                                    unsigned StmOpc, unsigned StrOpc,
+                                    bool NoGap,
+                                    bool(*Func)(unsigned, bool),
+                                    unsigned NumAlignedDPRCS2Regs,
+                                    unsigned MIFlags) const {
    MachineFunction &MF = *MBB.getParent();
    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
  
    MachineFunction &MF = *MBB.getParent();
    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
  
@@ -522,7 +592,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
      unsigned LastReg = 0;
      for (; i != 0; --i) {
        unsigned Reg = CSI[i-1].getReg();
      unsigned LastReg = 0;
      for (; i != 0; --i) {
        unsigned Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+      if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+      // D-registers in the aligned area DPRCS2 are NOT spilled here.
+      if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+        continue;
  
        // Add the callee-saved register as live-in unless it's LR and
        // @llvm.returnaddress is called. If LR is returned for
  
        // Add the callee-saved register as live-in unless it's LR and
        // @llvm.returnaddress is called. If LR is returned for
@@ -552,21 +626,15 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
      if (Regs.size() > 1 || StrOpc== 0) {
        MachineInstrBuilder MIB =
          AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
      if (Regs.size() > 1 || StrOpc== 0) {
        MachineInstrBuilder MIB =
          AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
-                       .addReg(ARM::SP));
+                       .addReg(ARM::SP).setMIFlags(MIFlags));
        for (unsigned i = 0, e = Regs.size(); i < e; ++i)
          MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
      } else if (Regs.size() == 1) {
        MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
                                          ARM::SP)
          .addReg(Regs[0].first, getKillRegState(Regs[0].second))
        for (unsigned i = 0, e = Regs.size(); i < e; ++i)
          MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
      } else if (Regs.size() == 1) {
        MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
                                          ARM::SP)
          .addReg(Regs[0].first, getKillRegState(Regs[0].second))
-        .addReg(ARM::SP);
-      // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
-      // that refactoring is complete (eventually).
-      if (StrOpc == ARM::STR_PRE) {
-        MIB.addReg(0);
-        MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
-      } else
-        MIB.addImm(-4);
+        .addReg(ARM::SP).setMIFlags(MIFlags)
+        .addImm(-4);
        AddDefaultPred(MIB);
      }
      Regs.clear();
        AddDefaultPred(MIB);
      }
      Regs.clear();
@@ -574,15 +642,19 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
  }
  
  void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
  }
  
  void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator MI,
-                               const std::vector<CalleeSavedInfo> &CSI,
-                               unsigned LdmOpc, unsigned LdrOpc,
-                               bool isVarArg, bool NoGap,
-                               bool(*Func)(unsigned, bool)) const {
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   unsigned LdmOpc, unsigned LdrOpc,
+                                   bool isVarArg, bool NoGap,
+                                   bool(*Func)(unsigned, bool),
+                                   unsigned NumAlignedDPRCS2Regs) const {
    MachineFunction &MF = *MBB.getParent();
    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    DebugLoc DL = MI->getDebugLoc();
    MachineFunction &MF = *MBB.getParent();
    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    DebugLoc DL = MI->getDebugLoc();
+  unsigned RetOpcode = MI->getOpcode();
+  bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+                     RetOpcode == ARM::TCRETURNri);
  
    SmallVector<unsigned, 4> Regs;
    unsigned i = CSI.size();
  
    SmallVector<unsigned, 4> Regs;
    unsigned i = CSI.size();
@@ -591,9 +663,13 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
      bool DeleteRet = false;
      for (; i != 0; --i) {
        unsigned Reg = CSI[i-1].getReg();
      bool DeleteRet = false;
      for (; i != 0; --i) {
        unsigned Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+      if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+      // The aligned reloads from area DPRCS2 are not inserted here.
+      if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+        continue;
  
  
-      if (Reg == ARM::LR && !isVarArg && STI.hasV5TOps()) {
+      if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
          Reg = ARM::PC;
          LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
          // Fold the return instruction into the LDM.
          Reg = ARM::PC;
          LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
          // Fold the return instruction into the LDM.
@@ -618,8 +694,10 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
                         .addReg(ARM::SP));
        for (unsigned i = 0, e = Regs.size(); i < e; ++i)
          MIB.addReg(Regs[i], getDefRegState(true));
                         .addReg(ARM::SP));
        for (unsigned i = 0, e = Regs.size(); i < e; ++i)
          MIB.addReg(Regs[i], getDefRegState(true));
-      if (DeleteRet)
+      if (DeleteRet) {
+        MIB.copyImplicitOps(&*MI);
          MI->eraseFromParent();
          MI->eraseFromParent();
+      }
        MI = MIB;
      } else if (Regs.size() == 1) {
        // If we adjusted the reg to PC from LR above, switch it back here. We
        MI = MIB;
      } else if (Regs.size() == 1) {
        // If we adjusted the reg to PC from LR above, switch it back here. We
@@ -632,7 +710,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
            .addReg(ARM::SP);
        // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
        // that refactoring is complete (eventually).
            .addReg(ARM::SP);
        // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
        // that refactoring is complete (eventually).
-      if (LdrOpc == ARM::LDR_POST) {
+      if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
          MIB.addReg(0);
          MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
        } else
          MIB.addReg(0);
          MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
        } else
@@ -643,47 +721,304 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
    }
  }
  
    }
  }
  
+/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8.  Also insert stack realignment code and leave the stack
+/// pointer pointing to the d8 spill slot.
+static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned NumAlignedDPRCS2Regs,
+                                    const std::vector<CalleeSavedInfo> &CSI,
+                                    const TargetRegisterInfo *TRI) {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  // Mark the D-register spill slots as properly aligned.  Since MFI computes
+  // stack slot layout backwards, this can actually mean that the d-reg stack
+  // slot offsets can be wrong. The offset for d8 will always be correct.
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned DNum = CSI[i].getReg() - ARM::D8;
+    if (DNum >= 8)
+      continue;
+    int FI = CSI[i].getFrameIdx();
+    // The even-numbered registers will be 16-byte aligned, the odd-numbered
+    // registers will be 8-byte aligned.
+    MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
+
+    // The stack slot for D8 needs to be maximally aligned because this is
+    // actually the point where we align the stack pointer.  MachineFrameInfo
+    // computes all offsets relative to the incoming stack pointer which is a
+    // bit weird when realigning the stack.  Any extra padding for this
+    // over-alignment is not realized because the code inserted below adjusts
+    // the stack pointer by numregs * 8 before aligning the stack pointer.
+    if (DNum == 0)
+      MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
+  }
+
+  // Move the stack pointer to the d8 spill slot, and align it at the same
+  // time. Leave the stack slot address in the scratch register r4.
+  //
+  //   sub r4, sp, #numregs * 8
+  //   bic r4, r4, #align - 1
+  //   mov sp, r4
+  //
+  bool isThumb = AFI->isThumbFunction();
+  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+  AFI->setShouldRestoreSPFromFP(true);
+
+  // sub r4, sp, #numregs * 8
+  // The immediate is <= 64, so it doesn't need any special encoding.
+  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
+  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+                              .addReg(ARM::SP)
+                              .addImm(8 * NumAlignedDPRCS2Regs)));
+
+  // bic r4, r4, #align-1
+  Opc = isThumb ? ARM::t2BICri : ARM::BICri;
+  unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
+  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+                              .addReg(ARM::R4, RegState::Kill)
+                              .addImm(MaxAlign - 1)));
+
+  // mov sp, r4
+  // The stack pointer must be adjusted before spilling anything, otherwise
+  // the stack slots could be clobbered by an interrupt handler.
+  // Leave r4 live, it is used below.
+  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
+                            .addReg(ARM::R4);
+  MIB = AddDefaultPred(MIB);
+  if (!isThumb)
+    AddDefaultCC(MIB);
+
+  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
+  // r4 holds the stack slot address.
+  unsigned NextReg = ARM::D8;
+
+  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
+  // The writeback is only needed when emitting two vst1.64 instructions.
+  if (NumAlignedDPRCS2Regs >= 6) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               &ARM::QQPRRegClass);
+    MBB.addLiveIn(SupReg);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
+                           ARM::R4)
+                   .addReg(ARM::R4, RegState::Kill).addImm(16)
+                   .addReg(NextReg)
+                   .addReg(SupReg, RegState::ImplicitKill));
+    NextReg += 4;
+    NumAlignedDPRCS2Regs -= 4;
+  }
+
+  // We won't modify r4 beyond this point.  It currently points to the next
+  // register to be spilled.
+  unsigned R4BaseReg = NextReg;
+
+  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
+  if (NumAlignedDPRCS2Regs >= 4) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               &ARM::QQPRRegClass);
+    MBB.addLiveIn(SupReg);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+                   .addReg(ARM::R4).addImm(16).addReg(NextReg)
+                   .addReg(SupReg, RegState::ImplicitKill));
+    NextReg += 4;
+    NumAlignedDPRCS2Regs -= 4;
+  }
+
+  // 16-byte aligned vst1.64 with 2 d-regs.
+  if (NumAlignedDPRCS2Regs >= 2) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               &ARM::QPRRegClass);
+    MBB.addLiveIn(SupReg);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+                   .addReg(ARM::R4).addImm(16).addReg(SupReg));
+    NextReg += 2;
+    NumAlignedDPRCS2Regs -= 2;
+  }
+
+  // Finally, use a vanilla vstr.64 for the odd last register.
+  if (NumAlignedDPRCS2Regs) {
+    MBB.addLiveIn(NextReg);
+    // vstr.64 uses addrmode5 which has an offset scale of 4.
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+                   .addReg(NextReg)
+                   .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+  }
+
+  // The last spill instruction inserted should kill the scratch register r4.
+  llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
+/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
+/// iterator to the following instruction.
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+                        unsigned NumAlignedDPRCS2Regs) {
+  //   sub r4, sp, #numregs * 8
+  //   bic r4, r4, #align - 1
+  //   mov sp, r4
+  ++MI; ++MI; ++MI;
+  assert(MI->mayStore() && "Expecting spill instruction");
+
+  // These switches all fall through.
+  switch(NumAlignedDPRCS2Regs) {
+  case 7:
+    ++MI;
+    assert(MI->mayStore() && "Expecting spill instruction");
+  default:
+    ++MI;
+    assert(MI->mayStore() && "Expecting spill instruction");
+  case 1:
+  case 2:
+  case 4:
+    assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
+    ++MI;
+  }
+  return MI;
+}
+
+/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8.  These instructions are assumed to execute while the
+/// stack is still aligned, unlike the code inserted by emitPopInst.
+static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MI,
+                                      unsigned NumAlignedDPRCS2Regs,
+                                      const std::vector<CalleeSavedInfo> &CSI,
+                                      const TargetRegisterInfo *TRI) {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // Find the frame index assigned to d8.
+  int D8SpillFI = 0;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+    if (CSI[i].getReg() == ARM::D8) {
+      D8SpillFI = CSI[i].getFrameIdx();
+      break;
+    }
+
+  // Materialize the address of the d8 spill slot into the scratch register r4.
+  // This can be fairly complicated if the stack frame is large, so just use
+  // the normal frame index elimination mechanism to do it.  This code runs as
+  // the initial part of the epilog where the stack and base pointers haven't
+  // been changed yet.
+  bool isThumb = AFI->isThumbFunction();
+  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+
+  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+                              .addFrameIndex(D8SpillFI).addImm(0)));
+
+  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
+  unsigned NextReg = ARM::D8;
+
+  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
+  if (NumAlignedDPRCS2Regs >= 6) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               &ARM::QQPRRegClass);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+                   .addReg(ARM::R4, RegState::Define)
+                   .addReg(ARM::R4, RegState::Kill).addImm(16)
+                   .addReg(SupReg, RegState::ImplicitDefine));
+    NextReg += 4;
+    NumAlignedDPRCS2Regs -= 4;
+  }
+
+  // We won't modify r4 beyond this point.  It currently points to the next
+  // register to be spilled.
+  unsigned R4BaseReg = NextReg;
+
+  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
+  if (NumAlignedDPRCS2Regs >= 4) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               &ARM::QQPRRegClass);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+                   .addReg(ARM::R4).addImm(16)
+                   .addReg(SupReg, RegState::ImplicitDefine));
+    NextReg += 4;
+    NumAlignedDPRCS2Regs -= 4;
+  }
+
+  // 16-byte aligned vld1.64 with 2 d-regs.
+  if (NumAlignedDPRCS2Regs >= 2) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               &ARM::QPRRegClass);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+                   .addReg(ARM::R4).addImm(16));
+    NextReg += 2;
+    NumAlignedDPRCS2Regs -= 2;
+  }
+
+  // Finally, use a vanilla vldr.64 for the remaining odd register.
+  if (NumAlignedDPRCS2Regs)
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+                   .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+
+  // Last store kills r4.
+  llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
  bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
  bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MI,
-                                       const std::vector<CalleeSavedInfo> &CSI,
-                                       const TargetRegisterInfo *TRI) const {
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
    if (CSI.empty())
      return false;
  
    MachineFunction &MF = *MBB.getParent();
    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    if (CSI.empty())
      return false;
  
    MachineFunction &MF = *MBB.getParent();
    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  DebugLoc DL = MI->getDebugLoc();
  
    unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
  
    unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
-  unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+  unsigned PushOneOpc = AFI->isThumbFunction() ?
+    ARM::t2STR_PRE : ARM::STR_PRE_IMM;
    unsigned FltOpc = ARM::VSTMDDB_UPD;
    unsigned FltOpc = ARM::VSTMDDB_UPD;
-  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register);
-  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register);
-  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register);
+  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
+               MachineInstr::FrameSetup);
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
+               MachineInstr::FrameSetup);
+  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+               NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+
+  // The code above does not insert spill code for the aligned DPRCS2 registers.
+  // The stack realignment code will be inserted between the push instructions
+  // and these spills.
+  if (NumAlignedDPRCS2Regs)
+    emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
  
    return true;
  }
  
  bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
  
    return true;
  }
  
  bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                               MachineBasicBlock::iterator MI,
-                                       const std::vector<CalleeSavedInfo> &CSI,
-                                         const TargetRegisterInfo *TRI) const {
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
    if (CSI.empty())
      return false;
  
    MachineFunction &MF = *MBB.getParent();
    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
    if (CSI.empty())
      return false;
  
    MachineFunction &MF = *MBB.getParent();
    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
-  DebugLoc DL = MI->getDebugLoc();
+  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+
+  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
+  // registers. Do that here instead.
+  if (NumAlignedDPRCS2Regs)
+    emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
  
    unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
  
    unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
-  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
    unsigned FltOpc = ARM::VLDMDIA_UPD;
    unsigned FltOpc = ARM::VLDMDIA_UPD;
-  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+              NumAlignedDPRCS2Regs);
    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
-              &isARMArea2Register);
+              &isARMArea2Register, 0);
    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
    emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
-              &isARMArea1Register);
+              &isARMArea1Register, 0);
  
    return true;
  }
  
    return true;
  }
@@ -705,20 +1040,52 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
  /// estimateStackSize - Estimate and return the size of the frame.
  /// FIXME: Make generic?
  static unsigned estimateStackSize(MachineFunction &MF) {
  /// estimateStackSize - Estimate and return the size of the frame.
  /// FIXME: Make generic?
  static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  unsigned MaxAlign = MFI->getMaxAlignment();
    int Offset = 0;
    int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
+
+  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+  // It really should be refactored to share code. Until then, changes
+  // should keep in mind that there's tight coupling between the two.
+
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -MFI->getObjectOffset(i);
      if (FixedOff > Offset) Offset = FixedOff;
    }
      if (FixedOff > Offset) Offset = FixedOff;
    }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isDeadObjectIndex(i))
        continue;
        continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
+    Offset += MFI->getObjectSize(i);
+    unsigned Align = MFI->getObjectAlignment(i);
      // Adjust to alignment boundary
      Offset = (Offset+Align-1)/Align*Align;
      // Adjust to alignment boundary
      Offset = (Offset+Align-1)/Align*Align;
+
+    MaxAlign = std::max(Align, MaxAlign);
    }
    }
+
+  if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+    Offset += MFI->getMaxCallFrameSize();
+
+  // Round up the size to a multiple of the alignment.  If the function has
+  // any calls or alloca's, align to the target's StackAlignment value to
+  // ensure that the callee's frame or the alloca data is suitably aligned;
+  // otherwise, for leaf functions, align to the TransientStackAlignment
+  // value.
+  unsigned StackAlign;
+  if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+      (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+    StackAlign = TFI->getStackAlignment();
+  else
+    StackAlign = TFI->getTransientStackAlignment();
+
+  // If the frame pointer is eliminated, all frame offsets will be relative to
+  // SP not FP. Align to MaxAlign so this works.
+  StackAlign = std::max(StackAlign, MaxAlign);
+  unsigned AlignMask = StackAlign - 1;
+  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
    return (unsigned)Offset;
  }
  
    return (unsigned)Offset;
  }
  
@@ -775,9 +1142,59 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
    return Limit;
  }
  
    return Limit;
  }
  
+// In functions that realign the stack, it can be an advantage to spill the
+// callee-saved vector registers after realigning the stack. The vst1 and vld1
+// instructions take alignment hints that can improve performance.
+//
+static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
+  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
+  if (!SpillAlignedNEONRegs)
+    return;
+
+  // Naked functions don't spill callee-saved registers.
+  if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                                     Attribute::Naked))
+    return;
+
+  // We are planning to use NEON instructions vst1 / vld1.
+  if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
+    return;
+
+  // Don't bother if the default stack alignment is sufficiently high.
+  if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
+    return;
+
+  // Aligned spills require stack realignment.
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  if (!RegInfo->canRealignStack(MF))
+    return;
+
+  // We always spill contiguous d-registers starting from d8. Count how many
+  // needs spilling.  The register allocator will almost always use the
+  // callee-saved registers in order, but it can happen that there are holes in
+  // the range.  Registers above the hole will be spilled to the standard DPRCS
+  // area.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned NumSpills = 0;
+  for (; NumSpills < 8; ++NumSpills)
+    if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
+      break;
+
+  // Don't do this for just one d-register. It's not worth it.
+  if (NumSpills < 2)
+    return;
+
+  // Spill the first NumSpills D-registers after realigning the stack.
+  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
+
+  // A scratch register is required for the vst1 / vld1 instructions.
+  MF.getRegInfo().setPhysRegUsed(ARM::R4);
+}
+
  void
  ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
  void
  ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                   RegScavenger *RS) const {
+                                                       RegScavenger *RS) const {
    // This tells PEI to spill the FP as if it is any other callee-save register
    // to take advantage the eliminateFrameIndex machinery. This also ensures it
    // is spilled in the order specified by getCalleeSavedRegs() to make it easier
    // This tells PEI to spill the FP as if it is any other callee-save register
    // to take advantage the eliminateFrameIndex machinery. This also ensures it
    // is spilled in the order specified by getCalleeSavedRegs() to make it easier
@@ -794,52 +1211,59 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
      *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    MachineFrameInfo *MFI = MF.getFrameInfo();
      *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
    MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
  
    // Spill R4 if Thumb2 function requires stack realignment - it will be used as
    // scratch register. Also spill R4 if Thumb2 function has varsized objects,
    unsigned FramePtr = RegInfo->getFrameRegister(MF);
  
    // Spill R4 if Thumb2 function requires stack realignment - it will be used as
    // scratch register. Also spill R4 if Thumb2 function has varsized objects,
-  // since it's always posible to restore sp from fp in a single instruction.
+  // since it's not always possible to restore sp from fp in a single
+  // instruction.
    // FIXME: It will be better just to find spare register here.
    if (AFI->isThumb2Function() &&
        (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
    // FIXME: It will be better just to find spare register here.
    if (AFI->isThumb2Function() &&
        (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
-    MF.getRegInfo().setPhysRegUsed(ARM::R4);
+    MRI.setPhysRegUsed(ARM::R4);
+
+  if (AFI->isThumb1OnlyFunction()) {
+    // Spill LR if Thumb1 function uses variable length argument lists.
+    if (AFI->getVarArgsRegSaveSize() > 0)
+      MRI.setPhysRegUsed(ARM::LR);
+
+    // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
+    // for sure what the stack size will be, but for this, an estimate is good
+    // enough. If there anything changes it, it'll be a spill, which implies
+    // we've used all the registers and so R4 is already used, so not marking
+    // it here will be OK.
+    // FIXME: It will be better just to find spare register here.
+    unsigned StackSize = estimateStackSize(MF);
+    if (MFI->hasVarSizedObjects() || StackSize > 508)
+      MRI.setPhysRegUsed(ARM::R4);
+  }
  
  
-  // Spill LR if Thumb1 function uses variable length argument lists.
-  if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
-    MF.getRegInfo().setPhysRegUsed(ARM::LR);
+  // See if we can spill vector registers to aligned stack.
+  checkNumAlignedDPRCS2Regs(MF);
  
    // Spill the BasePtr if it's used.
    if (RegInfo->hasBasePointer(MF))
  
    // Spill the BasePtr if it's used.
    if (RegInfo->hasBasePointer(MF))
-    MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+    MRI.setPhysRegUsed(RegInfo->getBaseRegister());
  
    // Don't spill FP if the frame can be eliminated. This is determined
    // by scanning the callee-save registers to see if any is used.
  
    // Don't spill FP if the frame can be eliminated. This is determined
    // by scanning the callee-save registers to see if any is used.
-  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
    for (unsigned i = 0; CSRegs[i]; ++i) {
      unsigned Reg = CSRegs[i];
      bool Spilled = false;
    for (unsigned i = 0; CSRegs[i]; ++i) {
      unsigned Reg = CSRegs[i];
      bool Spilled = false;
-    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
-      AFI->setCSRegisterIsSpilled(Reg);
+    if (MRI.isPhysRegUsed(Reg)) {
        Spilled = true;
        CanEliminateFrame = false;
        Spilled = true;
        CanEliminateFrame = false;
-    } else {
-      // Check alias registers too.
-      for (const unsigned *Aliases =
-             RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
-        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
-          Spilled = true;
-          CanEliminateFrame = false;
-        }
-      }
      }
  
      }
  
-    if (!ARM::GPRRegisterClass->contains(Reg))
+    if (!ARM::GPRRegClass.contains(Reg))
        continue;
  
      if (Spilled) {
        NumGPRSpills++;
  
        continue;
  
      if (Spilled) {
        NumGPRSpills++;
  
-      if (!STI.isTargetDarwin()) {
+      if (!STI.isTargetIOS()) {
          if (Reg == ARM::LR)
            LRSpilled = true;
          CS1Spilled = true;
          if (Reg == ARM::LR)
            LRSpilled = true;
          CS1Spilled = true;
@@ -859,7 +1283,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
          break;
        }
      } else {
          break;
        }
      } else {
-      if (!STI.isTargetDarwin()) {
+      if (!STI.isTargetIOS()) {
          UnspilledCS1GPRs.push_back(Reg);
          continue;
        }
          UnspilledCS1GPRs.push_back(Reg);
          continue;
        }
@@ -917,8 +1341,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
      // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
      // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
      if (!LRSpilled && CS1Spilled) {
      // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
      // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
      if (!LRSpilled && CS1Spilled) {
-      MF.getRegInfo().setPhysRegUsed(ARM::LR);
-      AFI->setCSRegisterIsSpilled(ARM::LR);
+      MRI.setPhysRegUsed(ARM::LR);
        NumGPRSpills++;
        UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
                                      UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
        NumGPRSpills++;
        UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
                                      UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
@@ -927,7 +1350,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
      }
  
      if (hasFP(MF)) {
      }
  
      if (hasFP(MF)) {
-      MF.getRegInfo().setPhysRegUsed(FramePtr);
+      MRI.setPhysRegUsed(FramePtr);
        NumGPRSpills++;
      }
  
        NumGPRSpills++;
      }
  
@@ -942,18 +1365,16 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
            // Don't spill high register if the function is thumb1
            if (!AFI->isThumb1OnlyFunction() ||
                isARMLowRegister(Reg) || Reg == ARM::LR) {
            // Don't spill high register if the function is thumb1
            if (!AFI->isThumb1OnlyFunction() ||
                isARMLowRegister(Reg) || Reg == ARM::LR) {
-            MF.getRegInfo().setPhysRegUsed(Reg);
-            AFI->setCSRegisterIsSpilled(Reg);
-            if (!RegInfo->isReservedReg(MF, Reg))
+            MRI.setPhysRegUsed(Reg);
+            if (!MRI.isReserved(Reg))
                ExtraCSSpill = true;
              break;
            }
          }
        } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
          unsigned Reg = UnspilledCS2GPRs.front();
                ExtraCSSpill = true;
              break;
            }
          }
        } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
          unsigned Reg = UnspilledCS2GPRs.front();
-        MF.getRegInfo().setPhysRegUsed(Reg);
-        AFI->setCSRegisterIsSpilled(Reg);
-        if (!RegInfo->isReservedReg(MF, Reg))
+        MRI.setPhysRegUsed(Reg);
+        if (!MRI.isReserved(Reg))
            ExtraCSSpill = true;
        }
      }
            ExtraCSSpill = true;
        }
      }
@@ -971,7 +1392,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
        while (NumExtras && !UnspilledCS1GPRs.empty()) {
          unsigned Reg = UnspilledCS1GPRs.back();
          UnspilledCS1GPRs.pop_back();
        while (NumExtras && !UnspilledCS1GPRs.empty()) {
          unsigned Reg = UnspilledCS1GPRs.back();
          UnspilledCS1GPRs.pop_back();
-        if (!RegInfo->isReservedReg(MF, Reg) &&
+        if (!MRI.isReserved(Reg) &&
              (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
               Reg == ARM::LR)) {
            Extras.push_back(Reg);
              (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
               Reg == ARM::LR)) {
            Extras.push_back(Reg);
@@ -983,7 +1404,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
          while (NumExtras && !UnspilledCS2GPRs.empty()) {
            unsigned Reg = UnspilledCS2GPRs.back();
            UnspilledCS2GPRs.pop_back();
          while (NumExtras && !UnspilledCS2GPRs.empty()) {
            unsigned Reg = UnspilledCS2GPRs.back();
            UnspilledCS2GPRs.pop_back();
-          if (!RegInfo->isReservedReg(MF, Reg)) {
+          if (!MRI.isReserved(Reg)) {
              Extras.push_back(Reg);
              NumExtras--;
            }
              Extras.push_back(Reg);
              NumExtras--;
            }
@@ -991,13 +1412,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
        }
        if (Extras.size() && NumExtras == 0) {
          for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
        }
        if (Extras.size() && NumExtras == 0) {
          for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
-          MF.getRegInfo().setPhysRegUsed(Extras[i]);
-          AFI->setCSRegisterIsSpilled(Extras[i]);
+          MRI.setPhysRegUsed(Extras[i]);
          }
        } else if (!AFI->isThumb1OnlyFunction()) {
          // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
          // closest to SP or frame pointer.
          }
        } else if (!AFI->isThumb1OnlyFunction()) {
          // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
          // closest to SP or frame pointer.
-        const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+        const TargetRegisterClass *RC = &ARM::GPRRegClass;
          RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                             RC->getAlignment(),
                                                             false));
          RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
                                                             RC->getAlignment(),
                                                             false));
@@ -1006,8 +1426,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
    }
  
    if (ForceLRSpill) {
    }
  
    if (ForceLRSpill) {
-    MF.getRegInfo().setPhysRegUsed(ARM::LR);
-    AFI->setCSRegisterIsSpilled(ARM::LR);
+    MRI.setPhysRegUsed(ARM::LR);
      AFI->setLRIsSpilledForFarJump(true);
    }
  }
      AFI->setLRIsSpilledForFarJump(true);
    }
  }