[C++11] Add 'override' keyword to virtual methods that override their base class.

[oota-llvm.git] / lib / Target / ARM / ARMLoadStoreOptimizer.cpp
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp

index f75ac90b22ea2d9b934faca14604e2df0315fce2..098faaab2423702cd250c786ee91b298a82cdc9e 100644 (file)
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -18,8 +18,12 @@
  #include "ARMBaseRegisterInfo.h"
  #include "ARMMachineFunctionInfo.h"
  #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
  #include "llvm/CodeGen/MachineBasicBlock.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
  #include "llvm/CodeGen/MachineInstr.h"
@@ -27,19 +31,15 @@
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/RegisterScavenging.h"
  #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetInstrInfo.h"
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
  using namespace llvm;
  
  STATISTIC(NumLDMGened , "Number of ldm instructions generated");
@@ -69,9 +69,9 @@ namespace {
      RegScavenger *RS;
      bool isThumb2;
  
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    bool runOnMachineFunction(MachineFunction &Fn) override;
  
-    virtual const char *getPassName() const {
+    const char *getPassName() const override {
        return "ARM load / store optimization pass";
      }
  
@@ -90,6 +90,10 @@ namespace {
      typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
      typedef MemOpQueue::iterator MemOpQueueIter;
  
+    void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
+                          const MemOpQueue &MemOps, unsigned DefReg,
+                          unsigned RangeBegin, unsigned RangeEnd);
+
      bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                    int Offset, unsigned Base, bool BaseKill, int Opcode,
                    ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@@ -109,12 +113,12 @@ namespace {
                          unsigned PredReg,
                          unsigned Scratch,
                          DebugLoc dl,
-                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+                        SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
      void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
                        int Opcode, unsigned Size,
                        ARMCC::CondCodes Pred, unsigned PredReg,
                        unsigned Scratch, MemOpQueue &MemOps,
-                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+                      SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
  
      void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
      bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@@ -360,6 +364,62 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
    return true;
  }
  
+/// \brief Find all instructions using a given imp-def within a range.
+///
+/// We are trying to combine a range of instructions, one of which (located at
+/// position RangeBegin) implicitly defines a register. The final LDM/STM will
+/// be placed at RangeEnd, and so any uses of this definition between RangeStart
+/// and RangeEnd must be modified to use an undefined value.
+///
+/// The live range continues until we find a second definition or one of the
+/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
+/// we must consider all uses and decide which are relevant in a second pass.
+void ARMLoadStoreOpt::findUsesOfImpDef(
+    SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
+    unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
+  std::map<unsigned, MachineOperand *> Uses;
+  unsigned LastLivePos = RangeEnd;
+
+  // First we find all uses of this register with Position between RangeBegin
+  // and RangeEnd, any or all of these could be uses of a definition at
+  // RangeBegin. We also record the latest position a definition at RangeBegin
+  // would be considered live.
+  for (unsigned i = 0; i < MemOps.size(); ++i) {
+    MachineInstr &MI = *MemOps[i].MBBI;
+    unsigned MIPosition = MemOps[i].Position;
+    if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
+      continue;
+
+    // If this instruction defines the register, then any later use will be of
+    // that definition rather than ours.
+    if (MI.definesRegister(DefReg))
+      LastLivePos = std::min(LastLivePos, MIPosition);
+
+    MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
+    if (!UseOp)
+      continue;
+
+    // If this instruction kills the register then (assuming liveness is
+    // correct when we start) we don't need to think about anything after here.
+    if (UseOp->isKill())
+      LastLivePos = std::min(LastLivePos, MIPosition);
+
+    Uses[MIPosition] = UseOp;
+  }
+
+  // Now we traverse the list of all uses, and append the ones that actually use
+  // our definition to the requested list.
+  for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
+                                                      E = Uses.end();
+       I != E; ++I) {
+    // List is sorted by position so once we've found one out of range there
+    // will be no more to consider.
+    if (I->first > LastLivePos)
+      break;
+    UsesOfImpDefs.push_back(I->second);
+  }
+}
+
  // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
  // success.
  void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
@@ -371,7 +431,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
                                       ARMCC::CondCodes Pred, unsigned PredReg,
                                       unsigned Scratch,
                                       DebugLoc dl,
-                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
    // First calculate which of the registers should be killed by the merged
    // instruction.
    const unsigned insertPos = memOps[insertAfter].Position;
@@ -392,6 +452,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
  
    SmallVector<std::pair<unsigned, bool>, 8> Regs;
    SmallVector<unsigned, 8> ImpDefs;
+  SmallVector<MachineOperand *, 8> UsesOfImpDefs;
    for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
      unsigned Reg = memOps[i].Reg;
      // If we are inserting the merged operation after an operation that
@@ -406,6 +467,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
        unsigned DefReg = MO->getReg();
        if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
          ImpDefs.push_back(DefReg);
+
+      // There may be other uses of the definition between this instruction and
+      // the eventual LDM/STM position. These should be marked undef if the
+      // merge takes place.
+      findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
+                       insertPos);
      }
    }
  
@@ -417,7 +484,17 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
      return;
  
    // Merge succeeded, update records.
-  Merges.push_back(prior(Loc));
+  Merges.push_back(std::prev(Loc));
+
+  // In gathering loads together, we may have moved the imp-def of a register
+  // past one of its uses. This is OK, since we know better than the rest of
+  // LLVM what's OK with ARM loads and stores; but we still have to adjust the
+  // affected uses.
+  for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
+                                                   E = UsesOfImpDefs.end();
+       I != E; ++I)
+    (*I)->setIsUndef();
+
    for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
      // Remove kill flags from any memops that come before insertPos.
      if (Regs[i-memOpsBegin].second) {
@@ -444,10 +521,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
  /// load / store multiple instructions.
  void
  ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
-                          unsigned Base, int Opcode, unsigned Size,
-                          ARMCC::CondCodes Pred, unsigned PredReg,
-                          unsigned Scratch, MemOpQueue &MemOps,
-                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+                         unsigned Base, int Opcode, unsigned Size,
+                         ARMCC::CondCodes Pred, unsigned PredReg,
+                         unsigned Scratch, MemOpQueue &MemOps,
+                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
    bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
    int Offset = MemOps[SIndex].Offset;
    int SOffset = Offset;
@@ -456,8 +533,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
    DebugLoc dl = Loc->getDebugLoc();
    const MachineOperand &PMO = Loc->getOperand(0);
    unsigned PReg = PMO.getReg();
-  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
-    : getARMRegisterNumbering(PReg);
+  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
    unsigned Count = 1;
    unsigned Limit = ~0U;
  
@@ -483,15 +559,17 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
      int NewOffset = MemOps[i].Offset;
      const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
      unsigned Reg = MO.getReg();
-    unsigned RegNum = MO.isUndef() ? UINT_MAX
-      : getARMRegisterNumbering(Reg);
+    unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
      // Register numbers must be in ascending order. For VFP / NEON load and
      // store multiples, the registers must also be consecutive and within the
      // limit on the number of registers per instruction.
      if (Reg != ARM::SP &&
          NewOffset == Offset + (int)Size &&
          ((isNotVFP && RegNum > PRegNum) ||
-         ((Count < Limit) && RegNum == PRegNum+1))) {
+         ((Count < Limit) && RegNum == PRegNum+1)) &&
+        // On Swift we don't want vldm/vstm to start with a odd register num
+        // because Q register unaligned vldm/vstm need more uops.
+        (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
        Offset += Size;
        PRegNum = RegNum;
        ++Count;
@@ -734,7 +812,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
    // Try merging with the previous instruction.
    MachineBasicBlock::iterator BeginMBBI = MBB.begin();
    if (MBBI != BeginMBBI) {
-    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
      while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
        --PrevMBBI;
      if (Mode == ARM_AM::ia &&
@@ -753,7 +831,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
    // Try merging with the next instruction.
    MachineBasicBlock::iterator EndMBBI = MBB.end();
    if (!DoMerge && MBBI != EndMBBI) {
-    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
      while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
        ++NextMBBI;
      if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
@@ -867,7 +945,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
    bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
    // Can't do the merge if the destination register is the same as the would-be
    // writeback register.
-  if (isLd && MI->getOperand(0).getReg() == Base)
+  if (MI->getOperand(0).getReg() == Base)
      return false;
  
    unsigned PredReg = 0;
@@ -881,7 +959,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
    // Try merging with the previous instruction.
    MachineBasicBlock::iterator BeginMBBI = MBB.begin();
    if (MBBI != BeginMBBI) {
-    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
      while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
        --PrevMBBI;
      if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
@@ -900,7 +978,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
    // Try merging with the next instruction.
    MachineBasicBlock::iterator EndMBBI = MBB.end();
    if (!DoMerge && MBBI != EndMBBI) {
-    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
      while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
        ++NextMBBI;
      if (!isAM5 &&
@@ -1044,7 +1122,7 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
    }
  
    if (Loc != MBB.begin())
-    RS->forward(prior(Loc));
+    RS->forward(std::prev(Loc));
  }
  
  static int getMemoryOpOffset(const MachineInstr *MI) {
@@ -1154,12 +1232,17 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                    getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
          ++NumSTRD2STM;
        }
-      NewBBI = llvm::prior(MBBI);
+      NewBBI = std::prev(MBBI);
      } else {
        // Split into two instructions.
        unsigned NewOpc = (isLd)
          ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
          : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+      // so adjust and use t2LDRi12 here for that.
+      unsigned NewOpc2 = (isLd)
+        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
        DebugLoc dl = MBBI->getDebugLoc();
        // If this is a load and base register is killed, it may have been
        // re-defed by the load, make sure the first load does not clobber it.
@@ -1167,11 +1250,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
            (BaseKill || OffKill) &&
            (TRI->regsOverlap(EvenReg, BaseReg))) {
          assert(!TRI->regsOverlap(OddReg, BaseReg));
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
                        OddReg, OddDeadKill, false,
                        BaseReg, false, BaseUndef, false, OffUndef,
                        Pred, PredReg, TII, isT2);
-        NewBBI = llvm::prior(MBBI);
+        NewBBI = std::prev(MBBI);
          InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                        EvenReg, EvenDeadKill, false,
                        BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1184,12 +1267,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
            EvenDeadKill = false;
            OddDeadKill = true;
          }
+        // Never kill the base register in the first instruction.
+        if (EvenReg == BaseReg)
+          EvenDeadKill = false;
          InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                        EvenReg, EvenDeadKill, EvenUndef,
                        BaseReg, false, BaseUndef, false, OffUndef,
                        Pred, PredReg, TII, isT2);
-        NewBBI = llvm::prior(MBBI);
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+        NewBBI = std::prev(MBBI);
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
                        OddReg, OddDeadKill, OddUndef,
                        BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
                        Pred, PredReg, TII, isT2);
@@ -1252,6 +1338,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
        // merge the ldr's so far, including this one. But don't try to
        // combine the following ldr(s).
        Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+
+      // Watch out for:
+      // r4 := ldr [r0, #8]
+      // r4 := ldr [r0, #4]
+      //
+      // The optimization may reorder the second ldr in front of the first
+      // ldr, which violates write after write(WAW) dependence. The same as
+      // str. Try to merge inst(s) already in MemOps.
+      bool Overlap = false;
+      for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
+        if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
+          Overlap = true;
+          break;
+        }
+      }
+
        if (CurrBase == 0 && !Clobber) {
          // Start of a new chain.
          CurrBase = Base;
@@ -1262,7 +1364,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
          MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
          ++NumMemOps;
          Advance = true;
-      } else {
+      } else if (!Overlap) {
          if (Clobber) {
            TryMerge = true;
            Advance = true;
@@ -1315,9 +1417,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
          // First advance to the instruction just before the start of the chain.
          AdvanceRS(MBB, MemOps);
          // Find a scratch register.
-        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+        unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
          // Process the load / store instructions.
-        RS->forward(prior(MBBI));
+        RS->forward(std::prev(MBBI));
  
          // Merge ops.
          Merges.clear();
@@ -1339,13 +1441,13 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
                ++NumMerges;
  
          // RS may be pointing to an instruction that's deleted.
-        RS->skipTo(prior(MBBI));
+        RS->skipTo(std::prev(MBBI));
        } else if (NumMemOps == 1) {
          // Try folding preceding/trailing base inc/dec into the single
          // load/store.
          if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
            ++NumMerges;
-          RS->forward(prior(MBBI));
+          RS->forward(std::prev(MBBI));
          }
        }
  
@@ -1388,7 +1490,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
        (MBBI->getOpcode() == ARM::BX_RET ||
         MBBI->getOpcode() == ARM::tBX_RET ||
         MBBI->getOpcode() == ARM::MOVPCLR)) {
-    MachineInstr *PrevMI = prior(MBBI);
+    MachineInstr *PrevMI = std::prev(MBBI);
      unsigned Opcode = PrevMI->getOpcode();
      if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
          Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
@@ -1401,7 +1503,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
                Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
        PrevMI->setDesc(TII->get(NewOpc));
        MO.setReg(ARM::PC);
-      PrevMI->copyImplicitOps(&*MBBI);
+      PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
        MBB.erase(MBBI);
        return true;
      }
@@ -1441,16 +1543,16 @@ namespace {
      static char ID;
      ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
  
-    const TargetData *TD;
+    const DataLayout *TD;
      const TargetInstrInfo *TII;
      const TargetRegisterInfo *TRI;
      const ARMSubtarget *STI;
      MachineRegisterInfo *MRI;
      MachineFunction *MF;
  
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    bool runOnMachineFunction(MachineFunction &Fn) override;
  
-    virtual const char *getPassName() const {
+    const char *getPassName() const override {
        return "ARM pre- register allocation load / store optimization pass";
      }
  
@@ -1462,7 +1564,7 @@ namespace {
                            unsigned &PredReg, ARMCC::CondCodes &Pred,
                            bool &isT2);
      bool RescheduleOps(MachineBasicBlock *MBB,
-                       SmallVector<MachineInstr*, 4> &Ops,
+                       SmallVectorImpl<MachineInstr *> &Ops,
                         unsigned Base, bool isLd,
                         DenseMap<MachineInstr*, unsigned> &MI2LocMap);
      bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
@@ -1471,7 +1573,7 @@ namespace {
  }
  
  bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  TD  = Fn.getTarget().getTargetData();
+  TD  = Fn.getTarget().getDataLayout();
    TII = Fn.getTarget().getInstrInfo();
    TRI = Fn.getTarget().getRegisterInfo();
    STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
@@ -1580,8 +1682,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
      return false;
  
    // Make sure the base address satisfies i64 ld / st alignment requirement.
+  // At the moment, we ignore the memoryoperand's value.
+  // If we want to use AliasAnalysis, we should check it accordingly.
    if (!Op0->hasOneMemOperand() ||
-      !(*Op0->memoperands_begin())->getValue() ||
        (*Op0->memoperands_begin())->isVolatile())
      return false;
  
@@ -1621,25 +1724,20 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
    return true;
  }
  
-namespace {
-  struct OffsetCompare {
-    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
-      int LOffset = getMemoryOpOffset(LHS);
-      int ROffset = getMemoryOpOffset(RHS);
-      assert(LHS == RHS || LOffset != ROffset);
-      return LOffset > ROffset;
-    }
-  };
-}
-
  bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
-                                 SmallVector<MachineInstr*, 4> &Ops,
+                                 SmallVectorImpl<MachineInstr *> &Ops,
                                   unsigned Base, bool isLd,
                                   DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
    bool RetVal = false;
  
    // Sort by offset (in reverse order).
-  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+  std::sort(Ops.begin(), Ops.end(),
+            [](const MachineInstr *LHS, const MachineInstr *RHS) {
+    int LOffset = getMemoryOpOffset(LHS);
+    int ROffset = getMemoryOpOffset(RHS);
+    assert(LHS == RHS || LOffset != ROffset);
+    return LOffset > ROffset;
+  });
  
    // The loads / stores of the same base are in order. Scan them from first to
    // last and check for the following:
@@ -1728,7 +1826,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
            Ops.pop_back();
  
            const MCInstrDesc &MCID = TII->get(NewOpc);
-          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
+          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
            MRI->constrainRegClass(EvenReg, TRC);
            MRI->constrainRegClass(OddReg, TRC);
  
@@ -1835,9 +1933,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
            if (!StopHere)
              BI->second.push_back(MI);
          } else {
-          SmallVector<MachineInstr*, 4> MIs;
-          MIs.push_back(MI);
-          Base2LdsMap[Base] = MIs;
+          Base2LdsMap[Base].push_back(MI);
            LdBases.push_back(Base);
          }
        } else {
@@ -1853,9 +1949,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
            if (!StopHere)
              BI->second.push_back(MI);
          } else {
-          SmallVector<MachineInstr*, 4> MIs;
-          MIs.push_back(MI);
-          Base2StsMap[Base] = MIs;
+          Base2StsMap[Base].push_back(MI);
            StBases.push_back(Base);
          }
        }
@@ -1871,7 +1965,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
      // Re-schedule loads.
      for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
        unsigned Base = LdBases[i];
-      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+      SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
        if (Lds.size() > 1)
          RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
      }
@@ -1879,7 +1973,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
      // Re-schedule stores.
      for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
        unsigned Base = StBases[i];
-      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+      SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
        if (Sts.size() > 1)
          RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
      }