AVX512: Implemented DAG lowering for shuff62x2/shufi62x2 instructions ( shuffle packe...

[oota-llvm.git] / lib / Target / ARM / ARMConstantIslandPass.cpp
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp

index f1ff94ab8b39e96dda3520e521a37013afe8835a..b6a2f7fa0d5b1beeda83e1d8a4d53a10fefc929d 100644 (file)
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -13,29 +13,30 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "arm-cp-islands"
  #include "ARM.h"
  #include "ARMMachineFunctionInfo.h"
-#include "ARMInstrInfo.h"
-#include "Thumb2InstrInfo.h"
  #include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
  #include "llvm/CodeGen/MachineConstantPool.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
  #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/Format.h"
  #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
  #include <algorithm>
  using namespace llvm;
  
+#define DEBUG_TYPE "arm-cp-islands"
+
  STATISTIC(NumCPEs,       "Number of constpool entries");
  STATISTIC(NumSplit,      "Number of uncond branches inserted");
  STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
@@ -52,11 +53,6 @@ static cl::opt<bool>
  AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
            cl::desc("Adjust basic block layout to better use TB[BH]"));
  
-// FIXME: This option should be removed once it has received sufficient testing.
-static cl::opt<bool>
-AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
-          cl::desc("Align constant islands in code"));
-
  /// UnknownPadding - Return the worst case padding that could result from
  /// unknown offset bits.  This does not include alignment padding caused by
  /// known offset bits.
@@ -69,27 +65,6 @@ static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
    return 0;
  }
  
-/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
-/// add padding such that:
-///
-/// 1. The result is aligned to 1 << LogAlign.
-///
-/// 2. No other value of the unknown bits would require more padding.
-///
-/// This may add more padding than is required to satisfy just one of the
-/// constraints.  It is necessary to compute alignment this way to guarantee
-/// that we don't underestimate the padding before an aligned block.  If the
-/// real padding before a block is larger than we think, constant pool entries
-/// may go out of range.
-static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
-                                      unsigned KnownBits) {
-  // Add the worst possible padding that the unknown bits could cause.
-  Offset += UnknownPadding(LogAlign, KnownBits);
-
-  // Then align the result.
-  return RoundUpToAlignment(Offset, 1u << LogAlign);
-}
-
  namespace {
    /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
    /// requires constant pool entries to be scattered among the instructions
@@ -109,7 +84,12 @@ namespace {
        /// Offset - Distance from the beginning of the function to the beginning
        /// of this basic block.
        ///
-      /// The offset is always aligned as required by the basic block.
+      /// Offsets are computed assuming worst case padding before an aligned
+      /// block. This means that subtracting basic block offsets always gives a
+      /// conservative estimate of the real distance which may be smaller.
+      ///
+      /// Because worst case padding is used, the computed offset of an aligned
+      /// block may not actually be aligned.
        unsigned Offset;
  
        /// Size - Size of the basic block in bytes.  If the block contains
@@ -140,7 +120,12 @@ namespace {
        /// This number should be used to predict worst case padding when
        /// splitting the block.
        unsigned internalKnownBits() const {
-        return Unalign ? Unalign : KnownBits;
+        unsigned Bits = Unalign ? Unalign : KnownBits;
+        // If the block size isn't a multiple of the known bits, assume the
+        // worst case padding.
+        if (Size & ((1u << Bits) - 1))
+          Bits = countTrailingZeros(Size);
+        return Bits;
        }
  
        /// Compute the offset immediately following this block.  If LogAlign is
@@ -152,7 +137,7 @@ namespace {
          if (!LA)
            return PO;
          // Add alignment padding from the terminator.
-        return WorstCaseAlign(PO, LA, internalKnownBits());
+        return PO + UnknownPadding(LA, internalKnownBits());
        }
  
        /// Compute the number of known low bits of postOffset.  If this block
@@ -195,9 +180,7 @@ namespace {
        MachineInstr *MI;
        MachineInstr *CPEMI;
        MachineBasicBlock *HighWaterMark;
-    private:
        unsigned MaxDisp;
-    public:
        bool NegOk;
        bool IsSoImm;
        bool KnownAlignment;
@@ -209,8 +192,9 @@ namespace {
        }
        /// getMaxDisp - Returns the maximum displacement supported by MI.
        /// Correct for unknown alignment.
+      /// Conservatively subtract 2 bytes to handle weird alignment effects.
        unsigned getMaxDisp() const {
-        return KnownAlignment ? MaxDisp : MaxDisp - 2;
+        return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
        }
      };
  
@@ -230,12 +214,24 @@ namespace {
      };
  
      /// CPEntries - Keep track of all of the constant pool entry machine
-    /// instructions. For each original constpool index (i.e. those that
-    /// existed upon entry to this pass), it keeps a vector of entries.
-    /// Original elements are cloned as we go along; the clones are
-    /// put in the vector of the original element, but have distinct CPIs.
+    /// instructions. For each original constpool index (i.e. those that existed
+    /// upon entry to this pass), it keeps a vector of entries.  Original
+    /// elements are cloned as we go along; the clones are put in the vector of
+    /// the original element, but have distinct CPIs.
+    ///
+    /// The first half of CPEntries contains generic constants, the second half
+    /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
+    /// which vector it will be in here.
      std::vector<std::vector<CPEntry> > CPEntries;
  
+    /// Maps a JT index to the offset in CPEntries containing copies of that
+    /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
+    DenseMap<int, int> JumpTableEntryIndices;
+
+    /// Maps a JT index to the LEA that actually uses the index to calculate its
+    /// base address.
+    DenseMap<int, int> JumpTableUserIndices;
+
      /// ImmBranch - One per immediate branch, keeping the machine instruction
      /// pointer, conditional or unconditional, the max displacement,
      /// and (if isCond is true) the corresponding unconditional branch
@@ -244,8 +240,8 @@ namespace {
        MachineInstr *MI;
        unsigned MaxDisp : 31;
        bool isCond : 1;
-      int UncondBr;
-      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+      unsigned UncondBr;
+      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
          : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
      };
  
@@ -266,7 +262,7 @@ namespace {
  
      MachineFunction *MF;
      MachineConstantPool *MCP;
-    const ARMInstrInfo *TII;
+    const ARMBaseInstrInfo *TII;
      const ARMSubtarget *STI;
      ARMFunctionInfo *AFI;
      bool isThumb;
@@ -276,14 +272,16 @@ namespace {
      static char ID;
      ARMConstantIslands() : MachineFunctionPass(ID) {}
  
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
  
-    virtual const char *getPassName() const {
+    const char *getPassName() const override {
        return "ARM constant island placement and branch shortening pass";
      }
  
    private:
-    void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+    void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
+    void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
+    bool BBHasFallthrough(MachineBasicBlock *MBB);
      CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
      unsigned getCPELogAlign(const MachineInstr *CPEMI);
      void scanFunctionJumpTables();
@@ -292,6 +290,7 @@ namespace {
      void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
      void adjustBBOffsetsAfter(MachineBasicBlock *BB);
      bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+    unsigned getCombinedIndex(const MachineInstr *CPEMI);
      int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
      bool findAvailableWater(CPUser&U, unsigned UserOffset,
                              water_iterator &WaterIter);
@@ -314,6 +313,9 @@ namespace {
      bool optimizeThumb2Instructions();
      bool optimizeThumb2Branches();
      bool reorderThumb2JumpTables();
+    bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
+                              unsigned &DeadSize, bool &CanDeleteLEA,
+                              bool &BaseRegKill);
      bool optimizeThumb2JumpTables();
      MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
                                                    MachineBasicBlock *JTBB);
@@ -341,16 +343,24 @@ void ARMConstantIslands::verify() {
    for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
         MBBI != E; ++MBBI) {
      MachineBasicBlock *MBB = MBBI;
-    unsigned Align = MBB->getAlignment();
      unsigned MBBId = MBB->getNumber();
-    assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
      assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
    }
+  DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
    for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
      CPUser &U = CPUsers[i];
      unsigned UserOffset = getUserOffset(U);
-    assert(isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp(),
-                            U.NegOk) && "Constant pool entry out of range!");
+    // Verify offset using the real max displacement without the safety
+    // adjustment.
+    if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
+                         /* DoDump = */ true)) {
+      DEBUG(dbgs() << "OK\n");
+      continue;
+    }
+    DEBUG(dbgs() << "Out of range.\n");
+    dumpBBs();
+    DEBUG(MF->dump());
+    llvm_unreachable("Constant pool entry out of range!");
    }
  #endif
  }
@@ -383,9 +393,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
                 << MCP->getConstants().size() << " CP entries, aligned to "
                 << MCP->getConstantPoolAlignment() << " bytes *****\n");
  
-  TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo();
+  STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
+  TII = STI->getInstrInfo();
    AFI = MF->getInfo<ARMFunctionInfo>();
-  STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
  
    isThumb = AFI->isThumbFunction();
    isThumb1 = AFI->isThumb1OnlyFunction();
@@ -393,6 +403,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
  
    HasFarJump = false;
  
+  // This pass invalidates liveness information when it splits basic blocks.
+  MF->getRegInfo().invalidateLiveness();
+
    // Renumber all of the machine basic blocks in the function, guaranteeing that
    // the numbers agree with the position of the block in the function.
    MF->RenumberBlocks();
@@ -409,18 +422,14 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
      MF->RenumberBlocks();
    }
  
-  // Thumb1 functions containing constant pools get 4-byte alignment.
-  // This is so we can keep exact track of where the alignment padding goes.
-
-  // ARM and Thumb2 functions need to be 4-byte aligned.
-  if (!isThumb1)
-    MF->EnsureAlignment(2);  // 2 = log2(4)
-
    // Perform the initial placement of the constant pool entries.  To start with,
    // we put them all at the end of the function.
    std::vector<MachineInstr*> CPEMIs;
    if (!MCP->isEmpty())
-    doInitialPlacement(CPEMIs);
+    doInitialConstPlacement(CPEMIs);
+
+  if (MF->getJumpTableInfo())
+    doInitialJumpTablePlacement(CPEMIs);
  
    /// The next UID to take is the first unused one.
    AFI->initPICLabelUId(CPEMIs.size());
@@ -432,6 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
    CPEMIs.clear();
    DEBUG(dumpBBs());
  
+  // Functions with jump tables need an alignment of 4 because they use the ADR
+  // instruction, which aligns the PC to 4 bytes before adding an offset.
+  if (!T2JumpTables.empty())
+    MF->ensureAlignment(2);
  
    /// Remove dead constant pool entries.
    MadeChange |= removeUnusedCPEntries();
@@ -481,7 +494,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
    for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
      for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
        const CPEntry & CPE = CPEntries[i][j];
-      AFI->recordCPEClone(i, CPE.CPI);
+      if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
+        AFI->recordCPEClone(i, CPE.CPI);
      }
    }
  
@@ -491,6 +505,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
    WaterList.clear();
    CPUsers.clear();
    CPEntries.clear();
+  JumpTableEntryIndices.clear();
+  JumpTableUserIndices.clear();
    ImmBranches.clear();
    PushPopMIs.clear();
    T2JumpTables.clear();
@@ -498,10 +514,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
    return MadeChange;
  }
  
-/// doInitialPlacement - Perform the initial placement of the constant pool
-/// entries.  To start with, we put them all at the end of the function.
+/// \brief Perform the initial placement of the regular constant pool entries.
+/// To start with, we put them all at the end of the function.
  void
-ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
    // Create the basic block to hold the CPE's.
    MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
    MF->push_back(BB);
@@ -510,12 +526,11 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
    unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
  
    // Mark the basic block as required by the const-pool.
-  // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
-  BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+  BB->setAlignment(MaxAlign);
  
    // The function needs to be as aligned as the basic blocks. The linker may
    // move functions around based on their alignment.
-  MF->EnsureAlignment(BB->getAlignment());
+  MF->ensureAlignment(BB->getAlignment());
  
    // Order the entries in BB by descending alignment.  That ensures correct
    // alignment of all entries as long as BB is sufficiently aligned.  Keep
@@ -527,7 +542,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
    // identity mapping of CPI's to CPE's.
    const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
  
-  const TargetData &TD = *MF->getTarget().getTargetData();
+  const DataLayout &TD = MF->getDataLayout();
    for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
      unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
      assert(Size >= 4 && "Too small constant pool entry");
@@ -552,9 +567,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
          InsPoint[a] = CPEMI;
  
      // Add a new CPEntry, but no corresponding CPUser yet.
-    std::vector<CPEntry> CPEs;
-    CPEs.push_back(CPEntry(CPEMI, i));
-    CPEntries.push_back(CPEs);
+    CPEntries.emplace_back(1, CPEntry(CPEMI, i));
      ++NumCPEs;
      DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
                   << Size << ", align = " << Align <<'\n');
@@ -562,22 +575,85 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
    DEBUG(BB->dump());
  }
  
+/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
+/// instructions can be made more efficient if the jump table immediately
+/// follows the instruction, it's best to place them immediately next to their
+/// jumps to begin with. In almost all cases they'll never be moved from that
+/// position.
+void ARMConstantIslands::doInitialJumpTablePlacement(
+    std::vector<MachineInstr *> &CPEMIs) {
+  unsigned i = CPEntries.size();
+  auto MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+
+  MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
+  for (MachineBasicBlock &MBB : *MF) {
+    auto MI = MBB.getLastNonDebugInstr();
+
+    unsigned JTOpcode;
+    switch (MI->getOpcode()) {
+    default:
+      continue;
+    case ARM::BR_JTadd:
+    case ARM::BR_JTr:
+    case ARM::tBR_JTr:
+    case ARM::BR_JTm:
+      JTOpcode = ARM::JUMPTABLE_ADDRS;
+      break;
+    case ARM::t2BR_JT:
+      JTOpcode = ARM::JUMPTABLE_INSTS;
+      break;
+    case ARM::t2TBB_JT:
+      JTOpcode = ARM::JUMPTABLE_TBB;
+      break;
+    case ARM::t2TBH_JT:
+      JTOpcode = ARM::JUMPTABLE_TBH;
+      break;
+    }
+
+    unsigned NumOps = MI->getDesc().getNumOperands();
+    MachineOperand JTOp =
+      MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
+    unsigned JTI = JTOp.getIndex();
+    unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
+    MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
+    MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
+    MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
+                                  DebugLoc(), TII->get(JTOpcode))
+                              .addImm(i++)
+                              .addJumpTableIndex(JTI)
+                              .addImm(Size);
+    CPEMIs.push_back(CPEMI);
+    CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
+    JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
+    if (!LastCorrectlyNumberedBB)
+      LastCorrectlyNumberedBB = &MBB;
+  }
+
+  // If we did anything then we need to renumber the subsequent blocks.
+  if (LastCorrectlyNumberedBB)
+    MF->RenumberBlocks(LastCorrectlyNumberedBB);
+}
+
  /// BBHasFallthrough - Return true if the specified basic block can fallthrough
  /// into the block immediately after it.
-static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
    // Get the next machine basic block in the function.
    MachineFunction::iterator MBBI = MBB;
    // Can't fall off end of function.
-  if (llvm::next(MBBI) == MBB->getParent()->end())
+  if (std::next(MBBI) == MBB->getParent()->end())
      return false;
  
-  MachineBasicBlock *NextBB = llvm::next(MBBI);
-  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
-       E = MBB->succ_end(); I != E; ++I)
-    if (*I == NextBB)
-      return true;
+  MachineBasicBlock *NextBB = std::next(MBBI);
+  if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
+    return false;
  
-  return false;
+  // Try to analyze the end of the block. A potential fallthrough may already
+  // have an unconditional branch for whatever reason.
+  MachineBasicBlock *TBB, *FBB;
+  SmallVector<MachineOperand, 4> Cond;
+  bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
+  return TooDifficult || FBB == nullptr;
  }
  
  /// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
@@ -592,19 +668,27 @@ ARMConstantIslands::CPEntry
      if (CPEs[i].CPEMI == CPEMI)
        return &CPEs[i];
    }
-  return NULL;
+  return nullptr;
  }
  
  /// getCPELogAlign - Returns the required alignment of the constant pool entry
  /// represented by CPEMI.  Alignment is measured in log2(bytes) units.
  unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
-  assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
-
-  // Everything is 4-byte aligned unless AlignConstantIslands is set.
-  if (!AlignConstantIslands)
+  switch (CPEMI->getOpcode()) {
+  case ARM::CONSTPOOL_ENTRY:
+    break;
+  case ARM::JUMPTABLE_TBB:
+    return 0;
+  case ARM::JUMPTABLE_TBH:
+  case ARM::JUMPTABLE_INSTS:
+    return 1;
+  case ARM::JUMPTABLE_ADDRS:
      return 2;
+  default:
+    llvm_unreachable("unknown constpool entry kind");
+  }
  
-  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned CPI = getCombinedIndex(CPEMI);
    assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
    unsigned Align = MCP->getConstants()[CPI].getAlignment();
    assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
@@ -663,7 +747,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
        if (I->isDebugValue())
          continue;
  
-      int Opc = I->getOpcode();
+      unsigned Opc = I->getOpcode();
        if (I->isBranch()) {
          bool isCond = false;
          unsigned Bits = 0;
@@ -713,12 +797,14 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
        if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
          PushPopMIs.push_back(I);
  
-      if (Opc == ARM::CONSTPOOL_ENTRY)
+      if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
+          Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
+          Opc == ARM::JUMPTABLE_TBH)
          continue;
  
        // Scan the instructions for constant pool operands.
        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
-        if (I->getOperand(op).isCPI()) {
+        if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
            // We found one.  The addressing mode tells us the max displacement
            // from the PC that this instruction permits.
  
@@ -734,6 +820,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
  
            // Taking the address of a CP entry.
            case ARM::LEApcrel:
+          case ARM::LEApcrelJT:
              // This takes a SoImm, which is 8 bit immediate rotated. We'll
              // pretend the maximum offset is 255 * 4. Since each instruction
              // 4 byte wide, this is always correct. We'll check for other
@@ -744,14 +831,17 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
              IsSoImm = true;
              break;
            case ARM::t2LEApcrel:
+          case ARM::t2LEApcrelJT:
              Bits = 12;
              NegOk = true;
              break;
            case ARM::tLEApcrel:
+          case ARM::tLEApcrelJT:
              Bits = 8;
              Scale = 4;
              break;
  
+          case ARM::LDRBi12:
            case ARM::LDRi12:
            case ARM::LDRcp:
            case ARM::t2LDRpci:
@@ -774,6 +864,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
  
            // Remember that this is a user of a CP entry.
            unsigned CPI = I->getOperand(op).getIndex();
+          if (I->getOperand(op).isJTI()) {
+            JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
+            CPI = JumpTableEntryIndices[CPI];
+          }
+
            MachineInstr *CPEMI = CPEMIs[CPI];
            unsigned MaxOffs = ((1 << Bits)-1) * Scale;
            CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
@@ -814,7 +909,7 @@ void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
    // tBR_JTr contains a .align 2 directive.
    if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
      BBI.PostAlign = 2;
-    MBB->getParent()->EnsureAlignment(2);
+    MBB->getParent()->ensureAlignment(2);
    }
  }
  
@@ -915,7 +1010,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
                       CompareMBBNumbers);
    MachineBasicBlock* WaterBB = *IP;
    if (WaterBB == OrigBB)
-    WaterList.insert(llvm::next(IP), NewBB);
+    WaterList.insert(std::next(IP), NewBB);
    else
      WaterList.insert(IP, OrigBB);
    NewWaterList.insert(OrigBB);
@@ -1031,7 +1126,6 @@ bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
                                        MachineInstr *CPEMI, unsigned MaxDisp,
                                        bool NegOk, bool DoDump) {
    unsigned CPEOffset  = getOffsetOf(CPEMI);
-  assert(CPEOffset % 4 == 0 && "Misaligned CPE");
  
    if (DoDump) {
      DEBUG({
@@ -1101,13 +1195,20 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
    assert(CPE && "Unexpected!");
    if (--CPE->RefCount == 0) {
      removeDeadCPEMI(CPEMI);
-    CPE->CPEMI = NULL;
+    CPE->CPEMI = nullptr;
      --NumCPEs;
      return true;
    }
    return false;
  }
  
+unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
+  if (CPEMI->getOperand(1).isCPI())
+    return CPEMI->getOperand(1).getIndex();
+
+  return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
+}
+
  /// LookForCPEntryInRange - see if the currently referenced CPE is in range;
  /// if not, see if an in-range clone of the CPE is in range, and if so,
  /// change the data structures so the user references the clone.  Returns:
@@ -1127,14 +1228,14 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
    }
  
    // No.  Look for previously created clones of the CPE that are in range.
-  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned CPI = getCombinedIndex(CPEMI);
    std::vector<CPEntry> &CPEs = CPEntries[CPI];
    for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
      // We already tried this one
      if (CPEs[i].CPEMI == CPEMI)
        continue;
      // Removing CPEs can leave empty entries, skip
-    if (CPEs[i].CPEMI == NULL)
+    if (CPEs[i].CPEMI == nullptr)
        continue;
      if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
                       U.NegOk)) {
@@ -1187,7 +1288,7 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
      return false;
  
    unsigned BestGrowth = ~0u;
-  for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+  for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
         --IP) {
      MachineBasicBlock* WaterBB = *IP;
      // Check if water is in range and is either at a lower address than the
@@ -1201,7 +1302,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
      unsigned Growth;
      if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
          (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
-         NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+         NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) &&
+        Growth < BestGrowth) {
        // This is the least amount of required padding seen so far.
        BestGrowth = Growth;
        WaterIter = IP;
@@ -1242,16 +1344,13 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
    if (BBHasFallthrough(UserMBB)) {
      // Size of branch to insert.
      unsigned Delta = isThumb1 ? 2 : 4;
-    // End of UserBlock after adding a branch.
-    unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
      // Compute the offset where the CPE will begin.
-    unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
-                                        UserBBI.postKnownBits());
+    unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
  
      if (isOffsetInRange(UserOffset, CPEOffset, U)) {
        DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
              << format(", expected CPE offset %#x\n", CPEOffset));
-      NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+      NewMBB = std::next(MachineFunction::iterator(UserMBB));
        // Add an unconditional branch from UserMBB to fallthrough block.  Record
        // it for branch lengthening; this new branch will not get out of range,
        // but if the preceding conditional branch is out of range, the targets
@@ -1266,7 +1365,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
        unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
        ImmBranches.push_back(ImmBranch(&UserMBB->back(),
                                        MaxDisp, false, UncondBr));
-      BBInfo[UserMBB->getNumber()].Size += Delta;
+      computeBlockSize(UserMBB);
        adjustBBOffsetsAfter(UserMBB);
        return;
      }
@@ -1285,20 +1384,16 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
    // up the insertion point.
  
    // Try to split the block so it's fully aligned.  Compute the latest split
-  // point where we can add a 4-byte branch instruction, and then
-  // WorstCaseAlign to LogAlign.
+  // point where we can add a 4-byte branch instruction, and then align to
+  // LogAlign which is the largest possible alignment in the function.
    unsigned LogAlign = MF->getAlignment();
    assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
    unsigned KnownBits = UserBBI.internalKnownBits();
    unsigned UPad = UnknownPadding(LogAlign, KnownBits);
-  unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+  unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
    DEBUG(dbgs() << format("Split in middle of big block before %#x",
                           BaseInsertOffset));
  
-  // Account for alignment and unknown padding.
-  BaseInsertOffset &= ~((1u << LogAlign) - 1);
-  BaseInsertOffset -= UPad;
-
    // The 4 in the following is for the unconditional branch we'll be inserting
    // (allows for long branch on Thumb1).  Alignment of the island is handled
    // inside isOffsetInRange.
@@ -1313,21 +1408,26 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
    // pool entries following this block; only the last one is in the water list.
    // Back past any possible branches (allow for a conditional and a maximally
    // long unconditional).
-  if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
-    BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
-      (isThumb1 ? 6 : 8);
-  unsigned EndInsertOffset =
-    WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+  if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+    // Ensure BaseInsertOffset is larger than the offset of the instruction
+    // following UserMI so that the loop which searches for the split point
+    // iterates at least once.
+    BaseInsertOffset =
+        std::max(UserBBI.postOffset() - UPad - 8,
+                 UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
+    DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+  }
+  unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
      CPEMI->getOperand(2).getImm();
    MachineBasicBlock::iterator MI = UserMI;
    ++MI;
    unsigned CPUIndex = CPUserIndex+1;
    unsigned NumCPUsers = CPUsers.size();
-  MachineInstr *LastIT = 0;
+  MachineInstr *LastIT = nullptr;
    for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
         Offset < BaseInsertOffset;
-       Offset += TII->GetInstSizeInBytes(MI),
-       MI = llvm::next(MI)) {
+       Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) {
+    assert(MI != UserMBB->end() && "Fell off end of block");
      if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
        CPUser &U = CPUsers[CPUIndex];
        if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
@@ -1339,9 +1439,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
        // reused within the block, but it doesn't matter much.  Also assume CPEs
        // are added in order with alignment padding.  We may eventually be able
        // to pack the aligned CPEs better.
-      EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
-                                           1u << getCPELogAlign(U.CPEMI)) +
-        U.CPEMI->getOperand(2).getImm();
+      EndInsertOffset += U.CPEMI->getOperand(2).getImm();
        CPUIndex++;
      }
  
@@ -1355,10 +1453,15 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
    // Avoid splitting an IT block.
    if (LastIT) {
      unsigned PredReg = 0;
-    ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+    ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
      if (CC != ARMCC::AL)
        MI = LastIT;
    }
+
+  // We really must not split an IT block.
+  DEBUG(unsigned PredReg;
+        assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
+
    NewMBB = splitBlockBeforeInstr(MI);
  }
  
@@ -1370,7 +1473,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
    CPUser &U = CPUsers[CPUserIndex];
    MachineInstr *UserMI = U.MI;
    MachineInstr *CPEMI  = U.CPEMI;
-  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned CPI = getCombinedIndex(CPEMI);
    unsigned Size = CPEMI->getOperand(2).getImm();
    // Compute this only once, it's expensive.
    unsigned UserOffset = getUserOffset(U);
@@ -1396,12 +1499,11 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
      // If the original WaterList entry was "new water" on this iteration,
      // propagate that to the new island.  This is just keeping NewWaterList
      // updated to match the WaterList, which will be updated below.
-    if (NewWaterList.count(WaterBB)) {
-      NewWaterList.erase(WaterBB);
+    if (NewWaterList.erase(WaterBB))
        NewWaterList.insert(NewIsland);
-    }
+
      // The new CPE goes before the following block (NewMBB).
-    NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+    NewMBB = std::next(MachineFunction::iterator(WaterBB));
  
    } else {
      // No water found.
@@ -1413,7 +1515,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
      // next iteration for constant pools, but in this context, we don't want
      // it.  Check for this so it will be removed from the WaterList.
      // Also remove any entry from NewWaterList.
-    MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+    MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
      IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
      if (IP != WaterList.end())
        NewWaterList.erase(WaterBB);
@@ -1435,23 +1537,23 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
    // Update internal data structures to account for the newly inserted MBB.
    updateForInsertedWaterBlock(NewIsland);
  
-  // Decrement the old entry, and remove it if refcount becomes 0.
-  decrementCPEReferenceCount(CPI, CPEMI);
-
    // Now that we have an island to add the CPE to, clone the original CPE and
    // add it to the island.
    U.HighWaterMark = NewIsland;
-  U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
-                .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+  U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
+                .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
    CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
    ++NumCPEs;
  
+  // Decrement the old entry, and remove it if refcount becomes 0.
+  decrementCPEReferenceCount(CPI, CPEMI);
+
    // Mark the basic block as aligned as required by the const-pool entry.
    NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
  
    // Increase the size of the island block to account for the new entry.
    BBInfo[NewIsland->getNumber()].Size += Size;
-  adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
+  adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
  
    // Finally, change the CPI in the instruction operand to be ID.
    for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1477,7 +1579,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
    if (CPEBB->empty()) {
      BBInfo[CPEBB->getNumber()].Size = 0;
  
-    // This block no longer needs to be aligned. <rdar://problem/10534709>.
+    // This block no longer needs to be aligned.
      CPEBB->setAlignment(0);
    } else
      // Entries are sorted by descending alignment, so realign from the front.
@@ -1500,7 +1602,7 @@ bool ARMConstantIslands::removeUnusedCPEntries() {
        for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
          if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
            removeDeadCPEMI(CPEs[j].CPEMI);
-          CPEs[j].CPEMI = NULL;
+          CPEs[j].CPEMI = nullptr;
            MadeChange = true;
          }
        }
@@ -1600,7 +1702,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
  
    ++NumCBrFixed;
    if (BMI != MI) {
-    if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+    if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
          BMI->getOpcode() == Br.UncondBr) {
        // Last MI in the BB is an unconditional branch. Can we simply invert the
        // condition and swap destinations:
@@ -1630,7 +1732,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
      MBB->back().eraseFromParent();
      // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
    }
-  MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+  MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
  
    DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
                 << " also invert condition and change dest. to BB#"
@@ -1739,6 +1841,7 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
  
      // FIXME: Check if offset is multiple of scale if scale is not 4.
      if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+      DEBUG(dbgs() << "Shrink: " << *U.MI);
        U.MI->setDesc(TII->get(NewOpc));
        MachineBasicBlock *MBB = U.MI->getParent();
        BBInfo[MBB->getNumber()].Size -= 2;
@@ -1756,8 +1859,13 @@ bool ARMConstantIslands::optimizeThumb2Instructions() {
  bool ARMConstantIslands::optimizeThumb2Branches() {
    bool MadeChange = false;
  
-  for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
-    ImmBranch &Br = ImmBranches[i];
+  // The order in which branches appear in ImmBranches is approximately their
+  // order within the function body. By visiting later branches first, we reduce
+  // the distance between earlier forward branches and their targets, making it
+  // more likely that the cbn?z optimization, which can only apply to forward
+  // branches, will succeed.
+  for (unsigned i = ImmBranches.size(); i != 0; --i) {
+    ImmBranch &Br = ImmBranches[i-1];
      unsigned Opcode = Br.MI->getOpcode();
      unsigned NewOpc = 0;
      unsigned Scale = 1;
@@ -1780,6 +1888,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
        unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
        MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
        if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+        DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
          Br.MI->setDesc(TII->get(NewOpc));
          MachineBasicBlock *MBB = Br.MI->getParent();
          BBInfo[MBB->getNumber()].Size -= 2;
@@ -1800,7 +1909,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
  
      NewOpc = 0;
      unsigned PredReg = 0;
-    ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+    ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
      if (Pred == ARMCC::EQ)
        NewOpc = ARM::tCBZ;
      else if (Pred == ARMCC::NE)
@@ -1818,11 +1927,12 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
          --CmpMI;
          if (CmpMI->getOpcode() == ARM::tCMPi8) {
            unsigned Reg = CmpMI->getOperand(0).getReg();
-          Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+          Pred = getInstrPredicate(CmpMI, PredReg);
            if (Pred == ARMCC::AL &&
                CmpMI->getOperand(1).getImm() == 0 &&
                isARMLowRegister(Reg)) {
              MachineBasicBlock *MBB = Br.MI->getParent();
+            DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
              MachineInstr *NewBR =
                BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
                .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
@@ -1842,6 +1952,122 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
    return MadeChange;
  }
  
+static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
+                              unsigned BaseReg) {
+  if (I.getOpcode() != ARM::t2ADDrs)
+    return false;
+
+  if (I.getOperand(0).getReg() != EntryReg)
+    return false;
+
+  if (I.getOperand(1).getReg() != BaseReg)
+    return false;
+
+  // FIXME: what about CC and IdxReg?
+  return true;
+}
+
+/// \brief While trying to form a TBB/TBH instruction, we may (if the table
+/// doesn't immediately follow the BR_JT) need access to the start of the
+/// jump-table. We know one instruction that produces such a register; this
+/// function works out whether that definition can be preserved to the BR_JT,
+/// possibly by removing an intervening addition (which is usually needed to
+/// calculate the actual entry to jump to).
+bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
+                                              MachineInstr *LEAMI,
+                                              unsigned &DeadSize,
+                                              bool &CanDeleteLEA,
+                                              bool &BaseRegKill) {
+  if (JumpMI->getParent() != LEAMI->getParent())
+    return false;
+
+  // Now we hope that we have at least these instructions in the basic block:
+  //     BaseReg = t2LEA ...
+  //     [...]
+  //     EntryReg = t2ADDrs BaseReg, ...
+  //     [...]
+  //     t2BR_JT EntryReg
+  //
+  // We have to be very conservative about what we recognise here though. The
+  // main perturbing factors to watch out for are:
+  //    + Spills at any point in the chain: not direct problems but we would
+  //      expect a blocking Def of the spilled register so in practice what we
+  //      can do is limited.
+  //    + EntryReg == BaseReg: this is the one situation we should allow a Def
+  //      of BaseReg, but only if the t2ADDrs can be removed.
+  //    + Some instruction other than t2ADDrs computing the entry. Not seen in
+  //      the wild, but we should be careful.
+  unsigned EntryReg = JumpMI->getOperand(0).getReg();
+  unsigned BaseReg = LEAMI->getOperand(0).getReg();
+
+  CanDeleteLEA = true;
+  BaseRegKill = false;
+  MachineInstr *RemovableAdd = nullptr;
+  MachineBasicBlock::iterator I(LEAMI);
+  for (++I; &*I != JumpMI; ++I) {
+    if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
+      RemovableAdd = &*I;
+      break;
+    }
+
+    for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+      const MachineOperand &MO = I->getOperand(K);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (MO.isDef() && MO.getReg() == BaseReg)
+        return false;
+      if (MO.isUse() && MO.getReg() == BaseReg) {
+        BaseRegKill = BaseRegKill || MO.isKill();
+        CanDeleteLEA = false;
+      }
+    }
+  }
+
+  if (!RemovableAdd)
+    return true;
+
+  // Check the add really is removable, and that nothing else in the block
+  // clobbers BaseReg.
+  for (++I; &*I != JumpMI; ++I) {
+    for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+      const MachineOperand &MO = I->getOperand(K);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (MO.isDef() && MO.getReg() == BaseReg)
+        return false;
+      if (MO.isUse() && MO.getReg() == EntryReg)
+        RemovableAdd = nullptr;
+    }
+  }
+
+  if (RemovableAdd) {
+    RemovableAdd->eraseFromParent();
+    DeadSize += 4;
+  } else if (BaseReg == EntryReg) {
+    // The add wasn't removable, but clobbered the base for the TBB. So we can't
+    // preserve it.
+    return false;
+  }
+
+  // We reached the end of the block without seeing another definition of
+  // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
+  // used in the TBB/TBH if necessary.
+  return true;
+}
+
+/// \brief Returns whether CPEMI is the first instruction in the block
+/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
+/// we can switch the first register to PC and usually remove the address
+/// calculation that preceded it.
+static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
+  MachineFunction::iterator MBB = JTMI->getParent();
+  MachineFunction *MF = MBB->getParent();
+  ++MBB;
+
+  return MBB != MF->end() && MBB->begin() != MBB->end() &&
+         &*MBB->begin() == CPEMI;
+}
+
  /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
  /// jumptables when it's possible.
  bool ARMConstantIslands::optimizeThumb2JumpTables() {
@@ -1850,14 +2076,14 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
    // FIXME: After the tables are shrunk, can we get rid some of the
    // constantpool tables?
    MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-  if (MJTI == 0) return false;
+  if (!MJTI) return false;
  
    const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
    for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
      MachineInstr *MI = T2JumpTables[i];
      const MCInstrDesc &MCID = MI->getDesc();
      unsigned NumOps = MCID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
+    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
      MachineOperand JTOP = MI->getOperand(JTOpIdx);
      unsigned JTI = JTOP.getIndex();
      assert(JTI < JT.size());
@@ -1880,88 +2106,79 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
          break;
      }
  
-    if (ByteOk || HalfWordOk) {
-      MachineBasicBlock *MBB = MI->getParent();
-      unsigned BaseReg = MI->getOperand(0).getReg();
-      bool BaseRegKill = MI->getOperand(0).isKill();
-      if (!BaseRegKill)
-        continue;
-      unsigned IdxReg = MI->getOperand(1).getReg();
-      bool IdxRegKill = MI->getOperand(1).isKill();
-
-      // Scan backwards to find the instruction that defines the base
-      // register. Due to post-RA scheduling, we can't count on it
-      // immediately preceding the branch instruction.
-      MachineBasicBlock::iterator PrevI = MI;
-      MachineBasicBlock::iterator B = MBB->begin();
-      while (PrevI != B && !PrevI->definesRegister(BaseReg))
-        --PrevI;
-
-      // If for some reason we didn't find it, we can't do anything, so
-      // just skip this one.
-      if (!PrevI->definesRegister(BaseReg))
-        continue;
+    if (!ByteOk && !HalfWordOk)
+      continue;
  
-      MachineInstr *AddrMI = PrevI;
-      bool OptOk = true;
-      // Examine the instruction that calculates the jumptable entry address.
-      // Make sure it only defines the base register and kills any uses
-      // other than the index register.
-      for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
-        const MachineOperand &MO = AddrMI->getOperand(k);
-        if (!MO.isReg() || !MO.getReg())
-          continue;
-        if (MO.isDef() && MO.getReg() != BaseReg) {
-          OptOk = false;
-          break;
-        }
-        if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
-          OptOk = false;
-          break;
-        }
-      }
-      if (!OptOk)
-        continue;
+    MachineBasicBlock *MBB = MI->getParent();
+    if (!MI->getOperand(0).isKill()) // FIXME: needed now?
+      continue;
+    unsigned IdxReg = MI->getOperand(1).getReg();
+    bool IdxRegKill = MI->getOperand(1).isKill();
  
-      // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
-      // that gave us the initial base register definition.
-      for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
-        ;
+    CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
+    unsigned DeadSize = 0;
+    bool CanDeleteLEA = false;
+    bool BaseRegKill = false;
+    bool PreservedBaseReg =
+        preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
  
-      // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
-      // to delete it as well.
-      MachineInstr *LeaMI = PrevI;
-      if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
-           LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
-          LeaMI->getOperand(0).getReg() != BaseReg)
-        OptOk = false;
+    if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+      continue;
  
-      if (!OptOk)
-        continue;
+    DEBUG(dbgs() << "Shrink JT: " << *MI);
+    MachineInstr *CPEMI = User.CPEMI;
+    unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+    MachineBasicBlock::iterator MI_JT = MI;
+    MachineInstr *NewJTMI =
+        BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
+            .addReg(User.MI->getOperand(0).getReg(),
+                    getKillRegState(BaseRegKill))
+            .addReg(IdxReg, getKillRegState(IdxRegKill))
+            .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+            .addImm(CPEMI->getOperand(0).getImm());
+    DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
+
+    unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
+    CPEMI->setDesc(TII->get(JTOpc));
+
+    if (jumpTableFollowsTB(MI, User.CPEMI)) {
+      NewJTMI->getOperand(0).setReg(ARM::PC);
+      NewJTMI->getOperand(0).setIsKill(false);
+
+      if (CanDeleteLEA)  {
+        User.MI->eraseFromParent();
+        DeadSize += 4;
+
+        // The LEA was eliminated, the TBB instruction becomes the only new user
+        // of the jump table.
+        User.MI = NewJTMI;
+        User.MaxDisp = 4;
+        User.NegOk = false;
+        User.IsSoImm = false;
+        User.KnownAlignment = false;
+      } else {
+        // The LEA couldn't be eliminated, so we must add another CPUser to
+        // record the TBB or TBH use.
+        int CPEntryIdx = JumpTableEntryIndices[JTI];
+        auto &CPEs = CPEntries[CPEntryIdx];
+        auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
+          return E.CPEMI == User.CPEMI;
+        });
+        ++Entry->RefCount;
+        CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
+      }
+    }
  
-      unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
-      MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
-        .addReg(IdxReg, getKillRegState(IdxRegKill))
-        .addJumpTableIndex(JTI, JTOP.getTargetFlags())
-        .addImm(MI->getOperand(JTOpIdx+1).getImm());
-      // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
-      // is 2-byte aligned. For now, asm printer will fix it up.
-      unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
-      unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
-      OrigSize += TII->GetInstSizeInBytes(LeaMI);
-      OrigSize += TII->GetInstSizeInBytes(MI);
-
-      AddrMI->eraseFromParent();
-      LeaMI->eraseFromParent();
-      MI->eraseFromParent();
+    unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+    unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+    MI->eraseFromParent();
  
-      int delta = OrigSize - NewSize;
-      BBInfo[MBB->getNumber()].Size -= delta;
-      adjustBBOffsetsAfter(MBB);
+    int Delta = OrigSize - NewSize + DeadSize;
+    BBInfo[MBB->getNumber()].Size -= Delta;
+    adjustBBOffsetsAfter(MBB);
  
-      ++NumTBs;
-      MadeChange = true;
-    }
+    ++NumTBs;
+    MadeChange = true;
    }
  
    return MadeChange;
@@ -1973,14 +2190,14 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
    bool MadeChange = false;
  
    MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-  if (MJTI == 0) return false;
+  if (!MJTI) return false;
  
    const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
    for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
      MachineInstr *MI = T2JumpTables[i];
      const MCInstrDesc &MCID = MI->getDesc();
      unsigned NumOps = MCID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
+    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
      MachineOperand JTOP = MI->getOperand(JTOpIdx);
      unsigned JTI = JTOP.getIndex();
      assert(JTI < JT.size());
@@ -2015,11 +2232,11 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
    // try to move it; otherwise, create a new block following the jump
    // table that branches back to the actual target. This is a very simple
    // heuristic. FIXME: We can definitely improve it.
-  MachineBasicBlock *TBB = 0, *FBB = 0;
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    SmallVector<MachineOperand, 4> Cond;
    SmallVector<MachineOperand, 4> CondPrior;
    MachineFunction::iterator BBi = BB;
-  MachineFunction::iterator OldPrior = prior(BBi);
+  MachineFunction::iterator OldPrior = std::prev(BBi);
  
    // If the block terminator isn't analyzable, don't try to move the block
    bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
@@ -2035,7 +2252,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
      // Update numbering to account for the block being moved.
      MF->RenumberBlocks();
      ++NumJTMoved;
-    return NULL;
+    return nullptr;
    }
  
    // Create a new MBB for the code after the jump BB.