//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
-#include "ARMInstrInfo.h"
-#include "Thumb2InstrInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "arm-cp-islands"
+
STATISTIC(NumCPEs, "Number of constpool entries");
STATISTIC(NumSplit, "Number of uncond branches inserted");
STATISTIC(NumCBrFixed, "Number of cond branches fixed");
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
-// FIXME: This option should be removed once it has received sufficient testing.
-static cl::opt<bool>
-AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
- cl::desc("Align constant islands in code"));
-
/// UnknownPadding - Return the worst case padding that could result from
/// unknown offset bits. This does not include alignment padding caused by
/// known offset bits.
return 0;
}
-/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
-/// add padding such that:
-///
-/// 1. The result is aligned to 1 << LogAlign.
-///
-/// 2. No other value of the unknown bits would require more padding.
-///
-/// This may add more padding than is required to satisfy just one of the
-/// constraints. It is necessary to compute alignment this way to guarantee
-/// that we don't underestimate the padding before an aligned block. If the
-/// real padding before a block is larger than we think, constant pool entries
-/// may go out of range.
-static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
- unsigned KnownBits) {
- // Add the worst possible padding that the unknown bits could cause.
- Offset += UnknownPadding(LogAlign, KnownBits);
-
- // Then align the result.
- return RoundUpToAlignment(Offset, 1u << LogAlign);
-}
-
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
/// Offset - Distance from the beginning of the function to the beginning
/// of this basic block.
///
- /// The offset is always aligned as required by the basic block.
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
unsigned Offset;
/// Size - Size of the basic block in bytes. If the block contains
/// This number should be used to predict worst case padding when
/// splitting the block.
unsigned internalKnownBits() const {
- return Unalign ? Unalign : KnownBits;
+ unsigned Bits = Unalign ? Unalign : KnownBits;
+ // If the block size isn't a multiple of the known bits, assume the
+ // worst case padding.
+ if (Size & ((1u << Bits) - 1))
+ Bits = countTrailingZeros(Size);
+ return Bits;
}
/// Compute the offset immediately following this block. If LogAlign is
if (!LA)
return PO;
// Add alignment padding from the terminator.
- return WorstCaseAlign(PO, LA, internalKnownBits());
+ return PO + UnknownPadding(LA, internalKnownBits());
}
/// Compute the number of known low bits of postOffset. If this block
MachineInstr *MI;
MachineInstr *CPEMI;
MachineBasicBlock *HighWaterMark;
- private:
unsigned MaxDisp;
- public:
bool NegOk;
bool IsSoImm;
bool KnownAlignment;
}
/// getMaxDisp - Returns the maximum displacement supported by MI.
/// Correct for unknown alignment.
+ /// Conservatively subtract 2 bytes to handle weird alignment effects.
unsigned getMaxDisp() const {
- return KnownAlignment ? MaxDisp : MaxDisp - 2;
+ return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
}
};
};
/// CPEntries - Keep track of all of the constant pool entry machine
- /// instructions. For each original constpool index (i.e. those that
- /// existed upon entry to this pass), it keeps a vector of entries.
- /// Original elements are cloned as we go along; the clones are
- /// put in the vector of the original element, but have distinct CPIs.
+ /// instructions. For each original constpool index (i.e. those that existed
+ /// upon entry to this pass), it keeps a vector of entries. Original
+ /// elements are cloned as we go along; the clones are put in the vector of
+ /// the original element, but have distinct CPIs.
+ ///
+ /// The first half of CPEntries contains generic constants, the second half
+ /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
+ /// which vector it will be in here.
std::vector<std::vector<CPEntry> > CPEntries;
+ /// Maps a JT index to the offset in CPEntries containing copies of that
+ /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
+ DenseMap<int, int> JumpTableEntryIndices;
+
+ /// Maps a JT index to the LEA that actually uses the index to calculate its
+ /// base address.
+ DenseMap<int, int> JumpTableUserIndices;
+
/// ImmBranch - One per immediate branch, keeping the machine instruction
/// pointer, conditional or unconditional, the max displacement,
/// and (if isCond is true) the corresponding unconditional branch
MachineInstr *MI;
unsigned MaxDisp : 31;
bool isCond : 1;
- int UncondBr;
- ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ unsigned UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
: MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
};
MachineFunction *MF;
MachineConstantPool *MCP;
- const ARMInstrInfo *TII;
+ const ARMBaseInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
bool isThumb;
static char ID;
ARMConstantIslands() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "ARM constant island placement and branch shortening pass";
}
private:
- void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+ void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
+ void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
+ bool BBHasFallthrough(MachineBasicBlock *MBB);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
unsigned getCPELogAlign(const MachineInstr *CPEMI);
void scanFunctionJumpTables();
void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
void adjustBBOffsetsAfter(MachineBasicBlock *BB);
bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+ unsigned getCombinedIndex(const MachineInstr *CPEMI);
int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
bool findAvailableWater(CPUser&U, unsigned UserOffset,
water_iterator &WaterIter);
bool optimizeThumb2Instructions();
bool optimizeThumb2Branches();
bool reorderThumb2JumpTables();
+ bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
+ unsigned &DeadSize, bool &CanDeleteLEA,
+ bool &BaseRegKill);
bool optimizeThumb2JumpTables();
MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
MachineBasicBlock *MBB = MBBI;
- unsigned Align = MBB->getAlignment();
unsigned MBBId = MBB->getNumber();
- assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
+ DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
CPUser &U = CPUsers[i];
unsigned UserOffset = getUserOffset(U);
- assert(isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp(),
- U.NegOk) && "Constant pool entry out of range!");
+ // Verify offset using the real max displacement without the safety
+ // adjustment.
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
+ /* DoDump = */ true)) {
+ DEBUG(dbgs() << "OK\n");
+ continue;
+ }
+ DEBUG(dbgs() << "Out of range.\n");
+ dumpBBs();
+ DEBUG(MF->dump());
+ llvm_unreachable("Constant pool entry out of range!");
}
#endif
}
<< MCP->getConstants().size() << " CP entries, aligned to "
<< MCP->getConstantPoolAlignment() << " bytes *****\n");
- TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo();
+ STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
+ TII = STI->getInstrInfo();
AFI = MF->getInfo<ARMFunctionInfo>();
- STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
isThumb = AFI->isThumbFunction();
isThumb1 = AFI->isThumb1OnlyFunction();
HasFarJump = false;
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
MF->RenumberBlocks();
MF->RenumberBlocks();
}
- // Thumb1 functions containing constant pools get 4-byte alignment.
- // This is so we can keep exact track of where the alignment padding goes.
-
- // ARM and Thumb2 functions need to be 4-byte aligned.
- if (!isThumb1)
- MF->EnsureAlignment(2); // 2 = log2(4)
-
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
std::vector<MachineInstr*> CPEMIs;
if (!MCP->isEmpty())
- doInitialPlacement(CPEMIs);
+ doInitialConstPlacement(CPEMIs);
+
+ if (MF->getJumpTableInfo())
+ doInitialJumpTablePlacement(CPEMIs);
/// The next UID to take is the first unused one.
AFI->initPICLabelUId(CPEMIs.size());
CPEMIs.clear();
DEBUG(dumpBBs());
+ // Functions with jump tables need an alignment of 4 because they use the ADR
+ // instruction, which aligns the PC to 4 bytes before adding an offset.
+ if (!T2JumpTables.empty())
+ MF->ensureAlignment(2);
/// Remove dead constant pool entries.
MadeChange |= removeUnusedCPEntries();
for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
const CPEntry & CPE = CPEntries[i][j];
- AFI->recordCPEClone(i, CPE.CPI);
+ if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
+ AFI->recordCPEClone(i, CPE.CPI);
}
}
WaterList.clear();
CPUsers.clear();
CPEntries.clear();
+ JumpTableEntryIndices.clear();
+ JumpTableUserIndices.clear();
ImmBranches.clear();
PushPopMIs.clear();
T2JumpTables.clear();
return MadeChange;
}
-/// doInitialPlacement - Perform the initial placement of the constant pool
-/// entries. To start with, we put them all at the end of the function.
+/// \brief Perform the initial placement of the regular constant pool entries.
+/// To start with, we put them all at the end of the function.
void
-ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
// Create the basic block to hold the CPE's.
MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
MF->push_back(BB);
unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
// Mark the basic block as required by the const-pool.
- // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
- BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+ BB->setAlignment(MaxAlign);
// The function needs to be as aligned as the basic blocks. The linker may
// move functions around based on their alignment.
- MF->EnsureAlignment(BB->getAlignment());
+ MF->ensureAlignment(BB->getAlignment());
// Order the entries in BB by descending alignment. That ensures correct
// alignment of all entries as long as BB is sufficiently aligned. Keep
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const TargetData &TD = *MF->getTarget().getTargetData();
+ const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
InsPoint[a] = CPEMI;
// Add a new CPEntry, but no corresponding CPUser yet.
- std::vector<CPEntry> CPEs;
- CPEs.push_back(CPEntry(CPEMI, i));
- CPEntries.push_back(CPEs);
+ CPEntries.emplace_back(1, CPEntry(CPEMI, i));
++NumCPEs;
DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
<< Size << ", align = " << Align <<'\n');
DEBUG(BB->dump());
}
+/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
+/// instructions can be made more efficient if the jump table immediately
+/// follows the instruction, it's best to place them immediately next to their
+/// jumps to begin with. In almost all cases they'll never be moved from that
+/// position.
+void ARMConstantIslands::doInitialJumpTablePlacement(
+ std::vector<MachineInstr *> &CPEMIs) {
+ unsigned i = CPEntries.size();
+ auto MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+
+ MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
+ for (MachineBasicBlock &MBB : *MF) {
+ auto MI = MBB.getLastNonDebugInstr();
+
+ unsigned JTOpcode;
+ switch (MI->getOpcode()) {
+ default:
+ continue;
+ case ARM::BR_JTadd:
+ case ARM::BR_JTr:
+ case ARM::tBR_JTr:
+ case ARM::BR_JTm:
+ JTOpcode = ARM::JUMPTABLE_ADDRS;
+ break;
+ case ARM::t2BR_JT:
+ JTOpcode = ARM::JUMPTABLE_INSTS;
+ break;
+ case ARM::t2TBB_JT:
+ JTOpcode = ARM::JUMPTABLE_TBB;
+ break;
+ case ARM::t2TBH_JT:
+ JTOpcode = ARM::JUMPTABLE_TBH;
+ break;
+ }
+
+ unsigned NumOps = MI->getDesc().getNumOperands();
+ MachineOperand JTOp =
+ MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
+ unsigned JTI = JTOp.getIndex();
+ unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
+ MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
+ MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
+ MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
+ DebugLoc(), TII->get(JTOpcode))
+ .addImm(i++)
+ .addJumpTableIndex(JTI)
+ .addImm(Size);
+ CPEMIs.push_back(CPEMI);
+ CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
+ JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
+ if (!LastCorrectlyNumberedBB)
+ LastCorrectlyNumberedBB = &MBB;
+ }
+
+ // If we did anything then we need to renumber the subsequent blocks.
+ if (LastCorrectlyNumberedBB)
+ MF->RenumberBlocks(LastCorrectlyNumberedBB);
+}
+
/// BBHasFallthrough - Return true if the specified basic block can fallthrough
/// into the block immediately after it.
-static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
// Can't fall off end of function.
- if (llvm::next(MBBI) == MBB->getParent()->end())
+ if (std::next(MBBI) == MBB->getParent()->end())
return false;
- MachineBasicBlock *NextBB = llvm::next(MBBI);
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I == NextBB)
- return true;
+ MachineBasicBlock *NextBB = std::next(MBBI);
+ if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
+ return false;
- return false;
+ // Try to analyze the end of the block. A potential fallthrough may already
+ // have an unconditional branch for whatever reason.
+ MachineBasicBlock *TBB, *FBB;
+ SmallVector<MachineOperand, 4> Cond;
+ bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
+ return TooDifficult || FBB == nullptr;
}
/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
if (CPEs[i].CPEMI == CPEMI)
return &CPEs[i];
}
- return NULL;
+ return nullptr;
}
/// getCPELogAlign - Returns the required alignment of the constant pool entry
/// represented by CPEMI. Alignment is measured in log2(bytes) units.
unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
- assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
-
- // Everything is 4-byte aligned unless AlignConstantIslands is set.
- if (!AlignConstantIslands)
+ switch (CPEMI->getOpcode()) {
+ case ARM::CONSTPOOL_ENTRY:
+ break;
+ case ARM::JUMPTABLE_TBB:
+ return 0;
+ case ARM::JUMPTABLE_TBH:
+ case ARM::JUMPTABLE_INSTS:
+ return 1;
+ case ARM::JUMPTABLE_ADDRS:
return 2;
+ default:
+ llvm_unreachable("unknown constpool entry kind");
+ }
- unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned CPI = getCombinedIndex(CPEMI);
assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
unsigned Align = MCP->getConstants()[CPI].getAlignment();
assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
if (I->isDebugValue())
continue;
- int Opc = I->getOpcode();
+ unsigned Opc = I->getOpcode();
if (I->isBranch()) {
bool isCond = false;
unsigned Bits = 0;
if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
PushPopMIs.push_back(I);
- if (Opc == ARM::CONSTPOOL_ENTRY)
+ if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
+ Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
+ Opc == ARM::JUMPTABLE_TBH)
continue;
// Scan the instructions for constant pool operands.
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
- if (I->getOperand(op).isCPI()) {
+ if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
// Taking the address of a CP entry.
case ARM::LEApcrel:
+ case ARM::LEApcrelJT:
// This takes a SoImm, which is 8 bit immediate rotated. We'll
// pretend the maximum offset is 255 * 4. Since each instruction
// 4 byte wide, this is always correct. We'll check for other
IsSoImm = true;
break;
case ARM::t2LEApcrel:
+ case ARM::t2LEApcrelJT:
Bits = 12;
NegOk = true;
break;
case ARM::tLEApcrel:
+ case ARM::tLEApcrelJT:
Bits = 8;
Scale = 4;
break;
+ case ARM::LDRBi12:
case ARM::LDRi12:
case ARM::LDRcp:
case ARM::t2LDRpci:
// Remember that this is a user of a CP entry.
unsigned CPI = I->getOperand(op).getIndex();
+ if (I->getOperand(op).isJTI()) {
+ JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
+ CPI = JumpTableEntryIndices[CPI];
+ }
+
MachineInstr *CPEMI = CPEMIs[CPI];
unsigned MaxOffs = ((1 << Bits)-1) * Scale;
CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
// tBR_JTr contains a .align 2 directive.
if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
BBI.PostAlign = 2;
- MBB->getParent()->EnsureAlignment(2);
+ MBB->getParent()->ensureAlignment(2);
}
}
CompareMBBNumbers);
MachineBasicBlock* WaterBB = *IP;
if (WaterBB == OrigBB)
- WaterList.insert(llvm::next(IP), NewBB);
+ WaterList.insert(std::next(IP), NewBB);
else
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
unsigned CPEOffset = getOffsetOf(CPEMI);
- assert(CPEOffset % 4 == 0 && "Misaligned CPE");
if (DoDump) {
DEBUG({
assert(CPE && "Unexpected!");
if (--CPE->RefCount == 0) {
removeDeadCPEMI(CPEMI);
- CPE->CPEMI = NULL;
+ CPE->CPEMI = nullptr;
--NumCPEs;
return true;
}
return false;
}
+unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
+ if (CPEMI->getOperand(1).isCPI())
+ return CPEMI->getOperand(1).getIndex();
+
+ return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
+}
+
/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
/// if not, see if an in-range clone of the CPE is in range, and if so,
/// change the data structures so the user references the clone. Returns:
}
// No. Look for previously created clones of the CPE that are in range.
- unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned CPI = getCombinedIndex(CPEMI);
std::vector<CPEntry> &CPEs = CPEntries[CPI];
for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
// We already tried this one
if (CPEs[i].CPEMI == CPEMI)
continue;
// Removing CPEs can leave empty entries, skip
- if (CPEs[i].CPEMI == NULL)
+ if (CPEs[i].CPEMI == nullptr)
continue;
if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
U.NegOk)) {
return false;
unsigned BestGrowth = ~0u;
- for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
--IP) {
MachineBasicBlock* WaterBB = *IP;
// Check if water is in range and is either at a lower address than the
unsigned Growth;
if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
(WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
- NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) &&
+ Growth < BestGrowth) {
// This is the least amount of required padding seen so far.
BestGrowth = Growth;
WaterIter = IP;
if (BBHasFallthrough(UserMBB)) {
// Size of branch to insert.
unsigned Delta = isThumb1 ? 2 : 4;
- // End of UserBlock after adding a branch.
- unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
// Compute the offset where the CPE will begin.
- unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
- UserBBI.postKnownBits());
+ unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
<< format(", expected CPE offset %#x\n", CPEOffset));
- NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ NewMBB = std::next(MachineFunction::iterator(UserMBB));
// Add an unconditional branch from UserMBB to fallthrough block. Record
// it for branch lengthening; this new branch will not get out of range,
// but if the preceding conditional branch is out of range, the targets
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
- BBInfo[UserMBB->getNumber()].Size += Delta;
+ computeBlockSize(UserMBB);
adjustBBOffsetsAfter(UserMBB);
return;
}
// up the insertion point.
// Try to split the block so it's fully aligned. Compute the latest split
- // point where we can add a 4-byte branch instruction, and then
- // WorstCaseAlign to LogAlign.
+ // point where we can add a 4-byte branch instruction, and then align to
+ // LogAlign which is the largest possible alignment in the function.
unsigned LogAlign = MF->getAlignment();
assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
unsigned KnownBits = UserBBI.internalKnownBits();
unsigned UPad = UnknownPadding(LogAlign, KnownBits);
- unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
DEBUG(dbgs() << format("Split in middle of big block before %#x",
BaseInsertOffset));
- // Account for alignment and unknown padding.
- BaseInsertOffset &= ~((1u << LogAlign) - 1);
- BaseInsertOffset -= UPad;
-
// The 4 in the following is for the unconditional branch we'll be inserting
// (allows for long branch on Thumb1). Alignment of the island is handled
// inside isOffsetInRange.
// pool entries following this block; only the last one is in the water list.
// Back past any possible branches (allow for a conditional and a maximally
// long unconditional).
- if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
- BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
- (isThumb1 ? 6 : 8);
- unsigned EndInsertOffset =
- WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+ if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+ // Ensure BaseInsertOffset is larger than the offset of the instruction
+ // following UserMI so that the loop which searches for the split point
+ // iterates at least once.
+ BaseInsertOffset =
+ std::max(UserBBI.postOffset() - UPad - 8,
+ UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
+ DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+ }
+ unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
CPEMI->getOperand(2).getImm();
MachineBasicBlock::iterator MI = UserMI;
++MI;
unsigned CPUIndex = CPUserIndex+1;
unsigned NumCPUsers = CPUsers.size();
- MachineInstr *LastIT = 0;
+ MachineInstr *LastIT = nullptr;
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
- Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
+ Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) {
+ assert(MI != UserMBB->end() && "Fell off end of block");
if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
// reused within the block, but it doesn't matter much. Also assume CPEs
// are added in order with alignment padding. We may eventually be able
// to pack the aligned CPEs better.
- EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
- 1u << getCPELogAlign(U.CPEMI)) +
- U.CPEMI->getOperand(2).getImm();
+ EndInsertOffset += U.CPEMI->getOperand(2).getImm();
CPUIndex++;
}
// Avoid splitting an IT block.
if (LastIT) {
unsigned PredReg = 0;
- ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
if (CC != ARMCC::AL)
MI = LastIT;
}
+
+ // We really must not split an IT block.
+ DEBUG(unsigned PredReg;
+ assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
+
NewMBB = splitBlockBeforeInstr(MI);
}
CPUser &U = CPUsers[CPUserIndex];
MachineInstr *UserMI = U.MI;
MachineInstr *CPEMI = U.CPEMI;
- unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned CPI = getCombinedIndex(CPEMI);
unsigned Size = CPEMI->getOperand(2).getImm();
// Compute this only once, it's expensive.
unsigned UserOffset = getUserOffset(U);
// If the original WaterList entry was "new water" on this iteration,
// propagate that to the new island. This is just keeping NewWaterList
// updated to match the WaterList, which will be updated below.
- if (NewWaterList.count(WaterBB)) {
- NewWaterList.erase(WaterBB);
+ if (NewWaterList.erase(WaterBB))
NewWaterList.insert(NewIsland);
- }
+
// The new CPE goes before the following block (NewMBB).
- NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+ NewMBB = std::next(MachineFunction::iterator(WaterBB));
} else {
// No water found.
// next iteration for constant pools, but in this context, we don't want
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
- MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+ MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
// Update internal data structures to account for the newly inserted MBB.
updateForInsertedWaterBlock(NewIsland);
- // Decrement the old entry, and remove it if refcount becomes 0.
- decrementCPEReferenceCount(CPI, CPEMI);
-
// Now that we have an island to add the CPE to, clone the original CPE and
// add it to the island.
U.HighWaterMark = NewIsland;
- U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
- .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
+ .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ decrementCPEReferenceCount(CPI, CPEMI);
+
// Mark the basic block as aligned as required by the const-pool entry.
NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
// Increase the size of the island block to account for the new entry.
BBInfo[NewIsland->getNumber()].Size += Size;
- adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
+ adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
if (CPEBB->empty()) {
BBInfo[CPEBB->getNumber()].Size = 0;
- // This block no longer needs to be aligned. <rdar://problem/10534709>.
+ // This block no longer needs to be aligned.
CPEBB->setAlignment(0);
} else
// Entries are sorted by descending alignment, so realign from the front.
for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
removeDeadCPEMI(CPEs[j].CPEMI);
- CPEs[j].CPEMI = NULL;
+ CPEs[j].CPEMI = nullptr;
MadeChange = true;
}
}
++NumCBrFixed;
if (BMI != MI) {
- if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
BMI->getOpcode() == Br.UncondBr) {
// Last MI in the BB is an unconditional branch. Can we simply invert the
// condition and swap destinations:
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
// FIXME: Check if offset is multiple of scale if scale is not 4.
if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ DEBUG(dbgs() << "Shrink: " << *U.MI);
U.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = U.MI->getParent();
BBInfo[MBB->getNumber()].Size -= 2;
bool ARMConstantIslands::optimizeThumb2Branches() {
bool MadeChange = false;
- for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
- ImmBranch &Br = ImmBranches[i];
+ // The order in which branches appear in ImmBranches is approximately their
+ // order within the function body. By visiting later branches first, we reduce
+ // the distance between earlier forward branches and their targets, making it
+ // more likely that the cbn?z optimization, which can only apply to forward
+ // branches, will succeed.
+ for (unsigned i = ImmBranches.size(); i != 0; --i) {
+ ImmBranch &Br = ImmBranches[i-1];
unsigned Opcode = Br.MI->getOpcode();
unsigned NewOpc = 0;
unsigned Scale = 1;
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+ DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
Br.MI->setDesc(TII->get(NewOpc));
MachineBasicBlock *MBB = Br.MI->getParent();
BBInfo[MBB->getNumber()].Size -= 2;
NewOpc = 0;
unsigned PredReg = 0;
- ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+ ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
if (Pred == ARMCC::EQ)
NewOpc = ARM::tCBZ;
else if (Pred == ARMCC::NE)
--CmpMI;
if (CmpMI->getOpcode() == ARM::tCMPi8) {
unsigned Reg = CmpMI->getOperand(0).getReg();
- Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+ Pred = getInstrPredicate(CmpMI, PredReg);
if (Pred == ARMCC::AL &&
CmpMI->getOperand(1).getImm() == 0 &&
isARMLowRegister(Reg)) {
MachineBasicBlock *MBB = Br.MI->getParent();
+ DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
MachineInstr *NewBR =
BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
.addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
return MadeChange;
}
+static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
+ unsigned BaseReg) {
+ if (I.getOpcode() != ARM::t2ADDrs)
+ return false;
+
+ if (I.getOperand(0).getReg() != EntryReg)
+ return false;
+
+ if (I.getOperand(1).getReg() != BaseReg)
+ return false;
+
+ // FIXME: what about CC and IdxReg?
+ return true;
+}
+
+/// \brief While trying to form a TBB/TBH instruction, we may (if the table
+/// doesn't immediately follow the BR_JT) need access to the start of the
+/// jump-table. We know one instruction that produces such a register; this
+/// function works out whether that definition can be preserved to the BR_JT,
+/// possibly by removing an intervening addition (which is usually needed to
+/// calculate the actual entry to jump to).
+bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
+ MachineInstr *LEAMI,
+ unsigned &DeadSize,
+ bool &CanDeleteLEA,
+ bool &BaseRegKill) {
+ if (JumpMI->getParent() != LEAMI->getParent())
+ return false;
+
+ // Now we hope that we have at least these instructions in the basic block:
+ // BaseReg = t2LEA ...
+ // [...]
+ // EntryReg = t2ADDrs BaseReg, ...
+ // [...]
+ // t2BR_JT EntryReg
+ //
+ // We have to be very conservative about what we recognise here though. The
+ // main perturbing factors to watch out for are:
+ // + Spills at any point in the chain: not direct problems but we would
+ // expect a blocking Def of the spilled register so in practice what we
+ // can do is limited.
+ // + EntryReg == BaseReg: this is the one situation we should allow a Def
+ // of BaseReg, but only if the t2ADDrs can be removed.
+ // + Some instruction other than t2ADDrs computing the entry. Not seen in
+ // the wild, but we should be careful.
+ unsigned EntryReg = JumpMI->getOperand(0).getReg();
+ unsigned BaseReg = LEAMI->getOperand(0).getReg();
+
+ CanDeleteLEA = true;
+ BaseRegKill = false;
+ MachineInstr *RemovableAdd = nullptr;
+ MachineBasicBlock::iterator I(LEAMI);
+ for (++I; &*I != JumpMI; ++I) {
+ if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
+ RemovableAdd = &*I;
+ break;
+ }
+
+ for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = I->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == BaseReg)
+ return false;
+ if (MO.isUse() && MO.getReg() == BaseReg) {
+ BaseRegKill = BaseRegKill || MO.isKill();
+ CanDeleteLEA = false;
+ }
+ }
+ }
+
+ if (!RemovableAdd)
+ return true;
+
+ // Check the add really is removable, and that nothing else in the block
+ // clobbers BaseReg.
+ for (++I; &*I != JumpMI; ++I) {
+ for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = I->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == BaseReg)
+ return false;
+ if (MO.isUse() && MO.getReg() == EntryReg)
+ RemovableAdd = nullptr;
+ }
+ }
+
+ if (RemovableAdd) {
+ RemovableAdd->eraseFromParent();
+ DeadSize += 4;
+ } else if (BaseReg == EntryReg) {
+ // The add wasn't removable, but clobbered the base for the TBB. So we can't
+ // preserve it.
+ return false;
+ }
+
+ // We reached the end of the block without seeing another definition of
+ // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
+ // used in the TBB/TBH if necessary.
+ return true;
+}
+
+/// \brief Returns whether CPEMI is the first instruction in the block
+/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
+/// we can switch the first register to PC and usually remove the address
+/// calculation that preceded it.
+static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
+ MachineFunction::iterator MBB = JTMI->getParent();
+ MachineFunction *MF = MBB->getParent();
+ ++MBB;
+
+ return MBB != MF->end() && MBB->begin() != MBB->end() &&
+ &*MBB->begin() == CPEMI;
+}
+
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::optimizeThumb2JumpTables() {
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- if (MJTI == 0) return false;
+ if (!MJTI) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
break;
}
- if (ByteOk || HalfWordOk) {
- MachineBasicBlock *MBB = MI->getParent();
- unsigned BaseReg = MI->getOperand(0).getReg();
- bool BaseRegKill = MI->getOperand(0).isKill();
- if (!BaseRegKill)
- continue;
- unsigned IdxReg = MI->getOperand(1).getReg();
- bool IdxRegKill = MI->getOperand(1).isKill();
-
- // Scan backwards to find the instruction that defines the base
- // register. Due to post-RA scheduling, we can't count on it
- // immediately preceding the branch instruction.
- MachineBasicBlock::iterator PrevI = MI;
- MachineBasicBlock::iterator B = MBB->begin();
- while (PrevI != B && !PrevI->definesRegister(BaseReg))
- --PrevI;
-
- // If for some reason we didn't find it, we can't do anything, so
- // just skip this one.
- if (!PrevI->definesRegister(BaseReg))
- continue;
+ if (!ByteOk && !HalfWordOk)
+ continue;
- MachineInstr *AddrMI = PrevI;
- bool OptOk = true;
- // Examine the instruction that calculates the jumptable entry address.
- // Make sure it only defines the base register and kills any uses
- // other than the index register.
- for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
- const MachineOperand &MO = AddrMI->getOperand(k);
- if (!MO.isReg() || !MO.getReg())
- continue;
- if (MO.isDef() && MO.getReg() != BaseReg) {
- OptOk = false;
- break;
- }
- if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
- OptOk = false;
- break;
- }
- }
- if (!OptOk)
- continue;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!MI->getOperand(0).isKill()) // FIXME: needed now?
+ continue;
+ unsigned IdxReg = MI->getOperand(1).getReg();
+ bool IdxRegKill = MI->getOperand(1).isKill();
- // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
- // that gave us the initial base register definition.
- for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
- ;
+ CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
+ unsigned DeadSize = 0;
+ bool CanDeleteLEA = false;
+ bool BaseRegKill = false;
+ bool PreservedBaseReg =
+ preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
- // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
- // to delete it as well.
- MachineInstr *LeaMI = PrevI;
- if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
- LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
- LeaMI->getOperand(0).getReg() != BaseReg)
- OptOk = false;
+ if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+ continue;
- if (!OptOk)
- continue;
+ DEBUG(dbgs() << "Shrink JT: " << *MI);
+ MachineInstr *CPEMI = User.CPEMI;
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ MachineBasicBlock::iterator MI_JT = MI;
+ MachineInstr *NewJTMI =
+ BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
+ .addReg(User.MI->getOperand(0).getReg(),
+ getKillRegState(BaseRegKill))
+ .addReg(IdxReg, getKillRegState(IdxRegKill))
+ .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+ .addImm(CPEMI->getOperand(0).getImm());
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
+
+ unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
+ CPEMI->setDesc(TII->get(JTOpc));
+
+ if (jumpTableFollowsTB(MI, User.CPEMI)) {
+ NewJTMI->getOperand(0).setReg(ARM::PC);
+ NewJTMI->getOperand(0).setIsKill(false);
+
+ if (CanDeleteLEA) {
+ User.MI->eraseFromParent();
+ DeadSize += 4;
+
+ // The LEA was eliminated, the TBB instruction becomes the only new user
+ // of the jump table.
+ User.MI = NewJTMI;
+ User.MaxDisp = 4;
+ User.NegOk = false;
+ User.IsSoImm = false;
+ User.KnownAlignment = false;
+ } else {
+ // The LEA couldn't be eliminated, so we must add another CPUser to
+ // record the TBB or TBH use.
+ int CPEntryIdx = JumpTableEntryIndices[JTI];
+ auto &CPEs = CPEntries[CPEntryIdx];
+ auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
+ return E.CPEMI == User.CPEMI;
+ });
+ ++Entry->RefCount;
+ CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
+ }
+ }
- unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
- MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
- .addReg(IdxReg, getKillRegState(IdxRegKill))
- .addJumpTableIndex(JTI, JTOP.getTargetFlags())
- .addImm(MI->getOperand(JTOpIdx+1).getImm());
- // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
- // is 2-byte aligned. For now, asm printer will fix it up.
- unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
- unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
- OrigSize += TII->GetInstSizeInBytes(LeaMI);
- OrigSize += TII->GetInstSizeInBytes(MI);
-
- AddrMI->eraseFromParent();
- LeaMI->eraseFromParent();
- MI->eraseFromParent();
+ unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
- int delta = OrigSize - NewSize;
- BBInfo[MBB->getNumber()].Size -= delta;
- adjustBBOffsetsAfter(MBB);
+ int Delta = OrigSize - NewSize + DeadSize;
+ BBInfo[MBB->getNumber()].Size -= Delta;
+ adjustBBOffsetsAfter(MBB);
- ++NumTBs;
- MadeChange = true;
- }
+ ++NumTBs;
+ MadeChange = true;
}
return MadeChange;
bool MadeChange = false;
MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- if (MJTI == 0) return false;
+ if (!MJTI) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
- unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
MachineOperand JTOP = MI->getOperand(JTOpIdx);
unsigned JTI = JTOP.getIndex();
assert(JTI < JT.size());
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
// heuristic. FIXME: We can definitely improve it.
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
SmallVector<MachineOperand, 4> CondPrior;
MachineFunction::iterator BBi = BB;
- MachineFunction::iterator OldPrior = prior(BBi);
+ MachineFunction::iterator OldPrior = std::prev(BBi);
// If the block terminator isn't analyzable, don't try to move the block
bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
// Update numbering to account for the block being moved.
MF->RenumberBlocks();
++NumJTMoved;
- return NULL;
+ return nullptr;
}
// Create a new MBB for the code after the jump BB.