#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
using namespace llvm;
STATISTIC(NumCPEs, "Number of constpool entries");
STATISTIC(NumTBs, "Number of table branches generated");
STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Adjust basic block layout to better use TB[BH]"));
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// Water - Potential places where an island could be formed.
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
- class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
+ class ARMConstantIslands : public MachineFunctionPass {
/// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
/// by MBB Number. The two-byte pads required for Thumb alignment are
/// counted as part of the following block (i.e., the offset and size for
/// to a return, unreachable, or unconditional branch).
std::vector<MachineBasicBlock*> WaterList;
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
/// CPUser - One user of a constant pool, keeping the machine instruction
/// pointer, the constant pool being referenced, and the max displacement
- /// allowed from the instruction to the CP.
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
struct CPUser {
MachineInstr *MI;
MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
unsigned MaxDisp;
bool NegOk;
bool IsSoImm;
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg, bool soimm)
- : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {}
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+ HighWaterMark = CPEMI->getParent();
+ }
};
/// CPUsers - Keep track of all of the machine instructions that use various
/// the branch fix up pass.
bool HasFarJump;
- const TargetInstrInfo *TII;
+ /// HasInlineAsm - True if the function contains inline assembly.
+ bool HasInlineAsm;
+
+ const ARMInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
bool isThumb;
bool isThumb2;
public:
static char ID;
- ARMConstantIslands() : MachineFunctionPass(&ID) {}
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
void DoInitialPlacement(MachineFunction &MF,
std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void JumpTableFunctionScan(MachineFunction &MF);
void InitialFunctionScan(MachineFunction &MF,
const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
- bool LookForWater(CPUser&U, unsigned UserOffset,
- MachineBasicBlock *&NewMBB);
- MachineBasicBlock *AcceptWater(water_iterator IP);
+ bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
MachineBasicBlock *&NewMBB);
bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
bool UndoLRSpillRestore();
bool OptimizeThumb2Instructions(MachineFunction &MF);
bool OptimizeThumb2Branches(MachineFunction &MF);
+ bool ReorderThumb2JumpTables(MachineFunction &MF);
bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+ MachineBasicBlock *JTBB);
unsigned GetOffsetOf(MachineInstr *MI) const;
void dumpBBs();
if (!MBB->empty() &&
MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
unsigned MBBId = MBB->getNumber();
- assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+ assert(HasInlineAsm ||
+ (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
(BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
}
}
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
+ unsigned CPEOffset = GetOffsetOf(U.CPEMI);
+ unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
+ UserOffset - CPEOffset;
+ assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+ }
#endif
}
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
MachineConstantPool &MCP = *MF.getConstantPool();
- TII = MF.getTarget().getInstrInfo();
+ TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
isThumb2 = AFI->isThumb2Function();
HasFarJump = false;
+ HasInlineAsm = false;
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
MF.RenumberBlocks();
+ // Try to reorder and otherwise adjust the block layout to make good use
+ // of the TB[BH] instructions.
+ bool MadeChange = false;
+ if (isThumb2 && AdjustJumpTableBlocks) {
+ JumpTableFunctionScan(MF);
+ MadeChange |= ReorderThumb2JumpTables(MF);
+ // Data is out of date, so clear it. It'll be re-computed later.
+ T2JumpTables.clear();
+ // Blocks may have shifted around. Keep the numbering up to date.
+ MF.RenumberBlocks();
+ }
+
// Thumb1 functions containing constant pools get 4-byte alignment.
// This is so we can keep exact track of where the alignment padding goes.
- // Set default. Thumb1 function is 2-byte aligned, ARM and Thumb2 are 4-byte
- // aligned.
- AFI->setAlign(isThumb1 ? 1U : 2U);
+ // ARM and Thumb2 functions need to be 4-byte aligned.
+ if (!isThumb1)
+ MF.EnsureAlignment(2); // 2 = log2(4)
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
if (!MCP.isEmpty()) {
DoInitialPlacement(MF, CPEMIs);
if (isThumb1)
- AFI->setAlign(2U);
+ MF.EnsureAlignment(2); // 2 = log2(4)
}
/// The next UID to take is the first unused one.
// constant pool users.
InitialFunctionScan(MF, CPEMIs);
CPEMIs.clear();
+ DEBUG(dumpBBs());
+
/// Remove dead constant pool entries.
RemoveUnusedCPEntries();
// Iteratively place constant pool entries and fix up branches until there
// is no change.
- bool MadeChange = false;
unsigned NoCPIters = 0, NoBRIters = 0;
while (true) {
bool CPChange = false;
llvm_unreachable("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
bool BRChange = false;
for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
// After a while, this might be made debug-only, but it is not expensive.
verify(MF);
- // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
- // Undo the spill / restore of LR if possible.
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
+ DEBUG(errs() << '\n'; dumpBBs());
+
BBSizes.clear();
BBOffsets.clear();
WaterList.clear();
// aligned.
assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
MachineInstr *CPEMI =
- BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
- .addImm(i).addConstantPoolIndex(i).addImm(Size);
+ BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
CPEMIs.push_back(CPEMI);
// Add a new CPEntry, but no corresponding CPUser yet.
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
- NumCPEs++;
+ ++NumCPEs;
DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
<< "\n");
}
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
- if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
return false;
- MachineBasicBlock *NextBB = next(MBBI);
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I)
if (*I == NextBB)
return NULL;
}
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(I);
+ }
+}
+
/// InitialFunctionScan - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
const std::vector<MachineInstr*> &CPEMIs) {
+ // First thing, see if the function has any inline assembly in it. If so,
+ // we have to be conservative about alignment assumptions, as we don't
+ // know for sure the size of any instructions in the inline assembly.
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getOpcode() == ARM::INLINEASM)
+ HasInlineAsm = true;
+ }
+
+ // Now go back through the instructions and build up our data structures
unsigned Offset = 0;
for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
MBBI != E; ++MBBI) {
unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
// Add instruction size to MBBSize.
MBBSize += TII->GetInstSizeInBytes(I);
case ARM::tBR_JTr:
// A Thumb1 table jump may involve padding; for the offsets to
// be right, functions containing these must be 4-byte aligned.
- AFI->setAlign(2U);
- if ((Offset+MBBSize)%4 != 0)
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr + 2
+ // is aligned. That is held in Offset+MBBSize, which already has
+ // 2 added in for the size of the mov pc instruction.
+ MF.EnsureAlignment(2U);
+ if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
// FIXME: Add a pseudo ALIGN instruction instead.
MBBSize += 2; // padding
continue; // Does not get an entry in ImmBranches
case ARM::LEApcrel:
// This takes a SoImm, which is 8 bit immediate rotated. We'll
// pretend the maximum offset is 255 * 4. Since each instruction
- // 4 byte wide, this is always correct. We'llc heck for other
+ // 4 byte wide, this is always correct. We'll check for other
// displacements that fits in a SoImm as well.
Bits = 8;
Scale = 4;
Scale = 4; // +(offset_8*4)
break;
- case ARM::FLDD:
- case ARM::FLDS:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
Bits = 8;
Scale = 4; // +-(offset_8*4)
NegOk = true;
if (isThumb &&
!MBB.empty() &&
MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- (Offset%4) != 0)
+ ((Offset%4) != 0 || HasInlineAsm))
MBBSize += 2;
BBSizes.push_back(MBBSize);
// alignment padding, and compensate if so.
if (isThumb &&
MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
- Offset%4 != 0)
+ (Offset%4 != 0 || HasInlineAsm))
Offset += 2;
// Sum instructions before MI in MBB.
/// Split the basic block containing MI into two blocks, which are joined by
-/// an unconditional branch. Update datastructures and renumber blocks to
+/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond to anything in the source.
unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
- BuildMI(OrigBB, DebugLoc::getUnknownLoc(), TII->get(Opc)).addMBB(NewBB);
- NumSplit++;
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+ ++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
while (!OrigBB->succ_empty()) {
// This pass should be run after register allocation, so there should be no
// PHI nodes to update.
- assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
+ assert((Succ->empty() || !Succ->begin()->isPHI())
&& "PHI nodes should be eliminated by now!");
}
CompareMBBNumbers);
MachineBasicBlock* WaterBB = *IP;
if (WaterBB == OrigBB)
- WaterList.insert(next(IP), NewBB);
+ WaterList.insert(llvm::next(IP), NewBB);
else
WaterList.insert(IP, OrigBB);
-
- // Figure out how large the first NewMBB is. (It cannot
- // contain a constpool_entry or tablejump.)
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
+ NewWaterList.insert(OrigBB);
unsigned OrigBBI = OrigBB->getNumber();
unsigned NewBBI = NewBB->getNumber();
- // Set the size of NewBB in BBSizes.
- BBSizes[NewBBI] = NewBBSize;
- // We removed instructions from UserMBB, subtract that off from its size.
- // Add 2 or 4 to the block to count the unconditional branch we added to it.
int delta = isThumb1 ? 2 : 4;
- BBSizes[OrigBBI] -= NewBBSize - delta;
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ unsigned OrigBBSize = 0;
+ for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+ I != E; ++I)
+ OrigBBSize += TII->GetInstSizeInBytes(I);
+ BBSizes[OrigBBI] = OrigBBSize;
// ...and adjust BBOffsets for NewBB accordingly.
BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+ // Set the size of NewBB in BBSizes. It does not include any padding now.
+ BBSizes[NewBBI] = NewBBSize;
+
+ MachineInstr* ThumbJTMI = prior(NewBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ // We've added another 2-byte instruction before this tablejump, which
+ // means we will always need padding if we didn't before, and vice versa.
+
+ // The original offset of the jump instruction was:
+ unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+ if (OrigOffset%4 == 0) {
+ // We had padding before and now we don't. No net change in code size.
+ delta = 0;
+ } else {
+ // We didn't have padding before and now we do.
+ BBSizes[NewBBI] += 2;
+ delta = 4;
+ }
+ }
+
// All BBOffsets following these blocks must be modified.
- AdjustBBOffsetsAfter(NewBB, delta);
+ if (delta)
+ AdjustBBOffsetsAfter(NewBB, delta);
return NewBB;
}
MachineInstr *CPEMI, unsigned MaxDisp,
bool NegOk, bool DoDump) {
unsigned CPEOffset = GetOffsetOf(CPEMI);
- assert(CPEOffset%4 == 0 && "Misaligned CPE");
+ assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
if (DoDump) {
DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
int delta) {
- MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
+ MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
i < e; ++i) {
BBOffsets[i] += delta;
if (!isThumb)
continue;
MachineBasicBlock *MBB = MBBI;
- if (!MBB->empty()) {
+ if (!MBB->empty() && !HasInlineAsm) {
// Constant pool entries require padding.
if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
unsigned OldOffset = BBOffsets[i] - delta;
}
// Thumb1 jump tables require padding. They should be at the end;
// following unconditional branches are removed by AnalyzeBranch.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr
+ // is aligned; if it is, the offset of the jump table following the
+ // instruction will not be aligned, and we need padding.
MachineInstr *ThumbJTMI = prior(MBB->end());
if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
if (delta==0)
return;
}
- MBBI = next(MBBI);
+ MBBI = llvm::next(MBBI);
}
}
if (--CPE->RefCount == 0) {
RemoveDeadCPEMI(CPEMI);
CPE->CPEMI = NULL;
- NumCPEs--;
+ --NumCPEs;
return true;
}
return false;
return ((1<<23)-1)*4;
}
-/// AcceptWater - Small amount of common code factored out of the following.
-///
-MachineBasicBlock *ARMConstantIslands::AcceptWater(water_iterator IP) {
- DEBUG(errs() << "found water in range\n");
- MachineBasicBlock *WaterBB = *IP;
- // Remove the original WaterList entry; we want subsequent
- // insertions in this vicinity to go after the one we're
- // about to insert. This considerably reduces the number
- // of times we have to move the same CPE more than once.
- WaterList.erase(IP);
- // CPE goes before following block (NewMBB).
- return next(MachineFunction::iterator(WaterBB));
-}
-
-/// LookForWater - look for an existing entry in the WaterList in which
+/// LookForWater - Look for an existing entry in the WaterList in which
/// we can place the CPE referenced from U so it's within range of U's MI.
-/// Returns true if found, false if not. If it returns true, NewMBB
+/// Returns true if found, false if not. If it returns true, WaterIter
/// is set to the WaterList entry. For Thumb, prefer water that will not
/// introduce padding to water that will. To ensure that this pass
/// terminates, the CPE location for a particular CPUser is only allowed to
/// move to a lower address, so search backward from the end of the list and
/// prefer the first water that is in range.
bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
- MachineBasicBlock *&NewMBB) {
+ water_iterator &WaterIter) {
if (WaterList.empty())
return false;
for (water_iterator IP = prior(WaterList.end()),
B = WaterList.begin();; --IP) {
MachineBasicBlock* WaterBB = *IP;
- // Check if water is in range and at a lower address than the current one.
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
if (WaterIsInRange(UserOffset, WaterBB, U) &&
- WaterBB->getNumber() < U.CPEMI->getParent()->getNumber()) {
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB))) {
unsigned WBBId = WaterBB->getNumber();
if (isThumb &&
(BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
IPThatWouldPad = IP;
}
} else {
- NewMBB = AcceptWater(IP);
+ WaterIter = IP;
return true;
}
}
break;
}
if (FoundWaterThatWouldPad) {
- NewMBB = AcceptWater(IPThatWouldPad);
+ WaterIter = IPThatWouldPad;
return true;
}
return false;
DEBUG(errs() << "Split at end of block\n");
if (&UserMBB->back() == UserMI)
assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
- NewMBB = next(MachineFunction::iterator(UserMBB));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
// Add an unconditional branch from UserMBB to fallthrough block.
// Record it for branch lengthening; this new branch will not get out of
// range, but if the preceding conditional branch is out of range, the
// targets will be exchanged, and the altered branch may be out of
// range, so the machinery has to know about it.
int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
- BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
- TII->get(UncondBr)).addMBB(NewMBB);
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
- MI = next(MI)) {
+ MI = llvm::next(MI)) {
if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!OffsetIsInRange(Offset, EndInsertOffset,
MachineInstr *CPEMI = U.CPEMI;
unsigned CPI = CPEMI->getOperand(1).getIndex();
unsigned Size = CPEMI->getOperand(2).getImm();
- MachineBasicBlock *NewMBB;
// Compute this only once, it's expensive. The 4 or 8 is the value the
// hardware keeps in the PC.
unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
unsigned ID = AFI->createConstPoolEntryUId();
// Look for water where we can place this CPE.
- if (!LookForWater(U, UserOffset, NewMBB)) {
+ MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (LookForWater(U, UserOffset, IP)) {
+ DEBUG(errs() << "found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.count(WaterBB)) {
+ NewWaterList.erase(WaterBB);
+ NewWaterList.insert(NewIsland);
+ }
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+ } else {
// No water found.
DEBUG(errs() << "No water found\n");
CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+ IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
}
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
// Okay, we know we can put an island before NewMBB now, do it!
- MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
MF.insert(NewMBB, NewIsland);
// Update internal data structures to account for the newly inserted MBB.
// Now that we have an island to add the CPE to, clone the original CPE and
// add it to the island.
- U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(),
- TII->get(ARM::CONSTPOOL_ENTRY))
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
- NumCPEs++;
+ ++NumCPEs;
BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
// Compensate for .align 2 in thumb mode.
- if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0)
+ if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
Size += 2;
// Increase the size of the island block to account for the new entry.
BBSizes[NewIsland->getNumber()] += Size;
BBSizes[MBB->getNumber()] += 2;
AdjustBBOffsetsAfter(MBB, 2);
HasFarJump = true;
- NumUBrFixed++;
+ ++NumUBrFixed;
DEBUG(errs() << " Changed B to long jump " << *MI);
MachineInstr *BMI = &MBB->back();
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
- NumCBrFixed++;
+ ++NumCBrFixed;
if (BMI != MI) {
- if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
BMI->getOpcode() == Br.UncondBr) {
// Last MI in the BB is an unconditional branch. Can we simply invert the
// condition and swap destinations:
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
BBSizes[MBB->getNumber()] -= delta;
- MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
AdjustBBOffsetsAfter(SplitBB, -delta);
MBB->back().eraseFromParent();
// BBOffsets[SplitBB] is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
// Insert a new conditional branch and a new unconditional branch.
// Also update the ImmBranch as well as adding a new entry for the new branch.
- BuildMI(MBB, DebugLoc::getUnknownLoc(),
- TII->get(MI->getOpcode()))
+ BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
.addMBB(NextBB).addImm(CC).addReg(CCReg);
Br.MI = &MBB->back();
BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
- BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
bool MadeChange = false;
for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
MachineInstr *MI = PushPopMIs[i];
- // First two operands are predicates, the third is a zero since there
- // is no writeback.
+ // First two operands are predicates.
if (MI->getOpcode() == ARM::tPOP_RET &&
- MI->getOperand(3).getReg() == ARM::PC &&
- MI->getNumExplicitOperands() == 4) {
+ MI->getOperand(2).getReg() == ARM::PC &&
+ MI->getNumExplicitOperands() == 3) {
BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
MI->eraseFromParent();
MadeChange = true;
Bits = 11;
Scale = 2;
break;
- case ARM::t2Bcc:
+ case ARM::t2Bcc: {
NewOpc = ARM::tBcc;
Bits = 8;
- Scale = 2;
+ Scale = 2;
break;
}
- if (!NewOpc)
+ }
+ if (NewOpc) {
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+ Br.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumT2BrShrunk;
+ MadeChange = true;
+ }
+ }
+
+ Opcode = Br.MI->getOpcode();
+ if (Opcode != ARM::tBcc)
continue;
- unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ NewOpc = 0;
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+ if (Pred == ARMCC::EQ)
+ NewOpc = ARM::tCBZ;
+ else if (Pred == ARMCC::NE)
+ NewOpc = ARM::tCBNZ;
+ if (!NewOpc)
+ continue;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
- if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
- Br.MI->setDesc(TII->get(NewOpc));
- MachineBasicBlock *MBB = Br.MI->getParent();
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
- ++NumT2BrShrunk;
- MadeChange = true;
+ // Check if the distance is within 126. Subtract starting offset by 2
+ // because the cmp will be eliminated.
+ unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+ if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+ MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPzi8) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+ if (Pred == ARMCC::AL &&
+ CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg)) {
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineInstr *NewBR =
+ BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumCBZ;
+ MadeChange = true;
+ }
+ }
}
}
return MadeChange;
}
-
/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
- const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (MJTI == 0) return false;
+
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
continue;
unsigned IdxReg = MI->getOperand(1).getReg();
bool IdxRegKill = MI->getOperand(1).isKill();
+
+ // Scan backwards to find the instruction that defines the base
+ // register. Due to post-RA scheduling, we can't count on it
+ // immediately preceding the branch instruction.
MachineBasicBlock::iterator PrevI = MI;
- if (PrevI == MBB->begin())
+ MachineBasicBlock::iterator B = MBB->begin();
+ while (PrevI != B && !PrevI->definesRegister(BaseReg))
+ --PrevI;
+
+ // If for some reason we didn't find it, we can't do anything, so
+ // just skip this one.
+ if (!PrevI->definesRegister(BaseReg))
continue;
- MachineInstr *AddrMI = --PrevI;
+ MachineInstr *AddrMI = PrevI;
bool OptOk = true;
- // Examine the instruction that calculate the jumptable entry address.
- // If it's not the one just before the t2BR_JT, we won't delete it, then
- // it's not worth doing the optimization.
+ // Examine the instruction that calculates the jumptable entry address.
+ // Make sure it only defines the base register and kills any uses
+ // other than the index register.
for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
const MachineOperand &MO = AddrMI->getOperand(k);
if (!MO.isReg() || !MO.getReg())
if (!OptOk)
continue;
- // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want
+ // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+ // that gave us the initial base register definition.
+ for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+ ;
+
+ // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
// to delete it as well.
- MachineInstr *LeaMI = --PrevI;
+ MachineInstr *LeaMI = PrevI;
if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
LeaMI->getOperand(0).getReg() != BaseReg)
return MadeChange;
}
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (MJTI == 0) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned NumOps = TID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ AdjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+ MachineFunction &MF = *BB->getParent();
+
+ // If the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator OldPrior = prior(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF.begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF.RenumberBlocks();
+ ++NumJTMoved;
+ return NULL;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->removeSuccessor(BB);
+ JTBB->addSuccessor(NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}