+
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+bool
+ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
+ switch(MI->getOpcode()) {
+ // optimizeThumb2Instructions.
+ case ARM::t2LEApcrel:
+ case ARM::t2LDRpci:
+ // optimizeThumb2Branches.
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ case ARM::tBcc:
+ // optimizeThumb2JumpTables.
+ case ARM::t2BR_JT:
+ return true;
+ }
+ return false;
+}
+
+bool ARMConstantIslands::optimizeThumb2Instructions() {
+ bool MadeChange = false;
+
+ // Shrink ADR and LDR from constantpool.
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+ CPUser &U = CPUsers[i];
+ unsigned Opcode = U.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2LEApcrel:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLEApcrel;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ case ARM::t2LDRpci:
+ if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+ NewOpc = ARM::tLDRpci;
+ Bits = 8;
+ Scale = 4;
+ }
+ break;
+ }
+
+ if (!NewOpc)
+ continue;
+
+ unsigned UserOffset = getUserOffset(U);
+ unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+
+ // Be conservative with inline asm.
+ if (!U.KnownAlignment)
+ MaxOffs -= 2;
+
+ // FIXME: Check if offset is multiple of scale if scale is not 4.
+ if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+ DEBUG(dbgs() << "Shrink: " << *U.MI);
+ U.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = U.MI->getParent();
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumT2CPShrunk;
+ MadeChange = true;
+ }
+ }
+
+ MadeChange |= optimizeThumb2Branches();
+ MadeChange |= optimizeThumb2JumpTables();
+ return MadeChange;
+}
+
+bool ARMConstantIslands::optimizeThumb2Branches() {
+ bool MadeChange = false;
+
+ // The order in which branches appear in ImmBranches is approximately their
+ // order within the function body. By visiting later branches first, we reduce
+ // the distance between earlier forward branches and their targets, making it
+ // more likely that the cbn?z optimization, which can only apply to forward
+ // branches, will succeed.
+ for (unsigned i = ImmBranches.size(); i != 0; --i) {
+ ImmBranch &Br = ImmBranches[i-1];
+ unsigned Opcode = Br.MI->getOpcode();
+ unsigned NewOpc = 0;
+ unsigned Scale = 1;
+ unsigned Bits = 0;
+ switch (Opcode) {
+ default: break;
+ case ARM::t2B:
+ NewOpc = ARM::tB;
+ Bits = 11;
+ Scale = 2;
+ break;
+ case ARM::t2Bcc: {
+ NewOpc = ARM::tBcc;
+ Bits = 8;
+ Scale = 2;
+ break;
+ }
+ }
+ if (NewOpc) {
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+ DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
+ Br.MI->setDesc(TII->get(NewOpc));
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumT2BrShrunk;
+ MadeChange = true;
+ }
+ }
+
+ Opcode = Br.MI->getOpcode();
+ if (Opcode != ARM::tBcc)
+ continue;
+
+ // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
+ // so this transformation is not safe.
+ if (!Br.MI->killsRegister(ARM::CPSR))
+ continue;
+
+ NewOpc = 0;
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
+ if (Pred == ARMCC::EQ)
+ NewOpc = ARM::tCBZ;
+ else if (Pred == ARMCC::NE)
+ NewOpc = ARM::tCBNZ;
+ if (!NewOpc)
+ continue;
+ MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+ // Check if the distance is within 126. Subtract starting offset by 2
+ // because the cmp will be eliminated.
+ unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+ if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+ MachineBasicBlock::iterator CmpMI = Br.MI;
+ if (CmpMI != Br.MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPi8) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ Pred = getInstrPredicate(CmpMI, PredReg);
+ if (Pred == ARMCC::AL &&
+ CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg)) {
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
+ MachineInstr *NewBR =
+ BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBInfo[MBB->getNumber()].Size -= 2;
+ adjustBBOffsetsAfter(MBB);
+ ++NumCBZ;
+ MadeChange = true;
+ }
+ }
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
+ unsigned BaseReg) {
+ if (I.getOpcode() != ARM::t2ADDrs)
+ return false;
+
+ if (I.getOperand(0).getReg() != EntryReg)
+ return false;
+
+ if (I.getOperand(1).getReg() != BaseReg)
+ return false;
+
+ // FIXME: what about CC and IdxReg?
+ return true;
+}
+
+/// \brief While trying to form a TBB/TBH instruction, we may (if the table
+/// doesn't immediately follow the BR_JT) need access to the start of the
+/// jump-table. We know one instruction that produces such a register; this
+/// function works out whether that definition can be preserved to the BR_JT,
+/// possibly by removing an intervening addition (which is usually needed to
+/// calculate the actual entry to jump to).
+bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
+ MachineInstr *LEAMI,
+ unsigned &DeadSize,
+ bool &CanDeleteLEA,
+ bool &BaseRegKill) {
+ if (JumpMI->getParent() != LEAMI->getParent())
+ return false;
+
+ // Now we hope that we have at least these instructions in the basic block:
+ // BaseReg = t2LEA ...
+ // [...]
+ // EntryReg = t2ADDrs BaseReg, ...
+ // [...]
+ // t2BR_JT EntryReg
+ //
+ // We have to be very conservative about what we recognise here though. The
+ // main perturbing factors to watch out for are:
+ // + Spills at any point in the chain: not direct problems but we would
+ // expect a blocking Def of the spilled register so in practice what we
+ // can do is limited.
+ // + EntryReg == BaseReg: this is the one situation we should allow a Def
+ // of BaseReg, but only if the t2ADDrs can be removed.
+ // + Some instruction other than t2ADDrs computing the entry. Not seen in
+ // the wild, but we should be careful.
+ unsigned EntryReg = JumpMI->getOperand(0).getReg();
+ unsigned BaseReg = LEAMI->getOperand(0).getReg();
+
+ CanDeleteLEA = true;
+ BaseRegKill = false;
+ MachineInstr *RemovableAdd = nullptr;
+ MachineBasicBlock::iterator I(LEAMI);
+ for (++I; &*I != JumpMI; ++I) {
+ if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
+ RemovableAdd = &*I;
+ break;
+ }
+
+ for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = I->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == BaseReg)
+ return false;
+ if (MO.isUse() && MO.getReg() == BaseReg) {
+ BaseRegKill = BaseRegKill || MO.isKill();
+ CanDeleteLEA = false;
+ }
+ }
+ }
+
+ if (!RemovableAdd)
+ return true;
+
+ // Check the add really is removable, and that nothing else in the block
+ // clobbers BaseReg.
+ for (++I; &*I != JumpMI; ++I) {
+ for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = I->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == BaseReg)
+ return false;
+ if (MO.isUse() && MO.getReg() == EntryReg)
+ RemovableAdd = nullptr;
+ }
+ }
+
+ if (RemovableAdd) {
+ RemovableAdd->eraseFromParent();
+ DeadSize += 4;
+ } else if (BaseReg == EntryReg) {
+ // The add wasn't removable, but clobbered the base for the TBB. So we can't
+ // preserve it.
+ return false;
+ }
+
+ // We reached the end of the block without seeing another definition of
+ // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
+ // used in the TBB/TBH if necessary.
+ return true;
+}
+
+/// \brief Returns whether CPEMI is the first instruction in the block
+/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
+/// we can switch the first register to PC and usually remove the address
+/// calculation that preceded it.
+static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
+ MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
+ MachineFunction *MF = MBB->getParent();
+ ++MBB;
+
+ return MBB != MF->end() && MBB->begin() != MBB->end() &&
+ &*MBB->begin() == CPEMI;
+}
+
+/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::optimizeThumb2JumpTables() {
+ bool MadeChange = false;
+
+ // FIXME: After the tables are shrunk, can we get rid some of the
+ // constantpool tables?
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ bool ByteOk = true;
+ bool HalfWordOk = true;
+ unsigned JTOffset = getOffsetOf(MI) + 4;
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
+ // Negative offset is not ok. FIXME: We should change BB layout to make
+ // sure all the branches are forward.
+ if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+ ByteOk = false;
+ unsigned TBHLimit = ((1<<16)-1)*2;
+ if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+ HalfWordOk = false;
+ if (!ByteOk && !HalfWordOk)
+ break;
+ }
+
+ if (!ByteOk && !HalfWordOk)
+ continue;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ if (!MI->getOperand(0).isKill()) // FIXME: needed now?
+ continue;
+ unsigned IdxReg = MI->getOperand(1).getReg();
+ bool IdxRegKill = MI->getOperand(1).isKill();
+
+ CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
+ unsigned DeadSize = 0;
+ bool CanDeleteLEA = false;
+ bool BaseRegKill = false;
+ bool PreservedBaseReg =
+ preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
+
+ if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+ continue;
+
+ DEBUG(dbgs() << "Shrink JT: " << *MI);
+ MachineInstr *CPEMI = User.CPEMI;
+ unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+ MachineBasicBlock::iterator MI_JT = MI;
+ MachineInstr *NewJTMI =
+ BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
+ .addReg(User.MI->getOperand(0).getReg(),
+ getKillRegState(BaseRegKill))
+ .addReg(IdxReg, getKillRegState(IdxRegKill))
+ .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+ .addImm(CPEMI->getOperand(0).getImm());
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
+
+ unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
+ CPEMI->setDesc(TII->get(JTOpc));
+
+ if (jumpTableFollowsTB(MI, User.CPEMI)) {
+ NewJTMI->getOperand(0).setReg(ARM::PC);
+ NewJTMI->getOperand(0).setIsKill(false);
+
+ if (CanDeleteLEA) {
+ User.MI->eraseFromParent();
+ DeadSize += 4;
+
+ // The LEA was eliminated, the TBB instruction becomes the only new user
+ // of the jump table.
+ User.MI = NewJTMI;
+ User.MaxDisp = 4;
+ User.NegOk = false;
+ User.IsSoImm = false;
+ User.KnownAlignment = false;
+ } else {
+ // The LEA couldn't be eliminated, so we must add another CPUser to
+ // record the TBB or TBH use.
+ int CPEntryIdx = JumpTableEntryIndices[JTI];
+ auto &CPEs = CPEntries[CPEntryIdx];
+ auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
+ return E.CPEMI == User.CPEMI;
+ });
+ ++Entry->RefCount;
+ CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
+ }
+ }
+
+ unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+ unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+
+ int Delta = OrigSize - NewSize + DeadSize;
+ BBInfo[MBB->getNumber()].Size -= Delta;
+ adjustBBOffsetsAfter(MBB);
+
+ ++NumTBs;
+ MadeChange = true;
+ }
+
+ return MadeChange;
+}
+
+/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::reorderThumb2JumpTables() {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return false;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MCID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ adjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
+ // If the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB->getIterator();
+ MachineFunction::iterator OldPrior = std::prev(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF->begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF->RenumberBlocks();
+ ++NumJTMoved;
+ return nullptr;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = ++JTBB->getIterator();
+ MF->insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
+ .addImm(ARMCC::AL).addReg(0);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->replaceSuccessor(BB, NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}