X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMConstantIslandPass.cpp;h=55c1684028c2add04535d69441885371e21177db;hp=29405eb65d52757aaa14541ef4adac4d2637e6f3;hb=2921ff9ffcfd09db1c8d304188739d8d89de5611;hpb=8bca5de6a99a9c9ad682d844a34afeffb438ae41 diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 29405eb65d5..55c1684028c 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -53,11 +53,6 @@ static cl::opt AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); -// FIXME: This option should be removed once it has received sufficient testing. -static cl::opt -AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), - cl::desc("Align constant islands in code")); - /// UnknownPadding - Return the worst case padding that could result from /// unknown offset bits. This does not include alignment padding caused by /// known offset bits. @@ -185,9 +180,7 @@ namespace { MachineInstr *MI; MachineInstr *CPEMI; MachineBasicBlock *HighWaterMark; - private: unsigned MaxDisp; - public: bool NegOk; bool IsSoImm; bool KnownAlignment; @@ -221,12 +214,24 @@ namespace { }; /// CPEntries - Keep track of all of the constant pool entry machine - /// instructions. For each original constpool index (i.e. those that - /// existed upon entry to this pass), it keeps a vector of entries. - /// Original elements are cloned as we go along; the clones are - /// put in the vector of the original element, but have distinct CPIs. + /// instructions. For each original constpool index (i.e. those that existed + /// upon entry to this pass), it keeps a vector of entries. Original + /// elements are cloned as we go along; the clones are put in the vector of + /// the original element, but have distinct CPIs. + /// + /// The first half of CPEntries contains generic constants, the second half + /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up + /// which vector it will be in here. std::vector > CPEntries; + /// Maps a JT index to the offset in CPEntries containing copies of that + /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity. + DenseMap JumpTableEntryIndices; + + /// Maps a JT index to the LEA that actually uses the index to calculate its + /// base address. + DenseMap JumpTableUserIndices; + /// ImmBranch - One per immediate branch, keeping the machine instruction /// pointer, conditional or unconditional, the max displacement, /// and (if isCond is true) the corresponding unconditional branch @@ -235,8 +240,8 @@ namespace { MachineInstr *MI; unsigned MaxDisp : 31; bool isCond : 1; - int UncondBr; - ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr) + unsigned UncondBr; + ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr) : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {} }; @@ -274,7 +279,8 @@ namespace { } private: - void doInitialPlacement(std::vector &CPEMIs); + void doInitialConstPlacement(std::vector &CPEMIs); + void doInitialJumpTablePlacement(std::vector &CPEMIs); bool BBHasFallthrough(MachineBasicBlock *MBB); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); unsigned getCPELogAlign(const MachineInstr *CPEMI); @@ -284,6 +290,7 @@ namespace { void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); void adjustBBOffsetsAfter(MachineBasicBlock *BB); bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + unsigned getCombinedIndex(const MachineInstr *CPEMI); int findInRangeCPEntry(CPUser& U, unsigned UserOffset); bool findAvailableWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); @@ -306,6 +313,9 @@ namespace { bool optimizeThumb2Instructions(); bool optimizeThumb2Branches(); bool reorderThumb2JumpTables(); + bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI, + unsigned &DeadSize, bool &CanDeleteLEA, + bool &BaseRegKill); bool optimizeThumb2JumpTables(); MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); @@ -330,12 +340,12 @@ namespace { /// verify - check BBOffsets, BBSizes, alignment of islands void ARMConstantIslands::verify() { #ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned MBBId = MBB->getNumber(); - assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); - } + assert(std::is_sorted(MF->begin(), MF->end(), + [this](const MachineBasicBlock &LHS, + const MachineBasicBlock &RHS) { + return BBInfo[LHS.getNumber()].postOffset() < + BBInfo[RHS.getNumber()].postOffset(); + })); DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; @@ -383,11 +393,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlignment() << " bytes *****\n"); - TII = (const ARMBaseInstrInfo *)MF->getTarget() - .getSubtargetImpl() - ->getInstrInfo(); + STI = &static_cast(MF->getSubtarget()); + TII = STI->getInstrInfo(); AFI = MF->getInfo(); - STI = &MF->getTarget().getSubtarget(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); @@ -414,18 +422,14 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MF->RenumberBlocks(); } - // Thumb1 functions containing constant pools get 4-byte alignment. - // This is so we can keep exact track of where the alignment padding goes. - - // ARM and Thumb2 functions need to be 4-byte aligned. - if (!isThumb1) - MF->ensureAlignment(2); // 2 = log2(4) - // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector CPEMIs; if (!MCP->isEmpty()) - doInitialPlacement(CPEMIs); + doInitialConstPlacement(CPEMIs); + + if (MF->getJumpTableInfo()) + doInitialJumpTablePlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -437,6 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { CPEMIs.clear(); DEBUG(dumpBBs()); + // Functions with jump tables need an alignment of 4 because they use the ADR + // instruction, which aligns the PC to 4 bytes before adding an offset. + if (!T2JumpTables.empty()) + MF->ensureAlignment(2); /// Remove dead constant pool entries. MadeChange |= removeUnusedCPEntries(); @@ -486,7 +494,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) { const CPEntry & CPE = CPEntries[i][j]; - AFI->recordCPEClone(i, CPE.CPI); + if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI()) + AFI->recordCPEClone(i, CPE.CPI); } } @@ -496,6 +505,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { WaterList.clear(); CPUsers.clear(); CPEntries.clear(); + JumpTableEntryIndices.clear(); + JumpTableUserIndices.clear(); ImmBranches.clear(); PushPopMIs.clear(); T2JumpTables.clear(); @@ -503,10 +514,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { return MadeChange; } -/// doInitialPlacement - Perform the initial placement of the constant pool -/// entries. To start with, we put them all at the end of the function. +/// \brief Perform the initial placement of the regular constant pool entries. +/// To start with, we put them all at the end of the function. void -ARMConstantIslands::doInitialPlacement(std::vector &CPEMIs) { +ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) { // Create the basic block to hold the CPE's. MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); @@ -515,8 +526,7 @@ ARMConstantIslands::doInitialPlacement(std::vector &CPEMIs) { unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); // Mark the basic block as required by the const-pool. - // If AlignConstantIslands isn't set, use 4-byte alignment for everything. - BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + BB->setAlignment(MaxAlign); // The function needs to be as aligned as the basic blocks. The linker may // move functions around based on their alignment. @@ -532,7 +542,7 @@ ARMConstantIslands::doInitialPlacement(std::vector &CPEMIs) { // identity mapping of CPI's to CPE's. const std::vector &CPs = MCP->getConstants(); - const DataLayout &TD = *MF->getSubtarget().getDataLayout(); + const DataLayout &TD = MF->getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); @@ -565,16 +575,78 @@ ARMConstantIslands::doInitialPlacement(std::vector &CPEMIs) { DEBUG(BB->dump()); } +/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH +/// instructions can be made more efficient if the jump table immediately +/// follows the instruction, it's best to place them immediately next to their +/// jumps to begin with. In almost all cases they'll never be moved from that +/// position. +void ARMConstantIslands::doInitialJumpTablePlacement( + std::vector &CPEMIs) { + unsigned i = CPEntries.size(); + auto MJTI = MF->getJumpTableInfo(); + const std::vector &JT = MJTI->getJumpTables(); + + MachineBasicBlock *LastCorrectlyNumberedBB = nullptr; + for (MachineBasicBlock &MBB : *MF) { + auto MI = MBB.getLastNonDebugInstr(); + if (MI == MBB.end()) + continue; + + unsigned JTOpcode; + switch (MI->getOpcode()) { + default: + continue; + case ARM::BR_JTadd: + case ARM::BR_JTr: + case ARM::tBR_JTr: + case ARM::BR_JTm: + JTOpcode = ARM::JUMPTABLE_ADDRS; + break; + case ARM::t2BR_JT: + JTOpcode = ARM::JUMPTABLE_INSTS; + break; + case ARM::t2TBB_JT: + JTOpcode = ARM::JUMPTABLE_TBB; + break; + case ARM::t2TBH_JT: + JTOpcode = ARM::JUMPTABLE_TBH; + break; + } + + unsigned NumOps = MI->getDesc().getNumOperands(); + MachineOperand JTOp = + MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1)); + unsigned JTI = JTOp.getIndex(); + unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t); + MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock(); + MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB); + MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(), + DebugLoc(), TII->get(JTOpcode)) + .addImm(i++) + .addJumpTableIndex(JTI) + .addImm(Size); + CPEMIs.push_back(CPEMI); + CPEntries.emplace_back(1, CPEntry(CPEMI, JTI)); + JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1)); + if (!LastCorrectlyNumberedBB) + LastCorrectlyNumberedBB = &MBB; + } + + // If we did anything then we need to renumber the subsequent blocks. + if (LastCorrectlyNumberedBB) + MF->RenumberBlocks(LastCorrectlyNumberedBB); +} + /// BBHasFallthrough - Return true if the specified basic block can fallthrough /// into the block immediately after it. bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; + MachineFunction::iterator MBBI = MBB->getIterator(); // Can't fall off end of function. if (std::next(MBBI) == MBB->getParent()->end()) return false; - MachineBasicBlock *NextBB = std::next(MBBI); + MachineBasicBlock *NextBB = &*std::next(MBBI); if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end()) return false; @@ -604,13 +676,21 @@ ARMConstantIslands::CPEntry /// getCPELogAlign - Returns the required alignment of the constant pool entry /// represented by CPEMI. Alignment is measured in log2(bytes) units. unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { - assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); - - // Everything is 4-byte aligned unless AlignConstantIslands is set. - if (!AlignConstantIslands) + switch (CPEMI->getOpcode()) { + case ARM::CONSTPOOL_ENTRY: + break; + case ARM::JUMPTABLE_TBB: + return 0; + case ARM::JUMPTABLE_TBH: + case ARM::JUMPTABLE_INSTS: + return 1; + case ARM::JUMPTABLE_ADDRS: return 2; + default: + llvm_unreachable("unknown constpool entry kind"); + } - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); unsigned Align = MCP->getConstants()[CPI].getAlignment(); assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); @@ -644,15 +724,15 @@ initializeFunctionInfo(const std::vector &CPEMIs) { // has any inline assembly in it. If so, we have to be conservative about // alignment assumptions, as we don't know for sure the size of any // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); + for (MachineBasicBlock &MBB : *MF) + computeBlockSize(&MBB); // The known bits of the entry block offset are determined by the function // alignment. BBInfo.front().KnownBits = MF->getAlignment(); // Compute block offsets and known bits. - adjustBBOffsetsAfter(MF->begin()); + adjustBBOffsetsAfter(&MF->front()); // Now go back through the instructions and build up our data structures. for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); @@ -669,7 +749,7 @@ initializeFunctionInfo(const std::vector &CPEMIs) { if (I->isDebugValue()) continue; - int Opc = I->getOpcode(); + unsigned Opc = I->getOpcode(); if (I->isBranch()) { bool isCond = false; unsigned Bits = 0; @@ -719,12 +799,14 @@ initializeFunctionInfo(const std::vector &CPEMIs) { if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) PushPopMIs.push_back(I); - if (Opc == ARM::CONSTPOOL_ENTRY) + if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS || + Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB || + Opc == ARM::JUMPTABLE_TBH) continue; // Scan the instructions for constant pool operands. for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) - if (I->getOperand(op).isCPI()) { + if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) { // We found one. The addressing mode tells us the max displacement // from the PC that this instruction permits. @@ -740,6 +822,7 @@ initializeFunctionInfo(const std::vector &CPEMIs) { // Taking the address of a CP entry. case ARM::LEApcrel: + case ARM::LEApcrelJT: // This takes a SoImm, which is 8 bit immediate rotated. We'll // pretend the maximum offset is 255 * 4. Since each instruction // 4 byte wide, this is always correct. We'll check for other @@ -750,10 +833,12 @@ initializeFunctionInfo(const std::vector &CPEMIs) { IsSoImm = true; break; case ARM::t2LEApcrel: + case ARM::t2LEApcrelJT: Bits = 12; NegOk = true; break; case ARM::tLEApcrel: + case ARM::tLEApcrelJT: Bits = 8; Scale = 4; break; @@ -781,6 +866,11 @@ initializeFunctionInfo(const std::vector &CPEMIs) { // Remember that this is a user of a CP entry. unsigned CPI = I->getOperand(op).getIndex(); + if (I->getOperand(op).isJTI()) { + JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size())); + CPI = JumpTableEntryIndices[CPI]; + } + MachineInstr *CPEMI = CPEMIs[CPI]; unsigned MaxOffs = ((1 << Bits)-1) * Scale; CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); @@ -880,7 +970,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; ++MBBI; + MachineFunction::iterator MBBI = ++OrigBB->getIterator(); MF->insert(MBBI, NewBB); // Splice the instructions starting with MI over to NewBB. @@ -1000,7 +1090,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, unsigned CPELogAlign = getCPELogAlign(U.CPEMI); unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); unsigned NextBlockOffset, NextBlockAlignment; - MachineFunction::const_iterator NextBlock = Water; + MachineFunction::const_iterator NextBlock = Water->getIterator(); if (++NextBlock == MF->end()) { NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); NextBlockAlignment = 0; @@ -1114,6 +1204,13 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, return false; } +unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) { + if (CPEMI->getOperand(1).isCPI()) + return CPEMI->getOperand(1).getIndex(); + + return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()]; +} + /// LookForCPEntryInRange - see if the currently referenced CPE is in range; /// if not, see if an in-range clone of the CPE is in range, and if so, /// change the data structures so the user references the clone. Returns: @@ -1133,7 +1230,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) } // No. Look for previously created clones of the CPE that are in range. - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); std::vector &CPEs = CPEntries[CPI]; for (unsigned i = 0, e = CPEs.size(); i != e; ++i) { // We already tried this one @@ -1255,7 +1352,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, if (isOffsetInRange(UserOffset, CPEOffset, U)) { DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() << format(", expected CPE offset %#x\n", CPEOffset)); - NewMBB = std::next(MachineFunction::iterator(UserMBB)); + NewMBB = &*++UserMBB->getIterator(); // Add an unconditional branch from UserMBB to fallthrough block. Record // it for branch lengthening; this new branch will not get out of range, // but if the preceding conditional branch is out of range, the targets @@ -1270,7 +1367,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); - BBInfo[UserMBB->getNumber()].Size += Delta; + computeBlockSize(UserMBB); adjustBBOffsetsAfter(UserMBB); return; } @@ -1378,7 +1475,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; - unsigned CPI = CPEMI->getOperand(1).getIndex(); + unsigned CPI = getCombinedIndex(CPEMI); unsigned Size = CPEMI->getOperand(2).getImm(); // Compute this only once, it's expensive. unsigned UserOffset = getUserOffset(U); @@ -1408,8 +1505,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { NewWaterList.insert(NewIsland); // The new CPE goes before the following block (NewMBB). - NewMBB = std::next(MachineFunction::iterator(WaterBB)); - + NewMBB = &*++WaterBB->getIterator(); } else { // No water found. DEBUG(dbgs() << "No water found\n"); @@ -1420,7 +1516,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { // next iteration for constant pools, but in this context, we don't want // it. Check for this so it will be removed from the WaterList. // Also remove any entry from NewWaterList. - MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB)); + MachineBasicBlock *WaterBB = &*--NewMBB->getIterator(); IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); if (IP != WaterList.end()) NewWaterList.erase(WaterBB); @@ -1437,28 +1533,28 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { WaterList.erase(IP); // Okay, we know we can put an island before NewMBB now, do it! - MF->insert(NewMBB, NewIsland); + MF->insert(NewMBB->getIterator(), NewIsland); // Update internal data structures to account for the newly inserted MBB. updateForInsertedWaterBlock(NewIsland); - // Decrement the old entry, and remove it if refcount becomes 0. - decrementCPEReferenceCount(CPI, CPEMI); - // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. U.HighWaterMark = NewIsland; - U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) - .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); + U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc()) + .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size); CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; + // Decrement the old entry, and remove it if refcount becomes 0. + decrementCPEReferenceCount(CPI, CPEMI); + // Mark the basic block as aligned as required by the const-pool entry. NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; - adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland))); + adjustBBOffsetsAfter(&*--NewIsland->getIterator()); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1637,7 +1733,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { MBB->back().eraseFromParent(); // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = &*++MBB->getIterator(); DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" @@ -1764,8 +1860,13 @@ bool ARMConstantIslands::optimizeThumb2Instructions() { bool ARMConstantIslands::optimizeThumb2Branches() { bool MadeChange = false; - for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { - ImmBranch &Br = ImmBranches[i]; + // The order in which branches appear in ImmBranches is approximately their + // order within the function body. By visiting later branches first, we reduce + // the distance between earlier forward branches and their targets, making it + // more likely that the cbn?z optimization, which can only apply to forward + // branches, will succeed. + for (unsigned i = ImmBranches.size(); i != 0; --i) { + ImmBranch &Br = ImmBranches[i-1]; unsigned Opcode = Br.MI->getOpcode(); unsigned NewOpc = 0; unsigned Scale = 1; @@ -1852,6 +1953,122 @@ bool ARMConstantIslands::optimizeThumb2Branches() { return MadeChange; } +static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg, + unsigned BaseReg) { + if (I.getOpcode() != ARM::t2ADDrs) + return false; + + if (I.getOperand(0).getReg() != EntryReg) + return false; + + if (I.getOperand(1).getReg() != BaseReg) + return false; + + // FIXME: what about CC and IdxReg? + return true; +} + +/// \brief While trying to form a TBB/TBH instruction, we may (if the table +/// doesn't immediately follow the BR_JT) need access to the start of the +/// jump-table. We know one instruction that produces such a register; this +/// function works out whether that definition can be preserved to the BR_JT, +/// possibly by removing an intervening addition (which is usually needed to +/// calculate the actual entry to jump to). +bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI, + MachineInstr *LEAMI, + unsigned &DeadSize, + bool &CanDeleteLEA, + bool &BaseRegKill) { + if (JumpMI->getParent() != LEAMI->getParent()) + return false; + + // Now we hope that we have at least these instructions in the basic block: + // BaseReg = t2LEA ... + // [...] + // EntryReg = t2ADDrs BaseReg, ... + // [...] + // t2BR_JT EntryReg + // + // We have to be very conservative about what we recognise here though. The + // main perturbing factors to watch out for are: + // + Spills at any point in the chain: not direct problems but we would + // expect a blocking Def of the spilled register so in practice what we + // can do is limited. + // + EntryReg == BaseReg: this is the one situation we should allow a Def + // of BaseReg, but only if the t2ADDrs can be removed. + // + Some instruction other than t2ADDrs computing the entry. Not seen in + // the wild, but we should be careful. + unsigned EntryReg = JumpMI->getOperand(0).getReg(); + unsigned BaseReg = LEAMI->getOperand(0).getReg(); + + CanDeleteLEA = true; + BaseRegKill = false; + MachineInstr *RemovableAdd = nullptr; + MachineBasicBlock::iterator I(LEAMI); + for (++I; &*I != JumpMI; ++I) { + if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) { + RemovableAdd = &*I; + break; + } + + for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) { + const MachineOperand &MO = I->getOperand(K); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() == BaseReg) + return false; + if (MO.isUse() && MO.getReg() == BaseReg) { + BaseRegKill = BaseRegKill || MO.isKill(); + CanDeleteLEA = false; + } + } + } + + if (!RemovableAdd) + return true; + + // Check the add really is removable, and that nothing else in the block + // clobbers BaseReg. + for (++I; &*I != JumpMI; ++I) { + for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) { + const MachineOperand &MO = I->getOperand(K); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() == BaseReg) + return false; + if (MO.isUse() && MO.getReg() == EntryReg) + RemovableAdd = nullptr; + } + } + + if (RemovableAdd) { + RemovableAdd->eraseFromParent(); + DeadSize += 4; + } else if (BaseReg == EntryReg) { + // The add wasn't removable, but clobbered the base for the TBB. So we can't + // preserve it. + return false; + } + + // We reached the end of the block without seeing another definition of + // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be + // used in the TBB/TBH if necessary. + return true; +} + +/// \brief Returns whether CPEMI is the first instruction in the block +/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so, +/// we can switch the first register to PC and usually remove the address +/// calculation that preceded it. +static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) { + MachineFunction::iterator MBB = JTMI->getParent()->getIterator(); + MachineFunction *MF = MBB->getParent(); + ++MBB; + + return MBB != MF->end() && MBB->begin() != MBB->end() && + &*MBB->begin() == CPEMI; +} + /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. bool ARMConstantIslands::optimizeThumb2JumpTables() { @@ -1867,7 +2084,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1890,91 +2107,79 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { break; } - if (ByteOk || HalfWordOk) { - MachineBasicBlock *MBB = MI->getParent(); - unsigned BaseReg = MI->getOperand(0).getReg(); - bool BaseRegKill = MI->getOperand(0).isKill(); - if (!BaseRegKill) - continue; - unsigned IdxReg = MI->getOperand(1).getReg(); - bool IdxRegKill = MI->getOperand(1).isKill(); - - // Scan backwards to find the instruction that defines the base - // register. Due to post-RA scheduling, we can't count on it - // immediately preceding the branch instruction. - MachineBasicBlock::iterator PrevI = MI; - MachineBasicBlock::iterator B = MBB->begin(); - while (PrevI != B && !PrevI->definesRegister(BaseReg)) - --PrevI; - - // If for some reason we didn't find it, we can't do anything, so - // just skip this one. - if (!PrevI->definesRegister(BaseReg)) - continue; + if (!ByteOk && !HalfWordOk) + continue; - MachineInstr *AddrMI = PrevI; - bool OptOk = true; - // Examine the instruction that calculates the jumptable entry address. - // Make sure it only defines the base register and kills any uses - // other than the index register. - for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { - const MachineOperand &MO = AddrMI->getOperand(k); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef() && MO.getReg() != BaseReg) { - OptOk = false; - break; - } - if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) { - OptOk = false; - break; - } - } - if (!OptOk) - continue; + MachineBasicBlock *MBB = MI->getParent(); + if (!MI->getOperand(0).isKill()) // FIXME: needed now? + continue; + unsigned IdxReg = MI->getOperand(1).getReg(); + bool IdxRegKill = MI->getOperand(1).isKill(); - // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction - // that gave us the initial base register definition. - for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI) - ; + CPUser &User = CPUsers[JumpTableUserIndices[JTI]]; + unsigned DeadSize = 0; + bool CanDeleteLEA = false; + bool BaseRegKill = false; + bool PreservedBaseReg = + preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill); - // The instruction should be a tLEApcrel or t2LEApcrelJT; we want - // to delete it as well. - MachineInstr *LeaMI = PrevI; - if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && - LeaMI->getOpcode() != ARM::t2LEApcrelJT) || - LeaMI->getOperand(0).getReg() != BaseReg) - OptOk = false; + if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg) + continue; - if (!OptOk) - continue; + DEBUG(dbgs() << "Shrink JT: " << *MI); + MachineInstr *CPEMI = User.CPEMI; + unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; + MachineBasicBlock::iterator MI_JT = MI; + MachineInstr *NewJTMI = + BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) + .addReg(User.MI->getOperand(0).getReg(), + getKillRegState(BaseRegKill)) + .addReg(IdxReg, getKillRegState(IdxRegKill)) + .addJumpTableIndex(JTI, JTOP.getTargetFlags()) + .addImm(CPEMI->getOperand(0).getImm()); + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); + + unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH; + CPEMI->setDesc(TII->get(JTOpc)); + + if (jumpTableFollowsTB(MI, User.CPEMI)) { + NewJTMI->getOperand(0).setReg(ARM::PC); + NewJTMI->getOperand(0).setIsKill(false); + + if (CanDeleteLEA) { + User.MI->eraseFromParent(); + DeadSize += 4; + + // The LEA was eliminated, the TBB instruction becomes the only new user + // of the jump table. + User.MI = NewJTMI; + User.MaxDisp = 4; + User.NegOk = false; + User.IsSoImm = false; + User.KnownAlignment = false; + } else { + // The LEA couldn't be eliminated, so we must add another CPUser to + // record the TBB or TBH use. + int CPEntryIdx = JumpTableEntryIndices[JTI]; + auto &CPEs = CPEntries[CPEntryIdx]; + auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) { + return E.CPEMI == User.CPEMI; + }); + ++Entry->RefCount; + CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false)); + } + } - DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI - << " lea: " << *LeaMI); - unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; - MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) - .addReg(IdxReg, getKillRegState(IdxRegKill)) - .addJumpTableIndex(JTI, JTOP.getTargetFlags()) - .addImm(MI->getOperand(JTOpIdx+1).getImm()); - DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); - // FIXME: Insert an "ALIGN" instruction to ensure the next instruction - // is 2-byte aligned. For now, asm printer will fix it up. - unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); - unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI); - OrigSize += TII->GetInstSizeInBytes(LeaMI); - OrigSize += TII->GetInstSizeInBytes(MI); - - AddrMI->eraseFromParent(); - LeaMI->eraseFromParent(); - MI->eraseFromParent(); + unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); + unsigned OrigSize = TII->GetInstSizeInBytes(MI); + MI->eraseFromParent(); - int delta = OrigSize - NewSize; - BBInfo[MBB->getNumber()].Size -= delta; - adjustBBOffsetsAfter(MBB); + int Delta = OrigSize - NewSize + DeadSize; + BBInfo[MBB->getNumber()].Size -= Delta; + adjustBBOffsetsAfter(MBB); - ++NumTBs; - MadeChange = true; - } + ++NumTBs; + MadeChange = true; } return MadeChange; @@ -1993,7 +2198,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -2031,7 +2236,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; SmallVector CondPrior; - MachineFunction::iterator BBi = BB; + MachineFunction::iterator BBi = BB->getIterator(); MachineFunction::iterator OldPrior = std::prev(BBi); // If the block terminator isn't analyzable, don't try to move the block @@ -2054,7 +2259,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // Create a new MBB for the code after the jump BB. MachineBasicBlock *NewBB = MF->CreateMachineBasicBlock(JTBB->getBasicBlock()); - MachineFunction::iterator MBBI = JTBB; ++MBBI; + MachineFunction::iterator MBBI = ++JTBB->getIterator(); MF->insert(MBBI, NewBB); // Add an unconditional branch from NewBB to BB. @@ -2069,8 +2274,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // Update the CFG. NewBB->addSuccessor(BB); - JTBB->removeSuccessor(BB); - JTBB->addSuccessor(NewBB); + JTBB->replaceSuccessor(BB, NewBB); ++NumJTInserted; return NewBB;