X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FBranchFolding.cpp;h=618266731c067ad6219af8b25e96b82923a04b51;hb=8770f7af5f46c0d34a79cf0beeeef80b1a2ab690;hp=dc786a21f968b0520d515ef7860412c77f66c1de;hpb=20350db44807f6863c3c00345934d45763ed21d3;p=oota-llvm.git diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index dc786a21f96..618266731c0 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -16,29 +16,32 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "branchfolding" #include "BranchFolding.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; +#define DEBUG_TYPE "branchfolding" + STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); @@ -67,9 +70,11 @@ namespace { static char ID; explicit BranchFolderPass(): MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -83,23 +88,32 @@ INITIALIZE_PASS(BranchFolderPass, "branch-folder", "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + TargetPassConfig *PassConfig = &getAnalysis(); - BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true); - return Folder.OptimizeFunction(MF, - MF.getTarget().getInstrInfo(), - MF.getTarget().getRegisterInfo(), + // TailMerge can create jump into if branches that make CFG irreducible for + // HW that requires structurized CFG. + bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && + PassConfig->getEnableTailMerge(); + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, + getAnalysis(), + getAnalysis()); + return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), + MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable()); } - -BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) { +BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, + const MachineBlockFrequencyInfo &FreqInfo, + const MachineBranchProbabilityInfo &ProbInfo) + : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo), + MBPI(ProbInfo) { switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; case cl::BOU_TRUE: EnableTailMerge = true; break; case cl::BOU_FALSE: EnableTailMerge = false; break; } - - EnableHoistCommonCode = CommonHoist; } /// RemoveDeadBlock - Remove the specified dead machine basic block from the @@ -136,10 +150,9 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); - ImpDefRegs.insert(Reg); - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - ImpDefRegs.insert(SubReg); + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); ++I; } if (ImpDefRegs.empty()) @@ -184,7 +197,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TII = tii; TRI = tri; MMI = mmi; - RS = NULL; + RS = nullptr; // Use a RegScavenger to help update liveness when required. MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -196,7 +209,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { - MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; + MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); @@ -215,7 +228,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // See if any jump tables have become dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); - if (JTI == 0) { + if (!JTI) { delete RS; return MadeChange; } @@ -257,11 +270,17 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); - // Merge in bits from the operand if easy. + // Merge in bits from the operand if easy. We can't use MachineOperand's + // hash_code here because it's not deterministic and we sort by hash value + // later. unsigned OperandHash = 0; switch (Op.getType()) { - case MachineOperand::MO_Register: OperandHash = Op.getReg(); break; - case MachineOperand::MO_Immediate: OperandHash = Op.getImm(); break; + case MachineOperand::MO_Register: + OperandHash = Op.getReg(); + break; + case MachineOperand::MO_Immediate: + OperandHash = Op.getImm(); + break; case MachineOperand::MO_MachineBasicBlock: OperandHash = Op.getMBB()->getNumber(); break; @@ -276,27 +295,20 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { // pull in the offset. OperandHash = Op.getOffset(); break; - default: break; + default: + break; } - Hash += ((OperandHash << 3) | Op.getType()) << (i&31); + Hash += ((OperandHash << 3) | Op.getType()) << (i & 31); } return Hash; } /// HashEndOfMBB - Hash the last instruction in the MBB. static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { - MachineBasicBlock::const_iterator I = MBB->end(); - if (I == MBB->begin()) - return 0; // Empty MBB. - - --I; - // Skip debug info so it will not affect codegen. - while (I->isDebugValue()) { - if (I==MBB->begin()) - return 0; // MBB empty except for debug info. - --I; - } + MachineBasicBlock::const_iterator I = MBB->getLastNonDebugInstr(); + if (I == MBB->end()) + return 0; return HashMachineInstr(I); } @@ -358,9 +370,8 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, if (I1 == MBB1->begin() && I2 != MBB2->begin()) { --I2; while (I2->isDebugValue()) { - if (I2 == MBB2->begin()) { + if (I2 == MBB2->begin()) return TailLen; - } --I2; } ++I2; @@ -382,11 +393,9 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, if (RS) { RS->enterBasicBlock(CurMBB); if (!CurMBB->empty()) - RS->forward(prior(CurMBB->end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) + RS->forward(std::prev(CurMBB->end())); + for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++) + if (RS->isRegUsed(i, false)) NewMBB->addLiveIn(i); } } @@ -409,15 +418,16 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// MBB so that the part before the iterator falls into the part starting at the /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1) { + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB) { if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) - return 0; + return nullptr; MachineFunction &MF = *CurMBB.getParent(); // Create the fall-through block. MachineFunction::iterator MBBI = &CurMBB; - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -429,6 +439,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Splice the code over. NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + // NewMBB inherits CurMBB's block frequency. + MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); + // For targets that use the register scavenger, we must maintain LiveIns. MaintainLiveIns(&CurMBB, NewMBB); @@ -460,8 +473,8 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); - MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB)); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && @@ -470,12 +483,12 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); - TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); + TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); return; } } } - TII->InsertBranch(*CurMBB, SuccBB, NULL, + TII->InsertBranch(*CurMBB, SuccBB, nullptr, SmallVector(), dl); } @@ -483,21 +496,34 @@ bool BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { if (getHash() < o.getHash()) return true; - else if (getHash() > o.getHash()) + if (getHash() > o.getHash()) return false; - else if (getBlock()->getNumber() < o.getBlock()->getNumber()) + if (getBlock()->getNumber() < o.getBlock()->getNumber()) return true; - else if (getBlock()->getNumber() > o.getBlock()->getNumber()) + if (getBlock()->getNumber() > o.getBlock()->getNumber()) return false; - else { - // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing - // an object with itself. + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. #ifndef _GLIBCXX_DEBUG - llvm_unreachable("Predecessor appears twice"); + llvm_unreachable("Predecessor appears twice"); #else - return false; + return false; #endif - } +} + +BlockFrequency +BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { + auto I = MergedBBFreq.find(MBB); + + if (I != MergedBBFreq.end()) + return I->second; + + return MBFI.getBlockFreq(MBB); +} + +void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, + BlockFrequency F) { + MergedBBFreq[MBB] = F; } /// CountTerminators - Count the number of terminators in the given @@ -575,7 +601,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; @@ -599,12 +625,11 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, unsigned maxCommonTailLength = 0U; SameTails.clear(); MachineBasicBlock::iterator TrialBBI1, TrialBBI2; - MPIterator HighestMPIter = prior(MergePotentials.end()); - for (MPIterator CurMPIter = prior(MergePotentials.end()), + MPIterator HighestMPIter = std::prev(MergePotentials.end()); + for (MPIterator CurMPIter = std::prev(MergePotentials.end()), B = MergePotentials.begin(); - CurMPIter != B && CurMPIter->getHash() == CurHash; - --CurMPIter) { - for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) { + CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) { + for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) { unsigned CommonTailLen; if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), minCommonTailLength, @@ -633,9 +658,9 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { MPIterator CurMPIter, B; - for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); - CurMPIter->getHash() == CurHash; - --CurMPIter) { + for (CurMPIter = std::prev(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter->getHash() == CurHash; --CurMPIter) { // Put the unconditional branch back, if we need one. MachineBasicBlock *CurMBB = CurMPIter->getBlock(); if (SuccBB && CurMBB != PredBB) @@ -651,6 +676,7 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex) { commonTailIndex = 0; @@ -680,7 +706,12 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); - MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + // If the split block unconditionally falls-thru to SuccBB, it will be + // merged. In control flow terms it should then take SuccBB's name. e.g. If + // SuccBB is an inner loop, the common tail is still part of the inner loop. + const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ? + SuccBB->getBasicBlock() : MBB->getBasicBlock(); + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB); if (!newMBB) { DEBUG(dbgs() << "... failed!"); return false; @@ -696,6 +727,62 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, return true; } +static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { + auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); + auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); + if ((E1 - I1) != (E2 - I2)) + return false; + for (; I1 != E1; ++I1, ++I2) { + if (**I1 != **I2) + return false; + } + return true; +} + +static void +removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { + // Remove MMOs from memory operations in the common block + // when they do not match the ones from the block being tail-merged. + // This ensures later passes conservatively compute dependencies. + MachineBasicBlock *MBB = MBBIStartPos->getParent(); + // Note CommonTailLen does not necessarily matches the size of + // the common BB nor all its instructions because of debug + // instructions differences. + unsigned CommonTailLen = 0; + for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos) + ++CommonTailLen; + + MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(); + MachineBasicBlock::reverse_iterator MBBIE = MBB->rend(); + MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin(); + MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend(); + + while (CommonTailLen--) { + assert(MBBI != MBBIE && "Reached BB end within common tail length!"); + (void)MBBIE; + + if (MBBI->isDebugValue()) { + ++MBBI; + continue; + } + + while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) + ++MBBICommon; + + assert(MBBICommon != MBBIECommon && + "Reached BB end within common tail length!"); + assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); + + if (MBBICommon->mayLoad() || MBBICommon->mayStore()) + if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) + MBBICommon->clearMemRefs(); + + ++MBBI; + ++MBBICommon; + } +} + // See if any of the blocks in MergePotentials (which all have a common single // successor, or all have no successor) can be tail-merged. If there is a // successor, any blocks in MergePotentials that are not tail-merged and @@ -730,7 +817,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // Sort by hash value so that blocks with identical end sequences sort // together. - std::stable_sort(MergePotentials.begin(), MergePotentials.end()); + array_pod_sort(MergePotentials.begin(), MergePotentials.end()); // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { @@ -788,7 +875,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - if (!CreateCommonTailOnlyBlock(PredBB, + if (!CreateCommonTailOnlyBlock(PredBB, SuccBB, maxCommonTailLength, commonTailIndex)) { RemoveBlocksWithHash(CurHash, SuccBB, PredBB); continue; @@ -796,6 +883,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, } MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + // Recompute commont tail MBB's edge weights and block frequency. + setCommonTailEdgeWeights(*MBB); + // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() @@ -805,6 +896,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); + // Remove MMOs from memory operations as needed. + removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. @@ -840,7 +933,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // See if we can do any tail merging on those. if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(NULL, NULL); + MadeChange |= TryTailMergeBlocks(nullptr, nullptr); // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by @@ -861,12 +954,12 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a compile-time infinite loop repeatedly doing and undoing the same // transformations.) - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ++I) { - if (I->pred_size() >= 2) continue; + if (I->pred_size() < 2) continue; SmallPtrSet UniquePreds; MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = prior(I); + MachineBasicBlock *PredBB = std::prev(I); MergePotentials.clear(); for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); @@ -880,14 +973,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // Visit each predecessor only once. - if (!UniquePreds.insert(PBB)) + if (!UniquePreds.insert(PBB).second) continue; // Skip blocks which may jump to a landing pad. Can't tail merge these. if (PBB->getLandingPadSuccessor()) continue; - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { // Failing case: IBB is the target of a cbr, and we cannot reverse the @@ -898,7 +991,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // This is the QBB case described above if (!FBB) - FBB = llvm::next(MachineFunction::iterator(PBB)); + FBB = std::next(MachineFunction::iterator(PBB)); } // Failing case: the only way IBB can be reached from PBB is via @@ -906,10 +999,10 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a bit in the edge so we didn't have to do all this. if (IBB->isLandingPad()) { MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock *PredNextBB = NULL; + MachineBasicBlock *PredNextBB = nullptr; if (IP != MF.end()) PredNextBB = IP; - if (TBB == NULL) { + if (!TBB) { if (IBB != PredNextBB) // fallthrough continue; } else if (FBB) { @@ -930,7 +1023,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { TII->RemoveBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, + NewCond, dl); } MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); @@ -948,7 +1042,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Reinsert an unconditional branch if needed. The 1 below can occur as a // result of removing blocks in TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + PredBB = std::prev(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) FixTail(MergePotentials.begin()->getBlock(), IBB, TII); @@ -957,6 +1051,44 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { return MadeChange; } +void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) { + SmallVector EdgeFreqLs(TailMBB.succ_size()); + BlockFrequency AccumulatedMBBFreq; + + // Aggregate edge frequency of successor edge j: + // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)), + // where bb is a basic block that is in SameTails. + for (const auto &Src : SameTails) { + const MachineBasicBlock *SrcMBB = Src.getBlock(); + BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB); + AccumulatedMBBFreq += BlockFreq; + + // It is not necessary to recompute edge weights if TailBB has less than two + // successors. + if (TailMBB.succ_size() <= 1) + continue; + + auto EdgeFreq = EdgeFreqLs.begin(); + + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) + *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI); + } + + MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq); + + if (TailMBB.succ_size() <= 1) + return; + + auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end()); + uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1; + auto EdgeFreq = EdgeFreqLs.begin(); + + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) + TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale); +} + //===----------------------------------------------------------------------===// // Branch Optimization //===----------------------------------------------------------------------===// @@ -967,7 +1099,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Make sure blocks are numbered in order MF.RenumberBlocks(); - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; MadeChange |= OptimizeBlock(MBB); @@ -985,25 +1117,15 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { // Blocks should be considered empty if they contain only debug info; // else the debug info would affect codegen. static bool IsEmptyBlock(MachineBasicBlock *MBB) { - if (MBB->empty()) - return true; - for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); - MBBI!=MBBE; ++MBBI) { - if (!MBBI->isDebugValue()) - return false; - } - return true; + return MBB->getFirstNonDebugInstr() == MBB->end(); } // Blocks with only debug info and branches should be considered the same // as blocks with only branches. static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { - MachineBasicBlock::iterator MBBI, MBBE; - for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) { - if (!MBBI->isDebugValue()) - break; - } - return (MBBI->isBranch()); + MachineBasicBlock::iterator I = MBB->getFirstNonDebugInstr(); + assert(I != MBB->end() && "empty block!"); + return I->isBranch(); } /// IsBetterFallthrough - Return true if it would be clearly better to @@ -1016,36 +1138,24 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block // into a fallthrough to the return. - if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false; + MachineBasicBlock::iterator MBB1I = MBB1->getLastNonDebugInstr(); + MachineBasicBlock::iterator MBB2I = MBB2->getLastNonDebugInstr(); + if (MBB1I == MBB1->end() || MBB2I == MBB2->end()) + return false; // If there is a clear successor ordering we make sure that one block // will fall through to the next if (MBB1->isSuccessor(MBB2)) return true; if (MBB2->isSuccessor(MBB1)) return false; - // Neither block consists entirely of debug info (per IsEmptyBlock check), - // so we needn't test for falling off the beginning here. - MachineBasicBlock::iterator MBB1I = --MBB1->end(); - while (MBB1I->isDebugValue()) - --MBB1I; - MachineBasicBlock::iterator MBB2I = --MBB2->end(); - while (MBB2I->isDebugValue()) - --MBB2I; return MBB2I->isCall() && !MBB1I->isCall(); } /// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch -/// instructions on the block. Always use the DebugLoc of the first -/// branching instruction found unless its absent, in which case use the -/// DebugLoc of the second if present. +/// instructions on the block. static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return DebugLoc(); - --I; - while (I->isDebugValue() && I != MBB.begin()) - --I; - if (I->isBranch()) + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I != MBB.end() && I->isBranch()) return I->getDebugLoc(); return DebugLoc(); } @@ -1070,6 +1180,11 @@ ReoptimizeBlock: if (FallThrough == MF.end()) { // TODO: Simplify preds to not branch here if possible! + } else if (FallThrough->isLandingPad()) { + // Don't rewrite to a landing pad fallthough. That could lead to the case + // where a BB jumps to more than one landing pad. + // TODO: Is it ever worth rewriting predecessors which don't already + // jump to a landing pad, and so can safely jump to the fallthrough? } else { // Rewrite all predecessors of the old block to go to the fallthrough // instead. @@ -1088,9 +1203,9 @@ ReoptimizeBlock: // Check to see if we can simplify the terminator of the block before this // one. - MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB)); + MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB)); - MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector PriorCond; bool PriorUnAnalyzable = TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); @@ -1107,7 +1222,7 @@ ReoptimizeBlock: TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1151,7 +1266,7 @@ ReoptimizeBlock: // If the previous branch *only* branches to *this* block (conditional or // not) remove the branch. - if (PriorTBB == MBB && PriorFBB == 0) { + if (PriorTBB == MBB && !PriorFBB) { TII->RemoveBranch(PrevBB); MadeChange = true; ++NumBranchOpts; @@ -1163,7 +1278,7 @@ ReoptimizeBlock: if (PriorFBB == MBB) { DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1177,7 +1292,7 @@ ReoptimizeBlock: if (!TII->ReverseBranchCondition(NewPriorCond)) { DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); + TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1192,7 +1307,7 @@ ReoptimizeBlock: // We consider it more likely that execution will stay in the function (e.g. // due to loops) than it is to exit it. This asserts in loops etc, moving // the assert condition out of the loop body. - if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 && + if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB && MachineFunction::iterator(PriorTBB) == FallThrough && !MBB->canFallThrough()) { bool DoTransform = true; @@ -1215,7 +1330,7 @@ ReoptimizeBlock: DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); + TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); // Move this block to the end of the function. MBB->moveAfter(--MF.end()); @@ -1228,7 +1343,7 @@ ReoptimizeBlock: } // Analyze the branch in the current block. - MachineBasicBlock *CurTBB = 0, *CurFBB = 0; + MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; SmallVector CurCond; bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); if (!CurUnAnalyzable) { @@ -1254,7 +1369,7 @@ ReoptimizeBlock: // If this branch is the only thing in its block, see if we can forward // other blocks across it. - if (CurTBB && CurCond.empty() && CurFBB == 0 && + if (CurTBB && CurCond.empty() && !CurFBB && IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { DebugLoc dl = getBranchDebugLoc(*MBB); @@ -1265,19 +1380,10 @@ ReoptimizeBlock: // If the only things remaining in the block are debug info, remove these // as well, so this will behave the same as an empty block in non-debug // mode. - if (!MBB->empty()) { - bool NonDebugInfoFound = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - if (!I->isDebugValue()) { - NonDebugInfoFound = true; - break; - } - } - if (!NonDebugInfoFound) - // Make the block empty, losing the debug info (we could probably - // improve this in some cases.) - MBB->erase(MBB->begin(), MBB->end()); + if (IsEmptyBlock(MBB)) { + // Make the block empty, losing the debug info (we could probably + // improve this in some cases.) + MBB->erase(MBB->begin(), MBB->end()); } // If this block is just an unconditional branch to CurTBB, we can // usually completely eliminate the block. The only case we cannot @@ -1292,12 +1398,12 @@ ReoptimizeBlock: // explicit branch to us to make updates simpler. if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && PriorTBB != MBB && PriorFBB != MBB) { - if (PriorTBB == 0) { - assert(PriorCond.empty() && PriorFBB == 0 && + if (!PriorTBB) { + assert(PriorCond.empty() && !PriorFBB && "Bad branch analysis"); PriorTBB = MBB; } else { - assert(PriorFBB == 0 && "Machine CFG out of date!"); + assert(!PriorFBB && "Machine CFG out of date!"); PriorFBB = MBB; } DebugLoc pdl = getBranchDebugLoc(PrevBB); @@ -1321,7 +1427,7 @@ ReoptimizeBlock: // If this change resulted in PMBB ending in a conditional // branch where both conditions go to the same destination, // change this to an unconditional branch (and fix the CFG). - MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0; + MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr; SmallVector NewCurCond; bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); @@ -1329,10 +1435,10 @@ ReoptimizeBlock: DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); + TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; - PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); + PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false); } } } @@ -1349,7 +1455,7 @@ ReoptimizeBlock: } // Add the branch back if the block is more than just an uncond branch. - TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); + TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl); } } @@ -1370,7 +1476,7 @@ ReoptimizeBlock: // Analyze the branch at the end of the pred. MachineBasicBlock *PredBB = *PI; MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; - MachineBasicBlock *PredTBB = 0, *PredFBB = 0; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; if (PredBB != MBB && !PredBB->canFallThrough() && !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) @@ -1387,9 +1493,10 @@ ReoptimizeBlock: // B elsewhere // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = + std::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); + TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; @@ -1422,7 +1529,7 @@ ReoptimizeBlock: // Okay, there is no really great place to put this block. If, however, // the block before this one would be a fall-through if this block were // removed, move this block to the end of the function. - MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0; + MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; SmallVector PrevCond; if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && @@ -1463,11 +1570,11 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, if (SuccBB != TrueBB) return SuccBB; } - return NULL; + return nullptr; } /// findHoistingInsertPosAndDeps - Find the location to move common instructions -/// in successors to. The location is ususally just before the terminator, +/// in successors to. The location is usually just before the terminator, /// however if the terminator is a conditional branch and its previous /// instruction is the flag setting instruction, the previous instruction is /// the preferred location. This function also gathers uses and defs of the @@ -1491,13 +1598,19 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - Uses.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.insert(*AS); - } else if (!MO.isDead()) - // Don't try to hoist code in the rare case the terminator defines a - // register that is later used. - return MBB->end(); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); + } else { + if (!MO.isDead()) + // Don't try to hoist code in the rare case the terminator defines a + // register that is later used. + return MBB->end(); + + // If the terminator defines a register, make sure we don't hoist + // the instruction whose def might be clobbered by the terminator. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); + } } if (Uses.empty()) @@ -1509,7 +1622,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // branch from condition setting instruction. MachineBasicBlock::iterator PI = Loc; --PI; - while (PI != MBB->begin() && Loc->isDebugValue()) + while (PI != MBB->begin() && PI->isDebugValue()) --PI; bool IsDef = false; @@ -1538,8 +1651,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // Also avoid moving code above predicated instruction since it's hard to // reason about register liveness with predicated instruction. bool DontMoveAcrossStore = true; - if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) || - TII->isPredicated(PI)) + if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(PI)) return MBB->end(); @@ -1553,18 +1665,15 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - Uses.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.insert(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); } else { - if (Uses.count(Reg)) { - Uses.erase(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - Uses.erase(*SR); // Use getSubRegisters to be conservative + if (Uses.erase(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative } - Defs.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Defs.insert(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); } } @@ -1575,7 +1684,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, /// sequence at the start of the function, move the instructions before MBB /// terminator if it's legal. bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) return false; @@ -1680,7 +1789,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { break; bool DontMoveAcrossStore = true; - if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore)) + if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore)) break; // Remove kills from LocalDefsSet, these registers had short live ranges. @@ -1691,8 +1800,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) - LocalDefsSet.erase(*OR); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); } // Track local defs so we can update liveins. @@ -1704,8 +1813,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg) continue; LocalDefs.push_back(Reg); - for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) - LocalDefsSet.insert(*OR); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.insert(*AI); } HasDups = true;