X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FCodeGen%2FMachineBlockPlacement.cpp;h=779b84e99b8ddff0cf6029f07c22d6f14e5af4db;hb=56e9c5f92b8fa33c29b9b9789d9280c4f7e36522;hp=bfba503b351ea9bea9f3f6f015cabc9edd5ce442;hpb=33a47d66e9f9126017d051bfba35cb9be6609b3e;p=oota-llvm.git diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index bfba503b351..779b84e99b8 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -25,7 +25,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "block-placement2" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -43,9 +42,12 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; +#define DEBUG_TYPE "block-placement2" + STATISTIC(NumCondBranches, "Number of conditional branches"); STATISTIC(NumUncondBranches, "Number of uncondittional branches"); STATISTIC(CondBranchTakenFreq, @@ -58,6 +60,24 @@ static cl::opt AlignAllBlock("align-all-blocks", "blocks in the function."), cl::init(0), cl::Hidden); +static cl::opt OnlyHotBadCFGConflictCheck( + "only-hot-bad-cfg-conflict-check", + cl::desc("Only check that a hot successor doesn't have a hot predecessor."), + cl::init(false), cl::Hidden); + +static cl::opt NoBadCFGConflictCheck( + "no-bad-cfg-conflict-check", + cl::desc("Don't check whether a hot successor has a more important " + "predecessor."), + cl::init(false), cl::Hidden); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt +ExitBlockBias("block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -145,7 +165,7 @@ public: #ifndef NDEBUG /// \brief Dump the blocks in this chain. - void dump() LLVM_ATTRIBUTE_USED { + LLVM_DUMP_METHOD void dump() { for (iterator I = begin(), E = end(); I != E; ++I) (*I)->dump(); } @@ -199,7 +219,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl &BlockWorkList, - const BlockFilterSet *BlockFilter = 0); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); @@ -213,7 +233,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, - const BlockFilterSet *BlockFilter = 0); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineFunction &F, @@ -230,9 +250,9 @@ public: initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -327,7 +347,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(4, 5); // 80% - MachineBasicBlock *BestSucc = 0; + MachineBasicBlock *BestSucc = nullptr; // FIXME: Due to the performance of the probability and weight routines in // the MBPI analysis, we manually compute probabilities using the edge // weights. This is suboptimal as it means that the somewhat subtle @@ -360,33 +380,38 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // any CFG constraints. if (SuccChain.LoopPredecessors != 0) { if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (CFG conflict)\n"); continue; } - // Make sure that a hot successor doesn't have a globally more important - // predecessor. - BlockFrequency CandidateEdgeFreq - = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); - bool BadCFGConflict = false; - for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), - PE = (*SI)->pred_end(); - PI != PE; ++PI) { - if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || - BlockToChain[*PI] == &Chain) + if (!NoBadCFGConflictCheck) { + // Make sure that a hot successor doesn't have a globally more + // important predecessor. + BlockFrequency CandidateEdgeFreq = + OnlyHotBadCFGConflictCheck + ? MBFI->getBlockFreq(BB) * SuccProb + : MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + bool BadCFGConflict = false; + for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), + PE = (*SI)->pred_end(); + PI != PE; ++PI) { + if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || + BlockToChain[*PI] == &Chain) + continue; + BlockFrequency PredEdgeFreq = + MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); + if (PredEdgeFreq >= CandidateEdgeFreq) { + BadCFGConflict = true; + break; + } + } + if (BadCFGConflict) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob) (non-cold CFG conflict)\n"); continue; - BlockFrequency PredEdgeFreq - = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); - if (PredEdgeFreq >= CandidateEdgeFreq) { - BadCFGConflict = true; - break; } } - if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(*SI) - << " -> non-cold CFG conflict\n"); - continue; - } } DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb @@ -401,23 +426,6 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( return BestSucc; } -namespace { -/// \brief Predicate struct to detect blocks already placed. -class IsBlockPlaced { - const BlockChain &PlacedChain; - const BlockToChainMapType &BlockToChain; - -public: - IsBlockPlaced(const BlockChain &PlacedChain, - const BlockToChainMapType &BlockToChain) - : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} - - bool operator()(MachineBasicBlock *BB) const { - return BlockToChain.lookup(BB) == &PlacedChain; - } -}; -} - /// \brief Select the best block from a worklist. /// /// This looks through the provided worklist as a list of candidate basic @@ -436,10 +444,12 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( // FIXME: If this shows up on profiles, it could be folded (at the cost of // some code complexity) into the loop below. WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), - IsBlockPlaced(Chain, BlockToChain)), + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), WorkList.end()); - MachineBasicBlock *BestBlock = 0; + MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (SmallVectorImpl::iterator WBI = WorkList.begin(), WBE = WorkList.end(); @@ -453,8 +463,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq - << " (freq)\n"); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); if (BestBlock && BestFreq >= CandidateFreq) continue; BestBlock = *WBI; @@ -486,7 +496,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( return *BlockToChain[I]->begin(); } } - return 0; + return nullptr; } void MachineBlockPlacement::buildChain( @@ -501,11 +511,11 @@ void MachineBlockPlacement::buildChain( MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); for (;;) { assert(BB); assert(BlockToChain[BB] == &Chain); - assert(*llvm::prior(Chain.end()) == BB); + assert(*std::prev(Chain.end()) == BB); // Look for the best viable successor if there is one to place immediately // after this block. @@ -536,7 +546,7 @@ void MachineBlockPlacement::buildChain( << " to " << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); - BB = *llvm::prior(Chain.end()); + BB = *std::prev(Chain.end()); } DEBUG(dbgs() << "Finished forming chain for header block " @@ -567,7 +577,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, << getBlockName(L.getHeader()) << "\n"); BlockFrequency BestPredFreq; - MachineBasicBlock *BestPred = 0; + MachineBasicBlock *BestPred = nullptr; for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), PE = L.getHeader()->pred_end(); PI != PE; ++PI) { @@ -575,8 +585,8 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, if (!LoopBlockSet.count(Pred)) continue; DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " - << Pred->succ_size() << " successors, " - << MBFI->getBlockFreq(Pred) << " freq\n"); + << Pred->succ_size() << " successors, "; + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); if (Pred->succ_size() > 1) continue; @@ -623,11 +633,11 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // header and only rotate if safe. BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; if (!LoopBlockSet.count(*HeaderChain.begin())) - return 0; + return nullptr; BlockFrequency BestExitEdgeFreq; unsigned BestExitLoopDepth = 0; - MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *ExitingBB = nullptr; // If there are exits to outer loops, loop rotation can severely limit // fallthrough opportunites unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. @@ -641,7 +651,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockChain &Chain = *BlockToChain[*I]; // Ensure that this block is at the end of a chain; otherwise it could be // mid-way through an inner loop or a successor of an analyzable branch. - if (*I != *llvm::prior(Chain.end())) + if (*I != *std::prev(Chain.end())) continue; // Now walk the successors. We need to establish whether this has a viable @@ -690,14 +700,17 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " [L:" << SuccLoopDepth - << "] (" << ExitEdgeFreq << ")\n"); - // Note that we slightly bias this toward an existing layout successor to - // retain incoming order in the absence of better information. - // FIXME: Should we bias this more strongly? It's pretty weak. + << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + // Note that we bias this toward an existing layout successor to retain + // incoming order in the absence of better information. The exit must have + // a frequency higher than the current exit before we consider breaking + // the layout. + BranchProbability Bias(100 - ExitBlockBias, 100); if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && - !(ExitEdgeFreq < BestExitEdgeFreq))) { + !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } @@ -713,14 +726,14 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. if (!ExitingBB || L.getNumBlocks() == 1) - return 0; + return nullptr; // Also, if we have exit blocks which lead to outer loops but didn't select // one of them as the exiting block we are rotating toward, disable loop // rotation altogether. if (!BlocksExitingToOuterLoop.empty() && !BlocksExitingToOuterLoop.count(ExitingBB)) - return 0; + return nullptr; DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); return ExitingBB; @@ -745,7 +758,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, PI != PE; ++PI) { BlockChain *PredChain = BlockToChain[*PI]; if (!LoopBlockSet.count(*PI) && - (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + (!PredChain || *PI == *std::prev(PredChain->end()))) { ViableTopFallthrough = true; break; } @@ -755,7 +768,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), SE = Bottom->succ_end(); SI != SE; ++SI) { @@ -771,7 +784,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; - std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); + std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } /// \brief Forms basic block chains from the natural loop structures. @@ -799,7 +812,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. - MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *ExitingBB = nullptr; if (LoopTop == L.getHeader()) ExitingBB = findBestLoopExit(F, L, LoopBlockSet); @@ -815,7 +828,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BE = L.block_end(); BI != BE; ++BI) { BlockChain &Chain = *BlockToChain[*BI]; - if (!UpdatedPreds.insert(&Chain)) + if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); @@ -887,11 +900,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // the exact fallthrough behavior for. for (;;) { Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; - MachineFunction::iterator NextFI(llvm::next(FI)); + MachineFunction::iterator NextFI(std::next(FI)); MachineBasicBlock *NextBB = NextFI; // Ensure that the layout successor is a viable block, as we know that // fallthrough is a possibility. @@ -899,7 +912,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " << getBlockName(BB) << " -> " << getBlockName(NextBB) << "\n"); - Chain->merge(NextBB, 0); + Chain->merge(NextBB, nullptr); FI = NextFI; BB = NextBB; } @@ -916,7 +929,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = &*FI; BlockChain &Chain = *BlockToChain[BB]; - if (!UpdatedPreds.insert(&Chain)) + if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); @@ -939,7 +952,9 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { BlockChain &FunctionChain = *BlockToChain[&F.front()]; buildChain(&F.front(), FunctionChain, BlockWorkList); +#ifndef NDEBUG typedef SmallPtrSet FunctionBlockSetType; +#endif DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadFunc = false; @@ -983,14 +998,36 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Update the terminator of the previous block. if (BI == FunctionChain.begin()) continue; - MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI)); // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this // boiler plate. Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explict "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // AnalyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set + // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, + // is mistakenly pointing to "*BI". + // + bool needUpdateBr = true; + if (!Cond.empty() && (!FBB || FBB == *BI)) { + PrevBB->updateTerminator(); + needUpdateBr = false; + Cond.clear(); + TBB = FBB = nullptr; + if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // FIXME: This should never take place. + TBB = FBB = nullptr; + } + } + // If PrevBB has a two-way branch, try to re-order the branches // such that we branch to the successor with higher weight first. if (TBB && !Cond.empty() && FBB && @@ -1003,14 +1040,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + needUpdateBr = true; } - PrevBB->updateTerminator(); + if (needUpdateBr) + PrevBB->updateTerminator(); } } // Fixup the last block. Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) F.back().updateTerminator(); @@ -1022,16 +1061,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (F.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) return; - unsigned Align = TLI->getPrefLoopAlignment(); - if (!Align) - return; // Don't care about loop alignment. if (FunctionChain.begin() == FunctionChain.end()) return; // Empty chain. const BranchProbability ColdProb(1, 5); // 20% BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; - for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), + for (BlockChain::iterator BI = std::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { // Don't align non-looping basic blocks. These are unlikely to execute @@ -1042,6 +1078,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!L) continue; + unsigned Align = TLI->getPrefLoopAlignment(L); + if (!Align) + continue; // Don't care about loop alignment. + // If the block is cold relative to the function entry don't waste space // aligning it. BlockFrequency Freq = MBFI->getBlockFreq(*BI); @@ -1057,7 +1097,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Check for the existence of a non-layout predecessor which would benefit // from aligning this block. - MachineBasicBlock *LayoutPred = *llvm::prior(BI); + MachineBasicBlock *LayoutPred = *std::prev(BI); // Force alignment if all the predecessors are jumps. We already checked // that the block isn't cold above. @@ -1079,14 +1119,17 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) + return false; + + if (skipOptnoneFunction(*F.getFunction())) return false; MBPI = &getAnalysis(); MBFI = &getAnalysis(); MLI = &getAnalysis(); - TII = F.getTarget().getInstrInfo(); - TLI = F.getTarget().getTargetLowering(); + TII = F.getSubtarget().getInstrInfo(); + TLI = F.getSubtarget().getTargetLowering(); assert(BlockToChain.empty()); buildCFGChains(F); @@ -1125,9 +1168,9 @@ public: initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); @@ -1147,7 +1190,7 @@ INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. - if (llvm::next(F.begin()) == F.end()) + if (std::next(F.begin()) == F.end()) return false; MBPI = &getAnalysis();