//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "block-placement2"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "block-placement2"
+
STATISTIC(NumCondBranches, "Number of conditional branches");
STATISTIC(NumUncondBranches, "Number of uncondittional branches");
STATISTIC(CondBranchTakenFreq,
STATISTIC(UncondBranchTakenFreq,
"Potential frequency of taking unconditional branches");
+static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
+ cl::desc("Force the alignment of all "
+ "blocks in the function."),
+ cl::init(0), cl::Hidden);
+
+// FIXME: Find a good default for this flag and remove the flag.
+static cl::opt<unsigned>
+ExitBlockBias("block-placement-exit-block-bias",
+ cl::desc("Block frequency percentage a loop exit block needs "
+ "over the original exit to be considered the new exit."),
+ cl::init(0), cl::Hidden);
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
#ifndef NDEBUG
/// \brief Dump the blocks in this chain.
- void dump() LLVM_ATTRIBUTE_USED {
+ LLVM_DUMP_METHOD void dump() {
for (iterator I = begin(), E = end(); I != E; ++I)
(*I)->dump();
}
const TargetInstrInfo *TII;
/// \brief A handle to the target's lowering info.
- const TargetLowering *TLI;
+ const TargetLoweringBase *TLI;
/// \brief Allocator and owner of BlockChain structures.
///
void markChainSuccessors(BlockChain &Chain,
MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
- const BlockFilterSet *BlockFilter = 0);
+ const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
BlockChain &Chain,
const BlockFilterSet *BlockFilter);
const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
- const BlockFilterSet *BlockFilter = 0);
+ const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(MachineFunction &F,
initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineLoopInfo>();
const BlockFilterSet *BlockFilter) {
const BranchProbability HotProb(4, 5); // 80%
- MachineBasicBlock *BestSucc = 0;
+ MachineBasicBlock *BestSucc = nullptr;
// FIXME: Due to the performance of the probability and weight routines in
// the MBPI analysis, we manually compute probabilities using the edge
// weights. This is suboptimal as it means that the somewhat subtle
// any CFG constraints.
if (SuccChain.LoopPredecessors != 0) {
if (SuccProb < HotProb) {
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n");
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ << " (prob) (CFG conflict)\n");
continue;
}
}
}
if (BadCFGConflict) {
- DEBUG(dbgs() << " " << getBlockName(*SI)
- << " -> non-cold CFG conflict\n");
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ << " (prob) (non-cold CFG conflict)\n");
continue;
}
}
return BestSucc;
}
-namespace {
-/// \brief Predicate struct to detect blocks already placed.
-class IsBlockPlaced {
- const BlockChain &PlacedChain;
- const BlockToChainMapType &BlockToChain;
-
-public:
- IsBlockPlaced(const BlockChain &PlacedChain,
- const BlockToChainMapType &BlockToChain)
- : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
-
- bool operator()(MachineBasicBlock *BB) const {
- return BlockToChain.lookup(BB) == &PlacedChain;
- }
-};
-}
-
/// \brief Select the best block from a worklist.
///
/// This looks through the provided worklist as a list of candidate basic
// FIXME: If this shows up on profiles, it could be folded (at the cost of
// some code complexity) into the loop below.
WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
- IsBlockPlaced(Chain, BlockToChain)),
+ [&](MachineBasicBlock *BB) {
+ return BlockToChain.lookup(BB) == &Chain;
+ }),
WorkList.end());
- MachineBasicBlock *BestBlock = 0;
+ MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
WBE = WorkList.end();
assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
- DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq
- << " (freq)\n");
+ DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> ";
+ MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
if (BestBlock && BestFreq >= CandidateFreq)
continue;
BestBlock = *WBI;
return *BlockToChain[I]->begin();
}
}
- return 0;
+ return nullptr;
}
void MachineBlockPlacement::buildChain(
MachineBasicBlock *LoopHeaderBB = BB;
markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
- BB = *llvm::prior(Chain.end());
+ BB = *std::prev(Chain.end());
for (;;) {
assert(BB);
assert(BlockToChain[BB] == &Chain);
- assert(*llvm::prior(Chain.end()) == BB);
+ assert(*std::prev(Chain.end()) == BB);
// Look for the best viable successor if there is one to place immediately
// after this block.
<< " to " << getBlockNum(BestSucc) << "\n");
markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
- BB = *llvm::prior(Chain.end());
+ BB = *std::prev(Chain.end());
}
DEBUG(dbgs() << "Finished forming chain for header block "
<< getBlockName(L.getHeader()) << "\n");
BlockFrequency BestPredFreq;
- MachineBasicBlock *BestPred = 0;
+ MachineBasicBlock *BestPred = nullptr;
for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
PE = L.getHeader()->pred_end();
PI != PE; ++PI) {
if (!LoopBlockSet.count(Pred))
continue;
DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
- << Pred->succ_size() << " successors, "
- << MBFI->getBlockFreq(Pred) << " freq\n");
+ << Pred->succ_size() << " successors, ";
+ MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
if (Pred->succ_size() > 1)
continue;
// header and only rotate if safe.
BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
if (!LoopBlockSet.count(*HeaderChain.begin()))
- return 0;
+ return nullptr;
BlockFrequency BestExitEdgeFreq;
unsigned BestExitLoopDepth = 0;
- MachineBasicBlock *ExitingBB = 0;
+ MachineBasicBlock *ExitingBB = nullptr;
// If there are exits to outer loops, loop rotation can severely limit
// fallthrough opportunites unless it selects such an exit. Keep a set of
// blocks where rotating to exit with that block will reach an outer loop.
BlockChain &Chain = *BlockToChain[*I];
// Ensure that this block is at the end of a chain; otherwise it could be
// mid-way through an inner loop or a successor of an analyzable branch.
- if (*I != *llvm::prior(Chain.end()))
+ if (*I != *std::prev(Chain.end()))
continue;
// Now walk the successors. We need to establish whether this has a viable
BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
<< getBlockName(*SI) << " [L:" << SuccLoopDepth
- << "] (" << ExitEdgeFreq << ")\n");
- // Note that we slightly bias this toward an existing layout successor to
- // retain incoming order in the absence of better information.
- // FIXME: Should we bias this more strongly? It's pretty weak.
+ << "] (";
+ MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+ // Note that we bias this toward an existing layout successor to retain
+ // incoming order in the absence of better information. The exit must have
+ // a frequency higher than the current exit before we consider breaking
+ // the layout.
+ BranchProbability Bias(100 - ExitBlockBias, 100);
if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
ExitEdgeFreq > BestExitEdgeFreq ||
((*I)->isLayoutSuccessor(*SI) &&
- !(ExitEdgeFreq < BestExitEdgeFreq))) {
+ !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
BestExitEdgeFreq = ExitEdgeFreq;
ExitingBB = *I;
}
// Without a candidate exiting block or with only a single block in the
// loop, just use the loop header to layout the loop.
if (!ExitingBB || L.getNumBlocks() == 1)
- return 0;
+ return nullptr;
// Also, if we have exit blocks which lead to outer loops but didn't select
// one of them as the exiting block we are rotating toward, disable loop
// rotation altogether.
if (!BlocksExitingToOuterLoop.empty() &&
!BlocksExitingToOuterLoop.count(ExitingBB))
- return 0;
+ return nullptr;
DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
return ExitingBB;
PI != PE; ++PI) {
BlockChain *PredChain = BlockToChain[*PI];
if (!LoopBlockSet.count(*PI) &&
- (!PredChain || *PI == *llvm::prior(PredChain->end()))) {
+ (!PredChain || *PI == *std::prev(PredChain->end()))) {
ViableTopFallthrough = true;
break;
}
// bottom is a viable exiting block. If so, bail out as rotating will
// introduce an unnecessary branch.
if (ViableTopFallthrough) {
- MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end());
+ MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
SE = Bottom->succ_end();
SI != SE; ++SI) {
if (ExitIt == LoopChain.end())
return;
- std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end());
+ std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
}
/// \brief Forms basic block chains from the natural loop structures.
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
// branches by placing an exit edge at the bottom.
- MachineBasicBlock *ExitingBB = 0;
+ MachineBasicBlock *ExitingBB = nullptr;
if (LoopTop == L.getHeader())
ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
// the exact fallthrough behavior for.
for (;;) {
Cond.clear();
- MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
break;
- MachineFunction::iterator NextFI(llvm::next(FI));
+ MachineFunction::iterator NextFI(std::next(FI));
MachineBasicBlock *NextBB = NextFI;
// Ensure that the layout successor is a viable block, as we know that
// fallthrough is a possibility.
DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
<< getBlockName(BB) << " -> " << getBlockName(NextBB)
<< "\n");
- Chain->merge(NextBB, 0);
+ Chain->merge(NextBB, nullptr);
FI = NextFI;
BB = NextBB;
}
BlockChain &FunctionChain = *BlockToChain[&F.front()];
buildChain(&F.front(), FunctionChain, BlockWorkList);
+#ifndef NDEBUG
typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+#endif
DEBUG({
// Crash at the end so we get all of the debugging output first.
bool BadFunc = false;
// Update the terminator of the previous block.
if (BI == FunctionChain.begin())
continue;
- MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
+ MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI));
// FIXME: It would be awesome of updateTerminator would just return rather
// than assert when the branch cannot be analyzed in order to remove this
// boiler plate.
Cond.clear();
- MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // The "PrevBB" is not yet updated to reflect current code layout, so,
+ // o. it may fall-through to a block without explict "goto" instruction
+ // before layout, and no longer fall-through it after layout; or
+ // o. just opposite.
+ //
+ // AnalyzeBranch() may return erroneous value for FBB when these two
+ // situations take place. For the first scenario FBB is mistakenly set
+ // NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
+ // is mistakenly pointing to "*BI".
+ //
+ bool needUpdateBr = true;
+ if (!Cond.empty() && (!FBB || FBB == *BI)) {
+ PrevBB->updateTerminator();
+ needUpdateBr = false;
+ Cond.clear();
+ TBB = FBB = nullptr;
+ if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+ // FIXME: This should never take place.
+ TBB = FBB = nullptr;
+ }
+ }
+
// If PrevBB has a two-way branch, try to re-order the branches
// such that we branch to the successor with higher weight first.
if (TBB && !Cond.empty() && FBB &&
DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PrevBB);
TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+ needUpdateBr = true;
}
- PrevBB->updateTerminator();
+ if (needUpdateBr)
+ PrevBB->updateTerminator();
}
}
// Fixup the last block.
Cond.clear();
- MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
F.back().updateTerminator();
const BranchProbability ColdProb(1, 5); // 20%
BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
- for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
+ for (BlockChain::iterator BI = std::next(FunctionChain.begin()),
BE = FunctionChain.end();
BI != BE; ++BI) {
// Don't align non-looping basic blocks. These are unlikely to execute
// Check for the existence of a non-layout predecessor which would benefit
// from aligning this block.
- MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+ MachineBasicBlock *LayoutPred = *std::prev(BI);
// Force alignment if all the predecessors are jumps. We already checked
// that the block isn't cold above.
}
// Align this block if the layout predecessor's edge into this block is
- // cold relative to the block. When this is true, othe predecessors make up
+ // cold relative to the block. When this is true, other predecessors make up
// all of the hot entries into the block and thus alignment is likely to be
// important.
BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
// Check for single-block functions and skip them.
- if (llvm::next(F.begin()) == F.end())
+ if (std::next(F.begin()) == F.end())
+ return false;
+
+ if (skipOptnoneFunction(*F.getFunction()))
return false;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();
- TII = F.getTarget().getInstrInfo();
- TLI = F.getTarget().getTargetLowering();
+ TII = F.getSubtarget().getInstrInfo();
+ TLI = F.getSubtarget().getTargetLowering();
assert(BlockToChain.empty());
buildCFGChains(F);
BlockToChain.clear();
ChainAllocator.DestroyAll();
+ if (AlignAllBlock)
+ // Align all of the blocks in the function to a specific alignment.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ FI->setAlignment(AlignAllBlock);
+
// We always return true as we have no way to track whether the final order
// differs from the original order.
return true;
initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.setPreservesAll();
bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
// Check for single-block functions and skip them.
- if (llvm::next(F.begin()) == F.end())
+ if (std::next(F.begin()) == F.end())
return false;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();