void markChainSuccessors(BlockChain &Chain,
MachineBasicBlock *LoopHeaderBB,
- SmallVectorImpl<MachineBasicBlock *> &Blocks,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = 0);
+ MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ MachineFunction &F,
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &Blocks,
+ SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = 0);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
void buildCFGChains(MachineFunction &F);
}
#endif
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
void MachineBlockPlacement::markChainSuccessors(
BlockChain &Chain,
MachineBasicBlock *LoopHeaderBB,
}
}
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
+ MachineBasicBlock *BB, BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+ const BranchProbability HotProb(4, 5); // 80%
+
+ MachineBasicBlock *BestSucc = 0;
+ // FIXME: Due to the performance of the probability and weight routines in
+ // the MBPI analysis, we manually compute probabilities using the edge
+ // weights. This is suboptimal as it means that the somewhat subtle
+ // definition of edge weight semantics is encoded here as well. We should
+ // improve the MBPI interface to effeciently support query patterns such as
+ // this.
+ uint32_t BestWeight = 0;
+ uint32_t WeightScale = 0;
+ uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+ DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ if (BlockFilter && !BlockFilter->count(*SI))
+ continue;
+ BlockChain &SuccChain = *BlockToChain[*SI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
+ continue;
+ }
+ if (*SI != *SuccChain.begin()) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n");
+ continue;
+ }
+
+ uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+ BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+
+ // Only consider successors which are either "hot", or wouldn't violate
+ // any CFG constraints.
+ if (SuccChain.LoopPredecessors != 0) {
+ if (SuccProb < HotProb) {
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n");
+ continue;
+ }
+
+ // Make sure that a hot successor doesn't have a globally more important
+ // predecessor.
+ BlockFrequency CandidateEdgeFreq
+ = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+ bool BadCFGConflict = false;
+ for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+ PE = (*SI)->pred_end();
+ PI != PE; ++PI) {
+ if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+ BlockToChain[*PI] == &Chain)
+ continue;
+ BlockFrequency PredEdgeFreq
+ = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+ if (PredEdgeFreq >= CandidateEdgeFreq) {
+ BadCFGConflict = true;
+ break;
+ }
+ }
+ if (BadCFGConflict) {
+ DEBUG(dbgs() << " " << getBlockName(*SI)
+ << " -> non-cold CFG conflict\n");
+ continue;
+ }
+ }
+
+ DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
+ << " (prob)"
+ << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
+ << "\n");
+ if (BestSucc && BestWeight >= SuccWeight)
+ continue;
+ BestSucc = *SI;
+ BestWeight = SuccWeight;
+ }
+ return BestSucc;
+}
+
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+ const BlockChain &PlacedChain;
+ const BlockToChainMapType &BlockToChain;
+
+public:
+ IsBlockPlaced(const BlockChain &PlacedChain,
+ const BlockToChainMapType &BlockToChain)
+ : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+ bool operator()(MachineBasicBlock *BB) const {
+ return BlockToChain.lookup(BB) == &PlacedChain;
+ }
+};
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve icache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+ BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter) {
+ // Once we need to walk the worklist looking for a candidate, cleanup the
+ // worklist of already placed entries.
+ // FIXME: If this shows up on profiles, it could be folded (at the cost of
+ // some code complexity) into the loop below.
+ WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+ IsBlockPlaced(Chain, BlockToChain)),
+ WorkList.end());
+
+ MachineBasicBlock *BestBlock = 0;
+ BlockFrequency BestFreq;
+ for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
+ WBE = WorkList.end();
+ WBI != WBE; ++WBI) {
+ assert(!BlockFilter || BlockFilter->count(*WBI));
+ BlockChain &SuccChain = *BlockToChain[*WBI];
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(*WBI)
+ << " -> Already merged!\n");
+ continue;
+ }
+ assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
+ DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq
+ << " (freq)\n");
+ if (BestBlock && BestFreq >= CandidateFreq)
+ continue;
+ BestBlock = *WBI;
+ BestFreq = CandidateFreq;
+ }
+ return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter) {
+ for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+ ++I) {
+ if (BlockFilter && !BlockFilter->count(I))
+ continue;
+ if (BlockToChain[I] != &PlacedChain) {
+ PrevUnplacedBlockIt = I;
+ // Now select the head of the chain to which the unplaced block belongs
+ // as the block to place. This will force the entire chain to be placed,
+ // and satisfies the requirements of merging chains.
+ return *BlockToChain[I]->begin();
+ }
+ }
+ return 0;
+}
+
void MachineBlockPlacement::buildChain(
MachineBasicBlock *BB,
BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
- const BranchProbability HotProb(4, 5); // 80%
assert(BB);
assert(BlockToChain[BB] == &Chain);
- assert(*Chain.begin() == BB);
+ MachineFunction &F = *BB->getParent();
+ MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+
MachineBasicBlock *LoopHeaderBB = BB;
markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
BB = *llvm::prior(Chain.end());
assert(BB);
assert(BlockToChain[BB] == &Chain);
assert(*llvm::prior(Chain.end()) == BB);
+ MachineBasicBlock *BestSucc = 0;
// Look for the best viable successor if there is one to place immediately
// after this block.
- MachineBasicBlock *BestSucc = 0;
- BranchProbability BestProb = BranchProbability::getZero();
- DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
- SE = BB->succ_end();
- SI != SE; ++SI) {
- if (BlockFilter && !BlockFilter->count(*SI))
- continue;
- BlockChain &SuccChain = *BlockToChain[*SI];
- if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n");
- continue;
- }
-
- BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI);
-
- // Only consider successors which are either "hot", or wouldn't violate
- // any CFG constraints.
- if (SuccChain.LoopPredecessors != 0 && SuccProb < HotProb) {
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n");
- continue;
- }
-
- DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb
- << " (prob)"
- << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
- << "\n");
- if (BestSucc && BestProb >= SuccProb)
- continue;
- BestSucc = *SI;
- BestProb = SuccProb;
- }
+ BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
// this point. This won't be a fallthrough, but it will increase locality.
- if (!BestSucc) {
- BlockFrequency BestFreq;
- for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = BlockWorkList.begin(),
- WBE = BlockWorkList.end();
- WBI != WBE; ++WBI) {
- if (BlockFilter && !BlockFilter->count(*WBI))
- continue;
- BlockChain &SuccChain = *BlockToChain[*WBI];
- if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(*WBI)
- << " -> Already merged!\n");
- continue;
- }
- assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+ if (!BestSucc)
+ BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
- BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
- DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq
- << " (freq)\n");
- if (BestSucc && BestFreq >= CandidateFreq)
- continue;
- BestSucc = *WBI;
- BestFreq = CandidateFreq;
- }
- }
if (!BestSucc) {
- DEBUG(dbgs() << "Finished forming chain for header block "
- << getBlockNum(*Chain.begin()) << "\n");
- return;
+ BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+ BlockFilter);
+ if (!BestSucc)
+ break;
+
+ DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+ "layout successor until the CFG reduces\n");
}
// Place this block, updating the datastructures to reflect its placement.
BlockChain &SuccChain = *BlockToChain[BestSucc];
+ // Zero out LoopPredecessors for the successor we're about to merge in case
+ // we selected a successor that didn't fit naturally into the CFG.
+ SuccChain.LoopPredecessors = 0;
DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
<< " to " << getBlockNum(BestSucc) << "\n");
markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *llvm::prior(Chain.end());
- }
+ };
+
+ DEBUG(dbgs() << "Finished forming chain for header block "
+ << getBlockNum(*Chain.begin()) << "\n");
}
/// \brief Forms basic block chains from the natural loop structures.
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+ BlockChain &LoopChain = *BlockToChain[L.getHeader()];
// FIXME: This is a really lame way of walking the chains in the loop: we
// walk the blocks, and use a set to prevent visiting a particular chain
BlockWorkList.push_back(*BI);
}
- BlockChain &LoopChain = *BlockToChain[L.getHeader()];
buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet);
DEBUG({
- if (LoopChain.LoopPredecessors)
+ // Crash at the end so we get all of the debugging output first.
+ bool BadLoop = false;
+ if (LoopChain.LoopPredecessors) {
+ BadLoop = true;
dbgs() << "Loop chain contains a block without its preds placed!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+ }
for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
BCI != BCE; ++BCI)
- if (!LoopBlockSet.erase(*BCI))
+ if (!LoopBlockSet.erase(*BCI)) {
+ // We don't mark the loop as bad here because there are real situations
+ // where this can occur. For example, with an unanalyzable fallthrough
+ // from a loop block to a non-loop block or vice versa.
dbgs() << "Loop chain contains a block not contained by the loop!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
<< " Bad block: " << getBlockName(*BCI) << "\n";
+ }
- if (!LoopBlockSet.empty())
- for (SmallPtrSet<MachineBasicBlock *, 16>::iterator LBI = LoopBlockSet.begin(), LBE = LoopBlockSet.end();
+ if (!LoopBlockSet.empty()) {
+ BadLoop = true;
+ for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
+ LBE = LoopBlockSet.end();
LBI != LBE; ++LBI)
dbgs() << "Loop contains blocks never placed into a chain!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
<< " Bad block: " << getBlockName(*LBI) << "\n";
+ }
+ assert(!BadLoop && "Detected problems with the placement of this loop.");
});
}
void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- BlockToChain[&*FI] =
- new (ChainAllocator.Allocate()) BlockChain(BlockToChain, &*FI);
+ SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ MachineBasicBlock *BB = FI;
+ BlockChain *&Chain = BlockToChain[BB];
+ Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ // Also, merge any blocks which we cannot reason about and must preserve
+ // the exact fallthrough behavior for.
+ for (;;) {
+ Cond.clear();
+ MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+ break;
+
+ MachineFunction::iterator NextFI(llvm::next(FI));
+ MachineBasicBlock *NextBB = NextFI;
+ // Ensure that the layout successor is a viable block, as we know that
+ // fallthrough is a possibility.
+ assert(NextFI != FE && "Can't fallthrough past the last block.");
+ DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+ << getBlockName(BB) << " -> " << getBlockName(NextBB)
+ << "\n");
+ Chain->merge(NextBB, 0);
+ FI = NextFI;
+ BB = NextBB;
+ }
+ }
// Build any loop-based chains.
for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
DEBUG({
+ // Crash at the end so we get all of the debugging output first.
+ bool BadFunc = false;
FunctionBlockSetType FunctionBlockSet;
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
FunctionBlockSet.insert(FI);
- for (BlockChain::iterator BCI = FunctionChain.begin(), BCE = FunctionChain.end();
+ for (BlockChain::iterator BCI = FunctionChain.begin(),
+ BCE = FunctionChain.end();
BCI != BCE; ++BCI)
- if (!FunctionBlockSet.erase(*BCI))
+ if (!FunctionBlockSet.erase(*BCI)) {
+ BadFunc = true;
dbgs() << "Function chain contains a block not in the function!\n"
<< " Bad block: " << getBlockName(*BCI) << "\n";
+ }
- if (!FunctionBlockSet.empty())
- for (SmallPtrSet<MachineBasicBlock *, 16>::iterator FBI = FunctionBlockSet.begin(),
- FBE = FunctionBlockSet.end(); FBI != FBE; ++FBI)
+ if (!FunctionBlockSet.empty()) {
+ BadFunc = true;
+ for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
+ FBE = FunctionBlockSet.end();
+ FBI != FBE; ++FBI)
dbgs() << "Function contains blocks never placed into a chain!\n"
<< " Bad block: " << getBlockName(*FBI) << "\n";
+ }
+ assert(!BadFunc && "Detected problems with the block placement.");
});
// Splice the blocks into place.
MachineFunction::iterator InsertPos = F.begin();
- SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
- for (BlockChain::iterator BI = FunctionChain.begin(), BE = FunctionChain.end();
+ for (BlockChain::iterator BI = FunctionChain.begin(),
+ BE = FunctionChain.end();
BI != BE; ++BI) {
DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
: " ... ")
AlignLoops(F);
BlockToChain.clear();
+ ChainAllocator.DestroyAll();
// We always return true as we have no way to track whether the final order
// differs from the original order.