#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
+static cl::opt<bool>
+LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to access PGO "
+ "heuristics to minimize code growth in cold regions."));
+
+static cl::opt<unsigned>
+ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
+ cl::desc("Coldness threshold in percentage. The loop header frequency "
+ "(relative to the entry frequency) is compared with this "
+ "threshold to determine if non-trivial unswitching should be "
+ "enabled."));
+
namespace {
class LUAnalysisCache {
LUAnalysisCache BranchesInfo;
+ bool EnabledPGO;
+
+ // BFI and ColdEntryFreq are only used when PGO and
+ // LoopUnswitchWithBlockFrequency are enabled.
+ BlockFrequencyInfo BFI;
+ BlockFrequency ColdEntryFreq;
+
bool OptimizeForSize;
bool redoLoop;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
+
+ EnabledPGO = F->getEntryCount().hasValue();
+
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ BranchProbabilityInfo BPI(*F, *LI);
+ BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
+
+ // Use BranchProbability to compute a minimum frequency based on
+ // function entry baseline frequency. Loops with headers below this
+ // frequency are considered as cold.
+ const BranchProbability ColdProb(ColdnessThreshold, 100);
+ ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
+ }
+
bool Changed = false;
do {
assert(currentLoop->isLCSSAForm(*DT));
LLVMContext &Context = loopHeader->getContext();
- // Probably we reach the quota of branches for this loop. If so
- // stop unswitching.
+ // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
if (!BranchesInfo.countLoop(
currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*currentLoop->getHeader()->getParent()),
return true;
}
+ // Do not unswitch loops containing convergent operations, as we might be
+ // making them control dependent on the unswitch value when they were not
+ // before.
+ // FIXME: This could be refined to only bail if the convergent operation is
+ // not already control-dependent on the unswitch value.
+ for (const auto BB : currentLoop->blocks()) {
+ for (auto &I : *BB) {
+ auto CS = CallSite(&I);
+ if (!CS) continue;
+ if (CS.hasFnAttr(Attribute::Convergent))
+ return false;
+ }
+ }
+
// Do not do non-trivial unswitch while optimizing for size.
// FIXME: Use Function::optForSize().
if (OptimizeForSize ||
loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
return false;
+ if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+ // Compute the weighted frequency of the hottest block in the
+ // loop (loopHeader in this case since inner loops should be
+ // processed before outer loop). If it is less than ColdFrequency,
+ // we should not unswitch.
+ BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
+ if (LoopEntryFreq < ColdEntryFreq)
+ return false;
+ }
+
// Loop over all of the basic blocks in the loop. If we find an interior
// block that is branching on a loop-invariant condition, we can unswitch this
// loop.
/// mapping the blocks with the specified map.
static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM) {
- Loop *New = new Loop();
- LPM->insertLoop(New, PL);
+ Loop &New = LPM->addLoop(PL);
// Add all of the blocks in L to the new loop.
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
if (LI->getLoopFor(*I) == L)
- New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+ New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
// Add all of the subloops to the new loop.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- CloneLoop(*I, New, VM, LI, LPM);
+ CloneLoop(*I, &New, VM, LI, LPM);
- return New;
+ return &New;
}
static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
// without actually branching to it (the exit block should be dominated by the
// loop header, not the preheader).
assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
- BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), DT, LI);
+ BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI);
// Okay, now we have a position to branch from and a position to branch to,
// insert the new conditional branch.
// Check if this loop will execute any side-effecting instructions (e.g.
// stores, calls, volatile loads) in the part of the loop that the code
// *would* execute. Check the header first.
- for (BasicBlock::iterator I : *CurrentBB)
- if (I->mayHaveSideEffects())
+ for (Instruction &I : *CurrentBB)
+ if (I.mayHaveSideEffects())
return false;
// FIXME: add check for constant foldable switch instructions.
// Splice the newly inserted blocks into the function right before the
// original preheader.
- F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
- NewBlocks[0], F->end());
+ F->getBasicBlockList().splice(NewPreheader->getIterator(),
+ F->getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F->end());
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
- ExitSucc->getFirstInsertionPt());
+ &*ExitSucc->getFirstInsertionPt());
for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
I != E; ++I) {
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+ RemapInstruction(&*I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
Succ->replaceAllUsesWith(Pred);
// Move all of the successor contents from Succ to Pred.
- Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
- Succ->end());
+ Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
+ Succ->begin(), Succ->end());
LPM->deleteSimpleAnalysisValue(BI, L);
BI->eraseFromParent();
RemoveFromWorklist(BI, Worklist);