X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FUtils%2FLoopUnroll.cpp;h=3b0e813072de918380c8a1b4a184f701cf7a534a;hp=162807d03c624a6dfeb1949ca738acbb1be5f5bb;hb=a4697dad1926a8c91c12cd6663f5ddc7c4cd16c7;hpb=80ccd9ea59b8911f12836da98aceedce4ebc6a6f diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 162807d03c6..3b0e813072d 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,22 +16,29 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + // TODO: Should these be here or in LoopUnroll? STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); @@ -58,18 +65,23 @@ static inline void RemapInstruction(Instruction *I, /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it /// only has one predecessor, and that predecessor only has one successor. -/// The LoopInfo Analysis that is passed will be kept consistent. -/// Returns the new combined block. -static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, - LPPassManager *LPM) { +/// The LoopInfo Analysis that is passed will be kept consistent. If folding is +/// successful references to the containing loop must be removed from +/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have +/// references to the eliminated BB. The argument ForgottenLoops contains a set +/// of loops that have already been forgotten to prevent redundant, expensive +/// calls to ScalarEvolution::forgetLoop. Returns the new combined block. +static BasicBlock * +FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM, + SmallPtrSetImpl &ForgottenLoops) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. BasicBlock *OnlyPred = BB->getSinglePredecessor(); - if (!OnlyPred) return 0; + if (!OnlyPred) return nullptr; if (OnlyPred->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); @@ -98,8 +110,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, // ScalarEvolution holds references to loop exit blocks. if (LPM) { if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable()) { - if (Loop *L = LI->getLoopFor(BB)) - SE->forgetLoop(L); + if (Loop *L = LI->getLoopFor(BB)) { + if (ForgottenLoops.insert(L)) + SE->forgetLoop(L); + } } } LI->removeBlock(BB); @@ -137,10 +151,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, /// removed from the LoopPassManager as well. LPM can also be NULL. /// /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are -/// available it must also preserve those analyses. +/// available from the Pass it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, unsigned TripMultiple, - LoopInfo *LI, LPPassManager *LPM) { + LoopInfo *LI, Pass *PP, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -208,8 +222,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - if (LPM) { - ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); + if (PP) { + ScalarEvolution *SE = PP->getAnalysisIfAvailable(); if (SE) SE->forgetLoop(L); } @@ -225,18 +239,35 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } + // Report the unrolling decision. + DebugLoc LoopLoc = L->getStartLoc(); + Function *F = Header->getParent(); + LLVMContext &Ctx = F->getContext(); + if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, + Twine("completely unrolled loop with ") + + Twine(TripCount) + " iterations"); } else { + auto EmitDiag = [&](const Twine &T) { + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, + "unrolled loop by a factor of " + Twine(Count) + + T); + }; + DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); + EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + EmitDiag(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); + EmitDiag(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); } @@ -297,11 +328,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, L->addBasicBlockToLoop(New, LI->getBase()); // Add phi entries for newly created values to all exit blocks. - for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); - SI != SE; ++SI) { - if (L->contains(*SI)) + for (BasicBlock *Succ : successors(*BB)) { + if (L->contains(Succ)) continue; - for (BasicBlock::iterator BBI = (*SI)->begin(); + for (BasicBlock::iterator BBI = Succ->begin(); PHINode *phi = dyn_cast(BBI); ++BBI) { Value *Incoming = phi->getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); @@ -383,11 +413,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Remove phi operands at this loop exit if (Dest != LoopExit) { BasicBlock *BB = Latches[i]; - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); - SI != SE; ++SI) { - if (*SI == Headers[i]) + for (BasicBlock *Succ : successors(BB)) { + if (Succ == Headers[i]) continue; - for (BasicBlock::iterator BBI = (*SI)->begin(); + for (BasicBlock::iterator BBI = Succ->begin(); PHINode *Phi = dyn_cast(BBI); ++BBI) { Phi->removeIncomingValue(BB, false); } @@ -400,23 +429,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, } // Merge adjacent basic blocks, if possible. + SmallPtrSet ForgottenLoops; for (unsigned i = 0, e = Latches.size(); i != e; ++i) { BranchInst *Term = cast(Latches[i]->getTerminator()); if (Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); - if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM)) + if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM, + ForgottenLoops)) std::replace(Latches.begin(), Latches.end(), Dest, Fold); } } - if (LPM) { + DominatorTree *DT = nullptr; + if (PP) { // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. - if (DominatorTree *DT = LPM->getAnalysisIfAvailable()) - DT->runOnFunction(*L->getHeader()->getParent()); + if (DominatorTreeWrapperPass *DTWP = + PP->getAnalysisIfAvailable()) { + DT = &DTWP->getDomTree(); + DT->recalculate(*L->getHeader()->getParent()); + } // Simplify any new induction variables in the partially unrolled loop. - ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); + ScalarEvolution *SE = PP->getAnalysisIfAvailable(); if (SE && !CompletelyUnroll) { SmallVector DeadInsts; simplifyLoopIVs(L, SE, LPM, DeadInsts); @@ -449,9 +484,36 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; + + Loop *OuterL = L->getParentLoop(); // Remove the loop from the LoopPassManager if it's completely removed. - if (CompletelyUnroll && LPM != NULL) + if (CompletelyUnroll && LPM != nullptr) LPM->deleteLoopFromQueue(L); + // If we have a pass and a DominatorTree we should re-simplify impacted loops + // to ensure subsequent analyses can rely on this form. We want to simplify + // at least one layer outside of the loop that was unrolled so that any + // changes to the parent loop exposed by the unrolling are considered. + if (PP && DT) { + if (!OuterL && !CompletelyUnroll) + OuterL = L; + if (OuterL) { + DataLayoutPass *DLP = PP->getAnalysisIfAvailable(); + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; + ScalarEvolution *SE = PP->getAnalysisIfAvailable(); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL); + + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop after + // deleteLoopFromQueue updates LoopInfo. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + if (!OuterL->contains(LatchLoop)) + while (OuterL->getParentLoop() != LatchLoop) + OuterL = OuterL->getParentLoop(); + + formLCSSARecursively(*OuterL, *DT, SE); + } + } + return true; }