X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopRotation.cpp;h=aeb5b36b4b8cfb402119447cebd226b6aed59b3a;hb=9025a1f8609d0f472e07cc8dfd568921cab347f7;hp=fde6bacb4921659f6aa3b3f3f2a63b63b2562700;hpb=36b699f2b139a30a2dfa4448223d6985b55daa8a;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index fde6bacb492..aeb5b36b4b8 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -11,27 +11,38 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-rotate" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; -#define MAX_HEADER_SIZE 16 +#define DEBUG_TYPE "loop-rotate" + +static cl::opt +DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, + cl::desc("The default maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); namespace { @@ -39,21 +50,30 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(ID) { + LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { initializeLoopRotatePass(*PassRegistry::getPassRegistry()); + if (SpecifiedMaxHeaderSize == -1) + MaxHeaderSize = DefaultRotationThreshold; + else + MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); } // LCSSA form makes instruction renaming easier. void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addPreservedID(LCSSAID); - AU.addPreserved(); - AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -61,20 +81,29 @@ namespace { bool rotateLoop(Loop *L, bool SimplifiedLatch); private: + unsigned MaxHeaderSize; LoopInfo *LI; const TargetTransformInfo *TTI; + AssumptionCache *AC; + DominatorTree *DT; }; } char LoopRotate::ID = 0; INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) -INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } +Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { + return new LoopRotate(MaxHeaderSize); +} /// Rotate Loop L as many times as possible. Return true if /// the loop is rotated at least once. @@ -82,8 +111,16 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; - LI = &getAnalysis(); - TTI = &getAnalysis(); + // Save the loop metadata. + MDNode *LoopMD = L->getLoopID(); + + Function &F = *L->getHeader()->getParent(); + + LI = &getAnalysis().getLoopInfo(); + TTI = &getAnalysis().getTTI(F); + AC = &getAnalysis().getAssumptionCache(F); + auto *DTWP = getAnalysisIfAvailable(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; // Simplify the loop latch before attempting to rotate the header // upward. Rotation may not be needed if the loop tail can be folded into the @@ -96,6 +133,12 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { MadeChange = true; SimplifiedLatch = false; } + + // Restore the loop metadata. + // NB! We presume LoopRotation DOESN'T ADD its own metadata. + if ((MadeChange || SimplifiedLatch) && LoopMD) + L->setLoopID(LoopMD); + return MadeChange; } @@ -164,13 +207,18 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, } } -/// Determine whether the instructions in this range my be safely and cheaply +/// Determine whether the instructions in this range may be safely and cheaply /// speculated. This is not an important enough situation to develop complex /// heuristics. We handle a single arithmetic instruction along with any type /// conversions. static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, - BasicBlock::iterator End) { + BasicBlock::iterator End, Loop *L) { bool seenIncrement = false; + bool MultiExitLoop = false; + + if (!L->getExitingBlock()) + MultiExitLoop = true; + for (BasicBlock::iterator I = Begin; I != End; ++I) { if (!isSafeToSpeculativelyExecute(I)) @@ -194,11 +242,30 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: - case Instruction::AShr: + case Instruction::AShr: { + Value *IVOpnd = !isa(I->getOperand(0)) + ? I->getOperand(0) + : !isa(I->getOperand(1)) + ? I->getOperand(1) + : nullptr; + if (!IVOpnd) + return false; + + // If increment operand is used outside of the loop, this speculation + // could cause extra live range interference. + if (MultiExitLoop) { + for (User *UseI : IVOpnd->users()) { + auto *UserInst = cast(UseI); + if (!L->contains(UserInst)) + return false; + } + } + if (seenIncrement) return false; seenIncrement = true; break; + } case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -212,7 +279,7 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, /// Fold the loop tail into the loop exit by speculating the loop tail /// instructions. Typically, this is a single post-increment. In the case of a /// simple 2-block loop, hoisting the increment can be much better than -/// duplicating the entire loop header. In the cast of loops with early exits, +/// duplicating the entire loop header. In the case of loops with early exits, /// rotation will not work anyway, but simplifyLoopLatch will put the loop in /// canonical form so downstream passes can handle it. /// @@ -234,7 +301,7 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) { if (!BI) return false; - if (!shouldSpeculateInstrs(Latch->begin(), Jmp)) + if (!shouldSpeculateInstrs(Latch->begin(), Jmp, L)) return false; DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " @@ -255,9 +322,8 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) { // Nuke the Latch block. assert(Latch->empty() && "unable to evacuate Latch"); LI->removeBlock(Latch); - if (DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable()) - DTWP->getDomTree().eraseNode(Latch); + if (DT) + DT->eraseNode(Latch); Latch->eraseFromParent(); return true; } @@ -281,7 +347,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast(OrigHeader->getTerminator()); - if (BI == 0 || BI->isUnconditional()) + if (!BI || BI->isUnconditional()) return false; // If the loop header is not one of the loop exiting blocks then @@ -292,7 +358,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. - if (OrigLatch == 0) + if (!OrigLatch) return false; // Rotate if either the loop latch does *not* exit the loop, or if the loop @@ -303,14 +369,17 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // Check size of original header and reject loop if it is very big or we can't // duplicate blocks inside it. { + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, AC, EphValues); + CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader, *TTI); + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); if (Metrics.notDuplicatable) { DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" << " instructions: "; L->dump()); return false; } - if (Metrics.NumInsts > MAX_HEADER_SIZE) + if (Metrics.NumInsts > MaxHeaderSize) return false; } @@ -319,13 +388,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0) + if (!OrigPreheader) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. - if (ScalarEvolution *SE = getAnalysisIfAvailable()) - SE->forgetLoop(L); + if (auto *SEWP = getAnalysisIfAvailable()) + SEWP->getSE().forgetLoop(L); DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); @@ -356,6 +425,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { for (; PHINode *PN = dyn_cast(I); ++I) ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); @@ -386,7 +457,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - Value *V = SimplifyInstruction(C); + // FIXME: Provide TLI, DT, AC to SimplifyInstruction. + Value *V = SimplifyInstruction(C, DL); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. @@ -404,8 +476,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's // successors by duplicating their incoming values for OrigHeader. TerminatorInst *TI = OrigHeader->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin(); + for (BasicBlock *SuccBB : TI->successors()) + for (BasicBlock::iterator BI = SuccBB->begin(); PHINode *PN = dyn_cast(BI); ++BI) PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); @@ -438,31 +510,31 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // The conditional branch can't be folded, handle the general case. // Update DominatorTree to reflect the CFG change we just made. Then split // edges as necessary to preserve LoopSimplify form. - if (DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable()) { - DominatorTree &DT = DTWP->getDomTree(); + if (DT) { // Everything that was dominated by the old loop header is now dominated // by the original loop preheader. Conceptually the header was merged // into the preheader, even though we reuse the actual block as a new // loop latch. - DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader); + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); SmallVector HeaderChildren(OrigHeaderNode->begin(), OrigHeaderNode->end()); - DomTreeNode *OrigPreheaderNode = DT.getNode(OrigPreheader); + DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader); for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) - DT.changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); + DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); - assert(DT.getNode(Exit)->getIDom() == OrigPreheaderNode); - assert(DT.getNode(NewHeader)->getIDom() == OrigPreheaderNode); + assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode); + assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode); // Update OrigHeader to be dominated by the new header block. - DT.changeImmediateDominator(OrigHeader, OrigLatch); + DT->changeImmediateDominator(OrigHeader, OrigLatch); } // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and // thus is not a preheader anymore. // Split the edge to form a real preheader. - BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this); + BasicBlock *NewPH = SplitCriticalEdge( + OrigPreheader, NewHeader, + CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); NewPH->setName(NewHeader->getName() + ".lr.ph"); // Preserve canonical loop form, which means that 'Exit' should have only @@ -478,8 +550,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { Loop *PredLoop = LI->getLoopFor(*PI); if (!PredLoop || PredLoop->contains(Exit)) continue; + if (isa((*PI)->getTerminator())) + continue; SplitLatchEdge |= L->getLoopLatch() == *PI; - BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this); + BasicBlock *ExitSplit = SplitCriticalEdge( + *PI, Exit, CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); ExitSplit->moveBefore(Exit); } assert(SplitLatchEdge && @@ -493,17 +568,15 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { PHBI->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. - if (DominatorTreeWrapperPass *DTWP = - getAnalysisIfAvailable()) { - DominatorTree &DT = DTWP->getDomTree(); + if (DT) { // Update OrigHeader to be dominated by the new header block. - DT.changeImmediateDominator(NewHeader, OrigPreheader); - DT.changeImmediateDominator(OrigHeader, OrigLatch); + DT->changeImmediateDominator(NewHeader, OrigPreheader); + DT->changeImmediateDominator(OrigHeader, OrigLatch); // Brute force incremental dominator tree update. Call // findNearestCommonDominator on all CFG predecessors of each child of the // original header. - DomTreeNode *OrigHeaderNode = DT.getNode(OrigHeader); + DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); SmallVector HeaderChildren(OrigHeaderNode->begin(), OrigHeaderNode->end()); bool Changed; @@ -516,11 +589,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { pred_iterator PI = pred_begin(BB); BasicBlock *NearestDom = *PI; for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) - NearestDom = DT.findNearestCommonDominator(NearestDom, *PI); + NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); // Remember if this changes the DomTree. if (Node->getIDom()->getBlock() != NearestDom) { - DT.changeImmediateDominator(BB, NearestDom); + DT->changeImmediateDominator(BB, NearestDom); Changed = true; } } @@ -538,7 +611,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // the OrigHeader block into OrigLatch. This will succeed if they are // connected by an unconditional branch. This is just a cleanup so the // emitted code isn't too gross in this common case. - MergeBlockIntoPredecessor(OrigHeader, this); + MergeBlockIntoPredecessor(OrigHeader, DT, LI); DEBUG(dbgs() << "LoopRotation: into "; L->dump());