From: Sanjoy Das Date: Thu, 26 Feb 2015 08:19:31 +0000 (+0000) Subject: IRCE: generalize to handle loops with decreasing induction variables. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=21b2edfeae46694e3c01005ed4d3bb5c1fc58996;p=oota-llvm.git IRCE: generalize to handle loops with decreasing induction variables. IRCE can now split the iteration space for loops like: for (i = n; i >= 0; i--) a[i + k] = 42; // bounds check on access git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230618 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index e2de965a12e..da4e0fe0fa5 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -399,6 +399,52 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, namespace { +// Keeps track of the structure of a loop. This is similar to llvm::Loop, +// except that it is more lightweight and can track the state of a loop through +// changing and potentially invalid IR. This structure also formalizes the +// kinds of loops we can deal with -- ones that have a single latch that is also +// an exiting block *and* have a canonical induction variable. +struct LoopStructure { + const char *Tag; + + BasicBlock *Header; + BasicBlock *Latch; + + // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th + // successor is `LatchExit', the exit block of the loop. + BranchInst *LatchBr; + BasicBlock *LatchExit; + unsigned LatchBrExitIdx; + + Value *IndVarNext; + Value *IndVarStart; + Value *LoopExitAt; + bool IndVarIncreasing; + + LoopStructure() + : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr), + LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr), + IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false) {} + + template LoopStructure map(M Map) const { + LoopStructure Result; + Result.Tag = Tag; + Result.Header = cast(Map(Header)); + Result.Latch = cast(Map(Latch)); + Result.LatchBr = cast(Map(LatchBr)); + Result.LatchExit = cast(Map(LatchExit)); + Result.LatchBrExitIdx = LatchBrExitIdx; + Result.IndVarNext = Map(IndVarNext); + Result.IndVarStart = Map(IndVarStart); + Result.LoopExitAt = Map(LoopExitAt); + Result.IndVarIncreasing = IndVarIncreasing; + return Result; + } + + static Optional parseLoopStructure(ScalarEvolution &, Loop &, + const char *&); +}; + /// This class is used to constrain loops to run within a given iteration space. /// The algorithm this class implements is given a Loop and a range [Begin, /// End). The algorithm then tries to break out a "main loop" out of the loop @@ -409,51 +455,6 @@ namespace { /// iterations in which the induction variable is >= End. /// class LoopConstrainer { - - // Keeps track of the structure of a loop. This is similar to llvm::Loop, - // except that it is more lightweight and can track the state of a loop - // through changing and potentially invalid IR. This structure also - // formalizes the kinds of loops we can deal with -- ones that have a single - // latch that is also an exiting block *and* have a canonical induction - // variable. - struct LoopStructure { - const char *Tag; - - BasicBlock *Header; - BasicBlock *Latch; - - // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th - // successor is `LatchExit', the exit block of the loop. - BranchInst *LatchBr; - BasicBlock *LatchExit; - unsigned LatchBrExitIdx; - - // The canonical induction variable. It's value is `CIVStart` on the 0th - // itertion and `CIVNext` for all iterations after that. - PHINode *CIV; - Value *CIVStart; - Value *CIVNext; - - LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr), - LatchBr(nullptr), LatchExit(nullptr), - LatchBrExitIdx(-1), CIV(nullptr), - CIVStart(nullptr), CIVNext(nullptr) { } - - template LoopStructure map(M Map) const { - LoopStructure Result; - Result.Tag = Tag; - Result.Header = cast(Map(Header)); - Result.Latch = cast(Map(Latch)); - Result.LatchBr = cast(Map(LatchBr)); - Result.LatchExit = cast(Map(LatchExit)); - Result.LatchBrExitIdx = LatchBrExitIdx; - Result.CIV = cast(Map(CIV)); - Result.CIVNext = Map(CIVNext); - Result.CIVStart = Map(CIVStart); - return Result; - } - }; - // The representation of a clone of the original loop we started out with. struct ClonedLoop { // The cloned blocks @@ -472,17 +473,22 @@ class LoopConstrainer { BasicBlock *PseudoExit; BasicBlock *ExitSelector; std::vector PHIValuesAtPseudoExit; + PHINode *IndVarEnd; - RewrittenRangeInfo() : PseudoExit(nullptr), ExitSelector(nullptr) { } + RewrittenRangeInfo() + : PseudoExit(nullptr), ExitSelector(nullptr), IndVarEnd(nullptr) {} }; // Calculated subranges we restrict the iteration space of the main loop to. // See the implementation of `calculateSubRanges' for more details on how - // these fields are computed. `ExitPreLoopAt' is `None' if we don't need a - // pre loop. `ExitMainLoopAt' is `None' if we don't need a post loop. + // these fields are computed. `LowLimit` is None if there is no restriction + // on low end of the restricted iteration space of the main loop. `HighLimit` + // is None if there is no restriction on high end of the restricted iteration + // space of the main loop. + struct SubRanges { - Optional ExitPreLoopAt; - Optional ExitMainLoopAt; + Optional LowLimit; + Optional HighLimit; }; // A utility function that does a `replaceUsesOfWith' on the incoming block @@ -491,19 +497,11 @@ class LoopConstrainer { static void replacePHIBlock(PHINode *PN, BasicBlock *Block, BasicBlock *ReplaceBy); - // Try to "parse" `OriginalLoop' and populate the various out parameters. - // Returns true on success, false on failure. - // - bool recognizeLoop(LoopStructure &LoopStructureOut, - const SCEV *&LatchCountOut, BasicBlock *&PreHeaderOut, - const char *&FailureReasonOut) const; - // Compute a safe set of limits for the main loop to run in -- effectively the // intersection of `Range' and the iteration space of the original loop. - // Return the header count (1 + the latch taken count) in `HeaderCount'. // Return None if unable to compute the set of subranges. // - Optional calculateSubRanges(Value *&HeaderCount) const; + Optional calculateSubRanges() const; // Clone `OriginalLoop' and return the result in CLResult. The IR after // running `cloneLoop' is well formed except for the PHI nodes in CLResult -- @@ -542,16 +540,15 @@ class LoopConstrainer { // The loop denoted by `LS' has `OldPreheader' as its preheader. This // function creates a new preheader for `LS' and returns it. // - BasicBlock *createPreheader(const LoopConstrainer::LoopStructure &LS, - BasicBlock *OldPreheader, const char *Tag) const; + BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader, + const char *Tag) const; // `ContinuationBlockAndPreheader' was the continuation block for some call to // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'. // This function rewrites the PHI nodes in `LS.Header' to start with the // correct value. void rewriteIncomingValuesForPHIs( - LoopConstrainer::LoopStructure &LS, - BasicBlock *ContinuationBlockAndPreheader, + LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader, const LoopConstrainer::RewrittenRangeInfo &RRI) const; // Even though we do not preserve any passes at this time, we at least need to @@ -570,7 +567,6 @@ class LoopConstrainer { LoopInfo &OriginalLoopInfo; const SCEV *LatchTakenCount; BasicBlock *OriginalPreheader; - Value *OriginalHeaderCount; // The preheader of the main loop. This may or may not be different from // `OriginalPreheader'. @@ -584,12 +580,12 @@ class LoopConstrainer { LoopStructure MainLoopStructure; public: - LoopConstrainer(Loop &L, LoopInfo &LI, ScalarEvolution &SE, - InductiveRangeCheck::Range R) - : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE), - OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr), - OriginalPreheader(nullptr), OriginalHeaderCount(nullptr), - MainLoopPreheader(nullptr), Range(R) { } + LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS, + ScalarEvolution &SE, InductiveRangeCheck::Range R) + : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), + SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr), + OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R), + MainLoopStructure(LS) {} // Entry point for the algorithm. Returns true on success. bool run(); @@ -604,155 +600,246 @@ void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block, PN->setIncomingBlock(i, ReplaceBy); } -bool LoopConstrainer::recognizeLoop(LoopStructure &LoopStructureOut, - const SCEV *&LatchCountOut, - BasicBlock *&PreheaderOut, - const char *&FailureReason) const { - using namespace llvm::PatternMatch; +static bool CanBeSMax(ScalarEvolution &SE, const SCEV *S) { + APInt SMax = + APInt::getSignedMaxValue(cast(S->getType())->getBitWidth()); + return SE.getSignedRange(S).contains(SMax) && + SE.getUnsignedRange(S).contains(SMax); +} + +static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) { + APInt SMin = + APInt::getSignedMinValue(cast(S->getType())->getBitWidth()); + return SE.getSignedRange(S).contains(SMin) && + SE.getUnsignedRange(S).contains(SMin); +} - assert(OriginalLoop.isLoopSimplifyForm() && - "should follow from addRequired<>"); +Optional +LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L, + const char *&FailureReason) { + assert(L.isLoopSimplifyForm() && "should follow from addRequired<>"); - BasicBlock *Latch = OriginalLoop.getLoopLatch(); - if (!OriginalLoop.isLoopExiting(Latch)) { + BasicBlock *Latch = L.getLoopLatch(); + if (!L.isLoopExiting(Latch)) { FailureReason = "no loop latch"; - return false; + return None; } - PHINode *CIV = OriginalLoop.getCanonicalInductionVariable(); - assert(CIV && "precondition"); - - BasicBlock *Header = OriginalLoop.getHeader(); - BasicBlock *Preheader = OriginalLoop.getLoopPreheader(); + BasicBlock *Header = L.getHeader(); + BasicBlock *Preheader = L.getLoopPreheader(); if (!Preheader) { FailureReason = "no preheader"; - return false; + return None; } - Value *CIVNext = CIV->getIncomingValueForBlock(Latch); - Value *CIVStart = CIV->getIncomingValueForBlock(Preheader); + BranchInst *LatchBr = dyn_cast(&*Latch->rbegin()); + if (!LatchBr || LatchBr->isUnconditional()) { + FailureReason = "latch terminator not conditional branch"; + return None; + } - const SCEV *LatchCount = SE.getExitCount(&OriginalLoop, Latch); + unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0; + + ICmpInst *ICI = dyn_cast(LatchBr->getCondition()); + if (!ICI || !isa(ICI->getOperand(0)->getType())) { + FailureReason = "latch terminator branch not conditional on integral icmp"; + return None; + } + + const SCEV *LatchCount = SE.getExitCount(&L, Latch); if (isa(LatchCount)) { FailureReason = "could not compute latch count"; - return false; + return None; } - // While SCEV does most of the analysis for us, we still have to - // modify the latch; and currently we can only deal with certain - // kinds of latches. This can be made more sophisticated as needed. + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *LeftValue = ICI->getOperand(0); + const SCEV *LeftSCEV = SE.getSCEV(LeftValue); + IntegerType *IndVarTy = cast(LeftValue->getType()); + + Value *RightValue = ICI->getOperand(1); + const SCEV *RightSCEV = SE.getSCEV(RightValue); + + // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence. + if (!isa(LeftSCEV)) { + if (isa(RightSCEV)) { + std::swap(LeftSCEV, RightSCEV); + std::swap(LeftValue, RightValue); + Pred = ICmpInst::getSwappedPredicate(Pred); + } else { + FailureReason = "no add recurrences in the icmp"; + return None; + } + } - BranchInst *LatchBr = dyn_cast(&*Latch->rbegin()); + auto IsInductionVar = [&SE](const SCEVAddRecExpr *AR, bool &IsIncreasing) { + if (!AR->isAffine()) + return false; - if (!LatchBr || LatchBr->isUnconditional()) { - FailureReason = "latch terminator not conditional branch"; - return false; - } + IntegerType *Ty = cast(AR->getType()); + IntegerType *WideTy = + IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2); - // Currently we only support a latch condition of the form: - // - // %condition = icmp slt %civNext, %limit - // br i1 %condition, label %header, label %exit + // Currently we only work with induction variables that have been proved to + // not wrap. This restriction can potentially be lifted in the future. - if (LatchBr->getSuccessor(0) != Header) { - FailureReason = "unknown latch form (header not first successor)"; - return false; - } + const SCEVAddRecExpr *ExtendAfterOp = + dyn_cast(SE.getSignExtendExpr(AR, WideTy)); + if (!ExtendAfterOp) + return false; - Value *CIVComparedTo = nullptr; - ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - if (!(match(LatchBr->getCondition(), - m_ICmp(Pred, m_Specific(CIVNext), m_Value(CIVComparedTo))) && - Pred == ICmpInst::ICMP_SLT)) { - FailureReason = "unknown latch form (not slt)"; - return false; - } + const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); + const SCEV *ExtendedStep = + SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); + + bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && + ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; + + if (!NoSignedWrap) + return false; + + if (const SCEVConstant *StepExpr = + dyn_cast(AR->getStepRecurrence(SE))) { + ConstantInt *StepCI = StepExpr->getValue(); + if (StepCI->isOne() || StepCI->isMinusOne()) { + IsIncreasing = StepCI->isOne(); + return true; + } + } - // IndVarSimplify will sometimes leave behind (in SCEV's cache) backedge-taken - // counts that are narrower than the canonical induction variable. These - // values are still accurate, and we could probably use them after sign/zero - // extension; but for now we just bail out of the transformation to keep - // things simple. - const SCEV *CIVComparedToSCEV = SE.getSCEV(CIVComparedTo); - if (isa(CIVComparedToSCEV) || - CIVComparedToSCEV->getType() != LatchCount->getType()) { - FailureReason = "could not relate CIV to latch expression"; return false; + }; + + // `ICI` is interpreted as taking the backedge if the *next* value of the + // induction variable satisfies some constraint. + + const SCEVAddRecExpr *IndVarNext = cast(LeftSCEV); + bool IsIncreasing = false; + if (!IsInductionVar(IndVarNext, IsIncreasing)) { + FailureReason = "LHS in icmp not induction variable"; + return None; } - const SCEV *ShouldBeOne = SE.getMinusSCEV(CIVComparedToSCEV, LatchCount); - const SCEVConstant *SCEVOne = dyn_cast(ShouldBeOne); - if (!SCEVOne || SCEVOne->getValue()->getValue() != 1) { - FailureReason = "unexpected header count in latch"; - return false; + ConstantInt *One = ConstantInt::get(IndVarTy, 1); + // TODO: generalize the predicates here to also match their unsigned variants. + if (IsIncreasing) { + bool FoundExpectedPred = + (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) || + (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0); + + if (!FoundExpectedPred) { + FailureReason = "expected icmp slt semantically, found something else"; + return None; + } + + if (LatchBrExitIdx == 0) { + if (CanBeSMax(SE, RightSCEV)) { + // TODO: this restriction is easily removable -- we just have to + // remember that the icmp was an slt and not an sle. + FailureReason = "limit may overflow when coercing sle to slt"; + return None; + } + + IRBuilder<> B(&*Preheader->rbegin()); + RightValue = B.CreateAdd(RightValue, One); + } + + } else { + bool FoundExpectedPred = + (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) || + (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0); + + if (!FoundExpectedPred) { + FailureReason = "expected icmp sgt semantically, found something else"; + return None; + } + + if (LatchBrExitIdx == 0) { + if (CanBeSMin(SE, RightSCEV)) { + // TODO: this restriction is easily removable -- we just have to + // remember that the icmp was an sgt and not an sge. + FailureReason = "limit may overflow when coercing sge to sgt"; + return None; + } + + IRBuilder<> B(&*Preheader->rbegin()); + RightValue = B.CreateSub(RightValue, One); + } } - unsigned LatchBrExitIdx = 1; + const SCEV *StartNext = IndVarNext->getStart(); + const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE)); + const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); + BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); - assert(SE.getLoopDisposition(LatchCount, &OriginalLoop) == + assert(SE.getLoopDisposition(LatchCount, &L) == ScalarEvolution::LoopInvariant && "loop variant exit count doesn't make sense!"); - assert(!OriginalLoop.contains(LatchExit) && "expected an exit block!"); + assert(!L.contains(LatchExit) && "expected an exit block!"); + + Value *IndVarStartV = SCEVExpander(SE, "irce").expandCodeFor( + IndVarStart, IndVarTy, &*Preheader->rbegin()); + IndVarStartV->setName("indvar.start"); + + LoopStructure Result; - LoopStructureOut.Tag = "main"; - LoopStructureOut.Header = Header; - LoopStructureOut.Latch = Latch; - LoopStructureOut.LatchBr = LatchBr; - LoopStructureOut.LatchExit = LatchExit; - LoopStructureOut.LatchBrExitIdx = LatchBrExitIdx; - LoopStructureOut.CIV = CIV; - LoopStructureOut.CIVNext = CIVNext; - LoopStructureOut.CIVStart = CIVStart; + Result.Tag = "main"; + Result.Header = Header; + Result.Latch = Latch; + Result.LatchBr = LatchBr; + Result.LatchExit = LatchExit; + Result.LatchBrExitIdx = LatchBrExitIdx; + Result.IndVarStart = IndVarStartV; + Result.IndVarNext = LeftValue; + Result.IndVarIncreasing = IsIncreasing; + Result.LoopExitAt = RightValue; - LatchCountOut = LatchCount; - PreheaderOut = Preheader; FailureReason = nullptr; - return true; + return Result; } Optional -LoopConstrainer::calculateSubRanges(Value *&HeaderCountOut) const { +LoopConstrainer::calculateSubRanges() const { IntegerType *Ty = cast(LatchTakenCount->getType()); if (Range.getType() != Ty) return None; - SCEVExpander Expander(SE, "irce"); - Instruction *InsertPt = OriginalPreheader->getTerminator(); - LoopConstrainer::SubRanges Result; // I think we can be more aggressive here and make this nuw / nsw if the // addition that feeds into the icmp for the latch's terminating branch is nuw // / nsw. In any case, a wrapping 2's complement addition is safe. ConstantInt *One = ConstantInt::get(Ty, 1); - const SCEV *HeaderCountSCEV = SE.getAddExpr(LatchTakenCount, SE.getSCEV(One)); - HeaderCountOut = Expander.expandCodeFor(HeaderCountSCEV, Ty, InsertPt); - - const SCEV *Zero = SE.getConstant(Ty, 0); + const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart); + const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt); + + bool Increasing = MainLoopStructure.IndVarIncreasing; + // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the + // range of values the induction variable takes. + const SCEV *Smallest = + Increasing ? Start : SE.getAddExpr(End, SE.getSCEV(One)); + const SCEV *Greatest = + Increasing ? End : SE.getAddExpr(Start, SE.getSCEV(One)); + + auto Clamp = [this, Smallest, Greatest](const SCEV *S) { + return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S)); + }; // In some cases we can prove that we don't need a pre or post loop bool ProvablyNoPreloop = - SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Zero); - if (!ProvablyNoPreloop) { - const SCEV *ExitPreLoopAtSCEV = - SE.getSMinExpr(HeaderCountSCEV, Range.getBegin()); - Result.ExitPreLoopAt = - Expander.expandCodeFor(ExitPreLoopAtSCEV, Ty, InsertPt); - } + SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Smallest); + if (!ProvablyNoPreloop) + Result.LowLimit = Clamp(Range.getBegin()); bool ProvablyNoPostLoop = - SE.isKnownPredicate(ICmpInst::ICMP_SLE, HeaderCountSCEV, Range.getEnd()); - if (!ProvablyNoPostLoop) { - const SCEV *ExitMainLoopAtSCEV = - SE.getSMinExpr(HeaderCountSCEV, Range.getEnd()); - Result.ExitMainLoopAt = - Expander.expandCodeFor(ExitMainLoopAtSCEV, Ty, InsertPt); - } + SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd()); + if (!ProvablyNoPostLoop) + Result.HighLimit = Clamp(Range.getEnd()); return Result; } @@ -809,7 +896,7 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, } LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( - const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt, + const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt, BasicBlock *ContinuationBlock) const { // We start with a loop with a single latch: @@ -893,32 +980,37 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( BBInsertLocation); BranchInst *PreheaderJump = cast(&*Preheader->rbegin()); + bool Increasing = LS.IndVarIncreasing; IRBuilder<> B(PreheaderJump); // EnterLoopCond - is it okay to start executing this `LS'? - Value *EnterLoopCond = B.CreateICmpSLT(LS.CIVStart, ExitLoopAt); + Value *EnterLoopCond = Increasing + ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt) + : B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt); + B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); PreheaderJump->eraseFromParent(); - assert(LS.LatchBrExitIdx == 1 && "generalize this as needed!"); - + LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); B.SetInsertPoint(LS.LatchBr); + Value *TakeBackedgeLoopCond = + Increasing ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt) + : B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt); + Value *CondForBranch = LS.LatchBrExitIdx == 1 + ? TakeBackedgeLoopCond + : B.CreateNot(TakeBackedgeLoopCond); - // ContinueCond - is it okay to execute the next iteration in `LS'? - Value *ContinueCond = B.CreateICmpSLT(LS.CIVNext, ExitLoopAt); - - LS.LatchBr->setCondition(ContinueCond); - assert(LS.LatchBr->getSuccessor(LS.LatchBrExitIdx) == LS.LatchExit && - "invariant!"); - LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); + LS.LatchBr->setCondition(CondForBranch); B.SetInsertPoint(RRI.ExitSelector); // IterationsLeft - are there any more iterations left, given the original // upper bound on the induction variable? If not, we branch to the "real" // exit. - Value *IterationsLeft = B.CreateICmpSLT(LS.CIVNext, OriginalHeaderCount); + Value *IterationsLeft = Increasing + ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt) + : B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt); B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); BranchInst *BranchToContinuation = @@ -942,6 +1034,11 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( RRI.PHIValuesAtPseudoExit.push_back(NewPHI); } + RRI.IndVarEnd = PHINode::Create(LS.IndVarNext->getType(), 2, "indvar.end", + BranchToContinuation); + RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader); + RRI.IndVarEnd->addIncoming(LS.IndVarNext, RRI.ExitSelector); + // The latch exit now has a branch from `RRI.ExitSelector' instead of // `LS.Latch'. The PHI nodes need to be updated to reflect that. for (Instruction &I : *LS.LatchExit) { @@ -955,7 +1052,7 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( } void LoopConstrainer::rewriteIncomingValuesForPHIs( - LoopConstrainer::LoopStructure &LS, BasicBlock *ContinuationBlock, + LoopStructure &LS, BasicBlock *ContinuationBlock, const LoopConstrainer::RewrittenRangeInfo &RRI) const { unsigned PHIIndex = 0; @@ -970,13 +1067,12 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs( PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); } - LS.CIVStart = LS.CIV->getIncomingValueForBlock(ContinuationBlock); + LS.IndVarStart = RRI.IndVarEnd; } -BasicBlock * -LoopConstrainer::createPreheader(const LoopConstrainer::LoopStructure &LS, - BasicBlock *OldPreheader, - const char *Tag) const { +BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS, + BasicBlock *OldPreheader, + const char *Tag) const { BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); BranchInst::Create(LS.Header, Preheader); @@ -1004,30 +1100,79 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef BBs) { bool LoopConstrainer::run() { BasicBlock *Preheader = nullptr; - const char *CouldNotProceedBecause = nullptr; - if (!recognizeLoop(MainLoopStructure, LatchTakenCount, Preheader, - CouldNotProceedBecause)) { - DEBUG(dbgs() << "irce: could not recognize loop, " << CouldNotProceedBecause - << "\n";); - return false; - } + LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch); + Preheader = OriginalLoop.getLoopPreheader(); + assert(!isa(LatchTakenCount) && Preheader != nullptr && + "preconditions!"); OriginalPreheader = Preheader; MainLoopPreheader = Preheader; - Optional MaybeSR = calculateSubRanges(OriginalHeaderCount); + Optional MaybeSR = calculateSubRanges(); if (!MaybeSR.hasValue()) { DEBUG(dbgs() << "irce: could not compute subranges\n"); return false; } + SubRanges SR = MaybeSR.getValue(); + bool Increasing = MainLoopStructure.IndVarIncreasing; + IntegerType *IVTy = + cast(MainLoopStructure.IndVarNext->getType()); + + SCEVExpander Expander(SE, "irce"); + Instruction *InsertPt = OriginalPreheader->getTerminator(); // It would have been better to make `PreLoop' and `PostLoop' // `Optional's, but `ValueToValueMapTy' does not have a copy // constructor. ClonedLoop PreLoop, PostLoop; - bool NeedsPreLoop = SR.ExitPreLoopAt.hasValue(); - bool NeedsPostLoop = SR.ExitMainLoopAt.hasValue(); + bool NeedsPreLoop = + Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue(); + bool NeedsPostLoop = + Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue(); + + Value *ExitPreLoopAt = nullptr; + Value *ExitMainLoopAt = nullptr; + const SCEVConstant *MinusOneS = + cast(SE.getConstant(IVTy, -1, true /* isSigned */)); + + if (NeedsPreLoop) { + const SCEV *ExitPreLoopAtSCEV = nullptr; + + if (Increasing) + ExitPreLoopAtSCEV = *SR.LowLimit; + else { + if (CanBeSMin(SE, *SR.HighLimit)) { + DEBUG(dbgs() << "irce: could not prove no-overflow when computing " + << "preloop exit limit. HighLimit = " << *(*SR.HighLimit) + << "\n"); + return false; + } + ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); + } + + ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt); + ExitPreLoopAt->setName("exit.preloop.at"); + } + + if (NeedsPostLoop) { + const SCEV *ExitMainLoopAtSCEV = nullptr; + + if (Increasing) + ExitMainLoopAtSCEV = *SR.HighLimit; + else { + if (CanBeSMin(SE, *SR.LowLimit)) { + DEBUG(dbgs() << "irce: could not prove no-overflow when computing " + << "mainloop exit limit. LowLimit = " << *(*SR.LowLimit) + << "\n"); + return false; + } + ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); + } + + ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt); + ExitMainLoopAt->setName("exit.mainloop.at"); + } // We clone these ahead of time so that we don't have to deal with changing // and temporarily invalid IR as we transform the loops. @@ -1044,9 +1189,8 @@ bool LoopConstrainer::run() { MainLoopPreheader = createPreheader(MainLoopStructure, Preheader, "mainloop"); - PreLoopRRI = - changeIterationSpaceEnd(PreLoop.Structure, Preheader, - SR.ExitPreLoopAt.getValue(), MainLoopPreheader); + PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader, + ExitPreLoopAt, MainLoopPreheader); rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader, PreLoopRRI); } @@ -1058,8 +1202,7 @@ bool LoopConstrainer::run() { PostLoopPreheader = createPreheader(PostLoop.Structure, Preheader, "postloop"); PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader, - SR.ExitMainLoopAt.getValue(), - PostLoopPreheader); + ExitMainLoopAt, PostLoopPreheader); rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader, PostLoopRRI); } @@ -1179,13 +1322,6 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { ScalarEvolution &SE = getAnalysis(); BranchProbabilityInfo &BPI = getAnalysis(); - PHINode *CIV = L->getCanonicalInductionVariable(); - if (!CIV) { - DEBUG(dbgs() << "irce: loop has no canonical induction variable\n"); - return false; - } - const SCEVAddRecExpr *IndVar = cast(SE.getSCEV(CIV)); - for (auto BBI : L->getBlocks()) if (BranchInst *TBI = dyn_cast(BBI->getTerminator())) if (InductiveRangeCheck *IRC = @@ -1202,6 +1338,21 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { IRC->print(dbgs()); ); + const char *FailureReason = nullptr; + Optional MaybeLoopStructure = + LoopStructure::parseLoopStructure(SE, *L, FailureReason); + if (!MaybeLoopStructure.hasValue()) { + DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason + << "\n";); + return false; + } + LoopStructure LS = MaybeLoopStructure.getValue(); + bool Increasing = LS.IndVarIncreasing; + const SCEV *MinusOne = + SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true); + const SCEVAddRecExpr *IndVar = + cast(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne)); + Optional SafeIterRange; Instruction *ExprInsertPt = Preheader->getTerminator(); @@ -1223,8 +1374,8 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { if (!SafeIterRange.hasValue()) return false; - LoopConstrainer LC(*L, getAnalysis().getLoopInfo(), SE, - SafeIterRange.getValue()); + LoopConstrainer LC(*L, getAnalysis().getLoopInfo(), LS, + SE, SafeIterRange.getValue()); bool Changed = LC.run(); if (Changed) { diff --git a/test/Transforms/IRCE/decrementing-loop.ll b/test/Transforms/IRCE/decrementing-loop.ll new file mode 100644 index 00000000000..877a2c20c4d --- /dev/null +++ b/test/Transforms/IRCE/decrementing-loop.ll @@ -0,0 +1,43 @@ +; RUN: opt -irce -S < %s | FileCheck %s + +define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) { + entry: + %len = load i32* %a_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + %start = sub i32 %n, 1 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ %start, %entry ] , [ %idx.dec, %in.bounds ] + %idx.dec = sub i32 %idx, 1 + %abc.high = icmp slt i32 %idx, %len + %abc.low = icmp sge i32 %idx, 0 + %abc = and i1 %abc.low, %abc.high + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + + in.bounds: + %addr = getelementptr i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp sgt i32 %idx.dec, -1 + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void + +; CHECK: loop.preheader: +; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1 +; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len +; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] +; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]] +; CHECK: [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]] +; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0 +; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0 +; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 +} + +!0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4} diff --git a/test/Transforms/IRCE/multiple-access-no-preloop.ll b/test/Transforms/IRCE/multiple-access-no-preloop.ll index 1dfb70fe5e7..304bb4d7727 100644 --- a/test/Transforms/IRCE/multiple-access-no-preloop.ll +++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -42,9 +42,11 @@ define void @multiple_access_no_preloop( ; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] ; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_upper_limit_cond:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] -; CHECK: [[not_upper_limit:[^ ]+]] = select i1 [[not_upper_limit_cond]], i32 [[smax_not_len]], i32 [[not_n]] -; CHECK: [[upper_limit:[^ ]+]] = sub i32 -1, [[not_upper_limit]] +; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] +; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] +; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] +; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 +; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 ; CHECK-LABEL: loop: ; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds diff --git a/test/Transforms/IRCE/single-access-no-preloop.ll b/test/Transforms/IRCE/single-access-no-preloop.ll index 0252e437ee8..4d47ba895e5 100644 --- a/test/Transforms/IRCE/single-access-no-preloop.ll +++ b/test/Transforms/IRCE/single-access-no-preloop.ll @@ -36,6 +36,7 @@ define void @single_access_no_preloop_no_offset(i32 *%arr, i32 *%a_len_ptr, i32 ; CHECK-LABEL: main.pseudo.exit: ; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ] +; CHECK-NEXT: %indvar.end = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ] ; CHECK-NEXT: br label %postloop ; CHECK-LABEL: postloop: @@ -85,17 +86,19 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3 ; CHECK-LABEL: loop.preheader: ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n ; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len -; CHECK: [[not_exit_main_loop_at_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] -; CHECK: [[not_exit_main_loop_at:[^ ]+]] = select i1 [[not_exit_main_loop_at_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] -; CHECK: [[exit_main_loop_at:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at]] -; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at]] +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] +; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] +; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 +; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 +; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] ; CHECK: br i1 [[enter_main_loop]], label %loop, label %main.pseudo.exit ; CHECK-LABEL: loop: ; CHECK: br i1 true, label %in.bounds, label %out.of.bounds ; CHECK-LABEL: in.bounds: -; CHECK: [[continue_main_loop:[^ ]+]] = icmp slt i32 %idx.next, [[exit_main_loop_at]] +; CHECK: [[continue_main_loop:[^ ]+]] = icmp slt i32 %idx.next, [[exit_main_loop_at_loclamp]] ; CHECK: br i1 [[continue_main_loop]], label %loop, label %main.exit.selector ; CHECK-LABEL: main.pseudo.exit: diff --git a/test/Transforms/IRCE/single-access-with-preloop.ll b/test/Transforms/IRCE/single-access-with-preloop.ll index c220efa50a6..16426b8c7d9 100644 --- a/test/Transforms/IRCE/single-access-with-preloop.ll +++ b/test/Transforms/IRCE/single-access-with-preloop.ll @@ -31,14 +31,21 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 ; CHECK-LABEL: loop.preheader: ; CHECK: [[not_safe_start:[^ ]+]] = add i32 %offset, -1 ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] -; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cond]], i32 [[not_safe_start]], i32 [[not_n]] -; CHECK: [[exit_preloop_at:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at]] +; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] +; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]] +; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]] +; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0 +; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 + + +; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 %offset, -1 +; CHECK: [[not_safe_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len +; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_end]], [[not_n]] +; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_end]], i32 [[not_n]] +; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_loclamp]] +; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_loclamp]], 0 +; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_loclamp]], i32 0 -; CHECK: [[not_safe_end:[^ ]+]] = sub i32 [[not_safe_start]], %len -; CHECK: [[not_exit_mainloop_at_cond:[^ ]+]] = icmp sgt i32 [[not_safe_end]], [[not_n]] -; CHECK: [[not_exit_mainloop_at:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond]], i32 [[not_safe_end]], i32 [[not_n]] -; CHECK: [[exit_mainloop_at:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at]] ; CHECK-LABEL: in.bounds: ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]