"disable-iv-rewrite", cl::Hidden,
cl::desc("Disable canonical induction variable rewriting"));
+// Temporary flag for use with -disable-iv-rewrite to force a canonical IV for
+// LFTR purposes.
+static cl::opt<bool> ForceLFTR(
+ "force-lftr", cl::Hidden,
+ cl::desc("Enable forced linear function test replacement"));
+
namespace {
class IndVarSimplify : public LoopPass {
IVUsers *IU;
void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
- ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- PHINode *IndVar,
- SCEVExpander &Rewriter);
+ Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+ PHINode *IndVar, SCEVExpander &Rewriter);
void SinkUnusedInvariants(Loop *L);
};
NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
return 0;
}
- // We assume that block terminators are not SCEVable. We wouldn't want to
+ // Assume block terminators cannot evaluate to a recurrence. We can't to
// insert a Trunc after a terminator if there happens to be a critical edge.
assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
"SCEV is not expected to evaluate a block terminator");
// Get the symbolic expression for this instruction.
const SCEV *S = SE->getSCEV(I);
- // We assume that terminators are not SCEVable.
- assert((!S || I != I->getParent()->getTerminator()) &&
- "can't fold terminators");
-
// Only consider affine recurrences.
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
if (AR && AR->getLoop() == L)
}
}
- if (!DisableIVRewrite)
+ if (!DisableIVRewrite || ForceLFTR)
return false;
// Recurse past add expressions, which commonly occur in the
/// getBackedgeIVType - Get the widest type used by the loop test after peeking
/// through Truncs.
///
-/// TODO: Unnecessary if LFTR does not force a canonical IV.
+/// TODO: Unnecessary when ForceLFTR is removed.
static Type *getBackedgeIVType(Loop *L) {
if (!L->getExitingBlock())
return 0;
return Ty;
}
+/// isLoopInvariant - Perform a quick domtree based check for loop invariance
+/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
+/// gratuitous for this purpose.
+static bool isLoopInvariant(Value *V, Loop *L, DominatorTree *DT) {
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst)
+ return true;
+
+ return DT->properlyDominates(Inst->getParent(), L->getHeader());
+}
+
+/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
+/// invariant value to the phi.
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+ Instruction *IncI = dyn_cast<Instruction>(IncV);
+ if (!IncI)
+ return 0;
+
+ switch (IncI->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ break;
+ case Instruction::GetElementPtr:
+ // An IV counter must preserve its type.
+ if (IncI->getNumOperands() == 2)
+ break;
+ default:
+ return 0;
+ }
+
+ PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
+ if (Phi && Phi->getParent() == L->getHeader()) {
+ if (isLoopInvariant(IncI->getOperand(1), L, DT))
+ return Phi;
+ return 0;
+ }
+ if (IncI->getOpcode() == Instruction::GetElementPtr)
+ return 0;
+
+ // Allow add/sub to be commuted.
+ Phi = dyn_cast<PHINode>(IncI->getOperand(1));
+ if (Phi && Phi->getParent() == L->getHeader()) {
+ if (isLoopInvariant(IncI->getOperand(0), L, DT))
+ return Phi;
+ }
+ return 0;
+}
+
+/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
+/// that the current exit test is already sufficiently canonical.
+static bool needsLFTR(Loop *L, DominatorTree *DT) {
+ assert(L->getExitingBlock() && "expected loop exit");
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ // Don't bother with LFTR if the loop is not properly simplified.
+ if (!LatchBlock)
+ return false;
+
+ BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ assert(BI && "expected exit branch");
+
+ // Do LFTR to simplify the exit condition to an ICMP.
+ ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cond)
+ return true;
+
+ // Do LFTR to simplify the exit ICMP to EQ/NE
+ ICmpInst::Predicate Pred = Cond->getPredicate();
+ if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+ return true;
+
+ // Look for a loop invariant RHS
+ Value *LHS = Cond->getOperand(0);
+ Value *RHS = Cond->getOperand(1);
+ if (!isLoopInvariant(RHS, L, DT)) {
+ if (!isLoopInvariant(LHS, L, DT))
+ return true;
+ std::swap(LHS, RHS);
+ }
+ // Look for a simple IV counter LHS
+ PHINode *Phi = dyn_cast<PHINode>(LHS);
+ if (!Phi)
+ Phi = getLoopPhiForCounter(LHS, L, DT);
+
+ if (!Phi)
+ return true;
+
+ // Do LFTR if the exit condition's IV is *not* a simple counter.
+ Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
+ return Phi != getLoopPhiForCounter(IncV, L, DT);
+}
+
+/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
+/// be rewritten) loop exit test.
+static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
+ int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+
+ for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
+ UI != UE; ++UI) {
+ if (*UI != Cond && *UI != IncV) return false;
+ }
+
+ for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
+ UI != UE; ++UI) {
+ if (*UI != Cond && *UI != Phi) return false;
+ }
+ return true;
+}
+
+/// FindLoopCounter - Find an affine IV in canonical form.
+///
+/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
+///
+/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
+/// This is difficult in general for SCEV because of potential overflow. But we
+/// could at least handle constant BECounts.
+static PHINode *
+FindLoopCounter(Loop *L, const SCEV *BECount,
+ ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
+ // I'm not sure how BECount could be a pointer type, but we definitely don't
+ // want to LFTR that.
+ if (BECount->getType()->isPointerTy())
+ return 0;
+
+ uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
+
+ Value *Cond =
+ cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+
+ // Loop over all of the PHI nodes, looking for a simple counter.
+ PHINode *BestPhi = 0;
+ const SCEV *BestInit = 0;
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *Phi = cast<PHINode>(I);
+ if (!SE->isSCEVable(Phi->getType()))
+ continue;
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+ if (!AR || AR->getLoop() != L || !AR->isAffine())
+ continue;
+
+ // AR may be a pointer type, while BECount is an integer type.
+ // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
+ // AR may not be a narrower type, or we may never exit.
+ uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
+ if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+ continue;
+
+ const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!Step || !Step->isOne())
+ continue;
+
+ int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+ Value *IncV = Phi->getIncomingValue(LatchIdx);
+ if (getLoopPhiForCounter(IncV, L, DT) != Phi)
+ continue;
+
+ const SCEV *Init = AR->getStart();
+
+ if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
+ // Don't force a live loop counter if another IV can be used.
+ if (AlmostDeadIV(Phi, LatchBlock, Cond))
+ continue;
+
+ // Prefer to count-from-zero. This is a more "canonical" counter form. It
+ // also prefers integer to pointer IVs.
+ if (BestInit->isZero() != Init->isZero()) {
+ if (BestInit->isZero())
+ continue;
+ }
+ // If two IVs both count from zero or both count from nonzero then the
+ // narrower is likely a dead phi that has been widened. Use the wider phi
+ // to allow the other to be eliminated.
+ if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
+ continue;
+ }
+ BestPhi = Phi;
+ BestInit = Init;
+ }
+ return BestPhi;
+}
+
/// LinearFunctionTestReplace - This method rewrites the exit condition of the
/// loop to be a canonical != comparison against the incremented loop induction
/// variable. This pass is able to rewrite the exit tests of any loop where the
/// SCEV analysis can determine a loop-invariant trip count of the loop, which
/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
+Value *IndVarSimplify::
LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+ // In DisableIVRewrite mode, IndVar is not necessarily a canonical IV. In this
+ // mode, LFTR can ignore IV overflow and truncate to the width of
+ // BECount. This avoids materializing the add(zext(add)) expression.
+ Type *CntTy = DisableIVRewrite ?
+ BackedgeTakenCount->getType() : IndVar->getType();
+
+ const SCEV *IVLimit = BackedgeTakenCount;
+
// If the exiting block is not the same as the backedge block, we must compare
// against the preincremented value, otherwise we prefer to compare against
// the post-incremented value.
Value *CmpIndVar;
- const SCEV *RHS = BackedgeTakenCount;
if (L->getExitingBlock() == L->getLoopLatch()) {
// Add one to the "backedge-taken" count to get the trip count.
// If this addition may overflow, we have to be more pessimistic and
// cast the induction variable before doing the add.
- const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
const SCEV *N =
- SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
- if ((isa<SCEVConstant>(N) && !N->isZero()) ||
- SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
- // No overflow. Cast the sum.
- RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
- } else {
- // Potential overflow. Cast before doing the add.
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
- RHS = SE->getAddExpr(RHS,
- SE->getConstant(IndVar->getType(), 1));
+ SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
+ if (CntTy == IVLimit->getType())
+ IVLimit = N;
+ else {
+ const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
+ if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+ SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+ // No overflow. Cast the sum.
+ IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
+ } else {
+ // Potential overflow. Cast before doing the add.
+ IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
+ IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
+ }
}
-
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
} else {
// We have to use the preincremented value...
- RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
- IndVar->getType());
+ IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
CmpIndVar = IndVar;
}
+ // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
+ // So for, non-zero start compute the IVLimit here.
+ bool isPtrIV = false;
+ Type *CmpTy = CntTy;
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+ assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+ if (!AR->getStart()->isZero()) {
+ assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+ const SCEV *IVInit = AR->getStart();
+
+ // For pointer types, sign extend BECount in order to materialize a GEP.
+ // Note that for DisableIVRewrite, we never run SCEVExpander on a
+ // pointer type, because we must preserve the existing GEPs. Instead we
+ // directly generate a GEP later.
+ if (IVInit->getType()->isPointerTy()) {
+ isPtrIV = true;
+ CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
+ IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
+ }
+ // For integer types, truncate the IV before computing IVInit + BECount.
+ else {
+ if (SE->getTypeSizeInBits(IVInit->getType())
+ > SE->getTypeSizeInBits(CmpTy))
+ IVInit = SE->getTruncateExpr(IVInit, CmpTy);
+
+ IVLimit = SE->getAddExpr(IVInit, IVLimit);
+ }
+ }
// Expand the code for the iteration count.
- assert(SE->isLoopInvariant(RHS, L) &&
+ IRBuilder<> Builder(BI);
+
+ assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
- Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+ Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
+
+ // Create a gep for IVInit + IVLimit from on an existing pointer base.
+ assert(isPtrIV == IndVar->getType()->isPointerTy() &&
+ "IndVar type must match IVInit type");
+ if (isPtrIV) {
+ Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+ assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
+ const PointerType *PointerTy = cast<PointerType>(IVStart->getType());
+ assert(SE->getSizeOfExpr(PointerTy->getElementType())->isOne() &&
+ "unit stride pointer IV must be i8*");
+
+ Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+ ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
+ Builder.SetInsertPoint(BI);
+ }
// Insert a new icmp_ne or icmp_eq instruction before the branch.
- ICmpInst::Predicate Opcode;
+ ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
- Opcode = ICmpInst::ICMP_NE;
+ P = ICmpInst::ICMP_NE;
else
- Opcode = ICmpInst::ICMP_EQ;
+ P = ICmpInst::ICMP_EQ;
DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n'
<< " op:\t"
- << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
- << " RHS:\t" << *RHS << "\n");
+ << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+ << " RHS:\t" << *ExitCnt << "\n"
+ << " Expr:\t" << *IVLimit << "\n");
- ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
- Cond->setDebugLoc(BI->getDebugLoc());
+ if (SE->getTypeSizeInBits(CmpIndVar->getType())
+ > SE->getTypeSizeInBits(CmpTy)) {
+ CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
+ }
+
+ Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
// comparison, but that's not immediately safe, since users of the old
// a canonical induction variable should be inserted.
Type *LargestType = 0;
bool NeedCannIV = false;
+ bool ReuseIVForExit = DisableIVRewrite && !ForceLFTR;
bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
- if (ExpandBECount) {
+ if (ExpandBECount && !ReuseIVForExit) {
// If we have a known trip count and a single exit block, we'll be
// rewriting the loop exit test condition below, which requires a
// canonical induction variable.
OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
}
}
-
+ else if (ExpandBECount && ReuseIVForExit && needsLFTR(L, DT)) {
+ IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+ }
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
- ICmpInst *NewICmp = 0;
- if (ExpandBECount) {
- assert(canExpandBackedgeTakenCount(L, SE) &&
- "canonical IV disrupted BackedgeTaken expansion");
- assert(NeedCannIV &&
- "LinearFunctionTestReplace requires a canonical induction variable");
+ Value *NewICmp = 0;
+ if (ExpandBECount && IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
// pass that uses the SCEVExpander must do it. This does not work well for
// For completeness, inform IVUsers of the IV use in the newly-created
// loop exit test instruction.
- if (NewICmp && IU)
- IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
-
+ if (IU && NewICmp) {
+ ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
+ if (NewICmpInst)
+ IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
+ }
// Clean up dead instructions.
Changed |= DeleteDeadPHIs(L->getHeader());
// Check a post-condition.
--- /dev/null
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
+;
+; Make sure that indvars can perform LFTR without a canonical IV.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Perform LFTR using the original pointer-type IV.
+
+; for(char* p = base; p < base + n; ++p) {
+; *p = p-base;
+; }
+define void @ptriv(i8* %base, i32 %n) nounwind {
+entry:
+ %idx.ext = sext i32 %n to i64
+ %add.ptr = getelementptr inbounds i8* %base, i64 %idx.ext
+ %cmp1 = icmp ult i8* %base, %add.ptr
+ br i1 %cmp1, label %for.body, label %for.end
+
+; CHECK: for.body:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK-NOT: add
+; CHECK: icmp ne i8*
+; CHECK: br i1
+for.body:
+ %p.02 = phi i8* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+ ; cruft to make the IV useful
+ %sub.ptr.lhs.cast = ptrtoint i8* %p.02 to i64
+ %sub.ptr.rhs.cast = ptrtoint i8* %base to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ %conv = trunc i64 %sub.ptr.sub to i8
+ store i8 %conv, i8* %p.02
+ %incdec.ptr = getelementptr inbounds i8* %p.02, i32 1
+ %cmp = icmp ult i8* %incdec.ptr, %add.ptr
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; It would be nice if SCEV and any loop analysis could assume that
+; preheaders exist. Unfortunately it is not always the case. This test
+; checks that SCEVExpander can handle an outer loop that has not yet
+; been simplified. As a result, the inner loop's exit test will not be
+; rewritten.
+define void @expandOuterRecurrence(i32 %arg) nounwind {
+entry:
+ %sub1 = sub nsw i32 %arg, 1
+ %cmp1 = icmp slt i32 0, %sub1
+ br i1 %cmp1, label %outer, label %exit
+
+outer:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ]
+ %sub2 = sub nsw i32 %arg, %i
+ %sub3 = sub nsw i32 %sub2, 1
+ %cmp2 = icmp slt i32 0, %sub3
+ br i1 %cmp2, label %inner.ph, label %outer.inc
+
+inner.ph:
+ br label %inner
+
+; CHECK: inner:
+; CHECK: icmp slt
+; CHECK: br i1
+inner:
+ %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ]
+ %j.inc = add nsw i32 %j, 1
+ %cmp3 = icmp slt i32 %j.inc, %sub3
+ br i1 %cmp3, label %inner, label %outer.inc
+
+; CHECK: outer.inc:
+; CHECK: icmp ne
+; CHECK: br i1
+outer.inc:
+ %i.inc = add nsw i32 %i, 1
+ %cmp4 = icmp slt i32 %i.inc, %sub1
+ br i1 %cmp4, label %outer, label %exit
+
+exit:
+ ret void
+}
+
+; Force SCEVExpander to look for an existing well-formed phi.
+; Perform LFTR without generating extra preheader code.
+define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
+ i32 %irow, i32 %ilead) nounwind {
+; CHECK: entry:
+; CHECK-NOT: zext
+; CHECK-NOT: add
+; CHECK: loop:
+; CHECK: phi i64
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: icmp ne
+; CHECK: br i1
+entry:
+ %cmp = icmp slt i32 1, %irow
+ br i1 %cmp, label %loop, label %return
+
+loop:
+ %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+ %diagidx = add nsw i32 %rowidx, %i
+ %diagidxw = sext i32 %diagidx to i64
+ %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw
+ %v1 = load double* %matrixp
+ %iw = sext i32 %i to i64
+ %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw
+ %v2 = load double* %vectorp
+ %row.inc = add nsw i32 %rowidx, %ilead
+ %i.inc = add nsw i32 %i, 1
+ %cmp196 = icmp slt i32 %i.inc, %irow
+ br i1 %cmp196, label %loop, label %return
+
+return:
+ ret void
+}
+
+; Avoid generating extra code to materialize a trip count. Skip LFTR.
+define void @unguardedloop([0 x double]* %matrix, [0 x double]* %vector,
+ i32 %irow, i32 %ilead) nounwind {
+entry:
+ br label %loop
+
+; CHECK: entry:
+; CHECK-NOT: zext
+; CHECK-NOT: add
+; CHECK: loop:
+; CHECK: phi i64
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: icmp slt
+; CHECK: br i1
+loop:
+ %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+ %diagidx = add nsw i32 %rowidx, %i
+ %diagidxw = sext i32 %diagidx to i64
+ %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw
+ %v1 = load double* %matrixp
+ %iw = sext i32 %i to i64
+ %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw
+ %v2 = load double* %vectorp
+ %row.inc = add nsw i32 %rowidx, %ilead
+ %i.inc = add nsw i32 %i, 1
+ %cmp196 = icmp slt i32 %i.inc, %irow
+ br i1 %cmp196, label %loop, label %return
+
+return:
+ ret void
+}
+
+; Remove %i which is only used by the exit test.
+; Verify that SCEV can still compute a backedge count from the sign
+; extended %n, used for pointer comparison by LFTR.
+define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind {
+entry:
+ %x.ext = sext i32 %x to i64
+ %add.ptr = getelementptr inbounds i8* %base, i64 %x.ext
+ %y.ext = sext i32 %y to i64
+ %add.ptr10 = getelementptr inbounds i8* %add.ptr, i64 %y.ext
+ %lim = add i32 %x, %n
+ %cmp.ph = icmp ult i32 %x, %lim
+ br i1 %cmp.ph, label %loop, label %exit
+
+; CHECK: loop:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK: getelementptr
+; CHECK: store
+; CHECK: icmp ne i8*
+; CHECK: br i1
+loop:
+ %i = phi i32 [ %x, %entry ], [ %inc, %loop ]
+ %aptr = phi i8* [ %add.ptr10, %entry ], [ %incdec.ptr, %loop ]
+ %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1
+ store i8 3, i8* %aptr
+ %inc = add i32 %i, 1
+ %cmp = icmp ult i32 %inc, %lim
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Exercise backedge taken count verification with a never-taken loop.
+define void @nevertaken() nounwind uwtable ssp {
+entry:
+ br label %loop
+
+; CHECK: loop:
+; CHECK-NOT: phi
+; CHECK-NOT: add
+; CHECK-NOT: icmp
+; CHECK: exit:
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %inc = add nsw i32 %i, 1
+ %cmp = icmp sle i32 %inc, 0
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Test LFTR on an IV whose recurrence start is a non-unit pointer type.
+define void @aryptriv([256 x i8]* %base, i32 %n) nounwind {
+entry:
+ %ivstart = getelementptr inbounds [256 x i8]* %base, i32 0, i32 0
+ %ivend = getelementptr inbounds [256 x i8]* %base, i32 0, i32 %n
+ %cmp.ph = icmp ult i8* %ivstart, %ivend
+ br i1 %cmp.ph, label %loop, label %exit
+
+; CHECK: loop:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK: getelementptr
+; CHECK: store
+; CHECK: icmp ne i8*
+; CHECK: br i1
+loop:
+ %aptr = phi i8* [ %ivstart, %entry ], [ %incdec.ptr, %loop ]
+ %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1
+ store i8 3, i8* %aptr
+ %cmp = icmp ult i8* %incdec.ptr, %ivend
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret void
+}