indvars: LinearFunctionTestReplace for non-canonical IVs.

author Andrew Trick <atrick@apple.com>

Mon, 18 Jul 2011 20:32:31 +0000 (20:32 +0000)

committer Andrew Trick <atrick@apple.com>

Mon, 18 Jul 2011 20:32:31 +0000 (20:32 +0000)
author Andrew Trick <atrick@apple.com>
Mon, 18 Jul 2011 20:32:31 +0000 (20:32 +0000)
committer Andrew Trick <atrick@apple.com>
Mon, 18 Jul 2011 20:32:31 +0000 (20:32 +0000)
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp

index 0078abd4d5297eb320f7774eb3dae9145eee7e3a..d7b11b8546028d3966fcf0c8fb3747e02f07b9fc 100644 (file)
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -79,6 +79,12 @@ static cl::opt<bool> DisableIVRewrite(
    "disable-iv-rewrite", cl::Hidden,
    cl::desc("Disable canonical induction variable rewriting"));
  
+// Temporary flag for use with -disable-iv-rewrite to force a canonical IV for
+// LFTR purposes.
+static cl::opt<bool> ForceLFTR(
+  "force-lftr", cl::Hidden,
+  cl::desc("Enable forced linear function test replacement"));
+
  namespace {
    class IndVarSimplify : public LoopPass {
      IVUsers         *IU;
@@ -140,9 +146,8 @@ namespace {
  
      void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
  
-    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
-                                        PHINode *IndVar,
-                                        SCEVExpander &Rewriter);
+    Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+                                     PHINode *IndVar, SCEVExpander &Rewriter);
  
      void SinkUnusedInvariants(Loop *L);
    };
@@ -1014,7 +1019,7 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
      NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
      return 0;
    }
-  // We assume that block terminators are not SCEVable. We wouldn't want to
+  // Assume block terminators cannot evaluate to a recurrence. We can't to
    // insert a Trunc after a terminator if there happens to be a critical edge.
    assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
           "SCEV is not expected to evaluate a block terminator");
@@ -1302,10 +1307,6 @@ static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
    // Get the symbolic expression for this instruction.
    const SCEV *S = SE->getSCEV(I);
  
-  // We assume that terminators are not SCEVable.
-  assert((!S || I != I->getParent()->getTerminator()) &&
-         "can't fold terminators");
-
    // Only consider affine recurrences.
    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
    if (AR && AR->getLoop() == L)
@@ -1471,7 +1472,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
      }
    }
  
-  if (!DisableIVRewrite)
+  if (!DisableIVRewrite || ForceLFTR)
      return false;
  
    // Recurse past add expressions, which commonly occur in the
@@ -1522,7 +1523,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
  /// getBackedgeIVType - Get the widest type used by the loop test after peeking
  /// through Truncs.
  ///
-/// TODO: Unnecessary if LFTR does not force a canonical IV.
+/// TODO: Unnecessary when ForceLFTR is removed.
  static Type *getBackedgeIVType(Loop *L) {
    if (!L->getExitingBlock())
      return 0;
@@ -1549,12 +1550,198 @@ static Type *getBackedgeIVType(Loop *L) {
    return Ty;
  }
  
+/// isLoopInvariant - Perform a quick domtree based check for loop invariance
+/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
+/// gratuitous for this purpose.
+static bool isLoopInvariant(Value *V, Loop *L, DominatorTree *DT) {
+  Instruction *Inst = dyn_cast<Instruction>(V);
+  if (!Inst)
+    return true;
+
+  return DT->properlyDominates(Inst->getParent(), L->getHeader());
+}
+
+/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
+/// invariant value to the phi.
+static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
+  Instruction *IncI = dyn_cast<Instruction>(IncV);
+  if (!IncI)
+    return 0;
+
+  switch (IncI->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Sub:
+    break;
+  case Instruction::GetElementPtr:
+    // An IV counter must preserve its type.
+    if (IncI->getNumOperands() == 2)
+      break;
+  default:
+    return 0;
+  }
+
+  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
+  if (Phi && Phi->getParent() == L->getHeader()) {
+    if (isLoopInvariant(IncI->getOperand(1), L, DT))
+      return Phi;
+    return 0;
+  }
+  if (IncI->getOpcode() == Instruction::GetElementPtr)
+    return 0;
+
+  // Allow add/sub to be commuted.
+  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
+  if (Phi && Phi->getParent() == L->getHeader()) {
+    if (isLoopInvariant(IncI->getOperand(0), L, DT))
+      return Phi;
+  }
+  return 0;
+}
+
+/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
+/// that the current exit test is already sufficiently canonical.
+static bool needsLFTR(Loop *L, DominatorTree *DT) {
+  assert(L->getExitingBlock() && "expected loop exit");
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  // Don't bother with LFTR if the loop is not properly simplified.
+  if (!LatchBlock)
+    return false;
+
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  assert(BI && "expected exit branch");
+
+  // Do LFTR to simplify the exit condition to an ICMP.
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return true;
+
+  // Do LFTR to simplify the exit ICMP to EQ/NE
+  ICmpInst::Predicate Pred = Cond->getPredicate();
+  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
+    return true;
+
+  // Look for a loop invariant RHS
+  Value *LHS = Cond->getOperand(0);
+  Value *RHS = Cond->getOperand(1);
+  if (!isLoopInvariant(RHS, L, DT)) {
+    if (!isLoopInvariant(LHS, L, DT))
+      return true;
+    std::swap(LHS, RHS);
+  }
+  // Look for a simple IV counter LHS
+  PHINode *Phi = dyn_cast<PHINode>(LHS);
+  if (!Phi)
+    Phi = getLoopPhiForCounter(LHS, L, DT);
+
+  if (!Phi)
+    return true;
+
+  // Do LFTR if the exit condition's IV is *not* a simple counter.
+  Value *IncV = Phi->getIncomingValueForBlock(L->getLoopLatch());
+  return Phi != getLoopPhiForCounter(IncV, L, DT);
+}
+
+/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
+/// be rewritten) loop exit test.
+static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
+  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+  Value *IncV = Phi->getIncomingValue(LatchIdx);
+
+  for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
+       UI != UE; ++UI) {
+    if (*UI != Cond && *UI != IncV) return false;
+  }
+
+  for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
+       UI != UE; ++UI) {
+    if (*UI != Cond && *UI != Phi) return false;
+  }
+  return true;
+}
+
+/// FindLoopCounter - Find an affine IV in canonical form.
+///
+/// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
+///
+/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
+/// This is difficult in general for SCEV because of potential overflow. But we
+/// could at least handle constant BECounts.
+static PHINode *
+FindLoopCounter(Loop *L, const SCEV *BECount,
+                ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
+  // I'm not sure how BECount could be a pointer type, but we definitely don't
+  // want to LFTR that.
+  if (BECount->getType()->isPointerTy())
+    return 0;
+
+  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
+
+  Value *Cond =
+    cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
+
+  // Loop over all of the PHI nodes, looking for a simple counter.
+  PHINode *BestPhi = 0;
+  const SCEV *BestInit = 0;
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  assert(LatchBlock && "needsLFTR should guarantee a loop latch");
+
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *Phi = cast<PHINode>(I);
+    if (!SE->isSCEVable(Phi->getType()))
+      continue;
+
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
+    if (!AR || AR->getLoop() != L || !AR->isAffine())
+      continue;
+
+    // AR may be a pointer type, while BECount is an integer type.
+    // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
+    // AR may not be a narrower type, or we may never exit.
+    uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
+    if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
+      continue;
+
+    const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+    if (!Step || !Step->isOne())
+      continue;
+
+    int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
+    Value *IncV = Phi->getIncomingValue(LatchIdx);
+    if (getLoopPhiForCounter(IncV, L, DT) != Phi)
+      continue;
+
+    const SCEV *Init = AR->getStart();
+
+    if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
+      // Don't force a live loop counter if another IV can be used.
+      if (AlmostDeadIV(Phi, LatchBlock, Cond))
+        continue;
+
+      // Prefer to count-from-zero. This is a more "canonical" counter form. It
+      // also prefers integer to pointer IVs.
+      if (BestInit->isZero() != Init->isZero()) {
+        if (BestInit->isZero())
+          continue;
+      }
+      // If two IVs both count from zero or both count from nonzero then the
+      // narrower is likely a dead phi that has been widened. Use the wider phi
+      // to allow the other to be eliminated.
+      if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
+        continue;
+    }
+    BestPhi = Phi;
+    BestInit = Init;
+  }
+  return BestPhi;
+}
+
  /// LinearFunctionTestReplace - This method rewrites the exit condition of the
  /// loop to be a canonical != comparison against the incremented loop induction
  /// variable.  This pass is able to rewrite the exit tests of any loop where the
  /// SCEV analysis can determine a loop-invariant trip count of the loop, which
  /// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
+Value *IndVarSimplify::
  LinearFunctionTestReplace(Loop *L,
                            const SCEV *BackedgeTakenCount,
                            PHINode *IndVar,
@@ -1562,62 +1749,118 @@ LinearFunctionTestReplace(Loop *L,
    assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
  
+  // In DisableIVRewrite mode, IndVar is not necessarily a canonical IV. In this
+  // mode, LFTR can ignore IV overflow and truncate to the width of
+  // BECount. This avoids materializing the add(zext(add)) expression.
+  Type *CntTy = DisableIVRewrite ?
+    BackedgeTakenCount->getType() : IndVar->getType();
+
+  const SCEV *IVLimit = BackedgeTakenCount;
+
    // If the exiting block is not the same as the backedge block, we must compare
    // against the preincremented value, otherwise we prefer to compare against
    // the post-incremented value.
    Value *CmpIndVar;
-  const SCEV *RHS = BackedgeTakenCount;
    if (L->getExitingBlock() == L->getLoopLatch()) {
      // Add one to the "backedge-taken" count to get the trip count.
      // If this addition may overflow, we have to be more pessimistic and
      // cast the induction variable before doing the add.
-    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
      const SCEV *N =
-      SE->getAddExpr(BackedgeTakenCount,
-                     SE->getConstant(BackedgeTakenCount->getType(), 1));
-    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
-        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
-      // No overflow. Cast the sum.
-      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
-    } else {
-      // Potential overflow. Cast before doing the add.
-      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                        IndVar->getType());
-      RHS = SE->getAddExpr(RHS,
-                           SE->getConstant(IndVar->getType(), 1));
+      SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
+    if (CntTy == IVLimit->getType())
+      IVLimit = N;
+    else {
+      const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
+      if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+          SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+        // No overflow. Cast the sum.
+        IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
+      } else {
+        // Potential overflow. Cast before doing the add.
+        IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
+        IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
+      }
      }
-
      // The BackedgeTaken expression contains the number of times that the
      // backedge branches to the loop header.  This is one less than the
      // number of times the loop executes, so use the incremented indvar.
      CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
    } else {
      // We have to use the preincremented value...
-    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                      IndVar->getType());
+    IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
      CmpIndVar = IndVar;
    }
  
+  // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
+  // So for, non-zero start compute the IVLimit here.
+  bool isPtrIV = false;
+  Type *CmpTy = CntTy;
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+  if (!AR->getStart()->isZero()) {
+    assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+    const SCEV *IVInit = AR->getStart();
+
+    // For pointer types, sign extend BECount in order to materialize a GEP.
+    // Note that for DisableIVRewrite, we never run SCEVExpander on a
+    // pointer type, because we must preserve the existing GEPs. Instead we
+    // directly generate a GEP later.
+    if (IVInit->getType()->isPointerTy()) {
+      isPtrIV = true;
+      CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
+      IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
+    }
+    // For integer types, truncate the IV before computing IVInit + BECount.
+    else {
+      if (SE->getTypeSizeInBits(IVInit->getType())
+          > SE->getTypeSizeInBits(CmpTy))
+        IVInit = SE->getTruncateExpr(IVInit, CmpTy);
+
+      IVLimit = SE->getAddExpr(IVInit, IVLimit);
+    }
+  }
    // Expand the code for the iteration count.
-  assert(SE->isLoopInvariant(RHS, L) &&
+  IRBuilder<> Builder(BI);
+
+  assert(SE->isLoopInvariant(IVLimit, L) &&
           "Computed iteration count is not loop invariant!");
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+  Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
+
+  // Create a gep for IVInit + IVLimit from on an existing pointer base.
+  assert(isPtrIV == IndVar->getType()->isPointerTy() &&
+         "IndVar type must match IVInit type");
+  if (isPtrIV) {
+      Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+      assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
+      const PointerType *PointerTy = cast<PointerType>(IVStart->getType());
+      assert(SE->getSizeOfExpr(PointerTy->getElementType())->isOne() &&
+             "unit stride pointer IV must be i8*");
+
+      Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
+      ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
+      Builder.SetInsertPoint(BI);
+  }
  
    // Insert a new icmp_ne or icmp_eq instruction before the branch.
-  ICmpInst::Predicate Opcode;
+  ICmpInst::Predicate P;
    if (L->contains(BI->getSuccessor(0)))
-    Opcode = ICmpInst::ICMP_NE;
+    P = ICmpInst::ICMP_NE;
    else
-    Opcode = ICmpInst::ICMP_EQ;
+    P = ICmpInst::ICMP_EQ;
  
    DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
                 << "      LHS:" << *CmpIndVar << '\n'
                 << "       op:\t"
-               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
-               << "      RHS:\t" << *RHS << "\n");
+               << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *ExitCnt << "\n"
+               << "     Expr:\t" << *IVLimit << "\n");
  
-  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
-  Cond->setDebugLoc(BI->getDebugLoc());
+  if (SE->getTypeSizeInBits(CmpIndVar->getType())
+      > SE->getTypeSizeInBits(CmpTy)) {
+    CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
+  }
+
+  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
    Value *OrigCond = BI->getCondition();
    // It's tempting to use replaceAllUsesWith here to fully replace the old
    // comparison, but that's not immediately safe, since users of the old
@@ -1784,8 +2027,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
    // a canonical induction variable should be inserted.
    Type *LargestType = 0;
    bool NeedCannIV = false;
+  bool ReuseIVForExit = DisableIVRewrite && !ForceLFTR;
    bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
-  if (ExpandBECount) {
+  if (ExpandBECount && !ReuseIVForExit) {
      // If we have a known trip count and a single exit block, we'll be
      // rewriting the loop exit test condition below, which requires a
      // canonical induction variable.
@@ -1848,15 +2092,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
        OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
      }
    }
-
+  else if (ExpandBECount && ReuseIVForExit && needsLFTR(L, DT)) {
+    IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+  }
    // If we have a trip count expression, rewrite the loop's exit condition
    // using it.  We can currently only handle loops with a single exit.
-  ICmpInst *NewICmp = 0;
-  if (ExpandBECount) {
-    assert(canExpandBackedgeTakenCount(L, SE) &&
-           "canonical IV disrupted BackedgeTaken expansion");
-    assert(NeedCannIV &&
-           "LinearFunctionTestReplace requires a canonical induction variable");
+  Value *NewICmp = 0;
+  if (ExpandBECount && IndVar) {
      // Check preconditions for proper SCEVExpander operation. SCEV does not
      // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
      // pass that uses the SCEVExpander must do it. This does not work well for
@@ -1894,9 +2136,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
  
    // For completeness, inform IVUsers of the IV use in the newly-created
    // loop exit test instruction.
-  if (NewICmp && IU)
-    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
-
+  if (IU && NewICmp) {
+    ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
+    if (NewICmpInst)
+      IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
+  }
    // Clean up dead instructions.
    Changed |= DeleteDeadPHIs(L->getHeader());
    // Check a post-condition.
diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll

index 9e635fdc0067b135351b8ae78277edfff2c47140..da7ecb66c5488bb090a5c91e654afa496034aa49 100644 (file)
--- a/test/Transforms/IndVarSimplify/ada-loops.ll
+++ b/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -9,10 +9,9 @@
  ; Note that all four functions should actually be converted to
  ; memset. However, this test case validates indvars behavior.  We
  ; don't check that phis are "folded together" because that is a job
-; for loop strength reduction. But indvars must remove sext, zext,
-; trunc, and add i8.
+; for loop strength reduction. But indvars must remove sext, zext, and add i8.
  ;
-; CHECK-NOT: {{sext|zext|trunc|add i8}}
+; CHECK-NOT: {{sext|zext|add i8}}
  
  ; ModuleID = 'ada.bc'
  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32"
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll

new file mode 100644 (file)

index 0000000..6ccd1a4
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -0,0 +1,230 @@
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
+;
+; Make sure that indvars can perform LFTR without a canonical IV.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Perform LFTR using the original pointer-type IV.
+
+;  for(char* p = base; p < base + n; ++p) {
+;    *p = p-base;
+;  }
+define void @ptriv(i8* %base, i32 %n) nounwind {
+entry:
+  %idx.ext = sext i32 %n to i64
+  %add.ptr = getelementptr inbounds i8* %base, i64 %idx.ext
+  %cmp1 = icmp ult i8* %base, %add.ptr
+  br i1 %cmp1, label %for.body, label %for.end
+
+; CHECK: for.body:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK-NOT: add
+; CHECK: icmp ne i8*
+; CHECK: br i1
+for.body:
+  %p.02 = phi i8* [ %base, %entry ], [ %incdec.ptr, %for.body ]
+  ; cruft to make the IV useful
+  %sub.ptr.lhs.cast = ptrtoint i8* %p.02 to i64
+  %sub.ptr.rhs.cast = ptrtoint i8* %base to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %conv = trunc i64 %sub.ptr.sub to i8
+  store i8 %conv, i8* %p.02
+  %incdec.ptr = getelementptr inbounds i8* %p.02, i32 1
+  %cmp = icmp ult i8* %incdec.ptr, %add.ptr
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; It would be nice if SCEV and any loop analysis could assume that
+; preheaders exist. Unfortunately it is not always the case. This test
+; checks that SCEVExpander can handle an outer loop that has not yet
+; been simplified. As a result, the inner loop's exit test will not be
+; rewritten.
+define void @expandOuterRecurrence(i32 %arg) nounwind {
+entry:
+  %sub1 = sub nsw i32 %arg, 1
+  %cmp1 = icmp slt i32 0, %sub1
+  br i1 %cmp1, label %outer, label %exit
+
+outer:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ]
+  %sub2 = sub nsw i32 %arg, %i
+  %sub3 = sub nsw i32 %sub2, 1
+  %cmp2 = icmp slt i32 0, %sub3
+  br i1 %cmp2, label %inner.ph, label %outer.inc
+
+inner.ph:
+  br label %inner
+
+; CHECK: inner:
+; CHECK: icmp slt
+; CHECK: br i1
+inner:
+  %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ]
+  %j.inc = add nsw i32 %j, 1
+  %cmp3 = icmp slt i32 %j.inc, %sub3
+  br i1 %cmp3, label %inner, label %outer.inc
+
+; CHECK: outer.inc:
+; CHECK: icmp ne
+; CHECK: br i1
+outer.inc:
+  %i.inc = add nsw i32 %i, 1
+  %cmp4 = icmp slt i32 %i.inc, %sub1
+  br i1 %cmp4, label %outer, label %exit
+
+exit:
+  ret void
+}
+
+; Force SCEVExpander to look for an existing well-formed phi.
+; Perform LFTR without generating extra preheader code.
+define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector,
+                         i32 %irow, i32 %ilead) nounwind {
+; CHECK: entry:
+; CHECK-NOT: zext
+; CHECK-NOT: add
+; CHECK: loop:
+; CHECK: phi i64
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: icmp ne
+; CHECK: br i1
+entry:
+  %cmp = icmp slt i32 1, %irow
+  br i1 %cmp, label %loop, label %return
+
+loop:
+  %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %diagidx = add nsw i32 %rowidx, %i
+  %diagidxw = sext i32 %diagidx to i64
+  %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw
+  %v1 = load double* %matrixp
+  %iw = sext i32 %i to i64
+  %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw
+  %v2 = load double* %vectorp
+  %row.inc = add nsw i32 %rowidx, %ilead
+  %i.inc = add nsw i32 %i, 1
+  %cmp196 = icmp slt i32 %i.inc, %irow
+  br i1 %cmp196, label %loop, label %return
+
+return:
+  ret void
+}
+
+; Avoid generating extra code to materialize a trip count. Skip LFTR.
+define void @unguardedloop([0 x double]* %matrix, [0 x double]* %vector,
+                           i32 %irow, i32 %ilead) nounwind {
+entry:
+  br label %loop
+
+; CHECK: entry:
+; CHECK-NOT: zext
+; CHECK-NOT: add
+; CHECK: loop:
+; CHECK: phi i64
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: icmp slt
+; CHECK: br i1
+loop:
+  %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ]
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %diagidx = add nsw i32 %rowidx, %i
+  %diagidxw = sext i32 %diagidx to i64
+  %matrixp = getelementptr inbounds [0 x double]* %matrix, i32 0, i64 %diagidxw
+  %v1 = load double* %matrixp
+  %iw = sext i32 %i to i64
+  %vectorp = getelementptr inbounds [0 x double]* %vector, i32 0, i64 %iw
+  %v2 = load double* %vectorp
+  %row.inc = add nsw i32 %rowidx, %ilead
+  %i.inc = add nsw i32 %i, 1
+  %cmp196 = icmp slt i32 %i.inc, %irow
+  br i1 %cmp196, label %loop, label %return
+
+return:
+  ret void
+}
+
+; Remove %i which is only used by the exit test.
+; Verify that SCEV can still compute a backedge count from the sign
+; extended %n, used for pointer comparison by LFTR.
+define void @geplftr(i8* %base, i32 %x, i32 %y, i32 %n) nounwind {
+entry:
+  %x.ext = sext i32 %x to i64
+  %add.ptr = getelementptr inbounds i8* %base, i64 %x.ext
+  %y.ext = sext i32 %y to i64
+  %add.ptr10 = getelementptr inbounds i8* %add.ptr, i64 %y.ext
+  %lim = add i32 %x, %n
+  %cmp.ph = icmp ult i32 %x, %lim
+  br i1 %cmp.ph, label %loop, label %exit
+
+; CHECK: loop:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK: getelementptr
+; CHECK: store
+; CHECK: icmp ne i8*
+; CHECK: br i1
+loop:
+  %i = phi i32 [ %x, %entry ], [ %inc, %loop ]
+  %aptr = phi i8* [ %add.ptr10, %entry ], [ %incdec.ptr, %loop ]
+  %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1
+  store i8 3, i8* %aptr
+  %inc = add i32 %i, 1
+  %cmp = icmp ult i32 %inc, %lim
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Exercise backedge taken count verification with a never-taken loop.
+define void @nevertaken() nounwind uwtable ssp {
+entry:
+  br label %loop
+
+; CHECK: loop:
+; CHECK-NOT: phi
+; CHECK-NOT: add
+; CHECK-NOT: icmp
+; CHECK: exit:
+loop:
+  %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp sle i32 %inc, 0
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Test LFTR on an IV whose recurrence start is a non-unit pointer type.
+define void @aryptriv([256 x i8]* %base, i32 %n) nounwind {
+entry:
+  %ivstart = getelementptr inbounds [256 x i8]* %base, i32 0, i32 0
+  %ivend = getelementptr inbounds [256 x i8]* %base, i32 0, i32 %n
+  %cmp.ph = icmp ult i8* %ivstart, %ivend
+  br i1 %cmp.ph, label %loop, label %exit
+
+; CHECK: loop:
+; CHECK: phi i8*
+; CHECK-NOT: phi
+; CHECK: getelementptr
+; CHECK: store
+; CHECK: icmp ne i8*
+; CHECK: br i1
+loop:
+  %aptr = phi i8* [ %ivstart, %entry ], [ %incdec.ptr, %loop ]
+  %incdec.ptr = getelementptr inbounds i8* %aptr, i32 1
+  store i8 3, i8* %aptr
+  %cmp = icmp ult i8* %incdec.ptr, %ivend
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
author	Andrew Trick <atrick@apple.com>
	Mon, 18 Jul 2011 20:32:31 +0000 (20:32 +0000)
committer	Andrew Trick <atrick@apple.com>
	Mon, 18 Jul 2011 20:32:31 +0000 (20:32 +0000)
lib/Transforms/Scalar/IndVarSimplify.cpp		patch \| blob \| history
test/Transforms/IndVarSimplify/ada-loops.ll		patch \| blob \| history
test/Transforms/IndVarSimplify/lftr-reuse.ll	[new file with mode: 0644]	patch \| blob