ScalarEvolution assume hanging bugfix

[oota-llvm.git] / lib / Analysis / ScalarEvolution.cpp
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index 73f0dc1e44c68d4f0388dee27cd80d0b8cb43680..ef695234b661b20ed22ed667ae2ed0f886077453 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -114,16 +114,6 @@ static cl::opt<bool>
  VerifySCEV("verify-scev",
             cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
  
-INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
-                "Scalar Evolution Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
-                "Scalar Evolution Analysis", false, true)
-char ScalarEvolution::ID = 0;
-
  //===----------------------------------------------------------------------===//
  //                           SCEV class definitions
  //===----------------------------------------------------------------------===//
@@ -1983,7 +1973,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
    Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
  
    // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, LI);
+  GroupByComplexity(Ops, &LI);
  
    // If there are any constants, fold them together.
    unsigned Idx = 0;
@@ -2391,7 +2381,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
    Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
  
    // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, LI);
+  GroupByComplexity(Ops, &LI);
  
    // If there are any constants, fold them together.
    unsigned Idx = 0;
@@ -2859,10 +2849,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
    // Canonicalize nested AddRecs in by nesting them in order of loop depth.
    if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
      const Loop *NestedLoop = NestedAR->getLoop();
-    if (L->contains(NestedLoop) ?
-        (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
-        (!NestedLoop->contains(L) &&
-         DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+    if (L->contains(NestedLoop)
+            ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
+            : (!NestedLoop->contains(L) &&
+               DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
        SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
                                                    NestedAR->op_end());
        Operands[0] = NestedAR->getStart();
@@ -2997,7 +2987,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
  #endif
  
    // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, LI);
+  GroupByComplexity(Ops, &LI);
  
    // If there are any constants, fold them together.
    unsigned Idx = 0;
@@ -3101,7 +3091,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
  #endif
  
    // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, LI);
+  GroupByComplexity(Ops, &LI);
  
    // If there are any constants, fold them together.
    unsigned Idx = 0;
@@ -3202,7 +3192,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
    // constant expression and then folding it back into a ConstantInt.
    // This is just a compile-time optimization.
    return getConstant(IntTy,
-                     F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
+                     F.getParent()->getDataLayout().getTypeAllocSize(AllocTy));
  }
  
  const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
@@ -3213,7 +3203,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
    // This is just a compile-time optimization.
    return getConstant(
        IntTy,
-      F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
+      F.getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
            FieldNo));
  }
  
@@ -3256,7 +3246,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {
  /// for which isSCEVable must return true.
  uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
    assert(isSCEVable(Ty) && "Type is not SCEVable!");
-  return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
+  return F.getParent()->getDataLayout().getTypeSizeInBits(Ty);
  }
  
  /// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -3272,11 +3262,11 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
  
    // The only other support type is pointer.
    assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-  return F->getParent()->getDataLayout().getIntPtrType(Ty);
+  return F.getParent()->getDataLayout().getIntPtrType(Ty);
  }
  
  const SCEV *ScalarEvolution::getCouldNotCompute() {
-  return &CouldNotCompute;
+  return CouldNotCompute.get();
  }
  
  namespace {
@@ -3339,15 +3329,16 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
  
  /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
  ///
-const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
+                                             SCEV::NoWrapFlags Flags) {
    if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
      return getConstant(
                 cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
  
    Type *Ty = V->getType();
    Ty = getEffectiveSCEVType(Ty);
-  return getMulExpr(V,
-                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+  return getMulExpr(
+      V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
  }
  
  /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
@@ -3366,15 +3357,40 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
  /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
  const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
                                            SCEV::NoWrapFlags Flags) {
-  assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
-
    // Fast path: X - X --> 0.
    if (LHS == RHS)
      return getConstant(LHS->getType(), 0);
  
-  // X - Y --> X + -Y.
-  // X -(nsw || nuw) Y --> X + -Y.
-  return getAddExpr(LHS, getNegativeSCEV(RHS));
+  // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
+  // makes it so that we cannot make much use of NUW.
+  auto AddFlags = SCEV::FlagAnyWrap;
+  const bool RHSIsNotMinSigned =
+      !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+  if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+    // Let M be the minimum representable signed value. Then (-1)*RHS
+    // signed-wraps if and only if RHS is M. That can happen even for
+    // a NSW subtraction because e.g. (-1)*M signed-wraps even though
+    // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
+    // (-1)*RHS, we need to prove that RHS != M.
+    //
+    // If LHS is non-negative and we know that LHS - RHS does not
+    // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
+    // either by proving that RHS > M or that LHS >= 0.
+    if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
+      AddFlags = SCEV::FlagNSW;
+    }
+  }
+
+  // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
+  // RHS is NSW and LHS >= 0.
+  //
+  // The difficulty here is that the NSW flag may have been proven
+  // relative to a loop that is to be found in a recurrence in LHS and
+  // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
+  // larger scope than intended.
+  auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
  }
  
  /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@@ -3595,7 +3611,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
  /// a loop header, making it a potential recurrence, or it doesn't.
  ///
  const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
-  if (const Loop *L = LI->getLoopFor(PN->getParent()))
+  if (const Loop *L = LI.getLoopFor(PN->getParent()))
      if (L->getHeader() == PN->getParent()) {
        // The loop may have multiple entrances or multiple exits; we can analyze
        // this phi as an addrec if it has a unique entry value and a unique
@@ -3741,9 +3757,9 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
    // PHI's incoming blocks are in a different loop, in which case doing so
    // risks breaking LCSSA form. Instcombine would normally zap these, but
    // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V =
-          SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
-    if (LI->replacementPreservesLCSSAForm(PN, V))
+  if (Value *V = SimplifyInstruction(PN, F.getParent()->getDataLayout(), &TLI,
+                                     &DT, &AC))
+    if (LI.replacementPreservesLCSSAForm(PN, V))
        return getSCEV(V);
  
    // If it's not a loop phi, we can't handle it yet.
@@ -3838,8 +3854,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
      // For a SCEVUnknown, ask ValueTracking.
      unsigned BitWidth = getTypeSizeInBits(U->getType());
      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    computeKnownBits(U->getValue(), Zeros, Ones,
-                     F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
+    computeKnownBits(U->getValue(), Zeros, Ones, F.getParent()->getDataLayout(),
+                     0, &AC, nullptr, &DT);
      return Zeros.countTrailingOnes();
    }
  
@@ -4069,18 +4085,18 @@ ScalarEvolution::getRange(const SCEV *S,
      // Split here to avoid paying the compile-time cost of calling both
      // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
      // if needed.
-    const DataLayout &DL = F->getParent()->getDataLayout();
+    const DataLayout &DL = F.getParent()->getDataLayout();
      if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
        // For a SCEVUnknown, ask ValueTracking.
        APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
        if (Ones != ~Zeros + 1)
          ConservativeResult =
              ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
      } else {
        assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
               "generalize as needed!");
-      unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
+      unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
        if (NS > 1)
          ConservativeResult = ConservativeResult.intersectWith(
              ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
@@ -4094,6 +4110,7 @@ ScalarEvolution::getRange(const SCEV *S,
  }
  
  SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+  if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
    const BinaryOperator *BinOp = cast<BinaryOperator>(V);
  
    // Return early if there are no flags to propagate to the SCEV.
@@ -4112,7 +4129,7 @@ SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
    // recurrence, but getting that requires computing the SCEV of the operands,
    // which can be expensive. This check we can do cheaply to rule out some
    // cases early.
-  Loop *innermostContainingLoop = LI->getLoopFor(BinOp->getParent());
+  Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent());
    if (innermostContainingLoop == nullptr ||
        innermostContainingLoop->getHeader() != BinOp->getParent())
      return SCEV::FlagAnyWrap;
@@ -4163,7 +4180,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
      // reachable. Such instructions don't matter, and they aren't required
      // to obey basic rules for definitions dominating uses which this
      // analysis depends on.
-    if (!DT->isReachableFromEntry(I->getParent()))
+    if (!DT.isReachableFromEntry(I->getParent()))
        return getUnknown(V);
    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
      Opcode = CE->getOpcode();
@@ -4185,9 +4202,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
      // because it leads to N-1 getAddExpr calls for N ultimate operands.
      // Instead, gather up all the operands and make a single getAddExpr call.
      // LLVM IR canonical form means we need only traverse the left operands.
-    //
-    // FIXME: Expand this handling of NSW and NUW to other instructions, like
-    // sub and mul.
      SmallVector<const SCEV *, 4> AddOps;
      for (Value *Op = U;; Op = U->getOperand(0)) {
        U = dyn_cast<Operator>(Op);
@@ -4198,7 +4212,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
          break;
        }
  
-      if (auto *OpSCEV = getExistingSCEV(Op)) {
+      if (auto *OpSCEV = getExistingSCEV(U)) {
          AddOps.push_back(OpSCEV);
          break;
        }
@@ -4210,45 +4224,57 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        // since the flags are only known to apply to this particular
        // addition - they may not apply to other additions that can be
        // formed with operands from AddOps.
-      //
-      // FIXME: Expand this to sub instructions.
-      if (Opcode == Instruction::Add && isa<BinaryOperator>(U)) {
-        SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
-        if (Flags != SCEV::FlagAnyWrap) {
-          AddOps.push_back(getAddExpr(getSCEV(U->getOperand(0)),
-                                      getSCEV(U->getOperand(1)), Flags));
-          break;
-        }
+      const SCEV *RHS = getSCEV(U->getOperand(1));
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        const SCEV *LHS = getSCEV(U->getOperand(0));
+        if (Opcode == Instruction::Sub)
+          AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
+        else
+          AddOps.push_back(getAddExpr(LHS, RHS, Flags));
+        break;
        }
  
-      const SCEV *Op1 = getSCEV(U->getOperand(1));
        if (Opcode == Instruction::Sub)
-        AddOps.push_back(getNegativeSCEV(Op1));
+        AddOps.push_back(getNegativeSCEV(RHS));
        else
-        AddOps.push_back(Op1);
+        AddOps.push_back(RHS);
      }
      return getAddExpr(AddOps);
    }
  
    case Instruction::Mul: {
-    // FIXME: Transfer NSW/NUW as in AddExpr.
      SmallVector<const SCEV *, 4> MulOps;
-    MulOps.push_back(getSCEV(U->getOperand(1)));
-    for (Value *Op = U->getOperand(0);
-         Op->getValueID() == Instruction::Mul + Value::InstructionVal;
-         Op = U->getOperand(0)) {
-      U = cast<Operator>(Op);
+    for (Value *Op = U;; Op = U->getOperand(0)) {
+      U = dyn_cast<Operator>(Op);
+      if (!U || U->getOpcode() != Instruction::Mul) {
+        assert(Op != V && "V should be a mul");
+        MulOps.push_back(getSCEV(Op));
+        break;
+      }
+
+      if (auto *OpSCEV = getExistingSCEV(U)) {
+        MulOps.push_back(OpSCEV);
+        break;
+      }
+
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
+                                    getSCEV(U->getOperand(1)), Flags));
+        break;
+      }
+
        MulOps.push_back(getSCEV(U->getOperand(1)));
      }
-    MulOps.push_back(getSCEV(U->getOperand(0)));
      return getMulExpr(MulOps);
    }
    case Instruction::UDiv:
      return getUDivExpr(getSCEV(U->getOperand(0)),
                         getSCEV(U->getOperand(1)));
    case Instruction::Sub:
-    return getMinusSCEV(getSCEV(U->getOperand(0)),
-                        getSCEV(U->getOperand(1)));
+    return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
+                        getNoWrapFlagsFromUB(U));
    case Instruction::And:
      // For an expression like x&255 that merely masks off the high bits,
      // use zext(trunc(x)) as the SCEV expression.
@@ -4268,7 +4294,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        unsigned BitWidth = A.getBitWidth();
        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
        computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
-                       F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
+                       F.getParent()->getDataLayout(), 0, &AC, nullptr, &DT);
  
        APInt EffectiveMask =
            APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4368,9 +4394,18 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        if (SA->getValue().uge(BitWidth))
          break;
  
+      // It is currently not resolved how to interpret NSW for left
+      // shift by BitWidth - 1, so we avoid applying flags in that
+      // case. Remove this check (or this comment) once the situation
+      // is resolved. See
+      // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+      // and http://reviews.llvm.org/D8890 .
+      auto Flags = SCEV::FlagAnyWrap;
+      if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
+
        Constant *X = ConstantInt::get(getContext(),
          APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
-      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
      }
      break;
  
@@ -4967,7 +5002,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
      // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
      // considered greater than any computable EL.Max.
      if (EL.Max != getCouldNotCompute() && Latch &&
-        DT->dominates(ExitBB, Latch)) {
+        DT.dominates(ExitBB, Latch)) {
        if (!MustExitMaxBECount)
          MustExitMaxBECount = EL.Max;
        else {
@@ -5580,7 +5615,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
  
    unsigned NumIterations = BEs.getZExtValue(); // must be in range
    unsigned IterationNum = 0;
-  const DataLayout &DL = F->getParent()->getDataLayout();
+  const DataLayout &DL = F.getParent()->getDataLayout();
    for (; ; ++IterationNum) {
      if (IterationNum == NumIterations)
        return RetVal = CurrentIterVals[PN];  // Got exit value!
@@ -5589,7 +5624,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
      // EvaluateExpression adds non-phi values to the CurrentIterVals map.
      DenseMap<Instruction *, Constant *> NextIterVals;
      Constant *NextPHI =
-        EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+        EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
      if (!NextPHI)
        return nullptr;        // Couldn't evaluate!
      NextIterVals[PN] = NextPHI;
@@ -5614,7 +5649,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
        Constant *&NextPHI = NextIterVals[PHI];
        if (!NextPHI) {   // Not already computed.
          Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
        }
        if (NextPHI != I->second)
          StoppedEvolving = false;
@@ -5666,10 +5701,10 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
    // the loop symbolically to determine when the condition gets a value of
    // "ExitWhen".
    unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
-  const DataLayout &DL = F->getParent()->getDataLayout();
+  const DataLayout &DL = F.getParent()->getDataLayout();
    for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
      ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
-        EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
+        EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
  
      // Couldn't symbolically evaluate.
      if (!CondVal) return getCouldNotCompute();
@@ -5699,7 +5734,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
        if (NextPHI) continue;    // Already computed!
  
        Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
      }
      CurrentIterVals.swap(NextIterVals);
    }
@@ -5844,7 +5879,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
    // exit value from the loop without using SCEVs.
    if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
      if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
-      const Loop *LI = (*this->LI)[I->getParent()];
+      const Loop *LI = this->LI[I->getParent()];
        if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
          if (PHINode *PN = dyn_cast<PHINode>(I))
            if (PN->getParent() == LI->getHeader()) {
@@ -5902,16 +5937,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
          // Check to see if getSCEVAtScope actually made an improvement.
          if (MadeImprovement) {
            Constant *C = nullptr;
-          const DataLayout &DL = F->getParent()->getDataLayout();
+          const DataLayout &DL = F.getParent()->getDataLayout();
            if (const CmpInst *CI = dyn_cast<CmpInst>(I))
              C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                                Operands[1], DL, TLI);
+                                                Operands[1], DL, &TLI);
            else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
              if (!LI->isVolatile())
                C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
            } else
              C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
-                                         DL, TLI);
+                                         DL, &TLI);
            if (!C) return V;
            return getSCEV(C);
          }
@@ -6332,7 +6367,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
    // A loop's header is defined to be a block that dominates the loop.
    // If the header has a unique predecessor outside the loop, it must be
    // a block that has exactly one successor that can reach the loop.
-  if (Loop *L = LI->getLoopFor(BB))
+  if (Loop *L = LI.getLoopFor(BB))
      return std::make_pair(L->getLoopPredecessor(), L->getHeader());
  
    return std::pair<BasicBlock *, BasicBlock *>();
@@ -6721,8 +6756,16 @@ bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
  bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
                                                 ICmpInst::Predicate Pred,
                                                 bool &Increasing) {
-  SCEV::NoWrapFlags FlagsRequired = SCEV::FlagAnyWrap;
-  bool IncreasingOnNonNegativeStep = false;
+
+  // A zero step value for LHS means the induction variable is essentially a
+  // loop invariant value. We don't really depend on the predicate actually
+  // flipping from false to true (for increasing predicates, and the other way
+  // around for decreasing predicates), all we care about is that *if* the
+  // predicate changes then it only changes from false to true.
+  //
+  // A zero step value in itself is not very useful, but there may be places
+  // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
+  // as general as possible.
  
    switch (Pred) {
    default:
@@ -6730,53 +6773,39 @@ bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
  
    case ICmpInst::ICMP_UGT:
    case ICmpInst::ICMP_UGE:
-    FlagsRequired = SCEV::FlagNUW;
-    IncreasingOnNonNegativeStep = true;
-    break;
-
    case ICmpInst::ICMP_ULT:
    case ICmpInst::ICMP_ULE:
-    FlagsRequired = SCEV::FlagNUW;
-    IncreasingOnNonNegativeStep = false;
-    break;
+    if (!LHS->getNoWrapFlags(SCEV::FlagNUW))
+      return false;
+
+    Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
+    return true;
  
    case ICmpInst::ICMP_SGT:
    case ICmpInst::ICMP_SGE:
-    FlagsRequired = SCEV::FlagNSW;
-    IncreasingOnNonNegativeStep = true;
-    break;
-
    case ICmpInst::ICMP_SLT:
-  case ICmpInst::ICMP_SLE:
-    FlagsRequired = SCEV::FlagNSW;
-    IncreasingOnNonNegativeStep = false;
-    break;
-  }
+  case ICmpInst::ICMP_SLE: {
+    if (!LHS->getNoWrapFlags(SCEV::FlagNSW))
+      return false;
  
-  if (!LHS->getNoWrapFlags(FlagsRequired))
-    return false;
+    const SCEV *Step = LHS->getStepRecurrence(*this);
  
-  // A zero step value for LHS means the induction variable is essentially a
-  // loop invariant value. We don't really depend on the predicate actually
-  // flipping from false to true (for increasing predicates, and the other way
-  // around for decreasing predicates), all we care about is that *if* the
-  // predicate changes then it only changes from false to true.
-  //
-  // A zero step value in itself is not very useful, but there may be places
-  // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
-  // as general as possible.
+    if (isKnownNonNegative(Step)) {
+      Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
+      return true;
+    }
  
-  if (isKnownNonNegative(LHS->getStepRecurrence(*this))) {
-    Increasing = IncreasingOnNonNegativeStep;
-    return true;
+    if (isKnownNonPositive(Step)) {
+      Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
+      return true;
+    }
+
+    return false;
    }
  
-  if (isKnownNonPositive(LHS->getStepRecurrence(*this))) {
-    Increasing = !IncreasingOnNonNegativeStep;
-    return true;
    }
  
-  return false;
+  llvm_unreachable("switch has default clause!");
  }
  
  bool ScalarEvolution::isLoopInvariantPredicate(
@@ -6929,18 +6958,6 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
                      LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
      return true;
  
-  // Check conditions due to any @llvm.assume intrinsics.
-  for (auto &AssumeVH : AC->assumptions()) {
-    if (!AssumeVH)
-      continue;
-    auto *CI = cast<CallInst>(AssumeVH);
-    if (!DT->dominates(CI, Latch->getTerminator()))
-      continue;
-
-    if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
-      return true;
-  }
-
    struct ClearWalkingBEDominatingCondsOnExit {
      ScalarEvolution &SE;
  
@@ -6952,7 +6969,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
      }
    };
  
-  // We don't want more than one activation of the following loop on the stack
+  // We don't want more than one activation of the following loops on the stack
    // -- that can lead to O(n!) time complexity.
    if (WalkingBEDominatingConds)
      return false;
@@ -6960,15 +6977,26 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
    WalkingBEDominatingConds = true;
    ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
  
+  // Check conditions due to any @llvm.assume intrinsics.
+  for (auto &AssumeVH : AC.assumptions()) {
+    if (!AssumeVH)
+      continue;
+    auto *CI = cast<CallInst>(AssumeVH);
+    if (!DT.dominates(CI, Latch->getTerminator()))
+      continue;
+
+    if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+      return true;
+  }
+
    // If the loop is not reachable from the entry block, we risk running into an
    // infinite loop as we walk up into the dom tree.  These loops do not matter
    // anyway, so we just return a conservative answer when we see them.
-  if (!DT->isReachableFromEntry(L->getHeader()))
+  if (!DT.isReachableFromEntry(L->getHeader()))
      return false;
  
-  for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
-       DTN != HeaderDTN;
-       DTN = DTN->getIDom()) {
+  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
+       DTN != HeaderDTN; DTN = DTN->getIDom()) {
  
      assert(DTN && "should reach the loop header before reaching the root!");
  
@@ -6992,7 +7020,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
        // We're constructively (and conservatively) enumerating edges within the
        // loop body that dominate the latch.  The dominator tree better agree
        // with us on this:
-      assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+      assert(DT.dominates(DominatingEdge, Latch) && "should be!");
  
        if (isImpliedCond(Pred, LHS, RHS, Condition,
                          BB != ContinuePredicate->getSuccessor(0)))
@@ -7037,11 +7065,11 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
    }
  
    // Check conditions due to any @llvm.assume intrinsics.
-  for (auto &AssumeVH : AC->assumptions()) {
+  for (auto &AssumeVH : AC.assumptions()) {
      if (!AssumeVH)
        continue;
      auto *CI = cast<CallInst>(AssumeVH);
-    if (!DT->dominates(CI, L->getHeader()))
+    if (!DT.dominates(CI, L->getHeader()))
        continue;
  
      if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
@@ -7298,6 +7326,38 @@ static bool IsMinConsistingOf(ScalarEvolution &SE,
    return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
  }
  
+static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
+                                           ICmpInst::Predicate Pred,
+                                           const SCEV *LHS, const SCEV *RHS) {
+
+  // If both sides are affine addrecs for the same loop, with equal
+  // steps, and we know the recurrences don't wrap, then we only
+  // need to check the predicate on the starting values.
+
+  if (!ICmpInst::isRelational(Pred))
+    return false;
+
+  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!LAR)
+    return false;
+  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+  if (!RAR)
+    return false;
+  if (LAR->getLoop() != RAR->getLoop())
+    return false;
+  if (!LAR->isAffine() || !RAR->isAffine())
+    return false;
+
+  if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
+    return false;
+
+  SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
+                         SCEV::FlagNSW : SCEV::FlagNUW;
+  if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
+    return false;
+
+  return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
+}
  
  /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
  /// expression?
@@ -7343,7 +7403,8 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
    auto IsKnownPredicateFull =
        [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
      return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
-        IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS);
+        IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
+        IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS);
    };
  
    switch (Pred) {
@@ -8273,22 +8334,34 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
  //                   ScalarEvolution Class Implementation
  //===----------------------------------------------------------------------===//
  
-ScalarEvolution::ScalarEvolution()
-    : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
-      LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
-  initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
-}
-
-bool ScalarEvolution::runOnFunction(Function &F) {
-  this->F = &F;
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  return false;
-}
-
-void ScalarEvolution::releaseMemory() {
+ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
+                                 AssumptionCache &AC, DominatorTree &DT,
+                                 LoopInfo &LI)
+    : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
+      CouldNotCompute(new SCEVCouldNotCompute()),
+      WalkingBEDominatingConds(false), ValuesAtScopes(64), LoopDispositions(64),
+      BlockDispositions(64), FirstUnknown(nullptr) {}
+
+ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
+    : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI),
+      CouldNotCompute(std::move(Arg.CouldNotCompute)),
+      ValueExprMap(std::move(Arg.ValueExprMap)),
+      WalkingBEDominatingConds(false),
+      BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
+      ConstantEvolutionLoopExitValue(
+          std::move(Arg.ConstantEvolutionLoopExitValue)),
+      ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
+      LoopDispositions(std::move(Arg.LoopDispositions)),
+      BlockDispositions(std::move(Arg.BlockDispositions)),
+      UnsignedRanges(std::move(Arg.UnsignedRanges)),
+      SignedRanges(std::move(Arg.SignedRanges)),
+      UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
+      SCEVAllocator(std::move(Arg.SCEVAllocator)),
+      FirstUnknown(Arg.FirstUnknown) {
+  Arg.FirstUnknown = nullptr;
+}
+
+ScalarEvolution::~ScalarEvolution() {
    // Iterate through all the SCEVUnknown instances and call their
    // destructors, so that they release their references to their values.
    for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
@@ -8307,24 +8380,6 @@ void ScalarEvolution::releaseMemory() {
  
    assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
    assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
-
-  BackedgeTakenCounts.clear();
-  ConstantEvolutionLoopExitValue.clear();
-  ValuesAtScopes.clear();
-  LoopDispositions.clear();
-  BlockDispositions.clear();
-  UnsignedRanges.clear();
-  SignedRanges.clear();
-  UniqueSCEVs.clear();
-  SCEVAllocator.Reset();
-}
-
-void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequiredTransitive<AssumptionCacheTracker>();
-  AU.addRequiredTransitive<LoopInfoWrapperPass>();
-  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
-  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
  }
  
  bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -8366,7 +8421,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
    OS << "\n";
  }
  
-void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+void ScalarEvolution::print(raw_ostream &OS) const {
    // ScalarEvolution's implementation of the print method is to print
    // out SCEV values of all instructions that are interesting. Doing
    // this potentially causes it to create new SCEV objects though,
@@ -8376,7 +8431,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
    ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
  
    OS << "Classifying expressions for: ";
-  F->printAsOperand(OS, /*PrintType=*/false);
+  F.printAsOperand(OS, /*PrintType=*/false);
    OS << "\n";
    for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
      if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
@@ -8391,7 +8446,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
          SE.getSignedRange(SV).print(OS);
        }
  
-      const Loop *L = LI->getLoopFor((*I).getParent());
+      const Loop *L = LI.getLoopFor((*I).getParent());
  
        const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
        if (AtUse != SV) {
@@ -8419,9 +8474,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
      }
  
    OS << "Determining loop execution counts for: ";
-  F->printAsOperand(OS, /*PrintType=*/false);
+  F.printAsOperand(OS, /*PrintType=*/false);
    OS << "\n";
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+  for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I)
      PrintLoopInfo(OS, &SE, *I);
  }
  
@@ -8565,7 +8620,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
      // produces the addrec's value is a PHI, and a PHI effectively properly
      // dominates its entire containing block.
      const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
-    if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+    if (!DT.dominates(AR->getLoop()->getHeader(), BB))
        return DoesNotDominateBlock;
    }
    // FALL THROUGH into SCEVNAryExpr handling.
@@ -8602,7 +8657,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
            dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
        if (I->getParent() == BB)
          return DominatesBlock;
-      if (DT->properlyDominates(I->getParent(), BB))
+      if (DT.properlyDominates(I->getParent(), BB))
          return ProperlyDominatesBlock;
        return DoesNotDominateBlock;
      }
@@ -8696,24 +8751,21 @@ getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
    }
  }
  
-void ScalarEvolution::verifyAnalysis() const {
-  if (!VerifySCEV)
-    return;
-
+void ScalarEvolution::verify() const {
    ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
  
    // Gather stringified backedge taken counts for all loops using SCEV's caches.
    // FIXME: It would be much better to store actual values instead of strings,
    //        but SCEV pointers will change if we drop the caches.
    VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
-  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
      getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
  
-  // Gather stringified backedge taken counts for all loops without using
-  // SCEV's caches.
-  SE.releaseMemory();
-  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
-    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
+  // Gather stringified backedge taken counts for all loops using a fresh
+  // ScalarEvolution object.
+  ScalarEvolution SE2(F, TLI, AC, DT, LI);
+  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
+    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
  
    // Now compare whether they're the same with and without caches. This allows
    // verifying that no pass changed the cache.
@@ -8746,3 +8798,63 @@ void ScalarEvolution::verifyAnalysis() const {
  
    // TODO: Verify more things.
  }
+
+char ScalarEvolutionAnalysis::PassID;
+
+ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
+                                             AnalysisManager<Function> *AM) {
+  return ScalarEvolution(F, AM->getResult<TargetLibraryAnalysis>(F),
+                         AM->getResult<AssumptionAnalysis>(F),
+                         AM->getResult<DominatorTreeAnalysis>(F),
+                         AM->getResult<LoopAnalysis>(F));
+}
+
+PreservedAnalyses
+ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> *AM) {
+  AM->getResult<ScalarEvolutionAnalysis>(F).print(OS);
+  return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
+                      "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
+                    "Scalar Evolution Analysis", false, true)
+char ScalarEvolutionWrapperPass::ID = 0;
+
+ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
+  initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
+  SE.reset(new ScalarEvolution(
+      F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+      getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+      getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
+  return false;
+}
+
+void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
+
+void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
+  SE->print(OS);
+}
+
+void ScalarEvolutionWrapperPass::verifyAnalysis() const {
+  if (!VerifySCEV)
+    return;
+
+  SE->verify();
+}
+
+void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AssumptionCacheTracker>();
+  AU.addRequiredTransitive<LoopInfoWrapperPass>();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
+}