[Modules] Fix potential ODR violations by sinking the DEBUG_TYPE

[oota-llvm.git] / lib / Analysis / ScalarEvolution.cpp
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index f876748af3dc1cab39a166d3be2861cf815cb134..d28aa58a058b447869586dbea38e8fd5149757f4 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -58,38 +58,38 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "scalar-evolution"
  #include "llvm/Analysis/ScalarEvolution.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/ConstantRange.h"
  #include "llvm/IR/Constants.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
  #include "llvm/IR/GlobalAlias.h"
  #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
  #include "llvm/IR/Instructions.h"
  #include "llvm/IR/LLVMContext.h"
  #include "llvm/IR/Operator.h"
  #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ConstantRange.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstIterator.h"
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetLibraryInfo.h"
  #include <algorithm>
  using namespace llvm;
  
+#define DEBUG_TYPE "scalar-evolution"
+
  STATISTIC(NumArrayLenItCounts,
            "Number of trip counts computed with array length");
  STATISTIC(NumTripCountsComputed,
@@ -114,7 +114,7 @@ VerifySCEV("verify-scev",
  INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
                  "Scalar Evolution Analysis", false, true)
  INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
  INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
                  "Scalar Evolution Analysis", false, true)
@@ -136,9 +136,9 @@ void SCEV::dump() const {
  #endif
  
  void SCEV::print(raw_ostream &OS) const {
-  switch (getSCEVType()) {
+  switch (static_cast<SCEVTypes>(getSCEVType())) {
    case scConstant:
-    WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+    cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
      return;
    case scTruncate: {
      const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
@@ -174,7 +174,7 @@ void SCEV::print(raw_ostream &OS) const {
      if (AR->getNoWrapFlags(FlagNW) &&
          !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
        OS << "nw><";
-    WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+    AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
      OS << ">";
      return;
    }
@@ -183,7 +183,7 @@ void SCEV::print(raw_ostream &OS) const {
    case scUMaxExpr:
    case scSMaxExpr: {
      const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
-    const char *OpStr = 0;
+    const char *OpStr = nullptr;
      switch (NAry->getSCEVType()) {
      case scAddExpr: OpStr = " + "; break;
      case scMulExpr: OpStr = " * "; break;
@@ -194,7 +194,7 @@ void SCEV::print(raw_ostream &OS) const {
      for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
           I != E; ++I) {
        OS << **I;
-      if (llvm::next(I) != E)
+      if (std::next(I) != E)
          OS << OpStr;
      }
      OS << ")";
@@ -229,25 +229,24 @@ void SCEV::print(raw_ostream &OS) const {
      Constant *FieldNo;
      if (U->isOffsetOf(CTy, FieldNo)) {
        OS << "offsetof(" << *CTy << ", ";
-      WriteAsOperand(OS, FieldNo, false);
+      FieldNo->printAsOperand(OS, false);
        OS << ")";
        return;
      }
  
      // Otherwise just print it normally.
-    WriteAsOperand(OS, U->getValue(), false);
+    U->getValue()->printAsOperand(OS, false);
      return;
    }
    case scCouldNotCompute:
      OS << "***COULDNOTCOMPUTE***";
      return;
-  default: break;
    }
    llvm_unreachable("Unknown SCEV kind!");
  }
  
  Type *SCEV::getType() const {
-  switch (getSCEVType()) {
+  switch (static_cast<SCEVTypes>(getSCEVType())) {
    case scConstant:
      return cast<SCEVConstant>(this)->getType();
    case scTruncate:
@@ -267,9 +266,8 @@ Type *SCEV::getType() const {
      return cast<SCEVUnknown>(this)->getType();
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default:
-    llvm_unreachable("Unknown SCEV kind!");
    }
+  llvm_unreachable("Unknown SCEV kind!");
  }
  
  bool SCEV::isZero() const {
@@ -315,14 +313,14 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
    FoldingSetNodeID ID;
    ID.AddInteger(scConstant);
    ID.AddPointer(V);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
    UniqueSCEVs.InsertNode(S, IP);
    return S;
  }
  
-const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
    return getConstant(ConstantInt::get(getContext(), Val));
  }
  
@@ -368,7 +366,7 @@ void SCEVUnknown::deleted() {
    SE->UniqueSCEVs.RemoveNode(this);
  
    // Release the value.
-  setValPtr(0);
+  setValPtr(nullptr);
  }
  
  void SCEVUnknown::allUsesReplacedWith(Value *New) {
@@ -482,7 +480,7 @@ namespace {
        // Aside from the getSCEVType() ordering, the particular ordering
        // isn't very important except that it's beneficial to be consistent,
        // so that (a + b) and (b + a) don't end up as different expressions.
-      switch (LType) {
+      switch (static_cast<SCEVTypes>(LType)) {
        case scUnknown: {
          const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
          const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
@@ -585,6 +583,9 @@ namespace {
  
          // Lexicographically compare n-ary expressions.
          unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+        if (LNumOps != RNumOps)
+          return (int)LNumOps - (int)RNumOps;
+
          for (unsigned i = 0; i != LNumOps; ++i) {
            if (i >= RNumOps)
              return 1;
@@ -616,9 +617,10 @@ namespace {
          return compare(LC->getOperand(), RC->getOperand());
        }
  
-      default:
-        llvm_unreachable("Unknown SCEV kind!");
+      case scCouldNotCompute:
+        llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
        }
+      llvm_unreachable("Unknown SCEV kind!");
      }
    };
  }
@@ -758,7 +760,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
    unsigned CalculationBits = W + T;
  
    // Calculate 2^T, at width T+W.
-  APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+  APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
  
    // Calculate the multiplicative inverse of K! / 2^T;
    // this multiplication factor will perform the exact division by
@@ -828,7 +830,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
    ID.AddInteger(scTruncate);
    ID.AddPointer(Op);
    ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  
    // Fold if the operand is constant.
@@ -918,7 +920,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
    ID.AddInteger(scZeroExtend);
    ID.AddPointer(Op);
    ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  
    // zext(trunc(x)) --> zext(x) or x or trunc(x)
@@ -1071,7 +1073,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step,
      return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
                         SE->getSignedRange(Step).getSignedMin());
    }
-  return 0;
+  return nullptr;
  }
  
  // The recurrence AR has been shown to have no signed wrap. Typically, if we can
@@ -1090,7 +1092,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
    // Check for a simple looking step prior to loop entry.
    const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
    if (!SA)
-    return 0;
+    return nullptr;
  
    // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
    // subtraction is expensive. For this purpose, perform a quick and dirty
@@ -1102,7 +1104,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
        DiffOps.push_back(*I);
    }
    if (DiffOps.size() == SA->getNumOperands())
-    return 0;
+    return nullptr;
  
    // This is a postinc AR. Check for overflow on the preinc recurrence using the
    // same three conditions that getSignExtendedExpr checks.
@@ -1138,7 +1140,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
        SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
      return PreStart;
    }
-  return 0;
+  return nullptr;
  }
  
  // Get the normalized sign-extended expression for this AddRec's Start.
@@ -1180,7 +1182,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
    ID.AddInteger(scSignExtend);
    ID.AddPointer(Op);
    ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  
    // If the input value is provably positive, build a zext instead.
@@ -1359,7 +1361,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
  /// what it does, given a sequence of operands that would form an add
  /// expression like this:
  ///
-///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
  ///
  /// where A and B are constants, update the map with these values:
  ///
@@ -1380,7 +1382,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
  ///
  static bool
  CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
-                             SmallVector<const SCEV *, 8> &NewOps,
+                             SmallVectorImpl<const SCEV *> &NewOps,
                               APInt &AccumulatedConstant,
                               const SCEV *const *Ops, size_t NumOperands,
                               const APInt &Scale,
@@ -1628,7 +1630,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
        // re-generate the operands list. Group the operands by constant scale,
        // to avoid multiplying by the same constant scale multiple times.
        std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
-      for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
             E = NewOps.end(); I != E; ++I)
          MulOpLists[M.find(*I)->second].push_back(*I);
        // Re-generate the operands list.
@@ -1810,7 +1812,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
    ID.AddInteger(scAddExpr);
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
      ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
    SCEVAddExpr *S =
      static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
    if (!S) {
@@ -2104,7 +2106,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
    ID.AddInteger(scMulExpr);
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
      ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
    SCEVMulExpr *S =
      static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
    if (!S) {
@@ -2229,7 +2231,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
    ID.AddInteger(scUDivExpr);
    ID.AddPointer(LHS);
    ID.AddPointer(RHS);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
                                               LHS, RHS);
@@ -2237,6 +2239,77 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
    return S;
  }
  
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getValue()->getValue().abs();
+  APInt B = C2->getValue()->getValue().abs();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
+
+  if (ABW > BBW)
+    B = B.zext(ABW);
+  else if (ABW < BBW)
+    A = A.zext(BBW);
+
+  return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+/// getUDivExactExpr - Get a canonical unsigned division expression, or
+/// something simpler if possible. There is no representation for an exact udiv
+/// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
+/// We can't do this when it's not exact because the udiv may be clearing bits.
+const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
+                                              const SCEV *RHS) {
+  // TODO: we could try to find factors in all sorts of things, but for now we
+  // just deal with u/exact (multiply, constant). See SCEVDivision towards the
+  // end of this file for inspiration.
+
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
+  if (!Mul)
+    return getUDivExpr(LHS, RHS);
+
+  if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
+    // If the mulexpr multiplies by a constant, then that constant must be the
+    // first element of the mulexpr.
+    if (const SCEVConstant *LHSCst =
+            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+      if (LHSCst == RHSCst) {
+        SmallVector<const SCEV *, 2> Operands;
+        Operands.append(Mul->op_begin() + 1, Mul->op_end());
+        return getMulExpr(Operands);
+      }
+
+      // We can't just assume that LHSCst divides RHSCst cleanly, it could be
+      // that there's a factor provided by one of the other terms. We need to
+      // check.
+      APInt Factor = gcd(LHSCst, RHSCst);
+      if (!Factor.isIntN(1)) {
+        LHSCst = cast<SCEVConstant>(
+            getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
+        RHSCst = cast<SCEVConstant>(
+            getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
+        SmallVector<const SCEV *, 2> Operands;
+        Operands.push_back(LHSCst);
+        Operands.append(Mul->op_begin() + 1, Mul->op_end());
+        LHS = getMulExpr(Operands);
+        RHS = RHSCst;
+        Mul = dyn_cast<SCEVMulExpr>(LHS);
+        if (!Mul)
+          return getUDivExactExpr(LHS, RHS);
+      }
+    }
+  }
+
+  for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
+    if (Mul->getOperand(i) == RHS) {
+      SmallVector<const SCEV *, 2> Operands;
+      Operands.append(Mul->op_begin(), Mul->op_begin() + i);
+      Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
+      return getMulExpr(Operands);
+    }
+  }
+
+  return getUDivExpr(LHS, RHS);
+}
  
  /// getAddRecExpr - Get an add recurrence expression for the specified loop.
  /// Simplify the expression as much as possible.
@@ -2353,7 +2426,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
      ID.AddPointer(Operands[i]);
    ID.AddPointer(L);
-  void *IP = 0;
+  void *IP = nullptr;
    SCEVAddRecExpr *S =
      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
    if (!S) {
@@ -2461,7 +2534,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
    ID.AddInteger(scSMaxExpr);
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
      ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
@@ -2565,7 +2638,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
    ID.AddInteger(scUMaxExpr);
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
      ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
@@ -2587,55 +2660,39 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
    return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
  }
  
-const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
    // If we have DataLayout, we can bypass creating a target-independent
    // constant expression and then folding it back into a ConstantInt.
    // This is just a compile-time optimization.
-  if (TD)
-    return getConstant(TD->getIntPtrType(getContext()),
-                       TD->getTypeAllocSize(AllocTy));
+  if (DL)
+    return getConstant(IntTy, DL->getTypeAllocSize(AllocTy));
  
    Constant *C = ConstantExpr::getSizeOf(AllocTy);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
        C = Folded;
    Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  assert(Ty == IntTy && "Effective SCEV type doesn't match");
    return getTruncateOrZeroExtend(getSCEV(C), Ty);
  }
  
-const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
-  Constant *C = ConstantExpr::getAlignOf(AllocTy);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
-      C = Folded;
-  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
-  return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-
-const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
+                                             StructType *STy,
                                               unsigned FieldNo) {
    // If we have DataLayout, we can bypass creating a target-independent
    // constant expression and then folding it back into a ConstantInt.
    // This is just a compile-time optimization.
-  if (TD)
-    return getConstant(TD->getIntPtrType(getContext()),
-                       TD->getStructLayout(STy)->getElementOffset(FieldNo));
+  if (DL) {
+    return getConstant(IntTy,
+                       DL->getStructLayout(STy)->getElementOffset(FieldNo));
+  }
  
    Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
        C = Folded;
-  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
-  return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
  
-const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
-                                             Constant *FieldNo) {
-  Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
-      C = Folded;
-  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
    return getTruncateOrZeroExtend(getSCEV(C), Ty);
  }
  
@@ -2648,7 +2705,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
    FoldingSetNodeID ID;
    ID.AddInteger(scUnknown);
    ID.AddPointer(V);
-  void *IP = 0;
+  void *IP = nullptr;
    if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
      assert(cast<SCEVUnknown>(S)->getValue() == V &&
             "Stale SCEVUnknown in uniquing map!");
@@ -2680,8 +2737,8 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
    assert(isSCEVable(Ty) && "Type is not SCEVable!");
  
    // If we have a DataLayout, use it!
-  if (TD)
-    return TD->getTypeSizeInBits(Ty);
+  if (DL)
+    return DL->getTypeSizeInBits(Ty);
  
    // Integer types have fixed sizes.
    if (Ty->isIntegerTy())
@@ -2700,12 +2757,15 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
  Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
    assert(isSCEVable(Ty) && "Type is not SCEVable!");
  
-  if (Ty->isIntegerTy())
+  if (Ty->isIntegerTy()) {
      return Ty;
+  }
  
    // The only other support type is pointer.
    assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-  if (TD) return TD->getIntPtrType(getContext());
+
+  if (DL)
+    return DL->getIntPtrType(Ty);
  
    // Without DataLayout, conservatively assume pointers are 64-bit.
    return Type::getInt64Ty(getContext());
@@ -2715,13 +2775,51 @@ const SCEV *ScalarEvolution::getCouldNotCompute() {
    return &CouldNotCompute;
  }
  
+namespace {
+  // Helper class working with SCEVTraversal to figure out if a SCEV contains
+  // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
+  // is set iff if find such SCEVUnknown.
+  //
+  struct FindInvalidSCEVUnknown {
+    bool FindOne;
+    FindInvalidSCEVUnknown() { FindOne = false; }
+    bool follow(const SCEV *S) {
+      switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+      case scConstant:
+        return false;
+      case scUnknown:
+        if (!cast<SCEVUnknown>(S)->getValue())
+          FindOne = true;
+        return false;
+      default:
+        return true;
+      }
+    }
+    bool isDone() const { return FindOne; }
+  };
+}
+
+bool ScalarEvolution::checkValidity(const SCEV *S) const {
+  FindInvalidSCEVUnknown F;
+  SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
+  ST.visitAll(S);
+
+  return !F.FindOne;
+}
+
  /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
  /// expression and create a new one.
  const SCEV *ScalarEvolution::getSCEV(Value *V) {
    assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
  
-  ValueExprMapType::const_iterator I = ValueExprMap.find_as(V);
-  if (I != ValueExprMap.end()) return I->second;
+  ValueExprMapType::iterator I = ValueExprMap.find_as(V);
+  if (I != ValueExprMap.end()) {
+    const SCEV *S = I->second;
+    if (checkValidity(S))
+      return S;
+    else
+      ValueExprMap.erase(I);
+  }
    const SCEV *S = createSCEV(V);
  
    // The process of creating a SCEV for V may have caused other SCEVs
@@ -2913,7 +3011,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
      return getPointerBase(Cast->getOperand());
    }
    else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
-    const SCEV *PtrOp = 0;
+    const SCEV *PtrOp = nullptr;
      for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
           I != E; ++I) {
        if ((*I)->getType()->isPointerTy()) {
@@ -2936,9 +3034,8 @@ static void
  PushDefUseChildren(Instruction *I,
                     SmallVectorImpl<Instruction *> &Worklist) {
    // Push the def-use children onto the Worklist stack.
-  for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-       UI != UE; ++UI)
-    Worklist.push_back(cast<Instruction>(*UI));
+  for (User *U : I->users())
+    Worklist.push_back(cast<Instruction>(U));
  }
  
  /// ForgetSymbolicValue - This looks up computed SCEV values for all
@@ -2994,20 +3091,20 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
        // The loop may have multiple entrances or multiple exits; we can analyze
        // this phi as an addrec if it has a unique entry value and a unique
        // backedge value.
-      Value *BEValueV = 0, *StartValueV = 0;
+      Value *BEValueV = nullptr, *StartValueV = nullptr;
        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
          Value *V = PN->getIncomingValue(i);
          if (L->contains(PN->getIncomingBlock(i))) {
            if (!BEValueV) {
              BEValueV = V;
            } else if (BEValueV != V) {
-            BEValueV = 0;
+            BEValueV = nullptr;
              break;
            }
          } else if (!StartValueV) {
            StartValueV = V;
          } else if (StartValueV != V) {
-          StartValueV = 0;
+          StartValueV = nullptr;
            break;
          }
        }
@@ -3060,15 +3157,26 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
                    Flags = setFlags(Flags, SCEV::FlagNUW);
                  if (OBO->hasNoSignedWrap())
                    Flags = setFlags(Flags, SCEV::FlagNSW);
-              } else if (const GEPOperator *GEP =
-                         dyn_cast<GEPOperator>(BEValueV)) {
+              } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
                  // If the increment is an inbounds GEP, then we know the address
                  // space cannot be wrapped around. We cannot make any guarantee
                  // about signed or unsigned overflow because pointers are
                  // unsigned but we may have a negative index from the base
-                // pointer.
-                if (GEP->isInBounds())
+                // pointer. We can guarantee that no unsigned wrap occurs if the
+                // indices form a positive value.
+                if (GEP->isInBounds()) {
                    Flags = setFlags(Flags, SCEV::FlagNW);
+
+                  const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+                  if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+                    Flags = setFlags(Flags, SCEV::FlagNUW);
+                }
+              } else if (const SubOperator *OBO =
+                           dyn_cast<SubOperator>(BEValueV)) {
+                if (OBO->hasNoUnsignedWrap())
+                  Flags = setFlags(Flags, SCEV::FlagNUW);
+                if (OBO->hasNoSignedWrap())
+                  Flags = setFlags(Flags, SCEV::FlagNSW);
                }
  
                const SCEV *StartVal = getSCEV(StartValueV);
@@ -3124,7 +3232,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
    // PHI's incoming blocks are in a different loop, in which case doing so
    // risks breaking LCSSA form. Instcombine would normally zap these, but
    // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
+  if (Value *V = SimplifyInstruction(PN, DL, TLI, DT))
      if (LI->replacementPreservesLCSSAForm(PN, V))
        return getSCEV(V);
  
@@ -3136,21 +3244,21 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
  /// operations. This allows them to be analyzed by regular SCEV code.
  ///
  const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+  Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+  Value *Base = GEP->getOperand(0);
+  // Don't attempt to analyze GEPs over unsized objects.
+  if (!Base->getType()->getPointerElementType()->isSized())
+    return getUnknown(GEP);
  
    // Don't blindly transfer the inbounds flag from the GEP instruction to the
    // Add expression, because the Instruction may be guarded by control flow
    // and the no-overflow bits may not be valid for the expression in any
    // context.
-  bool isInBounds = GEP->isInBounds();
+  SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
  
-  Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
-  Value *Base = GEP->getOperand(0);
-  // Don't attempt to analyze GEPs over unsized objects.
-  if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
-    return getUnknown(GEP);
    const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
    gep_type_iterator GTI = gep_type_begin(GEP);
-  for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+  for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()),
                                        E = GEP->op_end();
         I != E; ++I) {
      Value *Index = *I;
@@ -3158,21 +3266,19 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
      if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
        // For a struct, add the member offset.
        unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
-      const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+      const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
  
        // Add the field offset to the running total offset.
        TotalOffset = getAddExpr(TotalOffset, FieldOffset);
      } else {
        // For an array, add the element offset, explicitly scaled.
-      const SCEV *ElementSize = getSizeOfExpr(*GTI);
+      const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
        const SCEV *IndexS = getSCEV(Index);
        // Getelementptr indices are signed.
        IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
  
        // Multiply the index by the element size to compute the element offset.
-      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
-                                           isInBounds ? SCEV::FlagNSW :
-                                           SCEV::FlagAnyWrap);
+      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
  
        // Add the element offset to the running total offset.
        TotalOffset = getAddExpr(TotalOffset, LocalOffset);
@@ -3183,8 +3289,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
    const SCEV *BaseS = getSCEV(Base);
  
    // Add the total offset from all the GEP indices to the base.
-  return getAddExpr(BaseS, TotalOffset,
-                    isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+  return getAddExpr(BaseS, TotalOffset, Wrap);
  }
  
  /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3398,7 +3503,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
      // For a SCEVUnknown, ask ValueTracking.
      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
+    ComputeMaskedBits(U->getValue(), Zeros, Ones, DL);
      if (Ones == ~Zeros + 1)
        return setUnsignedRange(U, ConservativeResult);
      return setUnsignedRange(U,
@@ -3548,10 +3653,10 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
  
    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
      // For a SCEVUnknown, ask ValueTracking.
-    if (!U->getValue()->getType()->isIntegerTy() && !TD)
+    if (!U->getValue()->getType()->isIntegerTy() && !DL)
        return setSignedRange(U, ConservativeResult);
-    unsigned NS = ComputeNumSignBits(U->getValue(), TD);
-    if (NS == 1)
+    unsigned NS = ComputeNumSignBits(U->getValue(), DL);
+    if (NS <= 1)
        return setSignedRange(U, ConservativeResult);
      return setSignedRange(U, ConservativeResult.intersectWith(
        ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
@@ -3654,17 +3759,24 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
        // knew about to reconstruct a low-bits mask value.
        unsigned LZ = A.countLeadingZeros();
+      unsigned TZ = A.countTrailingZeros();
        unsigned BitWidth = A.getBitWidth();
        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
-
-      APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
-
-      if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
-        return
-          getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
-                                IntegerType::get(getContext(), BitWidth - LZ)),
-                            U->getType());
+      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, DL);
+
+      APInt EffectiveMask =
+          APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
+      if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
+        const SCEV *MulCount = getConstant(
+            ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
+        return getMulExpr(
+            getZeroExtendExpr(
+                getTruncateExpr(
+                    getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
+                    IntegerType::get(getContext(), BitWidth - LZ - TZ)),
+                U->getType()),
+            MulCount);
+      }
      }
      break;
  
@@ -3751,7 +3863,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
          break;
  
        Constant *X = ConstantInt::get(getContext(),
-        APInt(BitWidth, 1).shl(SA->getZExtValue()));
+        APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
        return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
      }
      break;
@@ -3769,7 +3881,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
          break;
  
        Constant *X = ConstantInt::get(getContext(),
-        APInt(BitWidth, 1).shl(SA->getZExtValue()));
+        APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
        return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
      }
      break;
@@ -3947,7 +4059,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
  /// depends on a NSW assumption, and we would only fall back to a conservative
  /// trip count in that case.
  unsigned ScalarEvolution::
-getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) {
+getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
    const SCEVConstant *ExitCount =
      dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
    if (!ExitCount)
@@ -3976,7 +4088,7 @@ getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) {
  /// As explained in the comments for getSmallConstantTripCount, this assumes
  /// that control exits the loop via ExitingBlock.
  unsigned ScalarEvolution::
-getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) {
+getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) {
    const SCEV *ExitCount = getBackedgeTakenCount(L);
    if (ExitCount == getCouldNotCompute())
      return 1;
@@ -4205,9 +4317,9 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
    if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
    assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
  
-  const SCEV *BECount = 0;
+  const SCEV *BECount = nullptr;
    for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
  
      assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
  
@@ -4225,7 +4337,7 @@ const SCEV *
  ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
                                               ScalarEvolution *SE) const {
    for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
  
      if (ENT->ExitingBlock == ExitingBlock)
        return ENT->ExactNotTaken;
@@ -4248,7 +4360,7 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
      return false;
  
    for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
  
      if (ENT->ExactNotTaken != SE->getCouldNotCompute()
          && SE->hasOperand(ENT->ExactNotTaken, S)) {
@@ -4287,8 +4399,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
  
  /// clear - Invalidate this result and free the ExitNotTakenInfo array.
  void ScalarEvolution::BackedgeTakenInfo::clear() {
-  ExitNotTaken.ExitingBlock = 0;
-  ExitNotTaken.ExactNotTaken = 0;
+  ExitNotTaken.ExitingBlock = nullptr;
+  ExitNotTaken.ExactNotTaken = nullptr;
    delete[] ExitNotTaken.getNextExit();
  }
  
@@ -4302,6 +4414,8 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
    // Examine all exits and pick the most conservative values.
    const SCEV *MaxBECount = getCouldNotCompute();
    bool CouldComputeBECount = true;
+  BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
+  const SCEV *LatchMaxCount = nullptr;
    SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
      ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
@@ -4318,13 +4432,17 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
        // We cannot take the "min" MaxBECount, because non-unit stride loops may
        // skip some loop tests. Taking the max over the exits is sufficiently
        // conservative.  TODO: We could do better taking into consideration
-      // that (1) the loop has unit stride (2) the last loop test is
-      // less-than/greater-than (3) any loop test is less-than/greater-than AND
-      // falls-through some constant times less then the other tests.
-      MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
+      // non-latch exits that dominate the latch.
+      if (EL.MustExit && ExitingBlocks[i] == Latch)
+        LatchMaxCount = EL.Max;
+      else
+        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
      }
    }
-
+  // Be more precise in the easy case of a loop latch that must exit.
+  if (LatchMaxCount) {
+    MaxBECount = getUMinFromMismatchedTypes(MaxBECount, LatchMaxCount);
+  }
    return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
  }
  
@@ -4334,12 +4452,19 @@ ScalarEvolution::ExitLimit
  ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
  
    // Okay, we've chosen an exiting block.  See what condition causes us to
-  // exit at this block.
-  //
-  // FIXME: we should be able to handle switch instructions (with a single exit)
-  BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-  if (ExitBr == 0) return getCouldNotCompute();
-  assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+  // exit at this block and remember the exit block and whether all other targets
+  // lead to the loop header.
+  bool MustExecuteLoopHeader = true;
+  BasicBlock *Exit = nullptr;
+  for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
+       SI != SE; ++SI)
+    if (!L->contains(*SI)) {
+      if (Exit) // Multiple exit successors.
+        return getCouldNotCompute();
+      Exit = *SI;
+    } else if (*SI != L->getHeader()) {
+      MustExecuteLoopHeader = false;
+    }
  
    // At this point, we know we have a conditional branch that determines whether
    // the loop is exited.  However, we don't know if the branch is executed each
@@ -4358,13 +4483,11 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
    //
    //  More extensive analysis could be done to handle more cases here.
    //
-  if (ExitBr->getSuccessor(0) != L->getHeader() &&
-      ExitBr->getSuccessor(1) != L->getHeader() &&
-      ExitBr->getParent() != L->getHeader()) {
+  if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
      // The simple checks failed, try climbing the unique predecessor chain
      // up to the header.
      bool Ok = false;
-    for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+    for (BasicBlock *BB = ExitingBlock; BB; ) {
        BasicBlock *Pred = BB->getUniquePredecessor();
        if (!Pred)
          return getCouldNotCompute();
@@ -4388,11 +4511,20 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
        return getCouldNotCompute();
    }
  
-  // Proceed to the next level to examine the exit condition expression.
-  return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
-                                  ExitBr->getSuccessor(0),
-                                  ExitBr->getSuccessor(1),
-                                  /*IsSubExpr=*/false);
+  TerminatorInst *Term = ExitingBlock->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
+    assert(BI->isConditional() && "If unconditional, it can't be in loop!");
+    // Proceed to the next level to examine the exit condition expression.
+    return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
+                                    BI->getSuccessor(1),
+                                    /*IsSubExpr=*/false);
+  }
+
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
+    return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
+                                                /*IsSubExpr=*/false);
+
+  return getCouldNotCompute();
  }
  
  /// ComputeExitLimitFromCond - Compute the number of times the
@@ -4420,6 +4552,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
                                                 IsSubExpr || EitherMayExit);
        const SCEV *BECount = getCouldNotCompute();
        const SCEV *MaxBECount = getCouldNotCompute();
+      bool MustExit = false;
        if (EitherMayExit) {
          // Both conditions must be true for the loop to continue executing.
          // Choose the less conservative count.
@@ -4434,6 +4567,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
            MaxBECount = EL0.Max;
          else
            MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+        MustExit = EL0.MustExit || EL1.MustExit;
        } else {
          // Both conditions must be true at the same time for the loop to exit.
          // For now, be conservative.
@@ -4442,9 +4576,10 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
            MaxBECount = EL0.Max;
          if (EL0.Exact == EL1.Exact)
            BECount = EL0.Exact;
+        MustExit = EL0.MustExit && EL1.MustExit;
        }
  
-      return ExitLimit(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount, MustExit);
      }
      if (BO->getOpcode() == Instruction::Or) {
        // Recurse on the operands of the or.
@@ -4455,6 +4590,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
                                                 IsSubExpr || EitherMayExit);
        const SCEV *BECount = getCouldNotCompute();
        const SCEV *MaxBECount = getCouldNotCompute();
+      bool MustExit = false;
        if (EitherMayExit) {
          // Both conditions must be false for the loop to continue executing.
          // Choose the less conservative count.
@@ -4469,6 +4605,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
            MaxBECount = EL0.Max;
          else
            MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+        MustExit = EL0.MustExit || EL1.MustExit;
        } else {
          // Both conditions must be false at the same time for the loop to exit.
          // For now, be conservative.
@@ -4477,9 +4614,10 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
            MaxBECount = EL0.Max;
          if (EL0.Exact == EL1.Exact)
            BECount = EL0.Exact;
+        MustExit = EL0.MustExit && EL1.MustExit;
        }
  
-      return ExitLimit(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount, MustExit);
      }
    }
  
@@ -4575,25 +4713,17 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
      if (EL.hasAnyInfo()) return EL;
      break;
    }
-  case ICmpInst::ICMP_SLT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
-    if (EL.hasAnyInfo()) return EL;
-    break;
-  }
-  case ICmpInst::ICMP_SGT: {
-    ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                    getNotSCEV(RHS), L, true, IsSubExpr);
-    if (EL.hasAnyInfo()) return EL;
-    break;
-  }
-  case ICmpInst::ICMP_ULT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_ULT: {                    // while (X < Y)
+    bool IsSigned = Cond == ICmpInst::ICMP_SLT;
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr);
      if (EL.hasAnyInfo()) return EL;
      break;
    }
-  case ICmpInst::ICMP_UGT: {
-    ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                    getNotSCEV(RHS), L, false, IsSubExpr);
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_UGT: {                    // while (X > Y)
+    bool IsSigned = Cond == ICmpInst::ICMP_SGT;
+    ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr);
      if (EL.hasAnyInfo()) return EL;
      break;
    }
@@ -4611,6 +4741,30 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
    return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
  }
  
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
+                                                      SwitchInst *Switch,
+                                                      BasicBlock *ExitingBlock,
+                                                      bool IsSubExpr) {
+  assert(!L->contains(ExitingBlock) && "Not an exiting block!");
+
+  // Give up if the exit is the default dest of a switch.
+  if (Switch->getDefaultDest() == ExitingBlock)
+    return getCouldNotCompute();
+
+  assert(L->contains(Switch->getDefaultDest()) &&
+         "Default case must not exit the loop!");
+  const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
+  const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
+
+  // while (X != Y) --> while (X-Y != 0)
+  ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
+  if (EL.hasAnyInfo())
+    return EL;
+
+  return getCouldNotCompute();
+}
+
  static ConstantInt *
  EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
                                  ScalarEvolution &SE) {
@@ -4647,7 +4801,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
      return getCouldNotCompute();
  
    // Okay, we allow one non-constant index into the GEP instruction.
-  Value *VarIdx = 0;
+  Value *VarIdx = nullptr;
    std::vector<Constant*> Indexes;
    unsigned VarIdxNum = 0;
    for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
@@ -4657,7 +4811,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
        if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
        VarIdx = GEP->getOperand(i);
        VarIdxNum = i-2;
-      Indexes.push_back(0);
+      Indexes.push_back(nullptr);
      }
  
    // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
@@ -4688,7 +4842,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
  
      Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
                                                           Indexes);
-    if (Result == 0) break;  // Cannot compute!
+    if (!Result) break;  // Cannot compute!
  
      // Evaluate the condition for this iteration.
      Result = ConstantExpr::getICmp(predicate, Result, RHS);
@@ -4749,14 +4903,14 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
  
    // Otherwise, we can evaluate this instruction if all of its operands are
    // constant or derived from a PHI node themselves.
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
    for (Instruction::op_iterator OpI = UseInst->op_begin(),
           OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
  
      if (isa<Constant>(*OpI)) continue;
  
      Instruction *OpInst = dyn_cast<Instruction>(*OpI);
-    if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
+    if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
  
      PHINode *P = dyn_cast<PHINode>(OpInst);
      if (!P)
@@ -4770,8 +4924,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
        P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
        PHIMap[OpInst] = P;
      }
-    if (P == 0) return 0;        // Not evolving from PHI
-    if (PHI && PHI != P) return 0;  // Evolving from multiple different PHIs.
+    if (!P)
+      return nullptr;  // Not evolving from PHI
+    if (PHI && PHI != P)
+      return nullptr;  // Evolving from multiple different PHIs.
      PHI = P;
    }
    // This is a expression evolving from a constant PHI!
@@ -4785,7 +4941,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
  /// constraints, return null.
  static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
    Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0 || !canConstantEvolve(I, L)) return 0;
+  if (!I || !canConstantEvolve(I, L)) return nullptr;
  
    if (PHINode *PN = dyn_cast<PHINode>(I)) {
      return PN;
@@ -4802,23 +4958,23 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
  /// reason, return null.
  static Constant *EvaluateExpression(Value *V, const Loop *L,
                                      DenseMap<Instruction *, Constant *> &Vals,
-                                    const DataLayout *TD,
+                                    const DataLayout *DL,
                                      const TargetLibraryInfo *TLI) {
    // Convenient constant check, but redundant for recursive calls.
    if (Constant *C = dyn_cast<Constant>(V)) return C;
    Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return 0;
+  if (!I) return nullptr;
  
    if (Constant *C = Vals.lookup(I)) return C;
  
    // An instruction inside the loop depends on a value outside the loop that we
    // weren't given a mapping for, or a value such as a call inside the loop.
-  if (!canConstantEvolve(I, L)) return 0;
+  if (!canConstantEvolve(I, L)) return nullptr;
  
    // An unmapped PHI can be due to a branch or another loop inside this loop,
    // or due to this not being the initial iteration through a loop where we
    // couldn't compute the evolution of this particular PHI last time.
-  if (isa<PHINode>(I)) return 0;
+  if (isa<PHINode>(I)) return nullptr;
  
    std::vector<Constant*> Operands(I->getNumOperands());
  
@@ -4826,23 +4982,23 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
      Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
      if (!Operand) {
        Operands[i] = dyn_cast<Constant>(I->getOperand(i));
-      if (!Operands[i]) return 0;
+      if (!Operands[i]) return nullptr;
        continue;
      }
-    Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
+    Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
      Vals[Operand] = C;
-    if (!C) return 0;
+    if (!C) return nullptr;
      Operands[i] = C;
    }
  
    if (CmpInst *CI = dyn_cast<CmpInst>(I))
      return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                           Operands[1], TD, TLI);
+                                           Operands[1], DL, TLI);
    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      if (!LI->isVolatile())
-      return ConstantFoldLoadFromConstPtr(Operands[0], TD);
+      return ConstantFoldLoadFromConstPtr(Operands[0], DL);
    }
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL,
                                    TLI);
  }
  
@@ -4860,7 +5016,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
      return I->second;
  
    if (BEs.ugt(MaxBruteForceIterations))
-    return ConstantEvolutionLoopExitValue[PN] = 0;  // Not going to evaluate it.
+    return ConstantEvolutionLoopExitValue[PN] = nullptr;  // Not going to evaluate it.
  
    Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
  
@@ -4872,22 +5028,22 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
    // entry must be a constant (coming in from outside of the loop), and the
    // second must be derived from the same PHI.
    bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
    for (BasicBlock::iterator I = Header->begin();
         (PHI = dyn_cast<PHINode>(I)); ++I) {
      Constant *StartCST =
        dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
-    if (StartCST == 0) continue;
+    if (!StartCST) continue;
      CurrentIterVals[PHI] = StartCST;
    }
    if (!CurrentIterVals.count(PN))
-    return RetVal = 0;
+    return RetVal = nullptr;
  
    Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
  
    // Execute the loop symbolically to determine the exit value.
    if (BEs.getActiveBits() >= 32)
-    return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+    return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
  
    unsigned NumIterations = BEs.getZExtValue(); // must be in range
    unsigned IterationNum = 0;
@@ -4898,10 +5054,10 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
      // Compute the value of the PHIs for the next iteration.
      // EvaluateExpression adds non-phi values to the CurrentIterVals map.
      DenseMap<Instruction *, Constant *> NextIterVals;
-    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
                                             TLI);
-    if (NextPHI == 0)
-      return 0;        // Couldn't evaluate!
+    if (!NextPHI)
+      return nullptr;        // Couldn't evaluate!
      NextIterVals[PN] = NextPHI;
  
      bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
@@ -4924,7 +5080,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
        Constant *&NextPHI = NextIterVals[PHI];
        if (!NextPHI) {   // Not already computed.
          Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
        }
        if (NextPHI != I->second)
          StoppedEvolving = false;
@@ -4948,7 +5104,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
                                                            Value *Cond,
                                                            bool ExitWhen) {
    PHINode *PN = getConstantEvolvingPHI(Cond, L);
-  if (PN == 0) return getCouldNotCompute();
+  if (!PN) return getCouldNotCompute();
  
    // If the loop is canonicalized, the PHI will have exactly two entries.
    // That's the only form we support here.
@@ -4961,12 +5117,12 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
    // One entry must be a constant (coming in from outside of the loop), and the
    // second must be derived from the same PHI.
    bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
    for (BasicBlock::iterator I = Header->begin();
         (PHI = dyn_cast<PHINode>(I)); ++I) {
      Constant *StartCST =
        dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
-    if (StartCST == 0) continue;
+    if (!StartCST) continue;
      CurrentIterVals[PHI] = StartCST;
    }
    if (!CurrentIterVals.count(PN))
@@ -4980,7 +5136,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
    for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
      ConstantInt *CondVal =
        dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
-                                                       TD, TLI));
+                                                       DL, TLI));
  
      // Couldn't symbolically evaluate.
      if (!CondVal) return getCouldNotCompute();
@@ -5010,7 +5166,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
        if (NextPHI) continue;    // Already computed!
  
        Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
      }
      CurrentIterVals.swap(NextIterVals);
    }
@@ -5031,15 +5187,21 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
  /// original value V is returned.
  const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
    // Check to see if we've folded this expression at this loop before.
-  std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
-  std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
-    Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
-  if (!Pair.second)
-    return Pair.first->second ? Pair.first->second : V;
-
+  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
+  for (unsigned u = 0; u < Values.size(); u++) {
+    if (Values[u].first == L)
+      return Values[u].second ? Values[u].second : V;
+  }
+  Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
    // Otherwise compute it.
    const SCEV *C = computeSCEVAtScope(V, L);
-  ValuesAtScopes[V][L] = C;
+  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
+  for (unsigned u = Values2.size(); u > 0; u--) {
+    if (Values2[u - 1].first == L) {
+      Values2[u - 1].second = C;
+      break;
+    }
+  }
    return C;
  }
  
@@ -5048,8 +5210,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
  /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
  /// Returns NULL if the SCEV isn't representable as a Constant.
  static Constant *BuildConstantFromSCEV(const SCEV *V) {
-  switch (V->getSCEVType()) {
-    default:  // TODO: smax, umax.
+  switch (static_cast<SCEVTypes>(V->getSCEVType())) {
      case scCouldNotCompute:
      case scAddRecExpr:
        break;
@@ -5078,27 +5239,32 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
      case scAddExpr: {
        const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
        if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
-        if (C->getType()->isPointerTy())
-          C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+        if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+          unsigned AS = PTy->getAddressSpace();
+          Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+          C = ConstantExpr::getBitCast(C, DestPtrTy);
+        }
          for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
            Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
-          if (!C2) return 0;
+          if (!C2) return nullptr;
  
            // First pointer!
            if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+            unsigned AS = C2->getType()->getPointerAddressSpace();
              std::swap(C, C2);
+            Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
              // The offsets have been converted to bytes.  We can add bytes to an
              // i8* by GEP with the byte count in the first index.
-            C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+            C = ConstantExpr::getBitCast(C, DestPtrTy);
            }
  
            // Don't bother trying to sum two pointers. We probably can't
            // statically compute a load that results from it anyway.
            if (C2->getType()->isPointerTy())
-            return 0;
+            return nullptr;
  
-          if (C->getType()->isPointerTy()) {
-            if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+          if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+            if (PTy->getElementType()->isStructTy())
                C2 = ConstantExpr::getIntegerCast(
                    C2, Type::getInt32Ty(C->getContext()), true);
              C = ConstantExpr::getGetElementPtr(C, C2);
@@ -5113,10 +5279,10 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
        const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
        if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
          // Don't bother with pointers at all.
-        if (C->getType()->isPointerTy()) return 0;
+        if (C->getType()->isPointerTy()) return nullptr;
          for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
            Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
-          if (!C2 || C2->getType()->isPointerTy()) return 0;
+          if (!C2 || C2->getType()->isPointerTy()) return nullptr;
            C = ConstantExpr::getMul(C, C2);
          }
          return C;
@@ -5131,8 +5297,11 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
              return ConstantExpr::getUDiv(LHS, RHS);
        break;
      }
+    case scSMaxExpr:
+    case scUMaxExpr:
+      break; // TODO: smax, umax.
    }
-  return 0;
+  return nullptr;
  }
  
  const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
@@ -5199,17 +5368,17 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
  
          // Check to see if getSCEVAtScope actually made an improvement.
          if (MadeImprovement) {
-          Constant *C = 0;
+          Constant *C = nullptr;
            if (const CmpInst *CI = dyn_cast<CmpInst>(I))
              C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                                Operands[0], Operands[1], TD,
+                                                Operands[0], Operands[1], DL,
                                                  TLI);
            else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
              if (!LI->isVolatile())
-              C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
+              C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
            } else
              C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                         Operands, TD, TLI);
+                                         Operands, DL, TLI);
            if (!C) return V;
            return getSCEV(C);
          }
@@ -5531,7 +5700,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
    // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
    // We have not yet seen any such cases.
    const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
-  if (StepC == 0 || StepC->getValue()->equalsInt(0))
+  if (!StepC || StepC->getValue()->equalsInt(0))
      return getCouldNotCompute();
  
    // For positive steps (counting up until unsigned overflow):
@@ -5556,7 +5725,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
      else
        MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
                                           : -CR.getUnsignedMin());
-    return ExitLimit(Distance, MaxBECount);
+    return ExitLimit(Distance, MaxBECount, /*MustExit=*/true);
    }
  
    // If the recurrence is known not to wraparound, unsigned divide computes the
@@ -5564,15 +5733,29 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
    // that the value will either become zero (and thus the loop terminates), that
    // the loop will terminate through some other exit condition first, or that
    // the loop has undefined behavior.  This means we can't "miss" the exit
-  // value, even with nonunit stride.
+  // value, even with nonunit stride, and exit later via the same branch. Note
+  // that we can skip this exit if loop later exits via a different
+  // branch. Hence MustExit=false.
    //
    // This is only valid for expressions that directly compute the loop exit. It
    // is invalid for subexpressions in which the loop may exit through this
    // branch even if this subexpression is false. In that case, the trip count
    // computed by this udiv could be smaller than the number of well-defined
    // iterations.
-  if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW))
-    return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+  if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
+    const SCEV *Exact =
+      getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+    return ExitLimit(Exact, Exact, /*MustExit=*/false);
+  }
+
+  // If Step is a power of two that evenly divides Start we know that the loop
+  // will always terminate.  Start may not be a constant so we just have the
+  // number of trailing zeros available.  This is safe even in presence of
+  // overflow as the recurrence will overflow to exactly 0.
+  const APInt &StepV = StepC->getValue()->getValue();
+  if (StepV.isPowerOf2() &&
+      GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros())
+    return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
  
    // Then, try to solve the above equation provided that Start is constant.
    if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
@@ -5985,7 +6168,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
    default:
      llvm_unreachable("Unexpected ICmpInst::Predicate value!");
    case ICmpInst::ICMP_SGT:
-    Pred = ICmpInst::ICMP_SLT;
      std::swap(LHS, RHS);
    case ICmpInst::ICMP_SLT: {
      ConstantRange LHSRange = getSignedRange(LHS);
@@ -5997,7 +6179,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
      break;
    }
    case ICmpInst::ICMP_SGE:
-    Pred = ICmpInst::ICMP_SLE;
      std::swap(LHS, RHS);
    case ICmpInst::ICMP_SLE: {
      ConstantRange LHSRange = getSignedRange(LHS);
@@ -6009,7 +6190,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
      break;
    }
    case ICmpInst::ICMP_UGT:
-    Pred = ICmpInst::ICMP_ULT;
      std::swap(LHS, RHS);
    case ICmpInst::ICMP_ULT: {
      ConstantRange LHSRange = getUnsignedRange(LHS);
@@ -6021,7 +6201,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
      break;
    }
    case ICmpInst::ICMP_UGE:
-    Pred = ICmpInst::ICMP_ULE;
      std::swap(LHS, RHS);
    case ICmpInst::ICMP_ULE: {
      ConstantRange LHSRange = getUnsignedRange(LHS);
@@ -6179,7 +6358,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
    // LHS' type is checked for above.
    if (getTypeSizeInBits(LHS->getType()) >
        getTypeSizeInBits(FoundLHS->getType())) {
-    if (CmpInst::isSigned(Pred)) {
+    if (CmpInst::isSigned(FoundPred)) {
        FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
        FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
      } else {
@@ -6295,45 +6474,72 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
    return false;
  }
  
-/// getBECount - Subtract the end and start values and divide by the step,
-/// rounding up, to get the number of times the backedge is executed. Return
-/// CouldNotCompute if an intermediate computation overflows.
-const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
-                                        const SCEV *End,
-                                        const SCEV *Step,
-                                        bool NoWrap) {
-  assert(!isKnownNegative(Step) &&
-         "This code doesn't handle negative strides yet!");
-
-  Type *Ty = Start->getType();
-
-  // When Start == End, we have an exact BECount == 0. Short-circuit this case
-  // here because SCEV may not be able to determine that the unsigned division
-  // after rounding is zero.
-  if (Start == End)
-    return getConstant(Ty, 0);
-
-  const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
-  const SCEV *Diff = getMinusSCEV(End, Start);
-  const SCEV *RoundUp = getAddExpr(Step, NegOne);
-
-  // Add an adjustment to the difference between End and Start so that
-  // the division will effectively round up.
-  const SCEV *Add = getAddExpr(Diff, RoundUp);
-
-  if (!NoWrap) {
-    // Check Add for unsigned overflow.
-    // TODO: More sophisticated things could be done here.
-    Type *WideTy = IntegerType::get(getContext(),
-                                          getTypeSizeInBits(Ty) + 1);
-    const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
-    const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
-    const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
-    if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
-      return getCouldNotCompute();
+// Verify if an linear IV with positive stride can overflow when in a 
+// less-than comparison, knowing the invariant term of the comparison, the 
+// stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
+                                         bool IsSigned, bool NoWrap) {
+  if (NoWrap) return false;
+
+  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+  const SCEV *One = getConstant(Stride->getType(), 1);
+
+  if (IsSigned) {
+    APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+    APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+    APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+                                .getSignedMax();
+
+    // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
+    return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
+  }
+
+  APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+  APInt MaxValue = APInt::getMaxValue(BitWidth);
+  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+                              .getUnsignedMax();
+
+  // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
+  return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
+}
+
+// Verify if an linear IV with negative stride can overflow when in a 
+// greater-than comparison, knowing the invariant term of the comparison,
+// the stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
+                                         bool IsSigned, bool NoWrap) {
+  if (NoWrap) return false;
+
+  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+  const SCEV *One = getConstant(Stride->getType(), 1);
+
+  if (IsSigned) {
+    APInt MinRHS = getSignedRange(RHS).getSignedMin();
+    APInt MinValue = APInt::getSignedMinValue(BitWidth);
+    APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+                               .getSignedMax();
+
+    // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
+    return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
    }
  
-  return getUDivExpr(Add, Step);
+  APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+  APInt MinValue = APInt::getMinValue(BitWidth);
+  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+                            .getUnsignedMax();
+
+  // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
+  return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
+}
+
+// Compute the backedge taken count knowing the interval difference, the
+// stride and presence of the equality in the comparison.
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, 
+                                            bool Equality) {
+  const SCEV *One = getConstant(Step->getType(), 1);
+  Delta = Equality ? getAddExpr(Delta, Step)
+                   : getAddExpr(Delta, getMinusSCEV(Step, One));
+  return getUDivExpr(Delta, Step);
  }
  
  /// HowManyLessThans - Return the number of times a backedge containing the
@@ -6345,119 +6551,144 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
  /// a subexpression that cannot overflow before evaluating true.
  ScalarEvolution::ExitLimit
  ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                                  const Loop *L, bool isSigned,
+                                  const Loop *L, bool IsSigned,
                                    bool IsSubExpr) {
-  // Only handle:  "ADDREC < LoopInvariant".
-  if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+  // We handle only IV < Invariant
+  if (!isLoopInvariant(RHS, L))
+    return getCouldNotCompute();
  
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
-  if (!AddRec || AddRec->getLoop() != L)
+  const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+  // Avoid weird loops
+  if (!IV || IV->getLoop() != L || !IV->isAffine())
      return getCouldNotCompute();
  
-  // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = false;
-  if (!IsSubExpr) {
-    NoWrap = AddRec->getNoWrapFlags(
-      (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
-                          | SCEV::FlagNW));
-  }
-  if (AddRec->isAffine()) {
-    unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
-    const SCEV *Step = AddRec->getStepRecurrence(*this);
+  bool NoWrap = !IsSubExpr &&
+                IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
  
-    if (Step->isZero())
-      return getCouldNotCompute();
-    if (Step->isOne()) {
-      // With unit stride, the iteration never steps past the limit value.
-    } else if (isKnownPositive(Step)) {
-      // Test whether a positive iteration can step past the limit
-      // value and past the maximum value for its type in a single step.
-      // Note that it's not sufficient to check NoWrap here, because even
-      // though the value after a wrap is undefined, it's not undefined
-      // behavior, so if wrap does occur, the loop could either terminate or
-      // loop infinitely, but in either case, the loop is guaranteed to
-      // iterate at least until the iteration where the wrapping occurs.
-      const SCEV *One = getConstant(Step->getType(), 1);
-      if (isSigned) {
-        APInt Max = APInt::getSignedMaxValue(BitWidth);
-        if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
-              .slt(getSignedRange(RHS).getSignedMax()))
-          return getCouldNotCompute();
-      } else {
-        APInt Max = APInt::getMaxValue(BitWidth);
-        if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
-              .ult(getUnsignedRange(RHS).getUnsignedMax()))
-          return getCouldNotCompute();
-      }
-    } else
-      // TODO: Handle negative strides here and below.
-      return getCouldNotCompute();
+  const SCEV *Stride = IV->getStepRecurrence(*this);
  
-    // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
-    // m.  So, we count the number of iterations in which {n,+,s} < m is true.
-    // Note that we cannot simply return max(m-n,0)/s because it's not safe to
-    // treat m-n as signed nor unsigned due to overflow possibility.
-
-    // First, we get the value of the LHS in the first iteration: n
-    const SCEV *Start = AddRec->getOperand(0);
-
-    // Determine the minimum constant start value.
-    const SCEV *MinStart = getConstant(isSigned ?
-      getSignedRange(Start).getSignedMin() :
-      getUnsignedRange(Start).getUnsignedMin());
-
-    // If we know that the condition is true in order to enter the loop,
-    // then we know that it will run exactly (m-n)/s times. Otherwise, we
-    // only know that it will execute (max(m,n)-n)/s times. In both cases,
-    // the division must round up.
-    const SCEV *End = RHS;
-    if (!isLoopEntryGuardedByCond(L,
-                                  isSigned ? ICmpInst::ICMP_SLT :
-                                             ICmpInst::ICMP_ULT,
-                                  getMinusSCEV(Start, Step), RHS))
-      End = isSigned ? getSMaxExpr(RHS, Start)
-                     : getUMaxExpr(RHS, Start);
-
-    // Determine the maximum constant end value.
-    const SCEV *MaxEnd = getConstant(isSigned ?
-      getSignedRange(End).getSignedMax() :
-      getUnsignedRange(End).getUnsignedMax());
-
-    // If MaxEnd is within a step of the maximum integer value in its type,
-    // adjust it down to the minimum value which would produce the same effect.
-    // This allows the subsequent ceiling division of (N+(step-1))/step to
-    // compute the correct value.
-    const SCEV *StepMinusOne = getMinusSCEV(Step,
-                                            getConstant(Step->getType(), 1));
-    MaxEnd = isSigned ?
-      getSMinExpr(MaxEnd,
-                  getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
-                               StepMinusOne)) :
-      getUMinExpr(MaxEnd,
-                  getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
-                               StepMinusOne));
-
-    // Finally, we subtract these two values and divide, rounding up, to get
-    // the number of times the backedge is executed.
-    const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
-
-    // The maximum backedge count is similar, except using the minimum start
-    // value and the maximum end value.
-    // If we already have an exact constant BECount, use it instead.
-    const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
-      : getBECount(MinStart, MaxEnd, Step, NoWrap);
-
-    // If the stride is nonconstant, and NoWrap == true, then
-    // getBECount(MinStart, MaxEnd) may not compute. This would result in an
-    // exact BECount and invalid MaxBECount, which should be avoided to catch
-    // more optimization opportunities.
-    if (isa<SCEVCouldNotCompute>(MaxBECount))
-      MaxBECount = BECount;
-
-    return ExitLimit(BECount, MaxBECount);
-  }
+  // Avoid negative or zero stride values
+  if (!isKnownPositive(Stride))
+    return getCouldNotCompute();
  
-  return getCouldNotCompute();
+  // Avoid proven overflow cases: this will ensure that the backedge taken count
+  // will not generate any unsigned overflow. Relaxed no-overflow conditions
+  // exploit NoWrapFlags, allowing to optimize in presence of undefined 
+  // behaviors like the case of C language.
+  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+    return getCouldNotCompute();
+
+  ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
+                                      : ICmpInst::ICMP_ULT;
+  const SCEV *Start = IV->getStart();
+  const SCEV *End = RHS;
+  if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+    End = IsSigned ? getSMaxExpr(RHS, Start)
+                   : getUMaxExpr(RHS, Start);
+
+  const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+
+  APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+                            : getUnsignedRange(Start).getUnsignedMin();
+
+  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+                             : getUnsignedRange(Stride).getUnsignedMin();
+
+  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+  APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
+                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+  // Although End can be a MAX expression we estimate MaxEnd considering only
+  // the case End = RHS. This is safe because in the other case (End - Start)
+  // is zero, leading to a zero maximum backedge taken count.
+  APInt MaxEnd =
+    IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+             : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+
+  const SCEV *MaxBECount;
+  if (isa<SCEVConstant>(BECount))
+    MaxBECount = BECount;
+  else
+    MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
+                                getConstant(MinStride), false);
+
+  if (isa<SCEVCouldNotCompute>(MaxBECount))
+    MaxBECount = BECount;
+
+  return ExitLimit(BECount, MaxBECount, /*MustExit=*/true);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
+                                     const Loop *L, bool IsSigned,
+                                     bool IsSubExpr) {
+  // We handle only IV > Invariant
+  if (!isLoopInvariant(RHS, L))
+    return getCouldNotCompute();
+
+  const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+  // Avoid weird loops
+  if (!IV || IV->getLoop() != L || !IV->isAffine())
+    return getCouldNotCompute();
+
+  bool NoWrap = !IsSubExpr &&
+                IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
+
+  const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
+
+  // Avoid negative or zero stride values
+  if (!isKnownPositive(Stride))
+    return getCouldNotCompute();
+
+  // Avoid proven overflow cases: this will ensure that the backedge taken count
+  // will not generate any unsigned overflow. Relaxed no-overflow conditions
+  // exploit NoWrapFlags, allowing to optimize in presence of undefined 
+  // behaviors like the case of C language.
+  if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
+    return getCouldNotCompute();
+
+  ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
+                                      : ICmpInst::ICMP_UGT;
+
+  const SCEV *Start = IV->getStart();
+  const SCEV *End = RHS;
+  if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
+    End = IsSigned ? getSMinExpr(RHS, Start)
+                   : getUMinExpr(RHS, Start);
+
+  const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
+
+  APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
+                            : getUnsignedRange(Start).getUnsignedMax();
+
+  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+                             : getUnsignedRange(Stride).getUnsignedMin();
+
+  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+  APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
+                         : APInt::getMinValue(BitWidth) + (MinStride - 1);
+
+  // Although End can be a MIN expression we estimate MinEnd considering only
+  // the case End = RHS. This is safe because in the other case (Start - End)
+  // is zero, leading to a zero maximum backedge taken count.
+  APInt MinEnd =
+    IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
+             : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+
+
+  const SCEV *MaxBECount = getCouldNotCompute();
+  if (isa<SCEVConstant>(BECount))
+    MaxBECount = BECount;
+  else
+    MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), 
+                                getConstant(MinStride), false);
+
+  if (isa<SCEVCouldNotCompute>(MaxBECount))
+    MaxBECount = BECount;
+
+  return ExitLimit(BECount, MaxBECount, /*MustExit=*/true);
  }
  
  /// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -6586,7 +6817,517 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
    return SE.getCouldNotCompute();
  }
  
+static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getValue()->getValue();
+  APInt B = C2->getValue()->getValue();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
+
+  if (ABW > BBW)
+    B = B.sext(ABW);
+  else if (ABW < BBW)
+    A = A.sext(BBW);
+
+  return APIntOps::srem(A, B);
+}
+
+static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getValue()->getValue();
+  APInt B = C2->getValue()->getValue();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
  
+  if (ABW > BBW)
+    B = B.sext(ABW);
+  else if (ABW < BBW)
+    A = A.sext(BBW);
+
+  return APIntOps::sdiv(A, B);
+}
+
+namespace {
+struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
+public:
+  // Pattern match Step into Start. When Step is a multiply expression, find
+  // the largest subexpression of Step that appears in Start. When Start is an
+  // add expression, try to match Step in the subexpressions of Start, non
+  // matching subexpressions are returned under Remainder.
+  static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
+                             const SCEV *Step, const SCEV **Remainder) {
+    assert(Remainder && "Remainder should not be NULL");
+    SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
+    const SCEV *Res = R.visit(Start);
+    *Remainder = R.Remainder;
+    return Res;
+  }
+
+  SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
+      : SE(S), GCD(G), Remainder(R) {
+    Zero = SE.getConstant(GCD->getType(), 0);
+    One = SE.getConstant(GCD->getType(), 1);
+  }
+
+  const SCEV *visitConstant(const SCEVConstant *Constant) {
+    if (GCD == Constant || Constant == Zero)
+      return GCD;
+
+    if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
+      const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
+      if (Res != One)
+        return Res;
+
+      Remainder = SE.getConstant(srem(Constant, CGCD));
+      Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
+      Res = SE.getConstant(gcd(Constant, CGCD));
+      return Res;
+    }
+
+    // When GCD is not a constant, it could be that the GCD is an Add, Mul,
+    // AddRec, etc., in which case we want to find out how many times the
+    // Constant divides the GCD: we then return that as the new GCD.
+    const SCEV *Rem = Zero;
+    const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
+
+    if (Res == One || Rem != Zero) {
+      Remainder = Constant;
+      return One;
+    }
+
+    assert(isa<SCEVConstant>(Res) && "Res should be a constant");
+    Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
+    return Res;
+  }
+
+  const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+    if (GCD == Expr)
+      return GCD;
+
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+      const SCEV *Rem = Zero;
+      const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
+
+      // FIXME: There may be ambiguous situations: for instance,
+      // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
+      // The order in which the AddExpr is traversed computes a different GCD
+      // and Remainder.
+      if (Res != One)
+        GCD = Res;
+      if (Rem != Zero)
+        Remainder = SE.getAddExpr(Remainder, Rem);
+    }
+
+    return GCD;
+  }
+
+  const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+    if (GCD == Expr)
+      return GCD;
+
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+      if (Expr->getOperand(i) == GCD)
+        return GCD;
+    }
+
+    // If we have not returned yet, it means that GCD is not part of Expr.
+    const SCEV *PartialGCD = One;
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+      const SCEV *Rem = Zero;
+      const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+      if (Rem != Zero)
+        // GCD does not divide Expr->getOperand(i).
+        continue;
+
+      if (Res == GCD)
+        return GCD;
+      PartialGCD = SE.getMulExpr(PartialGCD, Res);
+      if (PartialGCD == GCD)
+        return GCD;
+    }
+
+    if (PartialGCD != One)
+      return PartialGCD;
+
+    // Failed to find a PartialGCD: set the Remainder to the full expression,
+    // and return the GCD.
+    Remainder = Expr;
+    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
+    if (!Mul)
+      return GCD;
+
+    // When the GCD is a multiply expression, try to decompose it:
+    // this occurs when Step does not divide the Start expression
+    // as in: {(-4 + (3 * %m)),+,(2 * %m)}
+    for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
+      const SCEV *Rem = Zero;
+      const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
+      if (Rem == Zero) {
+        Remainder = Rem;
+        return Res;
+      }
+    }
+
+    return GCD;
+  }
+
+  const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    if (GCD == Expr)
+      return GCD;
+
+    if (!Expr->isAffine()) {
+      Remainder = Expr;
+      return GCD;
+    }
+
+    const SCEV *Rem = Zero;
+    const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
+    if (Res == One || Res->isAllOnesValue()) {
+      Remainder = Expr;
+      return GCD;
+    }
+
+    if (Rem != Zero)
+      Remainder = SE.getAddExpr(Remainder, Rem);
+
+    Rem = Zero;
+    Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
+    if (Rem != Zero || Res == One || Res->isAllOnesValue()) {
+      Remainder = Expr;
+      return GCD;
+    }
+
+    return Res;
+  }
+
+  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+    return One;
+  }
+
+private:
+  ScalarEvolution &SE;
+  const SCEV *GCD, *Remainder, *Zero, *One;
+};
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
+public:
+  // Remove from Start all multiples of Step.
+  static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
+                            const SCEV *Step) {
+    SCEVDivision D(SE, Step);
+    const SCEV *Rem = D.Zero;
+    (void)Rem;
+    // The division is guaranteed to succeed: Step should divide Start with no
+    // remainder.
+    assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
+           "Step should divide Start with no remainder.");
+    return D.visit(Start);
+  }
+
+  SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
+    Zero = SE.getConstant(GCD->getType(), 0);
+    One = SE.getConstant(GCD->getType(), 1);
+  }
+
+  const SCEV *visitConstant(const SCEVConstant *Constant) {
+    if (GCD == Constant)
+      return One;
+
+    if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
+      return SE.getConstant(sdiv(Constant, CGCD));
+    return Constant;
+  }
+
+  const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+
+    SmallVector<const SCEV *, 2> Operands;
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+      Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+
+    if (Operands.size() == 1)
+      return Operands[0];
+    return SE.getAddExpr(Operands);
+  }
+
+  const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+
+    bool FoundGCDTerm = false;
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+      if (Expr->getOperand(i) == GCD)
+        FoundGCDTerm = true;
+
+    SmallVector<const SCEV *, 2> Operands;
+    if (FoundGCDTerm) {
+      FoundGCDTerm = false;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+        if (FoundGCDTerm)
+          Operands.push_back(Expr->getOperand(i));
+        else if (Expr->getOperand(i) == GCD)
+          FoundGCDTerm = true;
+        else
+          Operands.push_back(Expr->getOperand(i));
+      }
+    } else {
+      const SCEV *PartialGCD = One;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+        if (PartialGCD == GCD) {
+          Operands.push_back(Expr->getOperand(i));
+          continue;
+        }
+
+        const SCEV *Rem = Zero;
+        const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+        if (Rem == Zero) {
+          PartialGCD = SE.getMulExpr(PartialGCD, Res);
+          Operands.push_back(divide(SE, Expr->getOperand(i), Res));
+        } else {
+          Operands.push_back(Expr->getOperand(i));
+        }
+      }
+    }
+
+    if (Operands.size() == 1)
+      return Operands[0];
+    return SE.getMulExpr(Operands);
+  }
+
+  const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+
+    assert(Expr->isAffine() && "Expr should be affine");
+
+    const SCEV *Start = divide(SE, Expr->getStart(), GCD);
+    const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
+
+    return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
+                            Expr->getNoWrapFlags());
+  }
+
+  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+    return Expr;
+  }
+
+private:
+  ScalarEvolution &SE;
+  const SCEV *GCD, *Zero, *One;
+};
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array.  The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride.  When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+///  void foo(long n, long m, long o, double A[n][m][o]) {
+///
+///    for (long i = 0; i < n; i++)
+///      for (long j = 0; j < m; j++)
+///        for (long k = 0; k < o; k++)
+///          A[i][j][k] = 1.0;
+///  }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+///  AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+///  CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+///  CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
+
+const SCEV *
+SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+                            SmallVectorImpl<const SCEV *> &Subscripts,
+                            SmallVectorImpl<const SCEV *> &Sizes) const {
+  // Early exit in case this SCEV is not an affine multivariate function.
+  if (!this->isAffine())
+    return this;
+
+  const SCEV *Start = this->getStart();
+  const SCEV *Step = this->getStepRecurrence(SE);
+
+  // Build the SCEV representation of the canonical induction variable in the
+  // loop of this SCEV.
+  const SCEV *Zero = SE.getConstant(this->getType(), 0);
+  const SCEV *One = SE.getConstant(this->getType(), 1);
+  const SCEV *IV =
+      SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
+
+  DEBUG(dbgs() << "(delinearize: " << *this << "\n");
+
+  // When the stride of this SCEV is 1, do not compute the GCD: the size of this
+  // subscript is 1, and this same SCEV for the access function.
+  const SCEV *Remainder = Zero;
+  const SCEV *GCD = One;
+
+  // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
+  if (Step != One && !Step->isAllOnesValue())
+    GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
+
+  DEBUG(dbgs() << "GCD: " << *GCD << "\n");
+  DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
+
+  const SCEV *Quotient = Start;
+  if (GCD != One && !GCD->isAllOnesValue())
+    // As findGCD computed Remainder, GCD divides "Start - Remainder." The
+    // Quotient is then this SCEV without Remainder, scaled down by the GCD.  The
+    // Quotient is what will be used in the next subscript delinearization.
+    Quotient = SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
+
+  DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
+
+  const SCEV *Rem = Quotient;
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
+    // Recursively call delinearize on the Quotient until there are no more
+    // multiples that can be recognized.
+    Rem = AR->delinearize(SE, Subscripts, Sizes);
+
+  // Scale up the canonical induction variable IV by whatever remains from the
+  // Step after division by the GCD: the GCD is the size of all the sub-array.
+  if (Step != One && !Step->isAllOnesValue() && GCD != One &&
+      !GCD->isAllOnesValue() && Step != GCD) {
+    Step = SCEVDivision::divide(SE, Step, GCD);
+    IV = SE.getMulExpr(IV, Step);
+  }
+  // The access function in the current subscript is computed as the canonical
+  // induction variable IV (potentially scaled up by the step) and offset by
+  // Rem, the offset of delinearization in the sub-array.
+  const SCEV *Index = SE.getAddExpr(IV, Rem);
+
+  // Record the access function and the size of the current subscript.
+  Subscripts.push_back(Index);
+  Sizes.push_back(GCD);
+
+#ifndef NDEBUG
+  int Size = Sizes.size();
+  DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
+  DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
+  for (int i = 0; i < Size - 1; i++)
+    DEBUG(dbgs() << "[" << *Sizes[i] << "]");
+  DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
+
+  DEBUG(dbgs() << "ArrayRef");
+  for (int i = 0; i < Size; i++)
+    DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
+  DEBUG(dbgs() << "\n)\n");
+#endif
+
+  return Remainder;
+}
  
  //===----------------------------------------------------------------------===//
  //                   SCEVCallbackVH Class Implementation
@@ -6607,11 +7348,8 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
    // so that future queries will recompute the expressions using the new
    // value.
    Value *Old = getValPtr();
-  SmallVector<User *, 16> Worklist;
+  SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
    SmallPtrSet<User *, 8> Visited;
-  for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
-       UI != UE; ++UI)
-    Worklist.push_back(*UI);
    while (!Worklist.empty()) {
      User *U = Worklist.pop_back_val();
      // Deleting the Old value will cause this to dangle. Postpone
@@ -6623,9 +7361,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
      if (PHINode *PN = dyn_cast<PHINode>(U))
        SE->ConstantEvolutionLoopExitValue.erase(PN);
      SE->ValueExprMap.erase(U);
-    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
-         UI != UE; ++UI)
-      Worklist.push_back(*UI);
+    Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
    }
    // Delete the Old value.
    if (PHINode *PN = dyn_cast<PHINode>(Old))
@@ -6642,16 +7378,18 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
  //===----------------------------------------------------------------------===//
  
  ScalarEvolution::ScalarEvolution()
-  : FunctionPass(ID), FirstUnknown(0) {
+  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
+    BlockDispositions(64), FirstUnknown(nullptr) {
    initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
  }
  
  bool ScalarEvolution::runOnFunction(Function &F) {
    this->F = &F;
    LI = &getAnalysis<LoopInfo>();
-  TD = getAnalysisIfAvailable<DataLayout>();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
    TLI = &getAnalysis<TargetLibraryInfo>();
-  DT = &getAnalysis<DominatorTree>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    return false;
  }
  
@@ -6660,7 +7398,7 @@ void ScalarEvolution::releaseMemory() {
    // destructors, so that they release their references to their values.
    for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
      U->~SCEVUnknown();
-  FirstUnknown = 0;
+  FirstUnknown = nullptr;
  
    ValueExprMap.clear();
  
@@ -6688,7 +7426,7 @@ void ScalarEvolution::releaseMemory() {
  void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.setPreservesAll();
    AU.addRequiredTransitive<LoopInfo>();
-  AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
    AU.addRequired<TargetLibraryInfo>();
  }
  
@@ -6703,7 +7441,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
      PrintLoopInfo(OS, SE, *I);
  
    OS << "Loop ";
-  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
    OS << ": ";
  
    SmallVector<BasicBlock *, 8> ExitBlocks;
@@ -6719,7 +7457,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
  
    OS << "\n"
          "Loop ";
-  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
    OS << ": ";
  
    if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
@@ -6741,7 +7479,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
    ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
  
    OS << "Classifying expressions for: ";
-  WriteAsOperand(OS, F, /*PrintType=*/false);
+  F->printAsOperand(OS, /*PrintType=*/false);
    OS << "\n";
    for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
      if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
@@ -6772,7 +7510,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
      }
  
    OS << "Determining loop execution counts for: ";
-  WriteAsOperand(OS, F, /*PrintType=*/false);
+  F->printAsOperand(OS, /*PrintType=*/false);
    OS << "\n";
    for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
      PrintLoopInfo(OS, &SE, *I);
@@ -6780,19 +7518,26 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
  
  ScalarEvolution::LoopDisposition
  ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
-  std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
-  std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
-    Values.insert(std::make_pair(L, LoopVariant));
-  if (!Pair.second)
-    return Pair.first->second;
-
+  SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
+  for (unsigned u = 0; u < Values.size(); u++) {
+    if (Values[u].first == L)
+      return Values[u].second;
+  }
+  Values.push_back(std::make_pair(L, LoopVariant));
    LoopDisposition D = computeLoopDisposition(S, L);
-  return LoopDispositions[S][L] = D;
+  SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
+  for (unsigned u = Values2.size(); u > 0; u--) {
+    if (Values2[u - 1].first == L) {
+      Values2[u - 1].second = D;
+      break;
+    }
+  }
+  return D;
  }
  
  ScalarEvolution::LoopDisposition
  ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
-  switch (S->getSCEVType()) {
+  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
    case scConstant:
      return LoopInvariant;
    case scTruncate:
@@ -6865,8 +7610,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
      return LoopInvariant;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default: llvm_unreachable("Unknown SCEV kind!");
    }
+  llvm_unreachable("Unknown SCEV kind!");
  }
  
  bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
@@ -6879,19 +7624,26 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
  
  ScalarEvolution::BlockDisposition
  ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
-  std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
-  std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
-    Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
-  if (!Pair.second)
-    return Pair.first->second;
-
+  SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
+  for (unsigned u = 0; u < Values.size(); u++) {
+    if (Values[u].first == BB)
+      return Values[u].second;
+  }
+  Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
    BlockDisposition D = computeBlockDisposition(S, BB);
-  return BlockDispositions[S][BB] = D;
+  SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
+  for (unsigned u = Values2.size(); u > 0; u--) {
+    if (Values2[u - 1].first == BB) {
+      Values2[u - 1].second = D;
+      break;
+    }
+  }
+  return D;
  }
  
  ScalarEvolution::BlockDisposition
  ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
-  switch (S->getSCEVType()) {
+  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
    case scConstant:
      return ProperlyDominatesBlock;
    case scTruncate:
@@ -6948,9 +7700,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
      return ProperlyDominatesBlock;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default:
-    llvm_unreachable("Unknown SCEV kind!");
    }
+  llvm_unreachable("Unknown SCEV kind!");
  }
  
  bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
@@ -7005,7 +7756,7 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
  
  typedef DenseMap<const Loop *, std::string> VerifyMap;
  
-/// replaceSubString - Replaces all occurences of From in Str with To.
+/// replaceSubString - Replaces all occurrences of From in Str with To.
  static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
    size_t Pos = 0;
    while ((Pos = Str.find(From, Pos)) != std::string::npos) {