split delinearization pass in 3 steps

[oota-llvm.git] / lib / Analysis / ScalarEvolution.cpp
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index 064aafd01e4e0730f4862d194b11b8539d0858b4..7d84dbee5c596ccca75f46e382c6184b6f1c2462 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -58,7 +58,6 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "scalar-evolution"
  #include "llvm/Analysis/ScalarEvolution.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/ADT/SmallPtrSet.h"
@@ -68,27 +67,29 @@
  #include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/ConstantRange.h"
  #include "llvm/IR/Constants.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/DerivedTypes.h"
  #include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
  #include "llvm/IR/GlobalAlias.h"
  #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
  #include "llvm/IR/Instructions.h"
  #include "llvm/IR/LLVMContext.h"
  #include "llvm/IR/Operator.h"
  #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ConstantRange.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstIterator.h"
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetLibraryInfo.h"
  #include <algorithm>
  using namespace llvm;
  
+#define DEBUG_TYPE "scalar-evolution"
+
  STATISTIC(NumArrayLenItCounts,
            "Number of trip counts computed with array length");
  STATISTIC(NumTripCountsComputed,
@@ -135,7 +136,7 @@ void SCEV::dump() const {
  #endif
  
  void SCEV::print(raw_ostream &OS) const {
-  switch (getSCEVType()) {
+  switch (static_cast<SCEVTypes>(getSCEVType())) {
    case scConstant:
      cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
      return;
@@ -182,7 +183,7 @@ void SCEV::print(raw_ostream &OS) const {
    case scUMaxExpr:
    case scSMaxExpr: {
      const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
-    const char *OpStr = 0;
+    const char *OpStr = nullptr;
      switch (NAry->getSCEVType()) {
      case scAddExpr: OpStr = " + "; break;
      case scMulExpr: OpStr = " * "; break;
@@ -193,7 +194,7 @@ void SCEV::print(raw_ostream &OS) const {
      for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
           I != E; ++I) {
        OS << **I;
-      if (llvm::next(I) != E)
+      if (std::next(I) != E)
          OS << OpStr;
      }
      OS << ")";
@@ -240,13 +241,12 @@ void SCEV::print(raw_ostream &OS) const {
    case scCouldNotCompute:
      OS << "***COULDNOTCOMPUTE***";
      return;
-  default: break;
    }
    llvm_unreachable("Unknown SCEV kind!");
  }
  
  Type *SCEV::getType() const {
-  switch (getSCEVType()) {
+  switch (static_cast<SCEVTypes>(getSCEVType())) {
    case scConstant:
      return cast<SCEVConstant>(this)->getType();
    case scTruncate:
@@ -266,9 +266,8 @@ Type *SCEV::getType() const {
      return cast<SCEVUnknown>(this)->getType();
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default:
-    llvm_unreachable("Unknown SCEV kind!");
    }
+  llvm_unreachable("Unknown SCEV kind!");
  }
  
  bool SCEV::isZero() const {
@@ -314,14 +313,14 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
    FoldingSetNodeID ID;
    ID.AddInteger(scConstant);
    ID.AddPointer(V);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
    UniqueSCEVs.InsertNode(S, IP);
    return S;
  }
  
-const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
    return getConstant(ConstantInt::get(getContext(), Val));
  }
  
@@ -367,7 +366,7 @@ void SCEVUnknown::deleted() {
    SE->UniqueSCEVs.RemoveNode(this);
  
    // Release the value.
-  setValPtr(0);
+  setValPtr(nullptr);
  }
  
  void SCEVUnknown::allUsesReplacedWith(Value *New) {
@@ -481,7 +480,7 @@ namespace {
        // Aside from the getSCEVType() ordering, the particular ordering
        // isn't very important except that it's beneficial to be consistent,
        // so that (a + b) and (b + a) don't end up as different expressions.
-      switch (LType) {
+      switch (static_cast<SCEVTypes>(LType)) {
        case scUnknown: {
          const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
          const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
@@ -618,9 +617,10 @@ namespace {
          return compare(LC->getOperand(), RC->getOperand());
        }
  
-      default:
-        llvm_unreachable("Unknown SCEV kind!");
+      case scCouldNotCompute:
+        llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
        }
+      llvm_unreachable("Unknown SCEV kind!");
      }
    };
  }
@@ -830,7 +830,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
    ID.AddInteger(scTruncate);
    ID.AddPointer(Op);
    ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  
    // Fold if the operand is constant.
@@ -920,7 +920,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
    ID.AddInteger(scZeroExtend);
    ID.AddPointer(Op);
    ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  
    // zext(trunc(x)) --> zext(x) or x or trunc(x)
@@ -1073,7 +1073,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step,
      return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
                         SE->getSignedRange(Step).getSignedMin());
    }
-  return 0;
+  return nullptr;
  }
  
  // The recurrence AR has been shown to have no signed wrap. Typically, if we can
@@ -1092,19 +1092,18 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
    // Check for a simple looking step prior to loop entry.
    const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
    if (!SA)
-    return 0;
+    return nullptr;
  
    // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
    // subtraction is expensive. For this purpose, perform a quick and dirty
    // difference, by checking for Step in the operand list.
    SmallVector<const SCEV *, 4> DiffOps;
-  for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
-       I != E; ++I) {
-    if (*I != Step)
-      DiffOps.push_back(*I);
-  }
+  for (const SCEV *Op : SA->operands())
+    if (Op != Step)
+      DiffOps.push_back(Op);
+
    if (DiffOps.size() == SA->getNumOperands())
-    return 0;
+    return nullptr;
  
    // This is a postinc AR. Check for overflow on the preinc recurrence using the
    // same three conditions that getSignExtendedExpr checks.
@@ -1140,7 +1139,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
        SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
      return PreStart;
    }
-  return 0;
+  return nullptr;
  }
  
  // Get the normalized sign-extended expression for this AddRec's Start.
@@ -1182,7 +1181,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
    ID.AddInteger(scSignExtend);
    ID.AddPointer(Op);
    ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
  
    // If the input value is provably positive, build a zext instead.
@@ -1341,9 +1340,8 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
    // Force the cast to be folded into the operands of an addrec.
    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
      SmallVector<const SCEV *, 4> Ops;
-    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
-         I != E; ++I)
-      Ops.push_back(getAnyExtendExpr(*I, Ty));
+    for (const SCEV *Op : AR->operands())
+      Ops.push_back(getAnyExtendExpr(Op, Ty));
      return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
    }
  
@@ -1361,7 +1359,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
  /// what it does, given a sequence of operands that would form an add
  /// expression like this:
  ///
-///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
  ///
  /// where A and B are constants, update the map with these values:
  ///
@@ -1812,7 +1810,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
    ID.AddInteger(scAddExpr);
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
      ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
    SCEVAddExpr *S =
      static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
    if (!S) {
@@ -2106,7 +2104,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
    ID.AddInteger(scMulExpr);
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
      ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
    SCEVMulExpr *S =
      static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
    if (!S) {
@@ -2231,7 +2229,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
    ID.AddInteger(scUDivExpr);
    ID.AddPointer(LHS);
    ID.AddPointer(RHS);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
                                               LHS, RHS);
@@ -2239,6 +2237,77 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
    return S;
  }
  
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getValue()->getValue().abs();
+  APInt B = C2->getValue()->getValue().abs();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
+
+  if (ABW > BBW)
+    B = B.zext(ABW);
+  else if (ABW < BBW)
+    A = A.zext(BBW);
+
+  return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+/// getUDivExactExpr - Get a canonical unsigned division expression, or
+/// something simpler if possible. There is no representation for an exact udiv
+/// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
+/// We can't do this when it's not exact because the udiv may be clearing bits.
+const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
+                                              const SCEV *RHS) {
+  // TODO: we could try to find factors in all sorts of things, but for now we
+  // just deal with u/exact (multiply, constant). See SCEVDivision towards the
+  // end of this file for inspiration.
+
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
+  if (!Mul)
+    return getUDivExpr(LHS, RHS);
+
+  if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
+    // If the mulexpr multiplies by a constant, then that constant must be the
+    // first element of the mulexpr.
+    if (const SCEVConstant *LHSCst =
+            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+      if (LHSCst == RHSCst) {
+        SmallVector<const SCEV *, 2> Operands;
+        Operands.append(Mul->op_begin() + 1, Mul->op_end());
+        return getMulExpr(Operands);
+      }
+
+      // We can't just assume that LHSCst divides RHSCst cleanly, it could be
+      // that there's a factor provided by one of the other terms. We need to
+      // check.
+      APInt Factor = gcd(LHSCst, RHSCst);
+      if (!Factor.isIntN(1)) {
+        LHSCst = cast<SCEVConstant>(
+            getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
+        RHSCst = cast<SCEVConstant>(
+            getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
+        SmallVector<const SCEV *, 2> Operands;
+        Operands.push_back(LHSCst);
+        Operands.append(Mul->op_begin() + 1, Mul->op_end());
+        LHS = getMulExpr(Operands);
+        RHS = RHSCst;
+        Mul = dyn_cast<SCEVMulExpr>(LHS);
+        if (!Mul)
+          return getUDivExactExpr(LHS, RHS);
+      }
+    }
+  }
+
+  for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
+    if (Mul->getOperand(i) == RHS) {
+      SmallVector<const SCEV *, 2> Operands;
+      Operands.append(Mul->op_begin(), Mul->op_begin() + i);
+      Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
+      return getMulExpr(Operands);
+    }
+  }
+
+  return getUDivExpr(LHS, RHS);
+}
  
  /// getAddRecExpr - Get an add recurrence expression for the specified loop.
  /// Simplify the expression as much as possible.
@@ -2355,7 +2424,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
      ID.AddPointer(Operands[i]);
    ID.AddPointer(L);
-  void *IP = 0;
+  void *IP = nullptr;
    SCEVAddRecExpr *S =
      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
    if (!S) {
@@ -2463,7 +2532,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
    ID.AddInteger(scSMaxExpr);
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
      ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
@@ -2567,7 +2636,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
    ID.AddInteger(scUMaxExpr);
    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
      ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
@@ -2593,12 +2662,12 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
    // If we have DataLayout, we can bypass creating a target-independent
    // constant expression and then folding it back into a ConstantInt.
    // This is just a compile-time optimization.
-  if (TD)
-    return getConstant(IntTy, TD->getTypeAllocSize(AllocTy));
+  if (DL)
+    return getConstant(IntTy, DL->getTypeAllocSize(AllocTy));
  
    Constant *C = ConstantExpr::getSizeOf(AllocTy);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
        C = Folded;
    Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
    assert(Ty == IntTy && "Effective SCEV type doesn't match");
@@ -2611,14 +2680,14 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
    // If we have DataLayout, we can bypass creating a target-independent
    // constant expression and then folding it back into a ConstantInt.
    // This is just a compile-time optimization.
-  if (TD) {
+  if (DL) {
      return getConstant(IntTy,
-                       TD->getStructLayout(STy)->getElementOffset(FieldNo));
+                       DL->getStructLayout(STy)->getElementOffset(FieldNo));
    }
  
    Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
        C = Folded;
  
    Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
@@ -2634,7 +2703,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
    FoldingSetNodeID ID;
    ID.AddInteger(scUnknown);
    ID.AddPointer(V);
-  void *IP = 0;
+  void *IP = nullptr;
    if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
      assert(cast<SCEVUnknown>(S)->getValue() == V &&
             "Stale SCEVUnknown in uniquing map!");
@@ -2666,8 +2735,8 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
    assert(isSCEVable(Ty) && "Type is not SCEVable!");
  
    // If we have a DataLayout, use it!
-  if (TD)
-    return TD->getTypeSizeInBits(Ty);
+  if (DL)
+    return DL->getTypeSizeInBits(Ty);
  
    // Integer types have fixed sizes.
    if (Ty->isIntegerTy())
@@ -2693,8 +2762,8 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
    // The only other support type is pointer.
    assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
  
-  if (TD)
-    return TD->getIntPtrType(Ty);
+  if (DL)
+    return DL->getIntPtrType(Ty);
  
    // Without DataLayout, conservatively assume pointers are 64-bit.
    return Type::getInt64Ty(getContext());
@@ -2713,7 +2782,7 @@ namespace {
      bool FindOne;
      FindInvalidSCEVUnknown() { FindOne = false; }
      bool follow(const SCEV *S) {
-      switch (S->getSCEVType()) {
+      switch (static_cast<SCEVTypes>(S->getSCEVType())) {
        case scConstant:
          return false;
        case scUnknown:
@@ -2940,7 +3009,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
      return getPointerBase(Cast->getOperand());
    }
    else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
-    const SCEV *PtrOp = 0;
+    const SCEV *PtrOp = nullptr;
      for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
           I != E; ++I) {
        if ((*I)->getType()->isPointerTy()) {
@@ -2963,9 +3032,8 @@ static void
  PushDefUseChildren(Instruction *I,
                     SmallVectorImpl<Instruction *> &Worklist) {
    // Push the def-use children onto the Worklist stack.
-  for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-       UI != UE; ++UI)
-    Worklist.push_back(cast<Instruction>(*UI));
+  for (User *U : I->users())
+    Worklist.push_back(cast<Instruction>(U));
  }
  
  /// ForgetSymbolicValue - This looks up computed SCEV values for all
@@ -3021,20 +3089,20 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
        // The loop may have multiple entrances or multiple exits; we can analyze
        // this phi as an addrec if it has a unique entry value and a unique
        // backedge value.
-      Value *BEValueV = 0, *StartValueV = 0;
+      Value *BEValueV = nullptr, *StartValueV = nullptr;
        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
          Value *V = PN->getIncomingValue(i);
          if (L->contains(PN->getIncomingBlock(i))) {
            if (!BEValueV) {
              BEValueV = V;
            } else if (BEValueV != V) {
-            BEValueV = 0;
+            BEValueV = nullptr;
              break;
            }
          } else if (!StartValueV) {
            StartValueV = V;
          } else if (StartValueV != V) {
-          StartValueV = 0;
+          StartValueV = nullptr;
            break;
          }
        }
@@ -3162,7 +3230,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
    // PHI's incoming blocks are in a different loop, in which case doing so
    // risks breaking LCSSA form. Instcombine would normally zap these, but
    // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
+  if (Value *V = SimplifyInstruction(PN, DL, TLI, DT))
      if (LI->replacementPreservesLCSSAForm(PN, V))
        return getSCEV(V);
  
@@ -3188,7 +3256,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
  
    const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
    gep_type_iterator GTI = gep_type_begin(GEP);
-  for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+  for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()),
                                        E = GEP->op_end();
         I != E; ++I) {
      Value *Index = *I;
@@ -3433,7 +3501,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
      // For a SCEVUnknown, ask ValueTracking.
      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
+    ComputeMaskedBits(U->getValue(), Zeros, Ones, DL);
      if (Ones == ~Zeros + 1)
        return setUnsignedRange(U, ConservativeResult);
      return setUnsignedRange(U,
@@ -3583,9 +3651,9 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
  
    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
      // For a SCEVUnknown, ask ValueTracking.
-    if (!U->getValue()->getType()->isIntegerTy() && !TD)
+    if (!U->getValue()->getType()->isIntegerTy() && !DL)
        return setSignedRange(U, ConservativeResult);
-    unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+    unsigned NS = ComputeNumSignBits(U->getValue(), DL);
      if (NS <= 1)
        return setSignedRange(U, ConservativeResult);
      return setSignedRange(U, ConservativeResult.intersectWith(
@@ -3689,17 +3757,24 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
        // knew about to reconstruct a low-bits mask value.
        unsigned LZ = A.countLeadingZeros();
+      unsigned TZ = A.countTrailingZeros();
        unsigned BitWidth = A.getBitWidth();
        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
-
-      APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
-
-      if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
-        return
-          getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
-                                IntegerType::get(getContext(), BitWidth - LZ)),
-                            U->getType());
+      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, DL);
+
+      APInt EffectiveMask =
+          APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
+      if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
+        const SCEV *MulCount = getConstant(
+            ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
+        return getMulExpr(
+            getZeroExtendExpr(
+                getTruncateExpr(
+                    getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
+                    IntegerType::get(getContext(), BitWidth - LZ - TZ)),
+                U->getType()),
+            MulCount);
+      }
      }
      break;
  
@@ -4240,9 +4315,9 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
    if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
    assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
  
-  const SCEV *BECount = 0;
+  const SCEV *BECount = nullptr;
    for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
  
      assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
  
@@ -4260,7 +4335,7 @@ const SCEV *
  ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
                                               ScalarEvolution *SE) const {
    for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
  
      if (ENT->ExitingBlock == ExitingBlock)
        return ENT->ExactNotTaken;
@@ -4283,7 +4358,7 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
      return false;
  
    for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
  
      if (ENT->ExactNotTaken != SE->getCouldNotCompute()
          && SE->hasOperand(ENT->ExactNotTaken, S)) {
@@ -4322,8 +4397,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
  
  /// clear - Invalidate this result and free the ExitNotTakenInfo array.
  void ScalarEvolution::BackedgeTakenInfo::clear() {
-  ExitNotTaken.ExitingBlock = 0;
-  ExitNotTaken.ExactNotTaken = 0;
+  ExitNotTaken.ExitingBlock = nullptr;
+  ExitNotTaken.ExactNotTaken = nullptr;
    delete[] ExitNotTaken.getNextExit();
  }
  
@@ -4338,7 +4413,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
    const SCEV *MaxBECount = getCouldNotCompute();
    bool CouldComputeBECount = true;
    BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
-  const SCEV *LatchMaxCount = 0;
+  const SCEV *LatchMaxCount = nullptr;
    SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
      ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
@@ -4375,12 +4450,19 @@ ScalarEvolution::ExitLimit
  ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
  
    // Okay, we've chosen an exiting block.  See what condition causes us to
-  // exit at this block.
-  //
-  // FIXME: we should be able to handle switch instructions (with a single exit)
-  BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-  if (ExitBr == 0) return getCouldNotCompute();
-  assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+  // exit at this block and remember the exit block and whether all other targets
+  // lead to the loop header.
+  bool MustExecuteLoopHeader = true;
+  BasicBlock *Exit = nullptr;
+  for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
+       SI != SE; ++SI)
+    if (!L->contains(*SI)) {
+      if (Exit) // Multiple exit successors.
+        return getCouldNotCompute();
+      Exit = *SI;
+    } else if (*SI != L->getHeader()) {
+      MustExecuteLoopHeader = false;
+    }
  
    // At this point, we know we have a conditional branch that determines whether
    // the loop is exited.  However, we don't know if the branch is executed each
@@ -4399,13 +4481,11 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
    //
    //  More extensive analysis could be done to handle more cases here.
    //
-  if (ExitBr->getSuccessor(0) != L->getHeader() &&
-      ExitBr->getSuccessor(1) != L->getHeader() &&
-      ExitBr->getParent() != L->getHeader()) {
+  if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
      // The simple checks failed, try climbing the unique predecessor chain
      // up to the header.
      bool Ok = false;
-    for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+    for (BasicBlock *BB = ExitingBlock; BB; ) {
        BasicBlock *Pred = BB->getUniquePredecessor();
        if (!Pred)
          return getCouldNotCompute();
@@ -4429,11 +4509,20 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
        return getCouldNotCompute();
    }
  
-  // Proceed to the next level to examine the exit condition expression.
-  return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
-                                  ExitBr->getSuccessor(0),
-                                  ExitBr->getSuccessor(1),
-                                  /*IsSubExpr=*/false);
+  TerminatorInst *Term = ExitingBlock->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
+    assert(BI->isConditional() && "If unconditional, it can't be in loop!");
+    // Proceed to the next level to examine the exit condition expression.
+    return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
+                                    BI->getSuccessor(1),
+                                    /*IsSubExpr=*/false);
+  }
+
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
+    return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
+                                                /*IsSubExpr=*/false);
+
+  return getCouldNotCompute();
  }
  
  /// ComputeExitLimitFromCond - Compute the number of times the
@@ -4650,6 +4739,30 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
    return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
  }
  
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
+                                                      SwitchInst *Switch,
+                                                      BasicBlock *ExitingBlock,
+                                                      bool IsSubExpr) {
+  assert(!L->contains(ExitingBlock) && "Not an exiting block!");
+
+  // Give up if the exit is the default dest of a switch.
+  if (Switch->getDefaultDest() == ExitingBlock)
+    return getCouldNotCompute();
+
+  assert(L->contains(Switch->getDefaultDest()) &&
+         "Default case must not exit the loop!");
+  const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
+  const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
+
+  // while (X != Y) --> while (X-Y != 0)
+  ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
+  if (EL.hasAnyInfo())
+    return EL;
+
+  return getCouldNotCompute();
+}
+
  static ConstantInt *
  EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
                                  ScalarEvolution &SE) {
@@ -4686,7 +4799,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
      return getCouldNotCompute();
  
    // Okay, we allow one non-constant index into the GEP instruction.
-  Value *VarIdx = 0;
+  Value *VarIdx = nullptr;
    std::vector<Constant*> Indexes;
    unsigned VarIdxNum = 0;
    for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
@@ -4696,7 +4809,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
        if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
        VarIdx = GEP->getOperand(i);
        VarIdxNum = i-2;
-      Indexes.push_back(0);
+      Indexes.push_back(nullptr);
      }
  
    // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
@@ -4727,7 +4840,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
  
      Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
                                                           Indexes);
-    if (Result == 0) break;  // Cannot compute!
+    if (!Result) break;  // Cannot compute!
  
      // Evaluate the condition for this iteration.
      Result = ConstantExpr::getICmp(predicate, Result, RHS);
@@ -4788,14 +4901,14 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
  
    // Otherwise, we can evaluate this instruction if all of its operands are
    // constant or derived from a PHI node themselves.
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
    for (Instruction::op_iterator OpI = UseInst->op_begin(),
           OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
  
      if (isa<Constant>(*OpI)) continue;
  
      Instruction *OpInst = dyn_cast<Instruction>(*OpI);
-    if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
+    if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
  
      PHINode *P = dyn_cast<PHINode>(OpInst);
      if (!P)
@@ -4809,8 +4922,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
        P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
        PHIMap[OpInst] = P;
      }
-    if (P == 0) return 0;        // Not evolving from PHI
-    if (PHI && PHI != P) return 0;  // Evolving from multiple different PHIs.
+    if (!P)
+      return nullptr;  // Not evolving from PHI
+    if (PHI && PHI != P)
+      return nullptr;  // Evolving from multiple different PHIs.
      PHI = P;
    }
    // This is a expression evolving from a constant PHI!
@@ -4824,7 +4939,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
  /// constraints, return null.
  static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
    Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0 || !canConstantEvolve(I, L)) return 0;
+  if (!I || !canConstantEvolve(I, L)) return nullptr;
  
    if (PHINode *PN = dyn_cast<PHINode>(I)) {
      return PN;
@@ -4841,23 +4956,23 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
  /// reason, return null.
  static Constant *EvaluateExpression(Value *V, const Loop *L,
                                      DenseMap<Instruction *, Constant *> &Vals,
-                                    const DataLayout *TD,
+                                    const DataLayout *DL,
                                      const TargetLibraryInfo *TLI) {
    // Convenient constant check, but redundant for recursive calls.
    if (Constant *C = dyn_cast<Constant>(V)) return C;
    Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return 0;
+  if (!I) return nullptr;
  
    if (Constant *C = Vals.lookup(I)) return C;
  
    // An instruction inside the loop depends on a value outside the loop that we
    // weren't given a mapping for, or a value such as a call inside the loop.
-  if (!canConstantEvolve(I, L)) return 0;
+  if (!canConstantEvolve(I, L)) return nullptr;
  
    // An unmapped PHI can be due to a branch or another loop inside this loop,
    // or due to this not being the initial iteration through a loop where we
    // couldn't compute the evolution of this particular PHI last time.
-  if (isa<PHINode>(I)) return 0;
+  if (isa<PHINode>(I)) return nullptr;
  
    std::vector<Constant*> Operands(I->getNumOperands());
  
@@ -4865,23 +4980,23 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
      Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
      if (!Operand) {
        Operands[i] = dyn_cast<Constant>(I->getOperand(i));
-      if (!Operands[i]) return 0;
+      if (!Operands[i]) return nullptr;
        continue;
      }
-    Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
+    Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
      Vals[Operand] = C;
-    if (!C) return 0;
+    if (!C) return nullptr;
      Operands[i] = C;
    }
  
    if (CmpInst *CI = dyn_cast<CmpInst>(I))
      return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                           Operands[1], TD, TLI);
+                                           Operands[1], DL, TLI);
    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      if (!LI->isVolatile())
-      return ConstantFoldLoadFromConstPtr(Operands[0], TD);
+      return ConstantFoldLoadFromConstPtr(Operands[0], DL);
    }
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL,
                                    TLI);
  }
  
@@ -4899,7 +5014,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
      return I->second;
  
    if (BEs.ugt(MaxBruteForceIterations))
-    return ConstantEvolutionLoopExitValue[PN] = 0;  // Not going to evaluate it.
+    return ConstantEvolutionLoopExitValue[PN] = nullptr;  // Not going to evaluate it.
  
    Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
  
@@ -4911,22 +5026,22 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
    // entry must be a constant (coming in from outside of the loop), and the
    // second must be derived from the same PHI.
    bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
    for (BasicBlock::iterator I = Header->begin();
         (PHI = dyn_cast<PHINode>(I)); ++I) {
      Constant *StartCST =
        dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
-    if (StartCST == 0) continue;
+    if (!StartCST) continue;
      CurrentIterVals[PHI] = StartCST;
    }
    if (!CurrentIterVals.count(PN))
-    return RetVal = 0;
+    return RetVal = nullptr;
  
    Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
  
    // Execute the loop symbolically to determine the exit value.
    if (BEs.getActiveBits() >= 32)
-    return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+    return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
  
    unsigned NumIterations = BEs.getZExtValue(); // must be in range
    unsigned IterationNum = 0;
@@ -4937,10 +5052,10 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
      // Compute the value of the PHIs for the next iteration.
      // EvaluateExpression adds non-phi values to the CurrentIterVals map.
      DenseMap<Instruction *, Constant *> NextIterVals;
-    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
                                             TLI);
-    if (NextPHI == 0)
-      return 0;        // Couldn't evaluate!
+    if (!NextPHI)
+      return nullptr;        // Couldn't evaluate!
      NextIterVals[PN] = NextPHI;
  
      bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
@@ -4963,7 +5078,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
        Constant *&NextPHI = NextIterVals[PHI];
        if (!NextPHI) {   // Not already computed.
          Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
        }
        if (NextPHI != I->second)
          StoppedEvolving = false;
@@ -4987,7 +5102,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
                                                            Value *Cond,
                                                            bool ExitWhen) {
    PHINode *PN = getConstantEvolvingPHI(Cond, L);
-  if (PN == 0) return getCouldNotCompute();
+  if (!PN) return getCouldNotCompute();
  
    // If the loop is canonicalized, the PHI will have exactly two entries.
    // That's the only form we support here.
@@ -5000,12 +5115,12 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
    // One entry must be a constant (coming in from outside of the loop), and the
    // second must be derived from the same PHI.
    bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
    for (BasicBlock::iterator I = Header->begin();
         (PHI = dyn_cast<PHINode>(I)); ++I) {
      Constant *StartCST =
        dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
-    if (StartCST == 0) continue;
+    if (!StartCST) continue;
      CurrentIterVals[PHI] = StartCST;
    }
    if (!CurrentIterVals.count(PN))
@@ -5019,7 +5134,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
    for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
      ConstantInt *CondVal =
        dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
-                                                       TD, TLI));
+                                                       DL, TLI));
  
      // Couldn't symbolically evaluate.
      if (!CondVal) return getCouldNotCompute();
@@ -5049,7 +5164,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
        if (NextPHI) continue;    // Already computed!
  
        Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
      }
      CurrentIterVals.swap(NextIterVals);
    }
@@ -5075,7 +5190,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
      if (Values[u].first == L)
        return Values[u].second ? Values[u].second : V;
    }
-  Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0)));
+  Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
    // Otherwise compute it.
    const SCEV *C = computeSCEVAtScope(V, L);
    SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
@@ -5093,8 +5208,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
  /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
  /// Returns NULL if the SCEV isn't representable as a Constant.
  static Constant *BuildConstantFromSCEV(const SCEV *V) {
-  switch (V->getSCEVType()) {
-    default:  // TODO: smax, umax.
+  switch (static_cast<SCEVTypes>(V->getSCEVType())) {
      case scCouldNotCompute:
      case scAddRecExpr:
        break;
@@ -5130,7 +5244,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
          }
          for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
            Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
-          if (!C2) return 0;
+          if (!C2) return nullptr;
  
            // First pointer!
            if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
@@ -5145,7 +5259,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
            // Don't bother trying to sum two pointers. We probably can't
            // statically compute a load that results from it anyway.
            if (C2->getType()->isPointerTy())
-            return 0;
+            return nullptr;
  
            if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
              if (PTy->getElementType()->isStructTy())
@@ -5163,10 +5277,10 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
        const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
        if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
          // Don't bother with pointers at all.
-        if (C->getType()->isPointerTy()) return 0;
+        if (C->getType()->isPointerTy()) return nullptr;
          for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
            Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
-          if (!C2 || C2->getType()->isPointerTy()) return 0;
+          if (!C2 || C2->getType()->isPointerTy()) return nullptr;
            C = ConstantExpr::getMul(C, C2);
          }
          return C;
@@ -5181,8 +5295,11 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
              return ConstantExpr::getUDiv(LHS, RHS);
        break;
      }
+    case scSMaxExpr:
+    case scUMaxExpr:
+      break; // TODO: smax, umax.
    }
-  return 0;
+  return nullptr;
  }
  
  const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
@@ -5249,17 +5366,17 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
  
          // Check to see if getSCEVAtScope actually made an improvement.
          if (MadeImprovement) {
-          Constant *C = 0;
+          Constant *C = nullptr;
            if (const CmpInst *CI = dyn_cast<CmpInst>(I))
              C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                                Operands[0], Operands[1], TD,
+                                                Operands[0], Operands[1], DL,
                                                  TLI);
            else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
              if (!LI->isVolatile())
-              C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
+              C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
            } else
              C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                         Operands, TD, TLI);
+                                         Operands, DL, TLI);
            if (!C) return V;
            return getSCEV(C);
          }
@@ -5581,7 +5698,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
    // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
    // We have not yet seen any such cases.
    const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
-  if (StepC == 0 || StepC->getValue()->equalsInt(0))
+  if (!StepC || StepC->getValue()->equalsInt(0))
      return getCouldNotCompute();
  
    // For positive steps (counting up until unsigned overflow):
@@ -5628,6 +5745,16 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
        getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
      return ExitLimit(Exact, Exact, /*MustExit=*/false);
    }
+
+  // If Step is a power of two that evenly divides Start we know that the loop
+  // will always terminate.  Start may not be a constant so we just have the
+  // number of trailing zeros available.  This is safe even in presence of
+  // overflow as the recurrence will overflow to exactly 0.
+  const APInt &StepV = StepC->getValue()->getValue();
+  if (StepV.isPowerOf2() &&
+      GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros())
+    return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+
    // Then, try to solve the above equation provided that Start is constant.
    if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
      return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -6039,7 +6166,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
    default:
      llvm_unreachable("Unexpected ICmpInst::Predicate value!");
    case ICmpInst::ICMP_SGT:
-    Pred = ICmpInst::ICMP_SLT;
      std::swap(LHS, RHS);
    case ICmpInst::ICMP_SLT: {
      ConstantRange LHSRange = getSignedRange(LHS);
@@ -6051,7 +6177,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
      break;
    }
    case ICmpInst::ICMP_SGE:
-    Pred = ICmpInst::ICMP_SLE;
      std::swap(LHS, RHS);
    case ICmpInst::ICMP_SLE: {
      ConstantRange LHSRange = getSignedRange(LHS);
@@ -6063,7 +6188,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
      break;
    }
    case ICmpInst::ICMP_UGT:
-    Pred = ICmpInst::ICMP_ULT;
      std::swap(LHS, RHS);
    case ICmpInst::ICMP_ULT: {
      ConstantRange LHSRange = getUnsignedRange(LHS);
@@ -6075,7 +6199,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
      break;
    }
    case ICmpInst::ICMP_UGE:
-    Pred = ICmpInst::ICMP_ULE;
      std::swap(LHS, RHS);
    case ICmpInst::ICMP_ULE: {
      ConstantRange LHSRange = getUnsignedRange(LHS);
@@ -6481,7 +6604,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
      IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
               : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
  
-  const SCEV *MaxBECount = getCouldNotCompute();
+  const SCEV *MaxBECount;
    if (isa<SCEVConstant>(BECount))
      MaxBECount = BECount;
    else
@@ -6692,18 +6815,68 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
    return SE.getCouldNotCompute();
  }
  
-static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
-  APInt A = C1->getValue()->getValue().abs();
-  APInt B = C2->getValue()->getValue().abs();
-  uint32_t ABW = A.getBitWidth();
-  uint32_t BBW = B.getBitWidth();
+namespace {
+// Collect all steps of SCEV expressions.
+struct SCEVCollectStrides {
+  ScalarEvolution &SE;
+  SmallVectorImpl<const SCEV *> &Strides;
  
-  if (ABW > BBW)
-    B = B.zext(ABW);
-  else if (ABW < BBW)
-    A = A.zext(BBW);
+  SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
+      : SE(SE), Strides(S) {}
  
-  return APIntOps::GreatestCommonDivisor(A, B);
+  bool follow(const SCEV *S) {
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      Strides.push_back(AR->getStepRecurrence(SE));
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+
+// Collect all SCEVUnknown and SCEVMulExpr expressions.
+struct SCEVCollectTerms {
+  SmallVectorImpl<const SCEV *> &Terms;
+
+  SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
+      : Terms(T) {}
+
+  bool follow(const SCEV *S) {
+    if (isa<SCEVUnknown>(S) || isa<SCEVConstant>(S) || isa<SCEVMulExpr>(S)) {
+      Terms.push_back(S);
+
+      // Stop recursion: once we collected a term, do not walk its operands.
+      return false;
+    }
+
+    // Keep looking.
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+}
+
+/// Find parametric terms in this SCEVAddRecExpr.
+void SCEVAddRecExpr::collectParametricTerms(
+    ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) const {
+  SmallVector<const SCEV *, 4> Strides;
+  SCEVCollectStrides StrideCollector(SE, Strides);
+  visitAll(this, StrideCollector);
+
+  DEBUG({
+      dbgs() << "Strides:\n";
+      for (const SCEV *S : Strides)
+        dbgs() << *S << "\n";
+    });
+
+  for (const SCEV *S : Strides) {
+    SCEVCollectTerms TermCollector(Terms);
+    visitAll(S, TermCollector);
+  }
+
+  DEBUG({
+      dbgs() << "Terms:\n";
+      for (const SCEV *T : Terms)
+        dbgs() << *T << "\n";
+    });
  }
  
  static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
@@ -6735,352 +6908,441 @@ static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
  }
  
  namespace {
-struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
-public:
-  // Pattern match Step into Start. When Step is a multiply expression, find
-  // the largest subexpression of Step that appears in Start. When Start is an
-  // add expression, try to match Step in the subexpressions of Start, non
-  // matching subexpressions are returned under Remainder.
-  static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
-                             const SCEV *Step, const SCEV **Remainder) {
-    assert(Remainder && "Remainder should not be NULL");
-    SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
-    const SCEV *Res = R.visit(Start);
-    *Remainder = R.Remainder;
-    return Res;
-  }
-
-  SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
-      : SE(S), GCD(G), Remainder(R) {
-    Zero = SE.getConstant(GCD->getType(), 0);
-    One = SE.getConstant(GCD->getType(), 1);
-  }
-
-  const SCEV *visitConstant(const SCEVConstant *Constant) {
-    if (GCD == Constant || Constant == Zero)
-      return GCD;
-
-    if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
-      const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
-      if (Res != One)
-        return Res;
-
-      Remainder = SE.getConstant(srem(Constant, CGCD));
-      Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
-      Res = SE.getConstant(gcd(Constant, CGCD));
-      return Res;
-    }
-
-    // When GCD is not a constant, it could be that the GCD is an Add, Mul,
-    // AddRec, etc., in which case we want to find out how many times the
-    // Constant divides the GCD: we then return that as the new GCD.
-    const SCEV *Rem = Zero;
-    const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
+struct FindSCEVSize {
+  int Size;
+  FindSCEVSize() : Size(0) {}
  
-    if (Res == One || Rem != Zero) {
-      Remainder = Constant;
-      return One;
-    }
-
-    assert(isa<SCEVConstant>(Res) && "Res should be a constant");
-    Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
-    return Res;
+  bool follow(const SCEV *S) {
+    ++Size;
+    // Keep looking at all operands of S.
+    return true;
    }
-
-  const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
+  bool isDone() const {
+    return false;
    }
+};
+}
  
-  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
+// Returns the size of the SCEV S.
+static inline int sizeOfSCEV(const SCEV *S) {
+  FindSCEVSize F;
+  SCEVTraversal<FindSCEVSize> ST(F);
+  ST.visitAll(S);
+  return F.Size;
+}
  
-  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
+namespace {
  
-  const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
-    if (GCD == Expr)
-      return GCD;
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
+public:
+  // Computes the Quotient and Remainder of the division of Numerator by
+  // Denominator.
+  static void divide(ScalarEvolution &SE, const SCEV *Numerator,
+                     const SCEV *Denominator, const SCEV **Quotient,
+                     const SCEV **Remainder) {
+    assert(Numerator && Denominator && *Quotient && *Remainder &&
+           "Uninitialized SCEV");
+
+    SCEVDivision D(SE, Numerator, Denominator);
+
+    // Check for the trivial case here to avoid having to check for it in the
+    // rest of the code.
+    if (Numerator == Denominator) {
+      *Quotient = D.One;
+      *Remainder = D.Zero;
+      return;
+    }
  
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-      const SCEV *Rem = Zero;
-      const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
+    if (Numerator == D.Zero) {
+      *Quotient = D.Zero;
+      *Remainder = D.Zero;
+      return;
+    }
  
-      // FIXME: There may be ambiguous situations: for instance,
-      // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
-      // The order in which the AddExpr is traversed computes a different GCD
-      // and Remainder.
-      if (Res != One)
-        GCD = Res;
-      if (Rem != Zero)
-        Remainder = SE.getAddExpr(Remainder, Rem);
+    // Split the Denominator when it is a product.
+    if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
+      const SCEV *Q, *R;
+      *Quotient = Numerator;
+      for (const SCEV *Op : T->operands()) {
+        divide(SE, *Quotient, Op, &Q, &R);
+        *Quotient = Q;
+
+        // Bail out when the Numerator is not divisible by one of the terms of
+        // the Denominator.
+        if (R != D.Zero) {
+          *Quotient = D.Zero;
+          *Remainder = Numerator;
+          return;
+        }
+      }
+      *Remainder = D.Zero;
+      return;
      }
  
-    return GCD;
+    D.visit(Numerator);
+    *Quotient = D.Quotient;
+    *Remainder = D.Remainder;
+  }
+
+  SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator)
+      : SE(S), Denominator(Denominator) {
+    Zero = SE.getConstant(Denominator->getType(), 0);
+    One = SE.getConstant(Denominator->getType(), 1);
+
+    // By default, we don't know how to divide Expr by Denominator.
+    // Providing the default here simplifies the rest of the code.
+    Quotient = Zero;
+    Remainder = Numerator;
+  }
+
+  // Except in the trivial case described above, we do not know how to divide
+  // Expr by Denominator for the following functions with empty implementation.
+  void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
+  void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
+  void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
+  void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
+  void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
+  void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+  void visitUnknown(const SCEVUnknown *Numerator) {}
+  void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+
+  void visitConstant(const SCEVConstant *Numerator) {
+    if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+      Quotient = SE.getConstant(sdiv(Numerator, D));
+      Remainder = SE.getConstant(srem(Numerator, D));
+      return;
+    }
    }
  
-  const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
-    if (GCD == Expr)
-      return GCD;
+  void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
+    const SCEV *StartQ, *StartR, *StepQ, *StepR;
+    assert(Numerator->isAffine() && "Numerator should be affine");
+    divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
+    divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+    Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
+                                Numerator->getNoWrapFlags());
+    Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
+                                 Numerator->getNoWrapFlags());
+  }
+
+  void visitAddExpr(const SCEVAddExpr *Numerator) {
+    SmallVector<const SCEV *, 2> Qs, Rs;
+    for (const SCEV *Op : Numerator->operands()) {
+      const SCEV *Q, *R;
+      divide(SE, Op, Denominator, &Q, &R);
+      Qs.push_back(Q);
+      Rs.push_back(R);
+    }
  
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-      if (Expr->getOperand(i) == GCD)
-        return GCD;
+    if (Qs.size() == 1) {
+      Quotient = Qs[0];
+      Remainder = Rs[0];
+      return;
      }
  
-    // If we have not returned yet, it means that GCD is not part of Expr.
-    const SCEV *PartialGCD = One;
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-      const SCEV *Rem = Zero;
-      const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
-      if (Rem != Zero)
-        // GCD does not divide Expr->getOperand(i).
+    Quotient = SE.getAddExpr(Qs);
+    Remainder = SE.getAddExpr(Rs);
+  }
+
+  void visitMulExpr(const SCEVMulExpr *Numerator) {
+    SmallVector<const SCEV *, 2> Qs;
+
+    bool FoundDenominatorTerm = false;
+    for (const SCEV *Op : Numerator->operands()) {
+      if (FoundDenominatorTerm) {
+        Qs.push_back(Op);
          continue;
+      }
  
-      if (Res == GCD)
-        return GCD;
-      PartialGCD = SE.getMulExpr(PartialGCD, Res);
-      if (PartialGCD == GCD)
-        return GCD;
+      // Check whether Denominator divides one of the product operands.
+      const SCEV *Q, *R;
+      divide(SE, Op, Denominator, &Q, &R);
+      if (R != Zero) {
+        Qs.push_back(Op);
+        continue;
+      }
+      FoundDenominatorTerm = true;
+      Qs.push_back(Q);
      }
  
-    if (PartialGCD != One)
-      return PartialGCD;
-
-    Remainder = Expr;
-    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
-    if (!Mul)
-      return PartialGCD;
-
-    // When the GCD is a multiply expression, try to decompose it:
-    // this occurs when Step does not divide the Start expression
-    // as in: {(-4 + (3 * %m)),+,(2 * %m)}
-    for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
-      const SCEV *Rem = Zero;
-      const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
-      if (Rem == Zero) {
-        Remainder = Rem;
-        return Res;
-      }
+    if (FoundDenominatorTerm) {
+      Remainder = Zero;
+      if (Qs.size() == 1)
+        Quotient = Qs[0];
+      else
+        Quotient = SE.getMulExpr(Qs);
+      return;
      }
  
-    return PartialGCD;
-  }
+    if (!isa<SCEVUnknown>(Denominator)) {
+      Quotient = Zero;
+      Remainder = Numerator;
+      return;
+    }
  
-  const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
+    // The Remainder is obtained by replacing Denominator by 0 in Numerator.
+    ValueToValueMap RewriteMap;
+    RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+        cast<SCEVConstant>(Zero)->getValue();
+    Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+
+    // Quotient is (Numerator - Remainder) divided by Denominator.
+    const SCEV *Q, *R;
+    const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
+    if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
+      // This SCEV does not seem to simplify: fail the division here.
+      Quotient = Zero;
+      Remainder = Numerator;
+      return;
+    }
+    divide(SE, Diff, Denominator, &Q, &R);
+    assert(R == Zero &&
+           "(Numerator - Remainder) should evenly divide Denominator");
+    Quotient = Q;
    }
  
-  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
-    if (GCD == Expr)
-      return GCD;
+private:
+  ScalarEvolution &SE;
+  const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
+};
+}
  
-    if (!Expr->isAffine()) {
-      Remainder = Expr;
-      return GCD;
-    }
+// Find the Greatest Common Divisor of A and B.
+static const SCEV *
+findGCD(ScalarEvolution &SE, const SCEV *A, const SCEV *B) {
  
-    const SCEV *Rem = Zero;
-    const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
-    if (Rem != Zero)
-      Remainder = SE.getAddExpr(Remainder, Rem);
+  if (const SCEVConstant *CA = dyn_cast<SCEVConstant>(A))
+    if (const SCEVConstant *CB = dyn_cast<SCEVConstant>(B))
+      return SE.getConstant(gcd(CA, CB));
  
-    Rem = Zero;
-    Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
-    if (Rem != Zero) {
-      Remainder = Expr;
-      return GCD;
-    }
+  const SCEV *One = SE.getConstant(A->getType(), 1);
+  if (isa<SCEVConstant>(A) && isa<SCEVUnknown>(B))
+    return One;
+  if (isa<SCEVUnknown>(A) && isa<SCEVConstant>(B))
+    return One;
  
-    return Res;
+  const SCEV *Q, *R;
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(A)) {
+    SmallVector<const SCEV *, 2> Qs;
+    for (const SCEV *Op : M->operands())
+      Qs.push_back(findGCD(SE, Op, B));
+    return SE.getMulExpr(Qs);
    }
-
-  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(B)) {
+    SmallVector<const SCEV *, 2> Qs;
+    for (const SCEV *Op : M->operands())
+      Qs.push_back(findGCD(SE, A, Op));
+    return SE.getMulExpr(Qs);
    }
  
-  const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
+  const SCEV *Zero = SE.getConstant(A->getType(), 0);
+  SCEVDivision::divide(SE, A, B, &Q, &R);
+  if (R == Zero)
+    return B;
  
-  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
+  SCEVDivision::divide(SE, B, A, &Q, &R);
+  if (R == Zero)
+    return A;
  
-  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
-    return One;
-  }
+  return One;
+}
  
-private:
-  ScalarEvolution &SE;
-  const SCEV *GCD, *Remainder, *Zero, *One;
-};
+// Find the Greatest Common Divisor of all the SCEVs in Terms.
+static const SCEV *
+findGCD(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) {
+  assert(Terms.size() > 0 && "Terms vector is empty");
  
-struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
-public:
-  // Remove from Start all multiples of Step.
-  static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
-                            const SCEV *Step) {
-    SCEVDivision D(SE, Step);
-    const SCEV *Rem = D.Zero;
-    (void)Rem;
-    // The division is guaranteed to succeed: Step should divide Start with no
-    // remainder.
-    assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
-           "Step should divide Start with no remainder.");
-    return D.visit(Start);
-  }
+  const SCEV *GCD = Terms[0];
+  for (const SCEV *T : Terms)
+    GCD = findGCD(SE, GCD, T);
  
-  SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
-    Zero = SE.getConstant(GCD->getType(), 0);
-    One = SE.getConstant(GCD->getType(), 1);
-  }
+  return GCD;
+}
  
-  const SCEV *visitConstant(const SCEVConstant *Constant) {
-    if (GCD == Constant)
-      return One;
+static void findArrayDimensionsRec(ScalarEvolution &SE,
+                                   SmallVectorImpl<const SCEV *> &Terms,
+                                   SmallVectorImpl<const SCEV *> &Sizes,
+                                   const SCEV *Zero, const SCEV *One) {
+  // The GCD of all Terms is the dimension of the innermost dimension.
+  const SCEV *GCD = findGCD(SE, Terms);
  
-    if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
-      return SE.getConstant(sdiv(Constant, CGCD));
-    return Constant;
-  }
+  // End of recursion.
+  if (Terms.size() == 1) {
+    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(GCD)) {
+      SmallVector<const SCEV *, 2> Qs;
+      for (const SCEV *Op : M->operands())
+        if (!isa<SCEVConstant>(Op))
+          Qs.push_back(Op);
  
-  const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
+      GCD = SE.getMulExpr(Qs);
+    }
  
-  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
+    Sizes.push_back(GCD);
+    return;
    }
  
-  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
+  for (unsigned I = 0; I < Terms.size(); ++I) {
+    // Normalize the terms before the next call to findArrayDimensionsRec.
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, Terms[I], GCD, &Q, &R);
+    assert(R == Zero && "GCD does not evenly divide one of the terms");
+    Terms[I] = Q;
    }
  
-  const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
-    if (GCD == Expr)
-      return One;
+  // Remove all SCEVConstants.
+  for (unsigned I = 0; I < Terms.size();)
+    if (isa<SCEVConstant>(Terms[I]))
+      Terms.erase(Terms.begin() + I);
+    else
+      ++I;
+
+  if (Terms.size() > 0)
+    findArrayDimensionsRec(SE, Terms, Sizes, Zero, One);
+  Sizes.push_back(GCD);
+}
  
-    SmallVector<const SCEV *, 2> Operands;
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-      Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+namespace {
+struct FindParameter {
+  bool FoundParameter;
+  FindParameter() : FoundParameter(false) {}
  
-    if (Operands.size() == 1)
-      return Operands[0];
-    return SE.getAddExpr(Operands);
+  bool follow(const SCEV *S) {
+    if (isa<SCEVUnknown>(S)) {
+      FoundParameter = true;
+      // Stop recursion: we found a parameter.
+      return false;
+    }
+    // Keep looking.
+    return true;
+  }
+  bool isDone() const {
+    // Stop recursion if we have found a parameter.
+    return FoundParameter;
    }
+};
+}
  
-  const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
-    if (GCD == Expr)
-      return One;
+// Returns true when S contains at least a SCEVUnknown parameter.
+static inline bool
+containsParameters(const SCEV *S) {
+  FindParameter F;
+  SCEVTraversal<FindParameter> ST(F);
+  ST.visitAll(S);
  
-    bool FoundGCDTerm = false;
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-      if (Expr->getOperand(i) == GCD)
-        FoundGCDTerm = true;
+  return F.FoundParameter;
+}
  
-    SmallVector<const SCEV *, 2> Operands;
-    if (FoundGCDTerm) {
-      FoundGCDTerm = false;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-        if (FoundGCDTerm)
-          Operands.push_back(Expr->getOperand(i));
-        else if (Expr->getOperand(i) == GCD)
-          FoundGCDTerm = true;
-        else
-          Operands.push_back(Expr->getOperand(i));
-      }
-    } else {
-      FoundGCDTerm = false;
-      const SCEV *PartialGCD = One;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-        if (PartialGCD == GCD) {
-          Operands.push_back(Expr->getOperand(i));
-          continue;
-        }
+// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
+static inline bool
+containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+  for (const SCEV *T : Terms)
+    if (containsParameters(T))
+      return true;
+  return false;
+}
  
-        const SCEV *Rem = Zero;
-        const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
-        if (Rem == Zero) {
-          PartialGCD = SE.getMulExpr(PartialGCD, Res);
-          Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
-        } else {
-          Operands.push_back(Expr->getOperand(i));
-        }
-      }
-    }
+// Return the number of product terms in S.
+static inline int numberOfTerms(const SCEV *S) {
+  if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
+    return Expr->getNumOperands();
+  return 1;
+}
  
-    if (Operands.size() == 1)
-      return Operands[0];
-    return SE.getMulExpr(Operands);
-  }
+/// Second step of delinearization: compute the array dimensions Sizes from the
+/// set of Terms extracted from the memory access function of this SCEVAddRec.
+void SCEVAddRecExpr::findArrayDimensions(
+    ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms,
+    SmallVectorImpl<const SCEV *> &Sizes) const {
  
-  const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
+  if (Terms.size() < 2)
+    return;
  
-  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
-    if (GCD == Expr)
-      return One;
+  // Early return when Terms do not contain parameters: we do not delinearize
+  // non parametric SCEVs.
+  if (!containsParameters(Terms))
+    return;
  
-    assert(Expr->isAffine() && "Expr should be affine");
+  DEBUG({
+      dbgs() << "Terms:\n";
+      for (const SCEV *T : Terms)
+        dbgs() << *T << "\n";
+    });
  
-    const SCEV *Start = divide(SE, Expr->getStart(), GCD);
-    const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
+  // Remove duplicates.
+  std::sort(Terms.begin(), Terms.end());
+  Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
  
-    return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
-                            Expr->getNoWrapFlags());
-  }
+  // Put larger terms first.
+  std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+    return numberOfTerms(LHS) > numberOfTerms(RHS);
+  });
  
-  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
+  DEBUG({
+      dbgs() << "Terms after sorting:\n";
+      for (const SCEV *T : Terms)
+        dbgs() << *T << "\n";
+    });
  
-  const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
+  const SCEV *Zero = SE.getConstant(this->getType(), 0);
+  const SCEV *One = SE.getConstant(this->getType(), 1);
+  findArrayDimensionsRec(SE, Terms, Sizes, Zero, One);
  
-  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
+  DEBUG({
+      dbgs() << "Sizes:\n";
+      for (const SCEV *S : Sizes)
+        dbgs() << *S << "\n";
+    });
+}
  
-  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
-    return Expr;
+/// Third step of delinearization: compute the access functions for the
+/// Subscripts based on the dimensions in Sizes.
+const SCEV *SCEVAddRecExpr::computeAccessFunctions(
+    ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Subscripts,
+    SmallVectorImpl<const SCEV *> &Sizes) const {
+  // Early exit in case this SCEV is not an affine multivariate function.
+  const SCEV *Zero = SE.getConstant(this->getType(), 0);
+  if (!this->isAffine())
+    return Zero;
+
+  const SCEV *Res = this, *Remainder = Zero;
+  int Last = Sizes.size() - 1;
+  for (int i = Last; i >= 0; i--) {
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
+
+    DEBUG({
+        dbgs() << "Res: " << *Res << "\n";
+        dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
+        dbgs() << "Res divided by Sizes[i]:\n";
+        dbgs() << "Quotient: " << *Q << "\n";
+        dbgs() << "Remainder: " << *R << "\n";
+      });
+
+    Res = Q;
+
+    if (i == Last) {
+      // Do not record the last subscript corresponding to the size of elements
+      // in the array.
+      Remainder = R;
+      continue;
+    }
+
+    // Record the access function for the current subscript.
+    Subscripts.push_back(R);
    }
  
-private:
-  ScalarEvolution &SE;
-  const SCEV *GCD, *Zero, *One;
-};
+  // Also push in last position the remainder of the last division: it will be
+  // the access function of the innermost dimension.
+  Subscripts.push_back(Res);
+
+  std::reverse(Subscripts.begin(), Subscripts.end());
+
+  DEBUG({
+      dbgs() << "Subscripts:\n";
+      for (const SCEV *S : Subscripts)
+        dbgs() << *S << "\n";
+    });
+  return Remainder;
  }
  
  /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
@@ -7136,87 +7398,27 @@ const SCEV *
  SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
                              SmallVectorImpl<const SCEV *> &Subscripts,
                              SmallVectorImpl<const SCEV *> &Sizes) const {
-  // Early exit in case this SCEV is not an affine multivariate function.
-  if (!this->isAffine())
-    return this;
+  // First step: collect parametric terms.
+  SmallVector<const SCEV *, 4> Terms;
+  collectParametricTerms(SE, Terms);
  
-  const SCEV *Start = this->getStart();
-  const SCEV *Step = this->getStepRecurrence(SE);
+  // Second step: find subscript sizes.
+  findArrayDimensions(SE, Terms, Sizes);
  
-  // Build the SCEV representation of the cannonical induction variable in the
-  // loop of this SCEV.
-  const SCEV *Zero = SE.getConstant(this->getType(), 0);
-  const SCEV *One = SE.getConstant(this->getType(), 1);
-  const SCEV *IV =
-      SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
+  // Third step: compute the access functions for each subscript.
+  const SCEV *Remainder = computeAccessFunctions(SE, Subscripts, Sizes);
  
-  DEBUG(dbgs() << "(delinearize: " << *this << "\n");
+  DEBUG({
+      dbgs() << "succeeded to delinearize " << *this << "\n";
+      dbgs() << "ArrayDecl[UnknownSize]";
+      for (const SCEV *S : Sizes)
+        dbgs() << "[" << *S << "]";
  
-  // Currently we fail to delinearize when the stride of this SCEV is 1. We
-  // could decide to not fail in this case: we could just return 1 for the size
-  // of the subscript, and this same SCEV for the access function.
-  if (Step == One) {
-    DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
-    return this;
-  }
-
-  // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
-  const SCEV *Remainder = NULL;
-  const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
-
-  DEBUG(dbgs() << "GCD: " << *GCD << "\n");
-  DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
-
-  // Same remark as above: we currently fail the delinearization, although we
-  // can very well handle this special case.
-  if (GCD == One) {
-    DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
-    return this;
-  }
-
-  // As findGCD computed Remainder, GCD divides "Start - Remainder." The
-  // Quotient is then this SCEV without Remainder, scaled down by the GCD.  The
-  // Quotient is what will be used in the next subscript delinearization.
-  const SCEV *Quotient =
-      SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
-  DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
-
-  const SCEV *Rem;
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
-    // Recursively call delinearize on the Quotient until there are no more
-    // multiples that can be recognized.
-    Rem = AR->delinearize(SE, Subscripts, Sizes);
-  else
-    Rem = Quotient;
-
-  // Scale up the cannonical induction variable IV by whatever remains from the
-  // Step after division by the GCD: the GCD is the size of all the sub-array.
-  if (Step != GCD) {
-    Step = SCEVDivision::divide(SE, Step, GCD);
-    IV = SE.getMulExpr(IV, Step);
-  }
-  // The access function in the current subscript is computed as the cannonical
-  // induction variable IV (potentially scaled up by the step) and offset by
-  // Rem, the offset of delinearization in the sub-array.
-  const SCEV *Index = SE.getAddExpr(IV, Rem);
-
-  // Record the access function and the size of the current subscript.
-  Subscripts.push_back(Index);
-  Sizes.push_back(GCD);
-
-#ifndef NDEBUG
-  int Size = Sizes.size();
-  DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
-  DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
-  for (int i = 0; i < Size - 1; i++)
-    DEBUG(dbgs() << "[" << *Sizes[i] << "]");
-  DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
-
-  DEBUG(dbgs() << "ArrayRef");
-  for (int i = 0; i < Size; i++)
-    DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
-  DEBUG(dbgs() << "\n)\n");
-#endif
+      dbgs() << "ArrayRef";
+      for (const SCEV *S : Sizes)
+        dbgs() << "[" << *S << "]";
+      dbgs() << "\n";
+    });
  
    return Remainder;
  }
@@ -7240,11 +7442,8 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
    // so that future queries will recompute the expressions using the new
    // value.
    Value *Old = getValPtr();
-  SmallVector<User *, 16> Worklist;
+  SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
    SmallPtrSet<User *, 8> Visited;
-  for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
-       UI != UE; ++UI)
-    Worklist.push_back(*UI);
    while (!Worklist.empty()) {
      User *U = Worklist.pop_back_val();
      // Deleting the Old value will cause this to dangle. Postpone
@@ -7256,9 +7455,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
      if (PHINode *PN = dyn_cast<PHINode>(U))
        SE->ConstantEvolutionLoopExitValue.erase(PN);
      SE->ValueExprMap.erase(U);
-    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
-         UI != UE; ++UI)
-      Worklist.push_back(*UI);
+    Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
    }
    // Delete the Old value.
    if (PHINode *PN = dyn_cast<PHINode>(Old))
@@ -7275,14 +7472,16 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
  //===----------------------------------------------------------------------===//
  
  ScalarEvolution::ScalarEvolution()
-  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) {
+  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
+    BlockDispositions(64), FirstUnknown(nullptr) {
    initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
  }
  
  bool ScalarEvolution::runOnFunction(Function &F) {
    this->F = &F;
    LI = &getAnalysis<LoopInfo>();
-  TD = getAnalysisIfAvailable<DataLayout>();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
    TLI = &getAnalysis<TargetLibraryInfo>();
    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
    return false;
@@ -7293,7 +7492,7 @@ void ScalarEvolution::releaseMemory() {
    // destructors, so that they release their references to their values.
    for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
      U->~SCEVUnknown();
-  FirstUnknown = 0;
+  FirstUnknown = nullptr;
  
    ValueExprMap.clear();
  
@@ -7432,7 +7631,7 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
  
  ScalarEvolution::LoopDisposition
  ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
-  switch (S->getSCEVType()) {
+  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
    case scConstant:
      return LoopInvariant;
    case scTruncate:
@@ -7505,8 +7704,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
      return LoopInvariant;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default: llvm_unreachable("Unknown SCEV kind!");
    }
+  llvm_unreachable("Unknown SCEV kind!");
  }
  
  bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
@@ -7538,7 +7737,7 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
  
  ScalarEvolution::BlockDisposition
  ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
-  switch (S->getSCEVType()) {
+  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
    case scConstant:
      return ProperlyDominatesBlock;
    case scTruncate:
@@ -7595,9 +7794,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
      return ProperlyDominatesBlock;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default:
-    llvm_unreachable("Unknown SCEV kind!");
    }
+  llvm_unreachable("Unknown SCEV kind!");
  }
  
  bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
@@ -7652,7 +7850,7 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
  
  typedef DenseMap<const Loop *, std::string> VerifyMap;
  
-/// replaceSubString - Replaces all occurences of From in Str with To.
+/// replaceSubString - Replaces all occurrences of From in Str with To.
  static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
    size_t Pos = 0;
    while ((Pos = Str.find(From, Pos)) != std::string::npos) {