Add a basic verifier for SCEV's backedge taken counts.

[oota-llvm.git] / lib / Analysis / ScalarEvolution.cpp
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp

index daf7742b0453266305514d7ffe4961d884869bff..8c2ba6ae83a5e4da913c877ff52ee21191505e57 100644 (file)
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -73,7 +73,7 @@
  #include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/ValueTracking.h"
  #include "llvm/Assembly/Writer.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
  #include "llvm/Target/TargetLibraryInfo.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/ConstantRange.h"
@@ -105,6 +105,11 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
                                   "derived loop"),
                          cl::init(100));
  
+// FIXME: Enable this with XDEBUG when the test suite is clean.
+static cl::opt<bool>
+VerifySCEV("verify-scev",
+           cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
+
  INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
                  "Scalar Evolution Analysis", false, true)
  INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -122,10 +127,12 @@ char ScalarEvolution::ID = 0;
  // Implementation of the SCEV class.
  //
  
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  void SCEV::dump() const {
    print(dbgs());
    dbgs() << '\n';
  }
+#endif
  
  void SCEV::print(raw_ostream &OS) const {
    switch (getSCEVType()) {
@@ -259,11 +266,9 @@ Type *SCEV::getType() const {
      return cast<SCEVUnknown>(this)->getType();
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return 0;
-  default: break;
+  default:
+    llvm_unreachable("Unknown SCEV kind!");
    }
-  llvm_unreachable("Unknown SCEV kind!");
-  return 0;
  }
  
  bool SCEV::isZero() const {
@@ -284,6 +289,20 @@ bool SCEV::isAllOnesValue() const {
    return false;
  }
  
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+bool SCEV::isNonConstantNegative() const {
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
+  if (!Mul) return false;
+
+  // If there is a constant factor, it will be first.
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+  if (!SC) return false;
+
+  // Return true if the value is negative, this matches things like (-42 * V).
+  return SC->getValue()->getValue().isNegative();
+}
+
  SCEVCouldNotCompute::SCEVCouldNotCompute() :
    SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
  
@@ -597,11 +616,8 @@ namespace {
        }
  
        default:
-        break;
+        llvm_unreachable("Unknown SCEV kind!");
        }
-
-      llvm_unreachable("Unknown SCEV kind!");
-      return 0;
      }
    };
  }
@@ -817,8 +833,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
    // Fold if the operand is constant.
    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
      return getConstant(
-      cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(),
-                                               getEffectiveSCEVType(Ty))));
+      cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
  
    // trunc(trunc(x)) --> trunc(x)
    if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
@@ -870,13 +885,6 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
      return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
    }
  
-  // As a special case, fold trunc(undef) to undef. We don't want to
-  // know too much about SCEVUnknowns, but this special case is handy
-  // and harmless.
-  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
-    if (isa<UndefValue>(U->getValue()))
-      return getSCEV(UndefValue::get(Ty));
-
    // The cast wasn't folded; create an explicit cast node. We can reuse
    // the existing insert position since if we get here, we won't have
    // made any changes which would invalidate it.
@@ -897,8 +905,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
    // Fold if the operand is constant.
    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
      return getConstant(
-      cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(),
-                                              getEffectiveSCEVType(Ty))));
+      cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
  
    // zext(zext(x)) --> zext(x)
    if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
@@ -967,12 +974,15 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
            Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
            // Check whether Start+Step*MaxBECount has no unsigned overflow.
            const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
-          const SCEV *Add = getAddExpr(Start, ZMul);
+          const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
+          const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
+          const SCEV *WideMaxBECount =
+            getZeroExtendExpr(CastedMaxBECount, WideTy);
            const SCEV *OperandExtendedAdd =
-            getAddExpr(getZeroExtendExpr(Start, WideTy),
-                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+            getAddExpr(WideStart,
+                       getMulExpr(WideMaxBECount,
                                    getZeroExtendExpr(Step, WideTy)));
-          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+          if (ZAdd == OperandExtendedAdd) {
              // Cache knowledge of AR NUW, which is propagated to this AddRec.
              const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
              // Return the expression with the addrec on the outside.
@@ -982,13 +992,11 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
            }
            // Similar to above, only this time treat the step value as signed.
            // This covers loops that count down.
-          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
-          Add = getAddExpr(Start, SMul);
            OperandExtendedAdd =
-            getAddExpr(getZeroExtendExpr(Start, WideTy),
-                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+            getAddExpr(WideStart,
+                       getMulExpr(WideMaxBECount,
                                    getSignExtendExpr(Step, WideTy)));
-          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+          if (ZAdd == OperandExtendedAdd) {
              // Cache knowledge of AR NW, which is propagated to this AddRec.
              // Negative step causes unsigned wrap, but it still can't self-wrap.
              const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
@@ -1155,8 +1163,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
    // Fold if the operand is constant.
    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
      return getConstant(
-      cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(),
-                                              getEffectiveSCEVType(Ty))));
+      cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
  
    // sext(sext(x)) --> sext(x)
    if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
@@ -1233,12 +1240,15 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
            Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
            // Check whether Start+Step*MaxBECount has no signed overflow.
            const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
-          const SCEV *Add = getAddExpr(Start, SMul);
+          const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
+          const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
+          const SCEV *WideMaxBECount =
+            getZeroExtendExpr(CastedMaxBECount, WideTy);
            const SCEV *OperandExtendedAdd =
-            getAddExpr(getSignExtendExpr(Start, WideTy),
-                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+            getAddExpr(WideStart,
+                       getMulExpr(WideMaxBECount,
                                    getSignExtendExpr(Step, WideTy)));
-          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+          if (SAdd == OperandExtendedAdd) {
              // Cache knowledge of AR NSW, which is propagated to this AddRec.
              const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
              // Return the expression with the addrec on the outside.
@@ -1248,13 +1258,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
            }
            // Similar to above, only this time treat the step value as unsigned.
            // This covers loops that count up with an unsigned step.
-          const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
-          Add = getAddExpr(Start, UMul);
            OperandExtendedAdd =
-            getAddExpr(getSignExtendExpr(Start, WideTy),
-                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+            getAddExpr(WideStart,
+                       getMulExpr(WideMaxBECount,
                                    getZeroExtendExpr(Step, WideTy)));
-          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) {
+          if (SAdd == OperandExtendedAdd) {
              // Cache knowledge of AR NSW, which is propagated to this AddRec.
              const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
              // Return the expression with the addrec on the outside.
@@ -1336,13 +1344,6 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
      return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
    }
  
-  // As a special case, fold anyext(undef) to undef. We don't want to
-  // know too much about SCEVUnknowns, but this special case is handy
-  // and harmless.
-  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
-    if (isa<UndefValue>(U->getValue()))
-      return getSCEV(UndefValue::get(Ty));
-
    // If the expression is obviously signed, use the sext cast value.
    if (isa<SCEVSMaxExpr>(Op))
      return SExt;
@@ -1830,7 +1831,7 @@ static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
  
  /// Compute the result of "n choose k", the binomial coefficient.  If an
  /// intermediate computation overflows, Overflow will be set and the return will
-/// be garbage. Overflow is not cleared on absense of overflow.
+/// be garbage. Overflow is not cleared on absence of overflow.
  static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
    // We use the multiplicative formula:
    //     n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
@@ -2029,63 +2030,67 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
      for (unsigned OtherIdx = Idx+1;
           OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
           ++OtherIdx) {
-      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
-        // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
-        // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
-        //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
-        //   ]]],+,...up to x=2n}.
-        // Note that the arguments to choose() are always integers with values
-        // known at compile time, never SCEV objects.
-        //
-        // The implementation avoids pointless extra computations when the two
-        // addrec's are of different length (mathematically, it's equivalent to
-        // an infinite stream of zeros on the right).
-        bool OpsModified = false;
-        for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-             ++OtherIdx)
-          if (const SCEVAddRecExpr *OtherAddRec =
-                dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
-            if (OtherAddRec->getLoop() == AddRecLoop) {
-              bool Overflow = false;
-              Type *Ty = AddRec->getType();
-              bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
-              SmallVector<const SCEV*, 7> AddRecOps;
-              for (int x = 0, xe = AddRec->getNumOperands() +
-                     OtherAddRec->getNumOperands() - 1;
-                   x != xe && !Overflow; ++x) {
-                const SCEV *Term = getConstant(Ty, 0);
-                for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
-                  uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
-                  for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
-                         ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
-                       z < ze && !Overflow; ++z) {
-                    uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
-                    uint64_t Coeff;
-                    if (LargerThan64Bits)
-                      Coeff = umul_ov(Coeff1, Coeff2, Overflow);
-                    else
-                      Coeff = Coeff1*Coeff2;
-                    const SCEV *CoeffTerm = getConstant(Ty, Coeff);
-                    const SCEV *Term1 = AddRec->getOperand(y-z);
-                    const SCEV *Term2 = OtherAddRec->getOperand(z);
-                    Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
-                  }
-                }
-                AddRecOps.push_back(Term);
-              }
-              if (!Overflow) {
-                const SCEV *NewAddRec = getAddRecExpr(AddRecOps,
-                                                      AddRec->getLoop(),
-                                                      SCEV::FlagAnyWrap);
-                if (Ops.size() == 2) return NewAddRec;
-                Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
-                Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
-                OpsModified = true;
-              }
+      if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop())
+        continue;
+
+      // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+      // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+      //       choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+      //   ]]],+,...up to x=2n}.
+      // Note that the arguments to choose() are always integers with values
+      // known at compile time, never SCEV objects.
+      //
+      // The implementation avoids pointless extra computations when the two
+      // addrec's are of different length (mathematically, it's equivalent to
+      // an infinite stream of zeros on the right).
+      bool OpsModified = false;
+      for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+           ++OtherIdx) {
+        const SCEVAddRecExpr *OtherAddRec =
+          dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+        if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
+          continue;
+
+        bool Overflow = false;
+        Type *Ty = AddRec->getType();
+        bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+        SmallVector<const SCEV*, 7> AddRecOps;
+        for (int x = 0, xe = AddRec->getNumOperands() +
+               OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
+          const SCEV *Term = getConstant(Ty, 0);
+          for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+            uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+            for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+                   ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+                 z < ze && !Overflow; ++z) {
+              uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+              uint64_t Coeff;
+              if (LargerThan64Bits)
+                Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+              else
+                Coeff = Coeff1*Coeff2;
+              const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+              const SCEV *Term1 = AddRec->getOperand(y-z);
+              const SCEV *Term2 = OtherAddRec->getOperand(z);
+              Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
              }
-        if (OpsModified)
-          return getMulExpr(Ops);
+          }
+          AddRecOps.push_back(Term);
+        }
+        if (!Overflow) {
+          const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+                                                SCEV::FlagAnyWrap);
+          if (Ops.size() == 2) return NewAddRec;
+          Ops[Idx] = NewAddRec;
+          Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+          OpsModified = true;
+          AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
+          if (!AddRec)
+            break;
+        }
        }
+      if (OpsModified)
+        return getMulExpr(Ops);
      }
  
      // Otherwise couldn't fold anything into this recurrence.  Move onto the
@@ -2581,13 +2586,12 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
    return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
  }
  
-const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
-  // If we have TargetData, we can bypass creating a target-independent
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy, Type *IntPtrTy) {
+  // If we have DataLayout, we can bypass creating a target-independent
    // constant expression and then folding it back into a ConstantInt.
    // This is just a compile-time optimization.
    if (TD)
-    return getConstant(TD->getIntPtrType(getContext()),
-                       TD->getTypeAllocSize(AllocTy));
+    return getConstant(IntPtrTy, TD->getTypeAllocSize(AllocTy));
  
    Constant *C = ConstantExpr::getSizeOf(AllocTy);
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
@@ -2606,13 +2610,13 @@ const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
    return getTruncateOrZeroExtend(getSCEV(C), Ty);
  }
  
-const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, Type *IntPtrTy,
                                               unsigned FieldNo) {
-  // If we have TargetData, we can bypass creating a target-independent
+  // If we have DataLayout, we can bypass creating a target-independent
    // constant expression and then folding it back into a ConstantInt.
    // This is just a compile-time optimization.
    if (TD)
-    return getConstant(TD->getIntPtrType(getContext()),
+    return getConstant(IntPtrTy,
                         TD->getStructLayout(STy)->getElementOffset(FieldNo));
  
    Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
@@ -2673,7 +2677,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {
  uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
    assert(isSCEVable(Ty) && "Type is not SCEVable!");
  
-  // If we have a TargetData, use it!
+  // If we have a DataLayout, use it!
    if (TD)
      return TD->getTypeSizeInBits(Ty);
  
@@ -2681,7 +2685,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
    if (Ty->isIntegerTy())
      return Ty->getPrimitiveSizeInBits();
  
-  // The only other support type is pointer. Without TargetData, conservatively
+  // The only other support type is pointer. Without DataLayout, conservatively
    // assume pointers are 64-bit.
    assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
    return 64;
@@ -2699,9 +2703,9 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
  
    // The only other support type is pointer.
    assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-  if (TD) return TD->getIntPtrType(getContext());
+  if (TD) return TD->getIntPtrType(Ty);
  
-  // Without TargetData, conservatively assume pointers are 64-bit.
+  // Without DataLayout, conservatively assume pointers are 64-bit.
    return Type::getInt64Ty(getContext());
  }
  
@@ -2714,7 +2718,7 @@ const SCEV *ScalarEvolution::getCouldNotCompute() {
  const SCEV *ScalarEvolution::getSCEV(Value *V) {
    assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
  
-  ValueExprMapType::const_iterator I = ValueExprMap.find(V);
+  ValueExprMapType::const_iterator I = ValueExprMap.find_as(V);
    if (I != ValueExprMap.end()) return I->second;
    const SCEV *S = createSCEV(V);
  
@@ -2951,7 +2955,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
      if (!Visited.insert(I)) continue;
  
      ValueExprMapType::iterator It =
-      ValueExprMap.find(static_cast<Value *>(I));
+      ValueExprMap.find_as(static_cast<Value *>(I));
      if (It != ValueExprMap.end()) {
        const SCEV *Old = It->second;
  
@@ -3008,7 +3012,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
        if (BEValueV && StartValueV) {
          // While we are analyzing this PHI node, handle its value symbolically.
          const SCEV *SymbolicName = getUnknown(PN);
-        assert(ValueExprMap.find(PN) == ValueExprMap.end() &&
+        assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
                 "PHI node already processed?");
          ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
  
@@ -3152,13 +3156,13 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
      if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
        // For a struct, add the member offset.
        unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
-      const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+      const SCEV *FieldOffset = getOffsetOfExpr(STy, IntPtrTy, FieldNo);
  
        // Add the field offset to the running total offset.
        TotalOffset = getAddExpr(TotalOffset, FieldOffset);
      } else {
        // For an array, add the element offset, explicitly scaled.
-      const SCEV *ElementSize = getSizeOfExpr(*GTI);
+      const SCEV *ElementSize = getSizeOfExpr(*GTI, IntPtrTy);
        const SCEV *IndexS = getSCEV(Index);
        // Getelementptr indices are signed.
        IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
@@ -3252,9 +3256,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
      // For a SCEVUnknown, ask ValueTracking.
      unsigned BitWidth = getTypeSizeInBits(U->getType());
-    APInt Mask = APInt::getAllOnesValue(BitWidth);
      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+    ComputeMaskedBits(U->getValue(), Zeros, Ones);
      return Zeros.countTrailingOnes();
    }
  
@@ -3392,9 +3395,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
  
    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
      // For a SCEVUnknown, ask ValueTracking.
-    APInt Mask = APInt::getAllOnesValue(BitWidth);
      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+    ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
      if (Ones == ~Zeros + 1)
        return setUnsignedRange(U, ConservativeResult);
      return setUnsignedRange(U,
@@ -3651,9 +3653,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
        // knew about to reconstruct a low-bits mask value.
        unsigned LZ = A.countLeadingZeros();
        unsigned BitWidth = A.getBitWidth();
-      APInt AllOnes = APInt::getAllOnesValue(BitWidth);
        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
  
        APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
  
@@ -3925,13 +3926,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
  //
  
  /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
-/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// or not constant. Will also return 0 if the maximum trip count is very large
-/// (>= 2^32)
-unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
-                                                    BasicBlock *ExitBlock) {
+/// normal unsigned value. Returns 0 if the trip count is unknown or not
+/// constant. Will also return 0 if the maximum trip count is very large (>=
+/// 2^32).
+///
+/// This "trip count" assumes that control exits via ExitingBlock. More
+/// precisely, it is the number of times that control may reach ExitingBlock
+/// before taking the branch. For loops with multiple exits, it may not be the
+/// number times that the loop header executes because the loop may exit
+/// prematurely via another branch.
+unsigned ScalarEvolution::
+getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
    const SCEVConstant *ExitCount =
-    dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
+    dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
    if (!ExitCount)
      return 0;
  
@@ -3954,9 +3961,12 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
  /// multiple of a constant (which is also the case if the trip count is simply
  /// constant, use getSmallConstantTripCount for that case), Will also return 1
  /// if the trip count is very large (>= 2^32).
-unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
-                                                       BasicBlock *ExitBlock) {
-  const SCEV *ExitCount = getExitCount(L, ExitBlock);
+///
+/// As explained in the comments for getSmallConstantTripCount, this assumes
+/// that control exits the loop via ExitingBlock.
+unsigned ScalarEvolution::
+getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
+  const SCEV *ExitCount = getExitCount(L, ExitingBlock);
    if (ExitCount == getCouldNotCompute())
      return 1;
  
@@ -3974,8 +3984,11 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
  
    ConstantInt *Result = MulC->getValue();
  
-  // Guard against huge trip counts.
-  if (!Result || Result->getValue().getActiveBits() > 32)
+  // Guard against huge trip counts (this requires checking
+  // for zero to handle the case where the trip count == -1 and the
+  // addition wraps).
+  if (!Result || Result->getValue().getActiveBits() > 32 ||
+      Result->getValue().getActiveBits() == 0)
      return 1;
  
    return (unsigned)Result->getZExtValue();
@@ -4066,7 +4079,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
        if (!Visited.insert(I)) continue;
  
        ValueExprMapType::iterator It =
-        ValueExprMap.find(static_cast<Value *>(I));
+        ValueExprMap.find_as(static_cast<Value *>(I));
        if (It != ValueExprMap.end()) {
          const SCEV *Old = It->second;
  
@@ -4117,7 +4130,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
      Instruction *I = Worklist.pop_back_val();
      if (!Visited.insert(I)) continue;
  
-    ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+    ValueExprMapType::iterator It =
+      ValueExprMap.find_as(static_cast<Value *>(I));
      if (It != ValueExprMap.end()) {
        forgetMemoizedResults(It->second);
        ValueExprMap.erase(It);
@@ -4150,7 +4164,8 @@ void ScalarEvolution::forgetValue(Value *V) {
      I = Worklist.pop_back_val();
      if (!Visited.insert(I)) continue;
  
-    ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+    ValueExprMapType::iterator It =
+      ValueExprMap.find_as(static_cast<Value *>(I));
      if (It != ValueExprMap.end()) {
        forgetMemoizedResults(It->second);
        ValueExprMap.erase(It);
@@ -4562,40 +4577,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
    return cast<SCEVConstant>(Val)->getValue();
  }
  
-/// GetAddressedElementFromGlobal - Given a global variable with an initializer
-/// and a GEP expression (missing the pointer index) indexing into it, return
-/// the addressed element of the initializer or null if the index expression is
-/// invalid.
-static Constant *
-GetAddressedElementFromGlobal(GlobalVariable *GV,
-                              const std::vector<ConstantInt*> &Indices) {
-  Constant *Init = GV->getInitializer();
-  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
-    uint64_t Idx = Indices[i]->getZExtValue();
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
-      assert(Idx < CS->getNumOperands() && "Bad struct index!");
-      Init = cast<Constant>(CS->getOperand(Idx));
-    } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
-      if (Idx >= CA->getNumOperands()) return 0;  // Bogus program
-      Init = cast<Constant>(CA->getOperand(Idx));
-    } else if (isa<ConstantAggregateZero>(Init)) {
-      if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
-        assert(Idx < STy->getNumElements() && "Bad struct index!");
-        Init = Constant::getNullValue(STy->getElementType(Idx));
-      } else if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
-        if (Idx >= ATy->getNumElements()) return 0;  // Bogus program
-        Init = Constant::getNullValue(ATy->getElementType());
-      } else {
-        llvm_unreachable("Unknown constant aggregate type!");
-      }
-      return 0;
-    } else {
-      return 0; // Unknown initializer type
-    }
-  }
-  return Init;
-}
-
  /// ComputeLoadConstantCompareExitLimit - Given an exit condition of
  /// 'icmp op load X, cst', try to see if we can compute the backedge
  /// execution count.
@@ -4623,7 +4604,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
  
    // Okay, we allow one non-constant index into the GEP instruction.
    Value *VarIdx = 0;
-  std::vector<ConstantInt*> Indexes;
+  std::vector<Constant*> Indexes;
    unsigned VarIdxNum = 0;
    for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
      if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
@@ -4635,6 +4616,10 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
        Indexes.push_back(0);
      }
  
+  // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
+  if (!VarIdx)
+    return getCouldNotCompute();
+
    // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
    // Check to see if X is a loop variant variable value now.
    const SCEV *Idx = getSCEV(VarIdx);
@@ -4657,7 +4642,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
      // Form the GEP offset.
      Indexes[VarIdxNum] = Val;
  
-    Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+    Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
+                                                         Indexes);
      if (Result == 0) break;  // Cannot compute!
  
      // Evaluate the condition for this iteration.
@@ -4772,7 +4758,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
  /// reason, return null.
  static Constant *EvaluateExpression(Value *V, const Loop *L,
                                      DenseMap<Instruction *, Constant *> &Vals,
-                                    const TargetData *TD,
+                                    const DataLayout *TD,
                                      const TargetLibraryInfo *TLI) {
    // Convenient constant check, but redundant for recursive calls.
    if (Constant *C = dyn_cast<Constant>(V)) return C;
@@ -5297,7 +5283,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
    }
  
    llvm_unreachable("Unknown SCEV type!");
-  return 0;
  }
  
  /// getSCEVAtScope - This is a convenience function which does
@@ -5394,6 +5379,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
      SqrtTerm *= B;
      SqrtTerm -= Four * (A * C);
  
+    if (SqrtTerm.isNegative()) {
+      // The loop is provably infinite.
+      const SCEV *CNC = SE.getCouldNotCompute();
+      return std::make_pair(CNC, CNC);
+    }
+
      // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
      // integer value or else APInt::sqrt() will assert.
      APInt SqrtVal(SqrtTerm.sqrt());
@@ -5496,7 +5487,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
    // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
    // We have not yet seen any such cases.
    const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
-  if (StepC == 0)
+  if (StepC == 0 || StepC->getValue()->equalsInt(0))
      return getCouldNotCompute();
  
    // For positive steps (counting up until unsigned overflow):
@@ -5617,9 +5608,14 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
  /// predicate Pred. Return true iff any changes were made.
  ///
  bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
-                                           const SCEV *&LHS, const SCEV *&RHS) {
+                                           const SCEV *&LHS, const SCEV *&RHS,
+                                           unsigned Depth) {
    bool Changed = false;
  
+  // If we hit the max recursion limit bail out.
+  if (Depth >= 3)
+    return false;
+
    // Canonicalize a constant to the right side.
    if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
      // Check for both operands constant.
@@ -5657,6 +5653,16 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
      default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
      case ICmpInst::ICMP_EQ:
      case ICmpInst::ICMP_NE:
+      // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
+      if (!RA)
+        if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
+          if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
+            if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
+                ME->getOperand(0)->isAllOnesValue()) {
+              RHS = AE->getOperand(1);
+              LHS = ME->getOperand(1);
+              Changed = true;
+            }
        break;
      case ICmpInst::ICMP_UGE:
        if ((RA - 1).isMinValue()) {
@@ -5858,6 +5864,11 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
  
    // TODO: More simplifications are possible here.
  
+  // Recursively simplify until we either hit a recursion limit or nothing
+  // changes.
+  if (Changed)
+    return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
+
    return Changed;
  
  trivially_true:
@@ -5928,7 +5939,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
    switch (Pred) {
    default:
      llvm_unreachable("Unexpected ICmpInst::Predicate value!");
-    break;
    case ICmpInst::ICMP_SGT:
      Pred = ICmpInst::ICMP_SLT;
      std::swap(LHS, RHS);
@@ -6056,12 +6066,34 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
    return false;
  }
  
+/// RAII wrapper to prevent recursive application of isImpliedCond.
+/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
+/// currently evaluating isImpliedCond.
+struct MarkPendingLoopPredicate {
+  Value *Cond;
+  DenseSet<Value*> &LoopPreds;
+  bool Pending;
+
+  MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
+    : Cond(C), LoopPreds(LP) {
+    Pending = !LoopPreds.insert(Cond).second;
+  }
+  ~MarkPendingLoopPredicate() {
+    if (!Pending)
+      LoopPreds.erase(Cond);
+  }
+};
+
  /// isImpliedCond - Test whether the condition described by Pred, LHS,
  /// and RHS is true whenever the given Cond value evaluates to true.
  bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
                                      const SCEV *LHS, const SCEV *RHS,
                                      Value *FoundCondValue,
                                      bool Inverse) {
+  MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
+  if (Mark.Pending)
+    return false;
+
    // Recursively handle And and Or conditions.
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
      if (BO->getOpcode() == Instruction::And) {
@@ -6565,7 +6597,7 @@ ScalarEvolution::ScalarEvolution()
  bool ScalarEvolution::runOnFunction(Function &F) {
    this->F = &F;
    LI = &getAnalysis<LoopInfo>();
-  TD = getAnalysisIfAvailable<TargetData>();
+  TD = getAnalysisIfAvailable<DataLayout>();
    TLI = &getAnalysis<TargetLibraryInfo>();
    DT = &getAnalysis<DominatorTree>();
    return false;
@@ -6588,6 +6620,8 @@ void ScalarEvolution::releaseMemory() {
      I->second.clear();
    }
  
+  assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+
    BackedgeTakenCounts.clear();
    ConstantEvolutionLoopExitValue.clear();
    ValuesAtScopes.clear();
@@ -6779,11 +6813,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
      return LoopInvariant;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return LoopVariant;
-  default: break;
+  default: llvm_unreachable("Unknown SCEV kind!");
    }
-  llvm_unreachable("Unknown SCEV kind!");
-  return LoopVariant;
  }
  
  bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
@@ -6865,11 +6896,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
      return ProperlyDominatesBlock;
    case scCouldNotCompute:
      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return DoesNotDominateBlock;
-  default: break;
+  default:
+    llvm_unreachable("Unknown SCEV kind!");
    }
-  llvm_unreachable("Unknown SCEV kind!");
-  return DoesNotDominateBlock;
  }
  
  bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
@@ -6880,46 +6909,27 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
    return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
  }
  
-bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
-  switch (S->getSCEVType()) {
-  case scConstant:
-    return false;
-  case scTruncate:
-  case scZeroExtend:
-  case scSignExtend: {
-    const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S);
-    const SCEV *CastOp = Cast->getOperand();
-    return Op == CastOp || hasOperand(CastOp, Op);
-  }
-  case scAddRecExpr:
-  case scAddExpr:
-  case scMulExpr:
-  case scUMaxExpr:
-  case scSMaxExpr: {
-    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
-    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
-         I != E; ++I) {
-      const SCEV *NAryOp = *I;
-      if (NAryOp == Op || hasOperand(NAryOp, Op))
-        return true;
-    }
-    return false;
-  }
-  case scUDivExpr: {
-    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
-    const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
-    return LHS == Op || hasOperand(LHS, Op) ||
-           RHS == Op || hasOperand(RHS, Op);
-  }
-  case scUnknown:
-    return false;
-  case scCouldNotCompute:
-    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return false;
-  default: break;
+namespace {
+// Search for a SCEV expression node within an expression tree.
+// Implements SCEVTraversal::Visitor.
+struct SCEVSearch {
+  const SCEV *Node;
+  bool IsFound;
+
+  SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
+
+  bool follow(const SCEV *S) {
+    IsFound |= (S == Node);
+    return !IsFound;
    }
-  llvm_unreachable("Unknown SCEV kind!");
-  return false;
+  bool isDone() const { return IsFound; }
+};
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+  SCEVSearch Search(Op);
+  visitAll(S, Search);
+  return Search.IsFound;
  }
  
  void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
@@ -6929,3 +6939,66 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
    UnsignedRanges.erase(S);
    SignedRanges.erase(S);
  }
+
+typedef DenseMap<const Loop *, std::string> VerifyMap;
+/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
+static void
+getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
+  for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
+    getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
+
+    std::string &S = Map[L];
+    if (S.empty()) {
+      raw_string_ostream OS(S);
+      SE.getBackedgeTakenCount(L)->print(OS);
+    }
+  }
+}
+
+void ScalarEvolution::verifyAnalysis() const {
+  if (!VerifySCEV)
+    return;
+
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+  // Gather stringified backedge taken counts for all loops using SCEV's caches.
+  // FIXME: It would be much better to store actual values instead of strings,
+  //        but SCEV pointers will change if we drop the caches.
+  VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
+  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+    getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
+
+  // Gather stringified backedge taken counts for all loops without using
+  // SCEV's caches.
+  SE.releaseMemory();
+  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
+
+  // Now compare whether they're the same with and without caches. This allows
+  // verifying that no pass changed the cache.
+  assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
+         "New loops suddenly appeared!");
+
+  for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
+                           OldE = BackedgeDumpsOld.end(),
+                           NewI = BackedgeDumpsNew.begin();
+       OldI != OldE; ++OldI, ++NewI) {
+    assert(OldI->first == NewI->first && "Loop order changed!");
+
+    // Compare the stringified SCEVs. We don't care if undef backedgetaken count
+    // changes.
+    // FIXME: We currently ignore SCEV changes towards CouldNotCompute. This
+    // means that a pass is buggy or SCEV has to learn a new pattern but is
+    // usually not harmful.
+    if (OldI->second != NewI->second &&
+        OldI->second.find("undef") == std::string::npos &&
+        NewI->second != "***COULDNOTCOMPUTE***") {
+      dbgs() << "SCEVValidator: SCEV for Loop '"
+             << OldI->first->getHeader()->getName()
+             << "' from '" << OldI->second << "' to '" << NewI->second << "'!";
+      std::abort();
+    }
+  }
+
+  // TODO: Verify more things.
+}