What the loop unroller cares about, rather than just not unrolling loops with calls, is

[oota-llvm.git] / lib / Analysis / ValueTracking.cpp
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp

index f6a492f7676621407cac992f2bcfb1662adb8f6f..181c9b01980c33c7bdbff2fc76064c60470bfc61 100644 (file)
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -23,6 +23,7 @@
  #include "llvm/Target/TargetData.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
  #include <cstring>
  using namespace llvm;
  
@@ -49,11 +50,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
    assert(V && "No Value?");
    assert(Depth <= MaxDepth && "Limit Search Depth");
    unsigned BitWidth = Mask.getBitWidth();
-  assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) &&
-         "Not integer or pointer type!");
+  assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
+         && "Not integer or pointer type!");
    assert((!TD ||
            TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
-         (!V->getType()->isIntOrIntVector() ||
+         (!V->getType()->isIntOrIntVectorTy() ||
            V->getType()->getScalarSizeInBits() == BitWidth) &&
           KnownZero.getBitWidth() == BitWidth && 
           KnownOne.getBitWidth() == BitWidth &&
@@ -249,7 +250,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
      unsigned SrcBitWidth;
      // Note that we handle pointer operands here because of inttoptr/ptrtoint
      // which fall through here.
-    if (isa<PointerType>(SrcTy))
+    if (SrcTy->isPointerTy())
        SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
      else
        SrcBitWidth = SrcTy->getScalarSizeInBits();
@@ -269,10 +270,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
    }
    case Instruction::BitCast: {
      const Type *SrcTy = I->getOperand(0)->getType();
-    if ((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
          // TODO: For now, not handling conversions like:
          // (bitcast i64 %x to <2 x i32>)
-        !isa<VectorType>(I->getType())) {
+        !I->getType()->isVectorTy()) {
        ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
                          Depth+1);
        return;
@@ -421,20 +422,29 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
    }
    case Instruction::SRem:
      if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      APInt RA = Rem->getValue();
-      if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
-        APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+      APInt RA = Rem->getValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
          APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
          ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, 
                            Depth+1);
  
-        // If the sign bit of the first operand is zero, the sign bit of
-        // the result is zero. If the first operand has no one bits below
-        // the second operand's single 1 bit, its sign will be zero.
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
          if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
-          KnownZero2 |= ~LowBits;
+          KnownZero |= ~LowBits;
  
-        KnownZero |= KnownZero2 & Mask;
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        KnownZero &= Mask;
+        KnownOne &= Mask;
  
          assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
        }
@@ -640,7 +650,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
  ///
  unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
                                    unsigned Depth) {
-  assert((TD || V->getType()->isIntOrIntVector()) &&
+  assert((TD || V->getType()->isIntOrIntVectorTy()) &&
           "ComputeNumSignBits requires a TargetData object to operate "
           "on non-integer values!");
    const Type *Ty = V->getType();
@@ -659,7 +669,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
    switch (Operator::getOpcode(V)) {
    default: break;
    case Instruction::SExt:
-    Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth();
+    Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
      return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
      
    case Instruction::AShr:
@@ -726,8 +736,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
        
      Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
      if (Tmp2 == 1) return 1;
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
      
    case Instruction::Sub:
      Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
@@ -757,8 +766,24 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
      // is, at worst, one more bit than the inputs.
      Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
      if (Tmp == 1) return 1;  // Early out.
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
+      
+  case Instruction::PHI: {
+    PHINode *PN = cast<PHINode>(U);
+    // Don't analyze large in-degree PHIs.
+    if (PN->getNumIncomingValues() > 4) break;
+    
+    // Take the minimum of all incoming values.  This can't infinitely loop
+    // because of our depth threshold.
+    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
+    for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+      if (Tmp == 1) return Tmp;
+      Tmp = std::min(Tmp,
+                     ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
+    }
+    return Tmp;
+  }
+
    case Instruction::Trunc:
      // FIXME: it's tricky to do anything useful for this, but it is an important
      // case for targets like X86.
@@ -799,7 +824,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
  
    assert(V && "No Value?");
    assert(Depth <= MaxDepth && "Limit Search Depth");
-  assert(V->getType()->isInteger() && "Not integer or pointer type!");
+  assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
  
    const Type *T = V->getType();
  
@@ -855,19 +880,20 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
      }
  
      Value *Mul0 = NULL;
-    Value *Mul1 = NULL;
-    bool M0 = ComputeMultiple(Op0, Base, Mul0,
-                              LookThroughSExt, Depth+1);
-    bool M1 = ComputeMultiple(Op1, Base, Mul1,
-                              LookThroughSExt, Depth+1);
-
-    if (M0) {
-      if (isa<Constant>(Op1) && isa<Constant>(Mul0)) {
-        // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
-        Multiple = ConstantExpr::getMul(cast<Constant>(Mul0),
-                                        cast<Constant>(Op1));
-        return true;
-      }
+    if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+      if (Constant *Op1C = dyn_cast<Constant>(Op1))
+        if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+          if (Op1C->getType()->getPrimitiveSizeInBits() < 
+              MulC->getType()->getPrimitiveSizeInBits())
+            Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+          if (Op1C->getType()->getPrimitiveSizeInBits() > 
+              MulC->getType()->getPrimitiveSizeInBits())
+            MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+          
+          // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+          Multiple = ConstantExpr::getMul(MulC, Op1C);
+          return true;
+        }
  
        if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
          if (Mul0CI->getValue() == 1) {
@@ -877,13 +903,21 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
          }
      }
  
-    if (M1) {
-      if (isa<Constant>(Op0) && isa<Constant>(Mul1)) {
-        // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
-        Multiple = ConstantExpr::getMul(cast<Constant>(Mul1),
-                                        cast<Constant>(Op0));
-        return true;
-      }
+    Value *Mul1 = NULL;
+    if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+      if (Constant *Op0C = dyn_cast<Constant>(Op0))
+        if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+          if (Op0C->getType()->getPrimitiveSizeInBits() < 
+              MulC->getType()->getPrimitiveSizeInBits())
+            Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+          if (Op0C->getType()->getPrimitiveSizeInBits() > 
+              MulC->getType()->getPrimitiveSizeInBits())
+            MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+          
+          // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+          Multiple = ConstantExpr::getMul(MulC, Op0C);
+          return true;
+        }
  
        if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
          if (Mul1CI->getValue() == 1) {
@@ -928,7 +962,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
      // sqrt(-0.0) = -0.0, no other negative results are possible.
      if (II->getIntrinsicID() == Intrinsic::sqrt)
-      return CannotBeNegativeZero(II->getOperand(1), Depth+1);
+      return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
    
    if (const CallInst *CI = dyn_cast<CallInst>(I))
      if (const Function *F = CI->getCalledFunction()) {
@@ -941,190 +975,13 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
          if (F->getName() == "fabsl") return true;
          if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
              F->getName() == "sqrtl")
-          return CannotBeNegativeZero(CI->getOperand(1), Depth+1);
+          return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
        }
      }
    
    return false;
  }
  
-
-/// GetLinearExpression - Analyze the specified value as a linear expression:
-/// "A*V + B", where A and B are constant integers.  Return the scale and offset
-/// values as APInts and return V as a Value*.  The incoming Value is known to
-/// have IntegerType.  Note that this looks through extends, so the high bits
-/// may not be represented in the result.
-static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
-                                  const TargetData *TD) {
-  assert(isa<IntegerType>(V->getType()) && "Not an integer value");
-  
-  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
-    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
-      switch (BOp->getOpcode()) {
-      default: break;
-      case Instruction::Or:
-        // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't
-        // analyze it.
-        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), TD))
-          break;
-        // FALL THROUGH.
-      case Instruction::Add:
-        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD);
-        Offset += RHSC->getValue();
-        return V;
-      case Instruction::Mul:
-        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD);
-        Offset *= RHSC->getValue();
-        Scale *= RHSC->getValue();
-        return V;
-      case Instruction::Shl:
-        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD);
-        Offset <<= RHSC->getValue().getLimitedValue();
-        Scale <<= RHSC->getValue().getLimitedValue();
-        return V;
-      }
-    }
-  }
-  
-  // Since clients don't care about the high bits of the value, just scales and
-  // offsets, we can look through extensions.
-  if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {
-    Value *CastOp = cast<CastInst>(V)->getOperand(0);
-    unsigned OldWidth = Scale.getBitWidth();
-    unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
-    Scale.trunc(SmallWidth);
-    Offset.trunc(SmallWidth);
-    Value *Result = GetLinearExpression(CastOp, Scale, Offset, TD);
-    Scale.zext(OldWidth);
-    Offset.zext(OldWidth);
-    return Result;
-  }
-  
-  Scale = 1;
-  Offset = 0;
-  return V;
-}
-
-/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
-/// into a base pointer with a constant offset and a number of scaled symbolic
-/// offsets.
-///
-/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
-/// the VarIndices vector) are Value*'s that are known to be scaled by the
-/// specified amount, but which may have other unrepresented high bits. As such,
-/// the gep cannot necessarily be reconstructed from its decomposed form.
-///
-/// When TargetData is around, this function is capable of analyzing everything
-/// that Value::getUnderlyingObject() can look through.  When not, it just looks
-/// through pointer casts.
-///
-const Value *llvm::DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
-                 SmallVectorImpl<std::pair<const Value*, int64_t> > &VarIndices,
-                                          const TargetData *TD) {
-  // FIXME: Should limit depth like getUnderlyingObject?
-  BaseOffs = 0;
-  while (1) {
-    // See if this is a bitcast or GEP.
-    const Operator *Op = dyn_cast<Operator>(V);
-    if (Op == 0) {
-      // The only non-operator case we can handle are GlobalAliases.
-      if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-        if (!GA->mayBeOverridden()) {
-          V = GA->getAliasee();
-          continue;
-        }
-      }
-      return V;
-    }
-    
-    if (Op->getOpcode() == Instruction::BitCast) {
-      V = Op->getOperand(0);
-      continue;
-    }
-    
-    const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
-    if (GEPOp == 0)
-      return V;
-    
-    // Don't attempt to analyze GEPs over unsized objects.
-    if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
-        ->getElementType()->isSized())
-      return V;
-    
-    // If we are lacking TargetData information, we can't compute the offets of
-    // elements computed by GEPs.  However, we can handle bitcast equivalent
-    // GEPs.
-    if (!TD) {
-      if (!GEPOp->hasAllZeroIndices())
-        return V;
-      V = GEPOp->getOperand(0);
-      continue;
-    }
-    
-    // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
-    gep_type_iterator GTI = gep_type_begin(GEPOp);
-    for (User::const_op_iterator I = GEPOp->op_begin()+1,
-         E = GEPOp->op_end(); I != E; ++I) {
-      Value *Index = *I;
-      // Compute the (potentially symbolic) offset in bytes for this index.
-      if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
-        // For a struct, add the member offset.
-        unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
-        if (FieldNo == 0) continue;
-        
-        BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
-        continue;
-      }
-      
-      // For an array/pointer, add the element offset, explicitly scaled.
-      if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
-        if (CIdx->isZero()) continue;
-        BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
-        continue;
-      }
-      
-      uint64_t Scale = TD->getTypeAllocSize(*GTI);
-      
-      // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
-      unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
-      APInt IndexScale(Width, 0), IndexOffset(Width, 0);
-      Index = GetLinearExpression(Index, IndexScale, IndexOffset, TD);
-      
-      // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
-      // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
-      BaseOffs += IndexOffset.getZExtValue()*Scale;
-      Scale *= IndexScale.getZExtValue();
-      
-      
-      // If we already had an occurrance of this index variable, merge this
-      // scale into it.  For example, we want to handle:
-      //   A[x][x] -> x*16 + x*4 -> x*20
-      // This also ensures that 'x' only appears in the index list once.
-      for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
-        if (VarIndices[i].first == Index) {
-          Scale += VarIndices[i].second;
-          VarIndices.erase(VarIndices.begin()+i);
-          break;
-        }
-      }
-      
-      // Make sure that we have a scale that makes sense for this target's
-      // pointer size.
-      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
-        Scale <<= ShiftBits;
-        Scale >>= ShiftBits;
-      }
-      
-      if (Scale)
-        VarIndices.push_back(std::make_pair(Index, Scale));
-    }
-    
-    // Analyze the base pointer next.
-    V = GEPOp->getOperand(0);
-  }
-}
-
-
  // This is the recursive version of BuildSubAggregate. It takes a few different
  // arguments. Idxs is the index within the nested struct From that we are
  // looking at now (which is of type IndexedType). IdxSkip is the number of
@@ -1217,7 +1074,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
    if (idx_begin == idx_end)
      return V;
    // We have indices, so V should have an indexable type
-  assert((isa<StructType>(V->getType()) || isa<ArrayType>(V->getType()))
+  assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
           && "Not looking at a struct or array?");
    assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
           && "Invalid indices for type?");
@@ -1305,22 +1162,23 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
  /// GetConstantStringInfo - This function computes the length of a
  /// null-terminated C string pointed to by V.  If successful, it returns true
  /// and returns the string in Str.  If unsuccessful, it returns false.
-bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
+bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
+                                 uint64_t Offset,
                                   bool StopAtNul) {
    // If V is NULL then return false;
    if (V == NULL) return false;
  
    // Look through bitcast instructions.
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+  if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
      return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
    
    // If the value is not a GEP instruction nor a constant expression with a
    // GEP instruction, then return false because ConstantArray can't occur
    // any other way
-  User *GEP = 0;
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+  const User *GEP = 0;
+  if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
      GEP = GEPI;
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
      if (CE->getOpcode() == Instruction::BitCast)
        return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
      if (CE->getOpcode() != Instruction::GetElementPtr)
@@ -1336,12 +1194,12 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
      // Make sure the index-ee is a pointer to array of i8.
      const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
      const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
-    if (AT == 0 || AT->getElementType() != Type::getInt8Ty(V->getContext()))
+    if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
        return false;
      
      // Check to make sure that the first operand of the GEP is an integer and
      // has value 0 so that we are sure we're indexing into the initializer.
-    ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
      if (FirstIdx == 0 || !FirstIdx->isZero())
        return false;
      
@@ -1349,7 +1207,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
      // into the array.  If this occurs, we can't say anything meaningful about
      // the string.
      uint64_t StartIdx = 0;
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
        StartIdx = CI->getZExtValue();
      else
        return false;
@@ -1357,18 +1215,13 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
                                   StopAtNul);
    }
    
-  if (MDString *MDStr = dyn_cast<MDString>(V)) {
-    Str = MDStr->getString();
-    return true;
-  }
-
    // The GEP instruction, constant or instruction, must reference a global
    // variable that is a constant and is initialized. The referenced constant
    // initializer is the array that we'll use for optimization.
-  GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+  const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
    if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
      return false;
-  Constant *GlobalInit = GV->getInitializer();
+  const Constant *GlobalInit = GV->getInitializer();
    
    // Handle the ConstantAggregateZero case
    if (isa<ConstantAggregateZero>(GlobalInit)) {
@@ -1379,9 +1232,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
    }
    
    // Must be a Constant Array
-  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (Array == 0 ||
-      Array->getType()->getElementType() != Type::getInt8Ty(V->getContext()))
+  const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
      return false;
    
    // Get the number of elements in the array
@@ -1394,8 +1246,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
    // to in the array.
    Str.reserve(NumElts-Offset);
    for (unsigned i = Offset; i != NumElts; ++i) {
-    Constant *Elt = Array->getOperand(i);
-    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    const Constant *Elt = Array->getOperand(i);
+    const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
      if (!CI) // This array isn't suitable, non-int initializer.
        return false;
      if (StopAtNul && CI->isZero())
@@ -1406,3 +1258,131 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
    // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
    return true;
  }
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+  // Look through noop bitcast instructions.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+  // If this is a PHI node, there are two cases: either we have already seen it
+  // or we haven't.
+  if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (!PHIs.insert(PN))
+      return ~0ULL;  // already in the set.
+
+    // If it was new, see if all the input strings are the same length.
+    uint64_t LenSoFar = ~0ULL;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+      if (Len == 0) return 0; // Unknown length -> unknown.
+
+      if (Len == ~0ULL) continue;
+
+      if (Len != LenSoFar && LenSoFar != ~0ULL)
+        return 0;    // Disagree -> unknown.
+      LenSoFar = Len;
+    }
+
+    // Success, all agree.
+    return LenSoFar;
+  }
+
+  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+    if (Len1 == 0) return 0;
+    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+    if (Len2 == 0) return 0;
+    if (Len1 == ~0ULL) return Len2;
+    if (Len2 == ~0ULL) return Len1;
+    if (Len1 != Len2) return 0;
+    return Len1;
+  }
+
+  // If the value is not a GEP instruction nor a constant expression with a
+  // GEP instruction, then return unknown.
+  User *GEP = 0;
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+    GEP = GEPI;
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() != Instruction::GetElementPtr)
+      return 0;
+    GEP = CE;
+  } else {
+    return 0;
+  }
+
+  // Make sure the GEP has exactly three arguments.
+  if (GEP->getNumOperands() != 3)
+    return 0;
+
+  // Check to make sure that the first operand of the GEP is an integer and
+  // has value 0 so that we are sure we're indexing into the initializer.
+  if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+    if (!Idx->isZero())
+      return 0;
+  } else
+    return 0;
+
+  // If the second index isn't a ConstantInt, then this is a variable index
+  // into the array.  If this occurs, we can't say anything meaningful about
+  // the string.
+  uint64_t StartIdx = 0;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+    StartIdx = CI->getZExtValue();
+  else
+    return 0;
+
+  // The GEP instruction, constant or instruction, must reference a global
+  // variable that is a constant and is initialized. The referenced constant
+  // initializer is the array that we'll use for optimization.
+  GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+      GV->mayBeOverridden())
+    return 0;
+  Constant *GlobalInit = GV->getInitializer();
+
+  // Handle the ConstantAggregateZero case, which is a degenerate case. The
+  // initializer is constant zero so the length of the string must be zero.
+  if (isa<ConstantAggregateZero>(GlobalInit))
+    return 1;  // Len = 0 offset by 1.
+
+  // Must be a Constant Array
+  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
+    return false;
+
+  // Get the number of elements in the array
+  uint64_t NumElts = Array->getType()->getNumElements();
+
+  // Traverse the constant array from StartIdx (derived above) which is
+  // the place the GEP refers to in the array.
+  for (unsigned i = StartIdx; i != NumElts; ++i) {
+    Constant *Elt = Array->getOperand(i);
+    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    if (!CI) // This array isn't suitable, non-int initializer.
+      return 0;
+    if (CI->isZero())
+      return i-StartIdx+1; // We found end of string, success!
+  }
+
+  return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;
+
+  SmallPtrSet<PHINode*, 32> PHIs;
+  uint64_t Len = GetStringLengthH(V, PHIs);
+  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+  // an empty string as a length.
+  return Len == ~0ULL ? 1 : Len;
+}