Cut off crazy computation. This helps PR1622 slightly.

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index bf68d540e1f49d7c91bfe18f482f5214b7d6d0cd..218da9773d1ddb7e03294cf3d4d29f623d9c7984 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -870,11 +870,10 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const Type *Ty,
  // could have the specified known zero and known one bits, returning them in
  // min/max.
  static void ComputeUnsignedMinMaxValuesFromKnownBits(const Type *Ty,
-                                                     const APInt& KnownZero,
-                                                     const APInt& KnownOne,
-                                                     APInt& Min,
-                                                     APInt& Max) {
-  uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+                                                     const APInt &KnownZero,
+                                                     const APInt &KnownOne,
+                                                     APInt &Min, APInt &Max) {
+  uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth(); BitWidth = BitWidth;
    assert(KnownZero.getBitWidth() == BitWidth && 
           KnownOne.getBitWidth() == BitWidth &&
           Min.getBitWidth() == BitWidth && Max.getBitWidth() &&
@@ -1342,6 +1341,11 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
        InsertNewInstBefore(cast<Instruction>(NewVal), *I);
        return UpdateValueUsesWith(I, NewVal);
      }    
+
+    // If the sign bit is the only bit demanded by this ashr, then there is no
+    // need to do it, the shift doesn't change the high bit.
+    if (DemandedMask.isSignBit())
+      return UpdateValueUsesWith(I, I->getOperand(0));
      
      if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
        uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
@@ -1501,7 +1505,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
      break;
    }
    case Instruction::BitCast: {
-    // Packed->packed casts only.
+    // Vector->vector casts only.
      const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
      if (!VTy) break;
      unsigned InVWidth = VTy->getNumElements();
@@ -1509,7 +1513,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
      unsigned Ratio;
  
      if (VWidth == InVWidth) {
-      // If we are converting from <4x i32> -> <4 x f32>, we demand the same
+      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
        // elements as are demanded of us.
        Ratio = 1;
        InputDemandedElts = DemandedElts;
@@ -1880,7 +1884,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    if (I.getNumOperands() == 2) {
      Constant *C = cast<Constant>(I.getOperand(1));
      for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV;
+      Value *InV = 0;
        if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
          if (CmpInst *CI = dyn_cast<CmpInst>(&I))
            InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
@@ -2271,26 +2275,34 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
    return 0;
  }
  
-/// isSignBitCheck - Given an exploded icmp instruction, return true if it
-/// really just returns true if the most significant (sign) bit is set.
-static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS) {
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit.  If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+                           bool &TrueIfSigned) {
    switch (pred) {
-    case ICmpInst::ICMP_SLT: 
-      // True if LHS s< RHS and RHS == 0
-      return RHS->isZero();
-    case ICmpInst::ICMP_SLE: 
-      // True if LHS s<= RHS and RHS == -1
-      return RHS->isAllOnesValue();
-    case ICmpInst::ICMP_UGE: 
-      // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
-      return RHS->getValue() == 
-             APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits());
-    case ICmpInst::ICMP_UGT:
-      // True if LHS u> RHS and RHS == high-bit-mask - 1
-      return RHS->getValue() ==
-             APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
-    default:
-      return false;
+  case ICmpInst::ICMP_SLT:   // True if LHS s< 0
+    TrueIfSigned = true;
+    return RHS->isZero();
+  case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
+    TrueIfSigned = true;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_SGT:   // True if LHS s> -1
+    TrueIfSigned = false;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_UGT:
+    // True if LHS u> RHS and RHS == high-bit-mask - 1
+    TrueIfSigned = true;
+    return RHS->getValue() ==
+      APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+  case ICmpInst::ICMP_UGE: 
+    // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+    TrueIfSigned = true;
+    return RHS->getValue() == 
+      APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits());
+  default:
+    return false;
    }
  }
  
@@ -2377,11 +2389,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
      if (ICmpInst *SCI = dyn_cast<ICmpInst>(BoolCast->getOperand(0))) {
        Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1);
        const Type *SCOpTy = SCIOp0->getType();
-
+      bool TIS = false;
+      
        // If the icmp is true iff the sign bit of X is set, then convert this
        // multiply into a shift/and combination.
        if (isa<ConstantInt>(SCIOp1) &&
-          isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1))) {
+          isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) &&
+          TIS) {
          // Shift the X value right to turn it into "all signbits".
          Constant *Amt = ConstantInt::get(SCIOp0->getType(),
                                            SCOpTy->getPrimitiveSizeInBits()-1);
@@ -2805,23 +2819,19 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
  // isMaxValueMinusOne - return true if this is Max-1
  static bool isMaxValueMinusOne(const ConstantInt *C, bool isSigned) {
    uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits();
-  if (isSigned) {
-    // Calculate 0111111111..11111
-    APInt Val(APInt::getSignedMaxValue(TypeBits));
-    return C->getValue() == Val-1;
-  }
-  return C->getValue() == APInt::getAllOnesValue(TypeBits) - 1;
+  if (!isSigned)
+    return C->getValue() == APInt::getAllOnesValue(TypeBits) - 1;
+  return C->getValue() == APInt::getSignedMaxValue(TypeBits)-1;
  }
  
  // isMinValuePlusOne - return true if this is Min+1
  static bool isMinValuePlusOne(const ConstantInt *C, bool isSigned) {
-  if (isSigned) {
-    // Calculate 1111111111000000000000
-    uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits();
-    APInt Val(APInt::getSignedMinValue(TypeBits));
-    return C->getValue() == Val+1;
-  }
-  return C->getValue() == 1; // unsigned
+  if (!isSigned)
+    return C->getValue() == 1; // unsigned
+    
+  // Calculate 1111111111000000000000
+  uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits();
+  return C->getValue() == APInt::getSignedMinValue(TypeBits)+1;
  }
  
  // isOneBitSet - Return true if there is exactly one bit set in the specified
@@ -3706,9 +3716,9 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
    for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
      if (ByteValues[i] != V)
        return 0;
-  const Type *Tys[] = { ITy, ITy };
+  const Type *Tys[] = { ITy };
    Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 2);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
    return new CallInst(F, V);
  }
  
@@ -3859,40 +3869,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
            InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I);
          return BinaryOperator::createAnd(V1, Or);
        }
-      
-      // (V1 & V3)|(V2 & ~V3) -> ((V1 ^ V2) & V3) ^ V2
-      if (isOnlyUse(Op0) && isOnlyUse(Op1)) {
-        // Try all combination of terms to find V3 and ~V3.
-        if (A->hasOneUse() && match(A, m_Not(m_Value(V3)))) {
-          if (V3 == B)
-            V1 = D, V2 = C;
-          else if (V3 == D)
-            V1 = B, V2 = C;
-        }
-        if (B->hasOneUse() && match(B, m_Not(m_Value(V3)))) {
-          if (V3 == A)
-            V1 = C, V2 = D;
-          else if (V3 == C)
-            V1 = A, V2 = D;
-        }
-        if (C->hasOneUse() && match(C, m_Not(m_Value(V3)))) {
-          if (V3 == B)
-            V1 = D, V2 = A;
-          else if (V3 == D)
-            V1 = B, V2 = A;
-        }
-        if (D->hasOneUse() && match(D, m_Not(m_Value(V3)))) {
-          if (V3 == A)
-            V1 = C, V2 = B;
-          else if (V3 == C)
-            V1 = A, V2 = B;
-        }
-        if (V1) {
-          A = InsertNewInstBefore(BinaryOperator::createXor(V1, V2, "tmp"), I);
-          A = InsertNewInstBefore(BinaryOperator::createAnd(A, V3, "tmp"), I);
-          return BinaryOperator::createXor(A, V2);
-        }
-      }
      }
    }
    
@@ -4118,7 +4094,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
  
    // xor X, X = 0, even if X is nested in a sequence of Xor's.
    if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
-    assert(Result == &I && "AssociativeOpt didn't work?");
+    assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
    }
    
@@ -4158,12 +4134,17 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
    
    
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // xor (icmp A, B), true = not (icmp A, B) = !icmp A, B
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
-      if (RHS == ConstantInt::getTrue() && ICI->hasOneUse())
+    // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+    if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) {
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
          return new ICmpInst(ICI->getInversePredicate(),
                              ICI->getOperand(0), ICI->getOperand(1));
  
+      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
+        return new FCmpInst(FCI->getInversePredicate(),
+                            FCI->getOperand(0), FCI->getOperand(1));
+    }
+
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
        // ~(c-X) == X-c-1 == X+(-c-1)
        if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
@@ -4848,22 +4829,29 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
      // already been handled above, this requires little checking.
      //
      switch (I.getPredicate()) {
-      default: break;
-      case ICmpInst::ICMP_ULE: 
-        return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI));
-      case ICmpInst::ICMP_SLE:
-        return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI));
-      case ICmpInst::ICMP_UGE:
-        return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI));
-      case ICmpInst::ICMP_SGE:
-        return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI));
+    default: break;
+    case ICmpInst::ICMP_ULE: 
+      return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI));
+    case ICmpInst::ICMP_SLE:
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI));
+    case ICmpInst::ICMP_UGE:
+      return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI));
+    case ICmpInst::ICMP_SGE:
+      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI));
      }
      
      // See if we can fold the comparison based on bits known to be zero or one
-    // in the input.
+    // in the input.  If this comparison is a normal comparison, it demands all
+    // bits, if it is a sign bit comparison, it only demands the sign bit.
+    
+    bool UnusedBit;
+    bool isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+    
      uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth();
      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-    if (SimplifyDemandedBits(Op0, APInt::getAllOnesValue(BitWidth),
+    if (SimplifyDemandedBits(Op0, 
+                             isSignBit ? APInt::getSignBit(BitWidth)
+                                       : APInt::getAllOnesValue(BitWidth),
                               KnownZero, KnownOne, 0))
        return &I;
          
@@ -5415,85 +5403,105 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
      }
      break;
      
-  case Instruction::Shl:         // (icmp pred (shl X, ShAmt), CI)
-    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-      if (ICI.isEquality()) {
-        uint32_t TypeBits = RHSV.getBitWidth();
-        
-        // Check that the shift amount is in range.  If not, don't perform
-        // undefined shifts.  When the shift is visited it will be
-        // simplified.
-        if (ShAmt->uge(TypeBits))
-          break;
-        
-        // If we are comparing against bits always shifted out, the
-        // comparison cannot succeed.
-        Constant *Comp =
-          ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt);
-        if (Comp != RHS) {// Comparing against a bit that we know is zero.
-          bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-          Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
-          return ReplaceInstUsesWith(ICI, Cst);
-        }
+  case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!ShAmt) break;
+    
+    uint32_t TypeBits = RHSV.getBitWidth();
+    
+    // Check that the shift amount is in range.  If not, don't perform
+    // undefined shifts.  When the shift is visited it will be
+    // simplified.
+    if (ShAmt->uge(TypeBits))
+      break;
+    
+    if (ICI.isEquality()) {
+      // If we are comparing against bits always shifted out, the
+      // comparison cannot succeed.
+      Constant *Comp =
+        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt);
+      if (Comp != RHS) {// Comparing against a bit that we know is zero.
+        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+        Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+        return ReplaceInstUsesWith(ICI, Cst);
+      }
+      
+      if (LHSI->hasOneUse()) {
+        // Otherwise strength reduce the shift into an and.
+        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+        Constant *Mask =
+          ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
          
-        if (LHSI->hasOneUse()) {
-          // Otherwise strength reduce the shift into an and.
-          uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-          Constant *Mask =
-            ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
-          
-          Instruction *AndI =
-            BinaryOperator::createAnd(LHSI->getOperand(0),
-                                      Mask, LHSI->getName()+".mask");
-          Value *And = InsertNewInstBefore(AndI, ICI);
-          return new ICmpInst(ICI.getPredicate(), And,
-                              ConstantInt::get(RHSV.lshr(ShAmtVal)));
-        }
+        Instruction *AndI =
+          BinaryOperator::createAnd(LHSI->getOperand(0),
+                                    Mask, LHSI->getName()+".mask");
+        Value *And = InsertNewInstBefore(AndI, ICI);
+        return new ICmpInst(ICI.getPredicate(), And,
+                            ConstantInt::get(RHSV.lshr(ShAmtVal)));
        }
      }
+    
+    // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+    bool TrueIfSigned = false;
+    if (LHSI->hasOneUse() &&
+        isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+      // (X << 31) <s 0  --> (X&1) != 0
+      Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) <<
+                                           (TypeBits-ShAmt->getZExtValue()-1));
+      Instruction *AndI =
+        BinaryOperator::createAnd(LHSI->getOperand(0),
+                                  Mask, LHSI->getName()+".mask");
+      Value *And = InsertNewInstBefore(AndI, ICI);
+      
+      return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+                          And, Constant::getNullValue(And->getType()));
+    }
      break;
+  }
      
    case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
-  case Instruction::AShr:
-    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-      if (ICI.isEquality()) {
-        // Check that the shift amount is in range.  If not, don't perform
-        // undefined shifts.  When the shift is visited it will be
-        // simplified.
-        uint32_t TypeBits = RHSV.getBitWidth();
-        if (ShAmt->uge(TypeBits))
-          break;
-        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-        
-        // If we are comparing against bits always shifted out, the
-        // comparison cannot succeed.
-        APInt Comp = RHSV << ShAmtVal;
-        if (LHSI->getOpcode() == Instruction::LShr)
-          Comp = Comp.lshr(ShAmtVal);
-        else
-          Comp = Comp.ashr(ShAmtVal);
-        
-        if (Comp != RHSV) { // Comparing against a bit that we know is zero.
-          bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-          Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
-          return ReplaceInstUsesWith(ICI, Cst);
-        }
+  case Instruction::AShr: {
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!ShAmt) break;
+
+    if (ICI.isEquality()) {
+      // Check that the shift amount is in range.  If not, don't perform
+      // undefined shifts.  When the shift is visited it will be
+      // simplified.
+      uint32_t TypeBits = RHSV.getBitWidth();
+      if (ShAmt->uge(TypeBits))
+        break;
+      uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+      
+      // If we are comparing against bits always shifted out, the
+      // comparison cannot succeed.
+      APInt Comp = RHSV << ShAmtVal;
+      if (LHSI->getOpcode() == Instruction::LShr)
+        Comp = Comp.lshr(ShAmtVal);
+      else
+        Comp = Comp.ashr(ShAmtVal);
+      
+      if (Comp != RHSV) { // Comparing against a bit that we know is zero.
+        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+        Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+        return ReplaceInstUsesWith(ICI, Cst);
+      }
+      
+      if (LHSI->hasOneUse() || RHSV == 0) {
+        // Otherwise strength reduce the shift into an and.
+        APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+        Constant *Mask = ConstantInt::get(Val);
          
-        if (LHSI->hasOneUse() || RHSV == 0) {
-          // Otherwise strength reduce the shift into an and.
-          APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-          Constant *Mask = ConstantInt::get(Val);
-          
-          Instruction *AndI =
-            BinaryOperator::createAnd(LHSI->getOperand(0),
-                                      Mask, LHSI->getName()+".mask");
-          Value *And = InsertNewInstBefore(AndI, ICI);
-          return new ICmpInst(ICI.getPredicate(), And,
-                              ConstantExpr::getShl(RHS, ShAmt));
-        }
+        Instruction *AndI =
+          BinaryOperator::createAnd(LHSI->getOperand(0),
+                                    Mask, LHSI->getName()+".mask");
+        Value *And = InsertNewInstBefore(AndI, ICI);
+        return new ICmpInst(ICI.getPredicate(), And,
+                            ConstantExpr::getShl(RHS, ShAmt));
        }
      }
      break;
+  }
      
    case Instruction::SDiv:
    case Instruction::UDiv:
@@ -6309,7 +6317,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
  /// This is a truncation operation if Ty is smaller than V->getType(), or an
  /// extension operation if Ty is larger.
  static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
-                                       int &NumCastsRemoved) {
+                                       unsigned CastOpc, int &NumCastsRemoved) {
    // We can always evaluate constants in another type.
    if (isa<ConstantInt>(V))
      return true;
@@ -6319,30 +6327,48 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
    
    const IntegerType *OrigTy = cast<IntegerType>(V->getType());
    
+  // If this is an extension or truncate, we can often eliminate it.
+  if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+    // If this is a cast from the destination type, we can trivially eliminate
+    // it, and this will remove a cast overall.
+    if (I->getOperand(0)->getType() == Ty) {
+      // If the first operand is itself a cast, and is eliminable, do not count
+      // this as an eliminable cast.  We would prefer to eliminate those two
+      // casts first.
+      if (!isa<CastInst>(I->getOperand(0)))
+        ++NumCastsRemoved;
+      return true;
+    }
+  }
+
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
    switch (I->getOpcode()) {
    case Instruction::Add:
    case Instruction::Sub:
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
-    if (!I->hasOneUse()) return false;
      // These operators can all arbitrarily be extended or truncated.
-    return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved) &&
-           CanEvaluateInDifferentType(I->getOperand(1), Ty, NumCastsRemoved);
+    return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                      NumCastsRemoved) &&
+           CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
+                                      NumCastsRemoved);
  
    case Instruction::Shl:
-    if (!I->hasOneUse()) return false;
      // If we are truncating the result of this SHL, and if it's a shift of a
      // constant amount, we can always perform a SHL in a smaller type.
      if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
        uint32_t BitWidth = Ty->getBitWidth();
        if (BitWidth < OrigTy->getBitWidth() && 
            CI->getLimitedValue(BitWidth) < BitWidth)
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved);
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                          NumCastsRemoved);
      }
      break;
    case Instruction::LShr:
-    if (!I->hasOneUse()) return false;
      // If this is a truncate of a logical shr, we can truncate it to a smaller
      // lshr iff we know that the bits we would otherwise be shifting in are
      // already zeros.
@@ -6353,25 +6379,19 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
            MaskedValueIsZero(I->getOperand(0),
              APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
            CI->getLimitedValue(BitWidth) < BitWidth) {
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved);
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                          NumCastsRemoved);
        }
      }
      break;
-  case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
-    // If this is a cast from the destination type, we can trivially eliminate
-    // it, and this will remove a cast overall.
-    if (I->getOperand(0)->getType() == Ty) {
-      // If the first operand is itself a cast, and is eliminable, do not count
-      // this as an eliminable cast.  We would prefer to eliminate those two
-      // casts first.
-      if (isa<CastInst>(I->getOperand(0)))
-        return true;
-      
-      ++NumCastsRemoved;
+  case Instruction::Trunc:
+    // If this is the same kind of case as our original (e.g. zext+zext), we
+    // can safely replace it.  Note that replacing it does not reduce the number
+    // of casts in the input.
+    if (I->getOpcode() == CastOpc)
        return true;
-    }
      break;
    default:
      // TODO: Can handle more cases here.
@@ -6410,14 +6430,16 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
    case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
-  case Instruction::BitCast:
      // If the source type of the cast is the type we're trying for then we can
-    // just return the source. There's no need to insert it because its not new.
+    // just return the source.  There's no need to insert it because it is not
+    // new.
      if (I->getOperand(0)->getType() == Ty)
        return I->getOperand(0);
      
-    // Some other kind of cast, which shouldn't happen, so just ..
-    // FALL THROUGH
+    // Otherwise, must be the same type of case, so just reinsert a new one.
+    Res = CastInst::create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
+                           Ty, I->getName());
+    break;
    default: 
      // TODO: Can handle more cases here.
      assert(0 && "Unreachable!");
@@ -6431,11 +6453,6 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
  Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
    Value *Src = CI.getOperand(0);
  
-  // Casting undef to anything results in undef so might as just replace it and
-  // get rid of the cast.
-  if (isa<UndefValue>(Src))   // cast undef -> undef
-    return ReplaceInstUsesWith(CI, UndefValue::get(CI.getType()));
-
    // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
    // eliminate it now.
    if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
@@ -6598,14 +6615,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
    int NumCastsRemoved = 0;
    if (!isa<BitCastInst>(CI) &&
        CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),
-                                 NumCastsRemoved)) {
+                                 CI.getOpcode(), NumCastsRemoved)) {
      // If this cast is a truncate, evaluting in a different type always
-    // eliminates the cast, so it is always a win.  If this is a noop-cast
-    // this just removes a noop cast which isn't pointful, but simplifies
-    // the code.  If this is a zero-extension, we need to do an AND to
-    // maintain the clear top-part of the computation, so we require that
-    // the input have eliminated at least one cast.  If this is a sign
-    // extension, we insert two new casts (to do the extension) so we
+    // eliminates the cast, so it is always a win.  If this is a zero-extension,
+    // we need to do an AND to maintain the clear top-part of the computation,
+    // so we require that the input have eliminated at least one cast.  If this
+    // is a sign extension, we insert two new casts (to do the extension) so we
      // require that two casts have been eliminated.
      bool DoXForm;
      switch (CI.getOpcode()) {
@@ -6622,9 +6637,6 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
      case Instruction::SExt:
        DoXForm = NumCastsRemoved >= 2;
        break;
-    case Instruction::BitCast:
-      DoXForm = false;
-      break;
      }
      
      if (DoXForm) {
@@ -7480,13 +7492,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    return 0;
  }
  
-/// GetKnownAlignment - If the specified pointer has an alignment that we can
-/// determine, return it, otherwise return 0.
-static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
+/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD,
+                                           unsigned PrefAlign = 0) {
    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
      unsigned Align = GV->getAlignment();
      if (Align == 0 && TD) 
        Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
+
+    // If there is a large requested alignment and we can, bump up the alignment
+    // of the global.
+    if (PrefAlign > Align && GV->hasInitializer()) {
+      GV->setAlignment(PrefAlign);
+      Align = PrefAlign;
+    }
      return Align;
    } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
      unsigned Align = AI->getAlignment();
@@ -7504,18 +7526,20 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
                     (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
        }
      }
+    
+    // If there is a requested alignment and if this is an alloca, round up.  We
+    // don't do this for malloc, because some systems can't respect the request.
+    if (PrefAlign > Align && isa<AllocaInst>(AI)) {
+      AI->setAlignment(PrefAlign);
+      Align = PrefAlign;
+    }
      return Align;
    } else if (isa<BitCastInst>(V) ||
               (isa<ConstantExpr>(V) && 
                cast<ConstantExpr>(V)->getOpcode() == Instruction::BitCast)) {
-    User *CI = cast<User>(V);
-    if (isa<PointerType>(CI->getOperand(0)->getType()))
-      return GetKnownAlignment(CI->getOperand(0), TD);
-    return 0;
+    return GetOrEnforceKnownAlignment(cast<User>(V)->getOperand(0),
+                                      TD, PrefAlign);
    } else if (User *GEPI = dyn_castGetElementPtr(V)) {
-    unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD);
-    if (BaseAlignment == 0) return 0;
-    
      // If all indexes are zero, it is just the alignment of the base pointer.
      bool AllZeroOperands = true;
      for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
@@ -7524,9 +7548,15 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
          AllZeroOperands = false;
          break;
        }
-    if (AllZeroOperands)
-      return BaseAlignment;
-    
+
+    if (AllZeroOperands) {
+      // Treat this like a bitcast.
+      return GetOrEnforceKnownAlignment(GEPI->getOperand(0), TD, PrefAlign);
+    }
+
+    unsigned BaseAlignment = GetOrEnforceKnownAlignment(GEPI->getOperand(0),TD);
+    if (BaseAlignment == 0) return 0;
+
      // Otherwise, if the base alignment is >= the alignment we expect for the
      // base pointer type, then we know that the resultant pointer is aligned at
      // least as much as its type requires.
@@ -7534,11 +7564,13 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
  
      const Type *BasePtrTy = GEPI->getOperand(0)->getType();
      const PointerType *PtrTy = cast<PointerType>(BasePtrTy);
-    if (TD->getABITypeAlignment(PtrTy->getElementType())
-        <= BaseAlignment) {
+    unsigned Align = TD->getABITypeAlignment(PtrTy->getElementType());
+    if (Align <= BaseAlignment) {
        const Type *GEPTy = GEPI->getType();
        const PointerType *GEPPtrTy = cast<PointerType>(GEPTy);
-      return TD->getABITypeAlignment(GEPPtrTy->getElementType());
+      Align = std::min(Align, (unsigned)
+                       TD->getABITypeAlignment(GEPPtrTy->getElementType()));
+      return Align;
      }
      return 0;
    }
@@ -7594,15 +7626,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      // If we can determine a pointer alignment that is bigger than currently
      // set, update the alignment.
      if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
-      unsigned Alignment1 = GetKnownAlignment(MI->getOperand(1), TD);
-      unsigned Alignment2 = GetKnownAlignment(MI->getOperand(2), TD);
+      unsigned Alignment1 = GetOrEnforceKnownAlignment(MI->getOperand(1), TD);
+      unsigned Alignment2 = GetOrEnforceKnownAlignment(MI->getOperand(2), TD);
        unsigned Align = std::min(Alignment1, Alignment2);
        if (MI->getAlignment()->getZExtValue() < Align) {
          MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align));
          Changed = true;
        }
      } else if (isa<MemSetInst>(MI)) {
-      unsigned Alignment = GetKnownAlignment(MI->getDest(), TD);
+      unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD);
        if (MI->getAlignment()->getZExtValue() < Alignment) {
          MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
          Changed = true;
@@ -7620,7 +7652,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::x86_sse2_loadu_dq:
        // Turn PPC lvx     -> load if the pointer is known aligned.
        // Turn X86 loadups -> load if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
                                        PointerType::get(II->getType()), CI);
          return new LoadInst(Ptr);
@@ -7629,7 +7661,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::ppc_altivec_stvx:
      case Intrinsic::ppc_altivec_stvxl:
        // Turn stvx -> store if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(2), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) {
          const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType());
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2),
                                        OpPtrTy, CI);
@@ -7641,7 +7673,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::x86_sse2_storeu_dq:
      case Intrinsic::x86_sse2_storel_dq:
        // Turn X86 storeu -> store if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
          const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
                                        OpPtrTy, CI);
@@ -7974,10 +8006,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    Instruction *NC;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
      NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(),
-                        &Args[0], Args.size(), Caller->getName(), Caller);
-    cast<InvokeInst>(II)->setCallingConv(II->getCallingConv());
+                        Args.begin(), Args.end(), Caller->getName(), Caller);
+    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
    } else {
-    NC = new CallInst(Callee, &Args[0], Args.size(), Caller->getName(), Caller);
+    NC = new CallInst(Callee, Args.begin(), Args.end(),
+                      Caller->getName(), Caller);
      if (cast<CallInst>(Caller)->isTailCall())
        cast<CallInst>(NC)->setTailCall();
     cast<CallInst>(NC)->setCallingConv(cast<CallInst>(Caller)->getCallingConv());
@@ -8250,6 +8283,10 @@ static bool DeadPHICycle(PHINode *PN,
    // Remember this node, and if we find the cycle, return.
    if (!PotentiallyDeadPHIs.insert(PN))
      return true;
+  
+  // Don't scan crazily complex things.
+  if (PotentiallyDeadPHIs.size() == 16)
+    return false;
  
    if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
      return DeadPHICycle(PU, PotentiallyDeadPHIs);
@@ -8749,9 +8786,36 @@ static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
    return false;
  }
  
+/// GetUnderlyingObject - Trace through a series of getelementptrs and bitcasts
+/// until we find the underlying object a pointer is referring to or something
+/// we don't understand.  Note that the returned pointer may be offset from the
+/// input, because we ignore GEP indices.
+static Value *GetUnderlyingObject(Value *Ptr) {
+  while (1) {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+      if (CE->getOpcode() == Instruction::BitCast ||
+          CE->getOpcode() == Instruction::GetElementPtr)
+        Ptr = CE->getOperand(0);
+      else
+        return Ptr;
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr)) {
+      Ptr = BCI->getOperand(0);
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+      Ptr = GEP->getOperand(0);
+    } else {
+      return Ptr;
+    }
+  }
+}
+
  Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    Value *Op = LI.getOperand(0);
  
+  // Attempt to improve the alignment.
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD);
+  if (KnownAlign > LI.getAlignment())
+    LI.setAlignment(KnownAlign);
+
    // load (cast X) --> cast (load X) iff safe
    if (isa<CastInst>(Op))
      if (Instruction *Res = InstCombineLoadCast(*this, LI))
@@ -8822,6 +8886,17 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
            return Res;
        }
    }
+    
+  // If this load comes from anywhere in a constant global, and if the global
+  // is all undef or zero, we know what it loads.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Op))) {
+    if (GV->isConstant() && GV->hasInitializer()) {
+      if (GV->getInitializer()->isNullValue())
+        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
+      else if (isa<UndefValue>(GV->getInitializer()))
+        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+    }
+  }
  
    if (Op->hasOneUse()) {
      // Change select and PHI nodes to select values instead of addresses: this
@@ -8947,6 +9022,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
        }
    }
  
+  // Attempt to improve the alignment.
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD);
+  if (KnownAlign > SI.getAlignment())
+    SI.setAlignment(KnownAlign);
+
    // Do really simple DSE, to catch cases where there are several consequtive
    // stores to the same location, separated by a few arithmetic operations. This
    // situation often occurs with bitfield accesses.
@@ -9316,16 +9396,16 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
  
  Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
  
-  // If packed val is undef, replace extract with scalar undef.
+  // If vector val is undef, replace extract with scalar undef.
    if (isa<UndefValue>(EI.getOperand(0)))
      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
  
-  // If packed val is constant 0, replace extract with scalar 0.
+  // If vector val is constant 0, replace extract with scalar 0.
    if (isa<ConstantAggregateZero>(EI.getOperand(0)))
      return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
    
    if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
-    // If packed val is constant with uniform operands, replace EI
+    // If vector val is constant with uniform operands, replace EI
      // with that operand
      Constant *op0 = C->getOperand(0);
      for (unsigned i = 1; i < C->getNumOperands(); ++i)
@@ -9840,7 +9920,7 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
          Inst->eraseFromParent();
          continue;
        }
-      
+     
        IC.AddToWorkList(Inst);
      }
  
@@ -10030,6 +10110,9 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
    }
  
    assert(WorklistMap.empty() && "Worklist empty, but map not?");
+    
+  // Do an explicit clear, this shrinks the map if needed.
+  WorklistMap.clear();
    return Changed;
  }