Cut off crazy computation. This helps PR1622 slightly.

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index 00b1bca656bd22c9bbb54968f208a691aaf2e860..218da9773d1ddb7e03294cf3d4d29f623d9c7984 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -76,6 +76,9 @@ namespace {
      TargetData *TD;
      bool MustPreserveLCSSA;
    public:
+    static char ID; // Pass identification, replacement for typeid
+    InstCombiner() : FunctionPass((intptr_t)&ID) {}
+
      /// AddToWorkList - Add the specified instruction to the worklist if it
      /// isn't already in it.
      void AddToWorkList(Instruction *I) {
@@ -186,6 +189,8 @@ namespace {
      Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                                  Instruction *LHS,
                                                  ConstantInt *RHS);
+    Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                ConstantInt *DivRHS);
  
      Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS,
                               ICmpInst::Predicate Cond, Instruction &I);
@@ -193,9 +198,10 @@ namespace {
                                       BinaryOperator &I);
      Instruction *commonCastTransforms(CastInst &CI);
      Instruction *commonIntCastTransforms(CastInst &CI);
-    Instruction *visitTrunc(CastInst &CI);
-    Instruction *visitZExt(CastInst &CI);
-    Instruction *visitSExt(CastInst &CI);
+    Instruction *commonPointerCastTransforms(CastInst &CI);
+    Instruction *visitTrunc(TruncInst &CI);
+    Instruction *visitZExt(ZExtInst &CI);
+    Instruction *visitSExt(SExtInst &CI);
      Instruction *visitFPTrunc(CastInst &CI);
      Instruction *visitFPExt(CastInst &CI);
      Instruction *visitFPToUI(CastInst &CI);
@@ -204,7 +210,7 @@ namespace {
      Instruction *visitSIToFP(CastInst &CI);
      Instruction *visitPtrToInt(CastInst &CI);
      Instruction *visitIntToPtr(CastInst &CI);
-    Instruction *visitBitCast(CastInst &CI);
+    Instruction *visitBitCast(BitCastInst &CI);
      Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                                  Instruction *FI);
      Instruction *visitSelectInst(SelectInst &CI);
@@ -350,12 +356,14 @@ namespace {
                                bool isSub, Instruction &I);
      Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                   bool isSigned, bool Inside, Instruction &IB);
-    Instruction *PromoteCastOfAllocation(CastInst &CI, AllocationInst &AI);
+    Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI);
      Instruction *MatchBSwap(BinaryOperator &I);
+    bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
  
      Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
    };
  
+  char InstCombiner::ID = 0;
    RegisterPass<InstCombiner> X("instcombine", "Combine redundant instructions");
  }
  
@@ -383,8 +391,7 @@ static const Type *getPromotedType(const Type *Ty) {
    if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
      if (ITy->getBitWidth() < 32)
        return Type::Int32Ty;
-  } else if (Ty == Type::FloatTy)
-    return Type::DoubleTy;
+  }
    return Ty;
  }
  
@@ -863,11 +870,10 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const Type *Ty,
  // could have the specified known zero and known one bits, returning them in
  // min/max.
  static void ComputeUnsignedMinMaxValuesFromKnownBits(const Type *Ty,
-                                                     const APInt& KnownZero,
-                                                     const APInt& KnownOne,
-                                                     APInt& Min,
-                                                     APInt& Max) {
-  uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+                                                     const APInt &KnownZero,
+                                                     const APInt &KnownOne,
+                                                     APInt &Min, APInt &Max) {
+  uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth(); BitWidth = BitWidth;
    assert(KnownZero.getBitWidth() == BitWidth && 
           KnownOne.getBitWidth() == BitWidth &&
           Min.getBitWidth() == BitWidth && Max.getBitWidth() &&
@@ -1335,12 +1341,21 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
        InsertNewInstBefore(cast<Instruction>(NewVal), *I);
        return UpdateValueUsesWith(I, NewVal);
      }    
+
+    // If the sign bit is the only bit demanded by this ashr, then there is no
+    // need to do it, the shift doesn't change the high bit.
+    if (DemandedMask.isSignBit())
+      return UpdateValueUsesWith(I, I->getOperand(0));
      
      if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
        uint32_t ShiftAmt = SA->getLimitedValue(BitWidth);
        
        // Signed shift right.
        APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      // If any of the "high bits" are demanded, we should set the sign bit as
+      // demanded.
+      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+        DemandedMaskIn.set(BitWidth-1);
        if (SimplifyDemandedBits(I->getOperand(0),
                                 DemandedMaskIn,
                                 RHSKnownZero, RHSKnownOne, Depth+1))
@@ -1489,7 +1504,73 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
      UndefElts |= 1ULL << IdxNo;
      break;
    }
+  case Instruction::BitCast: {
+    // Vector->vector casts only.
+    const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+    if (!VTy) break;
+    unsigned InVWidth = VTy->getNumElements();
+    uint64_t InputDemandedElts = 0;
+    unsigned Ratio;
+
+    if (VWidth == InVWidth) {
+      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+      // elements as are demanded of us.
+      Ratio = 1;
+      InputDemandedElts = DemandedElts;
+    } else if (VWidth > InVWidth) {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the result than there are in the source,
+      // then an input element is live if any of the corresponding output
+      // elements are live.
+      Ratio = VWidth/InVWidth;
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+        if (DemandedElts & (1ULL << OutIdx))
+          InputDemandedElts |= 1ULL << (OutIdx/Ratio);
+      }
+    } else {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the source than there are in the result,
+      // then an input element is live if the corresponding output element is
+      // live.
+      Ratio = InVWidth/VWidth;
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (DemandedElts & (1ULL << InIdx/Ratio))
+          InputDemandedElts |= 1ULL << InIdx;
+    }
+    
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) {
+      I->setOperand(0, TmpV);
+      MadeChange = true;
+    }
      
+    UndefElts = UndefElts2;
+    if (VWidth > InVWidth) {
+      assert(0 && "Unimp");
+      // If there are more elements in the result than there are in the source,
+      // then an output element is undef if the corresponding input element is
+      // undef.
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+        if (UndefElts2 & (1ULL << (OutIdx/Ratio)))
+          UndefElts |= 1ULL << OutIdx;
+    } else if (VWidth < InVWidth) {
+      assert(0 && "Unimp");
+      // If there are more elements in the source than there are in the result,
+      // then a result element is undef if all of the corresponding input
+      // elements are undef.
+      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if ((UndefElts2 & (1ULL << InIdx)) == 0)    // Not undef?
+          UndefElts &= ~(1ULL << (InIdx/Ratio));    // Clear undef bit.
+    }
+    break;
+  }
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
@@ -1803,7 +1884,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    if (I.getNumOperands() == 2) {
      Constant *C = cast<Constant>(I.getOperand(1));
      for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV;
+      Value *InV = 0;
        if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
          if (CmpInst *CI = dyn_cast<CmpInst>(&I))
            InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
@@ -1975,9 +2056,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
      return BinaryOperator::createMul(LHS, AddOne(C2));
  
    // X + ~X --> -1   since   ~X = -X-1
-  if (dyn_castNotVal(LHS) == RHS ||
-      dyn_castNotVal(RHS) == LHS)
-    return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType()));
+  if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS)
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
    
  
    // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
@@ -2157,7 +2237,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
        // 0 - (X sdiv C)  -> (X sdiv -C)
        if (Op1I->getOpcode() == Instruction::SDiv)
          if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-          if (CSI->isNullValue())
+          if (CSI->isZero())
              if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
                return BinaryOperator::createSDiv(Op1I->getOperand(0),
                                                 ConstantExpr::getNeg(DivRHS));
@@ -2195,26 +2275,34 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
    return 0;
  }
  
-/// isSignBitCheck - Given an exploded icmp instruction, return true if it
-/// really just returns true if the most significant (sign) bit is set.
-static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS) {
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit.  If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+                           bool &TrueIfSigned) {
    switch (pred) {
-    case ICmpInst::ICMP_SLT: 
-      // True if LHS s< RHS and RHS == 0
-      return RHS->isNullValue();
-    case ICmpInst::ICMP_SLE: 
-      // True if LHS s<= RHS and RHS == -1
-      return RHS->isAllOnesValue();
-    case ICmpInst::ICMP_UGE: 
-      // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
-      return RHS->getValue() == 
-             APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits());
-    case ICmpInst::ICMP_UGT:
-      // True if LHS u> RHS and RHS == high-bit-mask - 1
-      return RHS->getValue() ==
-             APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
-    default:
-      return false;
+  case ICmpInst::ICMP_SLT:   // True if LHS s< 0
+    TrueIfSigned = true;
+    return RHS->isZero();
+  case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
+    TrueIfSigned = true;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_SGT:   // True if LHS s> -1
+    TrueIfSigned = false;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_UGT:
+    // True if LHS u> RHS and RHS == high-bit-mask - 1
+    TrueIfSigned = true;
+    return RHS->getValue() ==
+      APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+  case ICmpInst::ICMP_UGE: 
+    // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+    TrueIfSigned = true;
+    return RHS->getValue() == 
+      APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits());
+  default:
+    return false;
    }
  }
  
@@ -2236,7 +2324,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
              return BinaryOperator::createMul(SI->getOperand(0),
                                               ConstantExpr::getShl(CI, ShOp));
  
-      if (CI->isNullValue())
+      if (CI->isZero())
          return ReplaceInstUsesWith(I, Op1);  // X * 0  == 0
        if (CI->equalsInt(1))                  // X * 1  == X
          return ReplaceInstUsesWith(I, Op0);
@@ -2301,11 +2389,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
      if (ICmpInst *SCI = dyn_cast<ICmpInst>(BoolCast->getOperand(0))) {
        Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1);
        const Type *SCOpTy = SCIOp0->getType();
-
+      bool TIS = false;
+      
        // If the icmp is true iff the sign bit of X is set, then convert this
        // multiply into a shift/and combination.
        if (isa<ConstantInt>(SCIOp1) &&
-          isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1))) {
+          isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) &&
+          TIS) {
          // Shift the X value right to turn it into "all signbits".
          Constant *Amt = ConstantInt::get(SCIOp0->getType(),
                                            SCOpTy->getPrimitiveSizeInBits()-1);
@@ -2729,23 +2819,19 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
  // isMaxValueMinusOne - return true if this is Max-1
  static bool isMaxValueMinusOne(const ConstantInt *C, bool isSigned) {
    uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits();
-  if (isSigned) {
-    // Calculate 0111111111..11111
-    APInt Val(APInt::getSignedMaxValue(TypeBits));
-    return C->getValue() == Val-1;
-  }
-  return C->getValue() == APInt::getAllOnesValue(TypeBits) - 1;
+  if (!isSigned)
+    return C->getValue() == APInt::getAllOnesValue(TypeBits) - 1;
+  return C->getValue() == APInt::getSignedMaxValue(TypeBits)-1;
  }
  
  // isMinValuePlusOne - return true if this is Min+1
  static bool isMinValuePlusOne(const ConstantInt *C, bool isSigned) {
-  if (isSigned) {
-    // Calculate 1111111111000000000000
-    uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits();
-    APInt Val(APInt::getSignedMinValue(TypeBits));
-    return C->getValue() == Val+1;
-  }
-  return C->getValue() == 1; // unsigned
+  if (!isSigned)
+    return C->getValue() == 1; // unsigned
+    
+  // Calculate 1111111111000000000000
+  uint32_t TypeBits = C->getType()->getPrimitiveSizeInBits();
+  return C->getValue() == APInt::getSignedMinValue(TypeBits)+1;
  }
  
  // isOneBitSet - Return true if there is exactly one bit set in the specified
@@ -3170,8 +3256,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
        return &I;
    } else {
      if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
-      if (CP->isAllOnesValue())
+      if (CP->isAllOnesValue())            // X & <-1,-1> -> X
          return ReplaceInstUsesWith(I, I.getOperand(0));
+    } else if (isa<ConstantAggregateZero>(Op1)) {
+      return ReplaceInstUsesWith(I, Op1);  // X & <0,0> -> <0,0>
      }
    }
    
@@ -3286,13 +3374,28 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
    }
    
    {
-    Value *A = 0, *B = 0;
-    if (match(Op0, m_Or(m_Value(A), m_Value(B))))
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    if (match(Op0, m_Or(m_Value(A), m_Value(B)))) {
        if (A == Op1 || B == Op1)    // (A | ?) & A  --> A
          return ReplaceInstUsesWith(I, Op1);
-    if (match(Op1, m_Or(m_Value(A), m_Value(B))))
+    
+      // (A|B) & ~(A&B) -> A^B
+      if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
+        if ((A == C && B == D) || (A == D && B == C))
+          return BinaryOperator::createXor(A, B);
+      }
+    }
+    
+    if (match(Op1, m_Or(m_Value(A), m_Value(B)))) {
        if (A == Op0 || B == Op0)    // A & (A | ?)  --> A
          return ReplaceInstUsesWith(I, Op0);
+
+      // ~(A&B) & (A|B) -> A^B
+      if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
+        if ((A == C && B == D) || (A == D && B == C))
+          return BinaryOperator::createXor(A, B);
+      }
+    }
      
      if (Op0->hasOneUse() &&
          match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
@@ -3613,9 +3716,9 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
    for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
      if (ByteValues[i] != V)
        return 0;
-  const Type *Tys[] = { ITy, ITy };
+  const Type *Tys[] = { ITy };
    Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 2);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
    return new CallInst(F, V);
  }
  
@@ -3625,7 +3728,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
    if (isa<UndefValue>(Op1))                       // X | undef -> -1
-    return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
    // or X, X = X
    if (Op0 == Op1)
@@ -3639,7 +3742,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
      if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth),
                               KnownZero, KnownOne))
        return &I;
+  } else if (isa<ConstantAggregateZero>(Op1)) {
+    return ReplaceInstUsesWith(I, Op0);  // X | <0,0> -> X
+  } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
+    if (CP->isAllOnesValue())            // X | <-1,-1> -> <-1,-1>
+      return ReplaceInstUsesWith(I, I.getOperand(1));
    }
+    
+
    
    // or X, -1 == -1
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
@@ -3710,7 +3820,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    }
  
    // (A & C)|(B & D)
-  Value *C, *D;
+  Value *C = 0, *D = 0;
    if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
        match(Op1, m_And(m_Value(B), m_Value(D)))) {
      Value *V1 = 0, *V2 = 0, *V3 = 0;
@@ -3759,40 +3869,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
            InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I);
          return BinaryOperator::createAnd(V1, Or);
        }
-      
-      // (V1 & V3)|(V2 & ~V3) -> ((V1 ^ V2) & V3) ^ V2
-      if (isOnlyUse(Op0) && isOnlyUse(Op1)) {
-        // Try all combination of terms to find V3 and ~V3.
-        if (A->hasOneUse() && match(A, m_Not(m_Value(V3)))) {
-          if (V3 == B)
-            V1 = D, V2 = C;
-          else if (V3 == D)
-            V1 = B, V2 = C;
-        }
-        if (B->hasOneUse() && match(B, m_Not(m_Value(V3)))) {
-          if (V3 == A)
-            V1 = C, V2 = D;
-          else if (V3 == C)
-            V1 = A, V2 = D;
-        }
-        if (C->hasOneUse() && match(C, m_Not(m_Value(V3)))) {
-          if (V3 == B)
-            V1 = D, V2 = A;
-          else if (V3 == D)
-            V1 = B, V2 = A;
-        }
-        if (D->hasOneUse() && match(D, m_Not(m_Value(V3)))) {
-          if (V3 == A)
-            V1 = C, V2 = B;
-          else if (V3 == C)
-            V1 = A, V2 = B;
-        }
-        if (V1) {
-          A = InsertNewInstBefore(BinaryOperator::createXor(V1, V2, "tmp"), I);
-          A = InsertNewInstBefore(BinaryOperator::createAnd(A, V3, "tmp"), I);
-          return BinaryOperator::createXor(A, V2);
-        }
-      }
      }
    }
    
@@ -3813,16 +3889,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
  
    if (match(Op0, m_Not(m_Value(A)))) {   // ~A | Op1
      if (A == Op1)   // ~A | A == -1
-      return ReplaceInstUsesWith(I,
-                                ConstantInt::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
    } else {
      A = 0;
    }
    // Note, A is still live here!
    if (match(Op1, m_Not(m_Value(B)))) {   // Op0 | ~B
      if (Op0 == B)
-      return ReplaceInstUsesWith(I,
-                                ConstantInt::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
      // (~A | ~B) == (~(A & B)) - De Morgan's Law
      if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
@@ -3847,13 +3921,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
              LHSCC != ICmpInst::ICMP_UGE && LHSCC != ICmpInst::ICMP_ULE &&
              RHSCC != ICmpInst::ICMP_UGE && RHSCC != ICmpInst::ICMP_ULE &&
              LHSCC != ICmpInst::ICMP_SGE && LHSCC != ICmpInst::ICMP_SLE &&
-            RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE) {
+            RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE &&
+            // We can't fold (ugt x, C) | (sgt x, C2).
+            PredicatesFoldable(LHSCC, RHSCC)) {
            // Ensure that the larger constant is on the RHS.
-          ICmpInst::Predicate GT = ICmpInst::isSignedPredicate(LHSCC) ? 
-            ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-          Constant *Cmp = ConstantExpr::getICmp(GT, LHSCst, RHSCst);
            ICmpInst *LHS = cast<ICmpInst>(Op0);
-          if (cast<ConstantInt>(Cmp)->getZExtValue()) {
+          bool NeedsSwap;
+          if (ICmpInst::isSignedPredicate(LHSCC))
+            NeedsSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+          else
+            NeedsSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+            
+          if (NeedsSwap) {
              std::swap(LHS, RHS);
              std::swap(LHSCst, RHSCst);
              std::swap(LHSCC, RHSCC);
@@ -4015,7 +4094,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
  
    // xor X, X = 0, even if X is nested in a sequence of Xor's.
    if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
-    assert(Result == &I && "AssociativeOpt didn't work?");
+    assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
    }
    
@@ -4027,15 +4106,45 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
      if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth),
                               KnownZero, KnownOne))
        return &I;
+  } else if (isa<ConstantAggregateZero>(Op1)) {
+    return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
    }
  
+  // Is this a ~ operation?
+  if (Value *NotOp = dyn_castNotVal(&I)) {
+    // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+    // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+      if (Op0I->getOpcode() == Instruction::And || 
+          Op0I->getOpcode() == Instruction::Or) {
+        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+          Instruction *NotY =
+            BinaryOperator::createNot(Op0I->getOperand(1),
+                                      Op0I->getOperand(1)->getName()+".not");
+          InsertNewInstBefore(NotY, I);
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::createOr(Op0NotVal, NotY);
+          else
+            return BinaryOperator::createAnd(Op0NotVal, NotY);
+        }
+      }
+    }
+  }
+  
+  
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // xor (icmp A, B), true = not (icmp A, B) = !icmp A, B
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
-      if (RHS == ConstantInt::getTrue() && ICI->hasOneUse())
+    // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+    if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) {
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
          return new ICmpInst(ICI->getInversePredicate(),
                              ICI->getOperand(0), ICI->getOperand(1));
  
+      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
+        return new FCmpInst(FCI->getInversePredicate(),
+                            FCI->getOperand(0), FCI->getOperand(1));
+    }
+
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
        // ~(c-X) == X-c-1 == X+(-c-1)
        if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
@@ -4045,18 +4154,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                                                ConstantInt::get(I.getType(), 1));
            return BinaryOperator::createAdd(Op0I->getOperand(1), ConstantRHS);
          }
-
-      // ~(~X & Y) --> (X | ~Y)
-      if (Op0I->getOpcode() == Instruction::And && RHS->isAllOnesValue()) {
-        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
-        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
-          Instruction *NotY =
-            BinaryOperator::createNot(Op0I->getOperand(1),
-                                      Op0I->getOperand(1)->getName()+".not");
-          InsertNewInstBefore(NotY, I);
-          return BinaryOperator::createOr(Op0NotVal, NotY);
-        }
-      }
            
        if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
          if (Op0I->getOpcode() == Instruction::Add) {
@@ -4101,12 +4198,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
  
    if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
      if (X == Op1)
-      return ReplaceInstUsesWith(I,
-                                ConstantInt::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
    if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
      if (X == Op0)
-      return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
    
    BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
@@ -4269,38 +4365,66 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
    Value *Result = Constant::getNullValue(IntPtrTy);
  
    // Build a mask for high order bits.
-  uint64_t PtrSizeMask = ~0ULL >> (64-TD.getPointerSize()*8);
+  unsigned IntPtrWidth = TD.getPointerSize()*8;
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
  
    for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
      Value *Op = GEP->getOperand(i);
      uint64_t Size = TD.getTypeSize(GTI.getIndexedType()) & PtrSizeMask;
-    Constant *Scale = ConstantInt::get(IntPtrTy, Size);
-    if (Constant *OpC = dyn_cast<Constant>(Op)) {
-      if (!OpC->isNullValue()) {
-        OpC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-        Scale = ConstantExpr::getMul(OpC, Scale);
-        if (Constant *RC = dyn_cast<Constant>(Result))
-          Result = ConstantExpr::getAdd(RC, Scale);
-        else {
-          // Emit an add instruction.
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+      if (OpC->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+        
+        if (ConstantInt *RC = dyn_cast<ConstantInt>(Result))
+          Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size));
+        else
            Result = IC.InsertNewInstBefore(
-             BinaryOperator::createAdd(Result, Scale,
-                                       GEP->getName()+".offs"), I);
-        }
+                   BinaryOperator::createAdd(Result,
+                                             ConstantInt::get(IntPtrTy, Size),
+                                             GEP->getName()+".offs"), I);
+        continue;
        }
-    } else {
-      // Convert to correct type.
-      Op = IC.InsertNewInstBefore(CastInst::createSExtOrBitCast(Op, IntPtrTy,
-                                               Op->getName()+".c"), I);
-      if (Size != 1)
-        // We'll let instcombine(mul) convert this to a shl if possible.
+      
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale);
+      if (Constant *RC = dyn_cast<Constant>(Result))
+        Result = ConstantExpr::getAdd(RC, Scale);
+      else {
+        // Emit an add instruction.
+        Result = IC.InsertNewInstBefore(
+           BinaryOperator::createAdd(Result, Scale,
+                                     GEP->getName()+".offs"), I);
+      }
+      continue;
+    }
+    // Convert to correct type.
+    if (Op->getType() != IntPtrTy) {
+      if (Constant *OpC = dyn_cast<Constant>(Op))
+        Op = ConstantExpr::getSExt(OpC, IntPtrTy);
+      else
+        Op = IC.InsertNewInstBefore(new SExtInst(Op, IntPtrTy,
+                                                 Op->getName()+".c"), I);
+    }
+    if (Size != 1) {
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      if (Constant *OpC = dyn_cast<Constant>(Op))
+        Op = ConstantExpr::getMul(OpC, Scale);
+      else    // We'll let instcombine(mul) convert this to a shl if possible.
          Op = IC.InsertNewInstBefore(BinaryOperator::createMul(Op, Scale,
-                                                    GEP->getName()+".idx"), I);
+                                                  GEP->getName()+".idx"), I);
+    }
  
-      // Emit an add instruction.
+    // Emit an add instruction.
+    if (isa<Constant>(Op) && isa<Constant>(Result))
+      Result = ConstantExpr::getAdd(cast<Constant>(Op),
+                                    cast<Constant>(Result));
+    else
        Result = IC.InsertNewInstBefore(BinaryOperator::createAdd(Op, Result,
-                                                    GEP->getName()+".offs"), I);
-    }
+                                                  GEP->getName()+".offs"), I);
    }
    return Result;
  }
@@ -4705,22 +4829,29 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
      // already been handled above, this requires little checking.
      //
      switch (I.getPredicate()) {
-      default: break;
-      case ICmpInst::ICMP_ULE: 
-        return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI));
-      case ICmpInst::ICMP_SLE:
-        return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI));
-      case ICmpInst::ICMP_UGE:
-        return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI));
-      case ICmpInst::ICMP_SGE:
-        return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI));
+    default: break;
+    case ICmpInst::ICMP_ULE: 
+      return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI));
+    case ICmpInst::ICMP_SLE:
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI));
+    case ICmpInst::ICMP_UGE:
+      return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI));
+    case ICmpInst::ICMP_SGE:
+      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI));
      }
      
      // See if we can fold the comparison based on bits known to be zero or one
-    // in the input.
+    // in the input.  If this comparison is a normal comparison, it demands all
+    // bits, if it is a sign bit comparison, it only demands the sign bit.
+    
+    bool UnusedBit;
+    bool isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+    
      uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth();
      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-    if (SimplifyDemandedBits(Op0, APInt::getAllOnesValue(BitWidth),
+    if (SimplifyDemandedBits(Op0, 
+                             isSignBit ? APInt::getSignBit(BitWidth)
+                                       : APInt::getAllOnesValue(BitWidth),
                               KnownZero, KnownOne, 0))
        return &I;
          
@@ -4968,6 +5099,150 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
    return Changed ? &I : 0;
  }
  
+
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                          ConstantInt *DivRHS) {
+  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+  const APInt &CmpRHSV = CmpRHS->getValue();
+  
+  // FIXME: If the operand types don't match the type of the divide 
+  // then don't attempt this transform. The code below doesn't have the
+  // logic to deal with a signed divide and an unsigned compare (and
+  // vice versa). This is because (x /s C1) <s C2  produces different 
+  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+  // (x /u C1) <u C2.  Simply casting the operands and result won't 
+  // work. :(  The if statement below tests that condition and bails 
+  // if it finds it. 
+  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate())
+    return 0;
+  if (DivRHS->isZero())
+    return 0; // The ProdOV computation fails on divide by zero.
+
+  // Compute Prod = CI * DivRHS. We are essentially solving an equation
+  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
+  // C2 (CI). By solving for X we can turn this into a range check 
+  // instead of computing a divide. 
+  ConstantInt *Prod = Multiply(CmpRHS, DivRHS);
+
+  // Determine if the product overflows by seeing if the product is
+  // not equal to the divide. Make sure we do the same kind of divide
+  // as in the LHS instruction that we're folding. 
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+  // Get the ICmp opcode
+  ICmpInst::Predicate Pred = ICI.getPredicate();
+
+  // Figure out the interval that is being checked.  For example, a comparison
+  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 
+  // Compute this interval based on the constants involved and the signedness of
+  // the compare/divide.  This computes a half-open interval, keeping track of
+  // whether either value in the interval overflows.  After analysis each
+  // overflow variable is set to 0 if it's corresponding bound variable is valid
+  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+  int LoOverflow = 0, HiOverflow = 0;
+  ConstantInt *LoBound = 0, *HiBound = 0;
+  
+  
+  if (!DivIsSigned) {  // udiv
+    // e.g. X/5 op 3  --> [15, 20)
+    LoBound = Prod;
+    HiOverflow = LoOverflow = ProdOV;
+    if (!HiOverflow)
+      HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+  } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0.
+    if (CmpRHSV == 0) {       // (X / pos) op 0
+      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
+      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
+      HiBound = DivRHS;
+    } else if (CmpRHSV.isPositive()) {   // (X / pos) op pos
+      LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
+      HiOverflow = LoOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+    } else {                       // (X / pos) op neg
+      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
+      Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS));
+      LoOverflow = AddWithOverflow(LoBound, Prod,
+                                   cast<ConstantInt>(DivRHSH), true) ? -1 : 0;
+      HiBound = AddOne(Prod);
+      HiOverflow = ProdOV ? -1 : 0;
+    }
+  } else {                         // Divisor is < 0.
+    if (CmpRHSV == 0) {       // (X / neg) op 0
+      // e.g. X/-5 op 0  --> [-4, 5)
+      LoBound = AddOne(DivRHS);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+      if (HiBound == DivRHS) {     // -INTMIN = INTMIN
+        HiOverflow = 1;            // [INTMIN+1, overflow)
+        HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
+      }
+    } else if (CmpRHSV.isPositive()) {   // (X / neg) op pos
+      // e.g. X/-5 op 3  --> [-19, -14)
+      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow)
+        LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), true) ?-1:0;
+      HiBound = AddOne(Prod);
+    } else {                       // (X / neg) op neg
+      // e.g. X/-5 op -3  --> [15, 20)
+      LoBound = Prod;
+      LoOverflow = HiOverflow = ProdOV ? 1 : 0;
+      HiBound = Subtract(Prod, DivRHS);
+    }
+    
+    // Dividing by a negative swaps the condition.  LT <-> GT
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  Value *X = DivI->getOperand(0);
+  switch (Pred) {
+  default: assert(0 && "Unhandled icmp opcode!");
+  case ICmpInst::ICMP_EQ:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+    else if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
+                          ICmpInst::ICMP_UGE, X, LoBound);
+    else if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
+                          ICmpInst::ICMP_ULT, X, HiBound);
+    else
+      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
+  case ICmpInst::ICMP_NE:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+    else if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
+                          ICmpInst::ICMP_ULT, X, LoBound);
+    else if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
+                          ICmpInst::ICMP_UGE, X, HiBound);
+    else
+      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT:
+    if (LoOverflow == +1)   // Low bound is greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+    if (LoOverflow == -1)   // Low bound is less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+    return new ICmpInst(Pred, X, LoBound);
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT:
+    if (HiOverflow == +1)       // High bound greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+    else if (HiOverflow == -1)  // High bound less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+    if (Pred == ICmpInst::ICMP_UGT)
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+    else
+      return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+  }
+}
+
+
  /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
  ///
  Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
@@ -5128,85 +5403,105 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
      }
      break;
      
-  case Instruction::Shl:         // (icmp pred (shl X, ShAmt), CI)
-    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-      if (ICI.isEquality()) {
-        uint32_t TypeBits = RHSV.getBitWidth();
-        
-        // Check that the shift amount is in range.  If not, don't perform
-        // undefined shifts.  When the shift is visited it will be
-        // simplified.
-        if (ShAmt->uge(TypeBits))
-          break;
-        
-        // If we are comparing against bits always shifted out, the
-        // comparison cannot succeed.
-        Constant *Comp =
-          ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt);
-        if (Comp != RHS) {// Comparing against a bit that we know is zero.
-          bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-          Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
-          return ReplaceInstUsesWith(ICI, Cst);
-        }
+  case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!ShAmt) break;
+    
+    uint32_t TypeBits = RHSV.getBitWidth();
+    
+    // Check that the shift amount is in range.  If not, don't perform
+    // undefined shifts.  When the shift is visited it will be
+    // simplified.
+    if (ShAmt->uge(TypeBits))
+      break;
+    
+    if (ICI.isEquality()) {
+      // If we are comparing against bits always shifted out, the
+      // comparison cannot succeed.
+      Constant *Comp =
+        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt);
+      if (Comp != RHS) {// Comparing against a bit that we know is zero.
+        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+        Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+        return ReplaceInstUsesWith(ICI, Cst);
+      }
+      
+      if (LHSI->hasOneUse()) {
+        // Otherwise strength reduce the shift into an and.
+        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+        Constant *Mask =
+          ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
          
-        if (LHSI->hasOneUse()) {
-          // Otherwise strength reduce the shift into an and.
-          uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-          Constant *Mask =
-            ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
-          
-          Instruction *AndI =
-            BinaryOperator::createAnd(LHSI->getOperand(0),
-                                      Mask, LHSI->getName()+".mask");
-          Value *And = InsertNewInstBefore(AndI, ICI);
-          return new ICmpInst(ICI.getPredicate(), And,
-                              ConstantInt::get(RHSV.lshr(ShAmtVal)));
-        }
+        Instruction *AndI =
+          BinaryOperator::createAnd(LHSI->getOperand(0),
+                                    Mask, LHSI->getName()+".mask");
+        Value *And = InsertNewInstBefore(AndI, ICI);
+        return new ICmpInst(ICI.getPredicate(), And,
+                            ConstantInt::get(RHSV.lshr(ShAmtVal)));
        }
      }
+    
+    // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+    bool TrueIfSigned = false;
+    if (LHSI->hasOneUse() &&
+        isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+      // (X << 31) <s 0  --> (X&1) != 0
+      Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) <<
+                                           (TypeBits-ShAmt->getZExtValue()-1));
+      Instruction *AndI =
+        BinaryOperator::createAnd(LHSI->getOperand(0),
+                                  Mask, LHSI->getName()+".mask");
+      Value *And = InsertNewInstBefore(AndI, ICI);
+      
+      return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+                          And, Constant::getNullValue(And->getType()));
+    }
      break;
+  }
      
    case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
-  case Instruction::AShr:
-    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-      if (ICI.isEquality()) {
-        // Check that the shift amount is in range.  If not, don't perform
-        // undefined shifts.  When the shift is visited it will be
-        // simplified.
-        uint32_t TypeBits = RHSV.getBitWidth();
-        if (ShAmt->uge(TypeBits))
-          break;
-        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-        
-        // If we are comparing against bits always shifted out, the
-        // comparison cannot succeed.
-        APInt Comp = RHSV << ShAmtVal;
-        if (LHSI->getOpcode() == Instruction::LShr)
-          Comp = Comp.lshr(ShAmtVal);
-        else
-          Comp = Comp.ashr(ShAmtVal);
-        
-        if (Comp != RHSV) { // Comparing against a bit that we know is zero.
-          bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-          Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
-          return ReplaceInstUsesWith(ICI, Cst);
-        }
+  case Instruction::AShr: {
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!ShAmt) break;
+
+    if (ICI.isEquality()) {
+      // Check that the shift amount is in range.  If not, don't perform
+      // undefined shifts.  When the shift is visited it will be
+      // simplified.
+      uint32_t TypeBits = RHSV.getBitWidth();
+      if (ShAmt->uge(TypeBits))
+        break;
+      uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+      
+      // If we are comparing against bits always shifted out, the
+      // comparison cannot succeed.
+      APInt Comp = RHSV << ShAmtVal;
+      if (LHSI->getOpcode() == Instruction::LShr)
+        Comp = Comp.lshr(ShAmtVal);
+      else
+        Comp = Comp.ashr(ShAmtVal);
+      
+      if (Comp != RHSV) { // Comparing against a bit that we know is zero.
+        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+        Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+        return ReplaceInstUsesWith(ICI, Cst);
+      }
+      
+      if (LHSI->hasOneUse() || RHSV == 0) {
+        // Otherwise strength reduce the shift into an and.
+        APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+        Constant *Mask = ConstantInt::get(Val);
          
-        if (LHSI->hasOneUse() || RHSV == 0) {
-          // Otherwise strength reduce the shift into an and.
-          APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-          Constant *Mask = ConstantInt::get(Val);
-          
-          Instruction *AndI =
-            BinaryOperator::createAnd(LHSI->getOperand(0),
-                                      Mask, LHSI->getName()+".mask");
-          Value *And = InsertNewInstBefore(AndI, ICI);
-          return new ICmpInst(ICI.getPredicate(), And,
-                              ConstantExpr::getShl(RHS, ShAmt));
-        }
+        Instruction *AndI =
+          BinaryOperator::createAnd(LHSI->getOperand(0),
+                                    Mask, LHSI->getName()+".mask");
+        Value *And = InsertNewInstBefore(AndI, ICI);
+        return new ICmpInst(ICI.getPredicate(), And,
+                            ConstantExpr::getShl(RHS, ShAmt));
        }
      }
      break;
+  }
      
    case Instruction::SDiv:
    case Instruction::UDiv:
@@ -5216,129 +5511,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
      // checked.  If there is an overflow on the low or high side, remember 
      // it, otherwise compute the range [low, hi) bounding the new value.
      // See: InsertRangeTest above for the kinds of replacements possible.
-    if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-      // FIXME: If the operand types don't match the type of the divide 
-      // then don't attempt this transform. The code below doesn't have the
-      // logic to deal with a signed divide and an unsigned compare (and
-      // vice versa). This is because (x /s C1) <s C2  produces different 
-      // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
-      // (x /u C1) <u C2.  Simply casting the operands and result won't 
-      // work. :(  The if statement below tests that condition and bails 
-      // if it finds it. 
-      bool DivIsSigned = LHSI->getOpcode() == Instruction::SDiv;
-      if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate())
-        break;
-      if (DivRHS->isZero())
-        break; // Don't hack on div by zero
-      
-      // Initialize the variables that will indicate the nature of the
-      // range check.
-      bool LoOverflow = false, HiOverflow = false;
-      ConstantInt *LoBound = 0, *HiBound = 0;
-      
-      // Compute Prod = CI * DivRHS. We are essentially solving an equation
-      // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
-      // C2 (CI). By solving for X we can turn this into a range check 
-      // instead of computing a divide. 
-      ConstantInt *Prod = Multiply(RHS, DivRHS);
-      
-      // Determine if the product overflows by seeing if the product is
-      // not equal to the divide. Make sure we do the same kind of divide
-      // as in the LHS instruction that we're folding. 
-      bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
-                     ConstantExpr::getUDiv(Prod, DivRHS)) != RHS;
-      
-      // Get the ICmp opcode
-      ICmpInst::Predicate predicate = ICI.getPredicate();
-      
-      if (!DivIsSigned) {  // udiv
-        LoBound = Prod;
-        LoOverflow = ProdOV;
-        HiOverflow = ProdOV || 
-          AddWithOverflow(HiBound, LoBound, DivRHS, false);
-      } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0.
-        if (RHSV == 0) {       // (X / pos) op 0
-                               // Can't overflow.
-          LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
-          HiBound = DivRHS;
-        } else if (RHSV.isPositive()) {   // (X / pos) op pos
-          LoBound = Prod;
-          LoOverflow = ProdOV;
-          HiOverflow = ProdOV || 
-            AddWithOverflow(HiBound, Prod, DivRHS, true);
-        } else {                       // (X / pos) op neg
-          Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS));
-          LoOverflow = AddWithOverflow(LoBound, Prod,
-                                       cast<ConstantInt>(DivRHSH), true);
-          HiBound = AddOne(Prod);
-          HiOverflow = ProdOV;
-        }
-      } else {                         // Divisor is < 0.
-        if (RHSV == 0) {       // (X / neg) op 0
-          LoBound = AddOne(DivRHS);
-          HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
-          if (HiBound == DivRHS)
-            LoBound = 0;               // - INTMIN = INTMIN
-        } else if (RHSV.isPositive()) {   // (X / neg) op pos
-          HiOverflow = LoOverflow = ProdOV;
-          if (!LoOverflow)
-            LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS),
-                                         true);
-          HiBound = AddOne(Prod);
-        } else {                       // (X / neg) op neg
-          LoBound = Prod;
-          LoOverflow = HiOverflow = ProdOV;
-          HiBound = Subtract(Prod, DivRHS);
-        }
-        
-        // Dividing by a negate swaps the condition.
-        predicate = ICmpInst::getSwappedPredicate(predicate);
-      }
-      
-      if (LoBound) {
-        Value *X = LHSI->getOperand(0);
-        switch (predicate) {
-          default: assert(0 && "Unhandled icmp opcode!");
-          case ICmpInst::ICMP_EQ:
-            if (LoOverflow && HiOverflow)
-              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
-            else if (HiOverflow)
-              return new ICmpInst(DivIsSigned ?  ICmpInst::ICMP_SGE : 
-                                  ICmpInst::ICMP_UGE, X, LoBound);
-            else if (LoOverflow)
-              return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
-                                  ICmpInst::ICMP_ULT, X, HiBound);
-            else
-              return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, 
-                                     true, ICI);
-          case ICmpInst::ICMP_NE:
-            if (LoOverflow && HiOverflow)
-              return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
-            else if (HiOverflow)
-              return new ICmpInst(DivIsSigned ?  ICmpInst::ICMP_SLT : 
-                                  ICmpInst::ICMP_ULT, X, LoBound);
-            else if (LoOverflow)
-              return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
-                                  ICmpInst::ICMP_UGE, X, HiBound);
-            else
-              return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, 
-                                     false, ICI);
-          case ICmpInst::ICMP_ULT:
-          case ICmpInst::ICMP_SLT:
-            if (LoOverflow)
-              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
-            return new ICmpInst(predicate, X, LoBound);
-          case ICmpInst::ICMP_UGT:
-          case ICmpInst::ICMP_SGT:
-            if (HiOverflow)
-              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
-            if (predicate == ICmpInst::ICMP_UGT)
-              return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
-            else
-              return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
-        }
-      }
-    }
+    if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+      if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+                                          DivRHS))
+        return R;
      break;
    }
    
@@ -5457,8 +5633,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
        }
      }
    } else {  // Not a ICMP_EQ/ICMP_NE
-            // If the LHS is a cast from an integral value of the same size, then 
-            // since we know the RHS is a constant, try to simlify.
+            // If the LHS is a cast from an integral value of the same size, 
+            // then since we know the RHS is a constant, try to simlify.
      if (CastInst *Cast = dyn_cast<CastInst>(LHSI)) {
        Value *CastOp = Cast->getOperand(0);
        const Type *SrcTy = CastOp->getType();
@@ -5493,7 +5669,28 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
    const Type *DestTy    = LHSCI->getType();
    Value *RHSCIOp;
  
-  // We only handle extension cast instructions, so far. Enforce this.
+  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 
+  // integer type is the same size as the pointer type.
+  if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+      getTargetData().getPointerSizeInBits() == 
+         cast<IntegerType>(DestTy)->getBitWidth()) {
+    Value *RHSOp = 0;
+    if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+    } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+      RHSOp = RHSC->getOperand(0);
+      // If the pointer types don't match, insert a bitcast.
+      if (LHSCIOp->getType() != RHSOp->getType())
+        RHSOp = InsertCastBefore(Instruction::BitCast, RHSOp,
+                                 LHSCIOp->getType(), ICI);
+    }
+
+    if (RHSOp)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+  }
+  
+  // The code below only handles extension cast instructions, so far.
+  // Enforce this.
    if (LHSCI->getOpcode() != Instruction::ZExt &&
        LHSCI->getOpcode() != Instruction::SExt)
      return 0;
@@ -6018,10 +6215,9 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
  
  /// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
  /// try to eliminate the cast by moving the type information into the alloc.
-Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI,
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                     AllocationInst &AI) {
-  const PointerType *PTy = dyn_cast<PointerType>(CI.getType());
-  if (!PTy) return 0;   // Not casting the allocation to a pointer type.
+  const PointerType *PTy = cast<PointerType>(CI.getType());
    
    // Remove any uses of AI that are dead.
    assert(!CI.use_empty() && "Dead instructions should be removed earlier!");
@@ -6121,7 +6317,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI,
  /// This is a truncation operation if Ty is smaller than V->getType(), or an
  /// extension operation if Ty is larger.
  static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
-                                       int &NumCastsRemoved) {
+                                       unsigned CastOpc, int &NumCastsRemoved) {
    // We can always evaluate constants in another type.
    if (isa<ConstantInt>(V))
      return true;
@@ -6131,30 +6327,48 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
    
    const IntegerType *OrigTy = cast<IntegerType>(V->getType());
    
+  // If this is an extension or truncate, we can often eliminate it.
+  if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+    // If this is a cast from the destination type, we can trivially eliminate
+    // it, and this will remove a cast overall.
+    if (I->getOperand(0)->getType() == Ty) {
+      // If the first operand is itself a cast, and is eliminable, do not count
+      // this as an eliminable cast.  We would prefer to eliminate those two
+      // casts first.
+      if (!isa<CastInst>(I->getOperand(0)))
+        ++NumCastsRemoved;
+      return true;
+    }
+  }
+
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
    switch (I->getOpcode()) {
    case Instruction::Add:
    case Instruction::Sub:
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
-    if (!I->hasOneUse()) return false;
      // These operators can all arbitrarily be extended or truncated.
-    return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved) &&
-           CanEvaluateInDifferentType(I->getOperand(1), Ty, NumCastsRemoved);
+    return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                      NumCastsRemoved) &&
+           CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
+                                      NumCastsRemoved);
  
    case Instruction::Shl:
-    if (!I->hasOneUse()) return false;
      // If we are truncating the result of this SHL, and if it's a shift of a
      // constant amount, we can always perform a SHL in a smaller type.
      if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
        uint32_t BitWidth = Ty->getBitWidth();
        if (BitWidth < OrigTy->getBitWidth() && 
            CI->getLimitedValue(BitWidth) < BitWidth)
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved);
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                          NumCastsRemoved);
      }
      break;
    case Instruction::LShr:
-    if (!I->hasOneUse()) return false;
      // If this is a truncate of a logical shr, we can truncate it to a smaller
      // lshr iff we know that the bits we would otherwise be shifting in are
      // already zeros.
@@ -6165,25 +6379,19 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
            MaskedValueIsZero(I->getOperand(0),
              APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
            CI->getLimitedValue(BitWidth) < BitWidth) {
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved);
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                          NumCastsRemoved);
        }
      }
      break;
-  case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
-    // If this is a cast from the destination type, we can trivially eliminate
-    // it, and this will remove a cast overall.
-    if (I->getOperand(0)->getType() == Ty) {
-      // If the first operand is itself a cast, and is eliminable, do not count
-      // this as an eliminable cast.  We would prefer to eliminate those two
-      // casts first.
-      if (isa<CastInst>(I->getOperand(0)))
-        return true;
-      
-      ++NumCastsRemoved;
+  case Instruction::Trunc:
+    // If this is the same kind of case as our original (e.g. zext+zext), we
+    // can safely replace it.  Note that replacing it does not reduce the number
+    // of casts in the input.
+    if (I->getOpcode() == CastOpc)
        return true;
-    }
      break;
    default:
      // TODO: Can handle more cases here.
@@ -6222,14 +6430,16 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
    case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
-  case Instruction::BitCast:
      // If the source type of the cast is the type we're trying for then we can
-    // just return the source. There's no need to insert it because its not new.
+    // just return the source.  There's no need to insert it because it is not
+    // new.
      if (I->getOperand(0)->getType() == Ty)
        return I->getOperand(0);
      
-    // Some other kind of cast, which shouldn't happen, so just ..
-    // FALL THROUGH
+    // Otherwise, must be the same type of case, so just reinsert a new one.
+    Res = CastInst::create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
+                           Ty, I->getName());
+    break;
    default: 
      // TODO: Can handle more cases here.
      assert(0 && "Unreachable!");
@@ -6243,12 +6453,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
  Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
    Value *Src = CI.getOperand(0);
  
-  // Casting undef to anything results in undef so might as just replace it and
-  // get rid of the cast.
-  if (isa<UndefValue>(Src))   // cast undef -> undef
-    return ReplaceInstUsesWith(CI, UndefValue::get(CI.getType()));
-
-  // Many cases of "cast of a cast" are eliminable. If its eliminable we just
+  // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
    // eliminate it now.
    if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
      if (Instruction::CastOps opc = 
@@ -6259,32 +6464,6 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
      }
    }
  
-  // If casting the result of a getelementptr instruction with no offset, turn
-  // this into a cast of the original pointer!
-  //
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
-    bool AllZeroOperands = true;
-    for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i)
-      if (!isa<Constant>(GEP->getOperand(i)) ||
-          !cast<Constant>(GEP->getOperand(i))->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
-    if (AllZeroOperands) {
-      // Changing the cast operand is usually not a good idea but it is safe
-      // here because the pointer operand is being replaced with another 
-      // pointer operand so the opcode doesn't need to change.
-      CI.setOperand(0, GEP->getOperand(0));
-      return &CI;
-    }
-  }
-    
-  // If we are casting a malloc or alloca to a pointer to a type of the same
-  // size, rewrite the allocation instruction to allocate the "right" type.
-  if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
-    if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
-      return V;
-
    // If we are casting a select then fold the cast into the select
    if (SelectInst *SI = dyn_cast<SelectInst>(Src))
      if (Instruction *NV = FoldOpIntoSelect(CI, SI, this))
@@ -6298,6 +6477,113 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
    return 0;
  }
  
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+  
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+    // If casting the result of a getelementptr instruction with no offset, turn
+    // this into a cast of the original pointer!
+    if (GEP->hasAllZeroIndices()) {
+      // Changing the cast operand is usually not a good idea but it is safe
+      // here because the pointer operand is being replaced with another 
+      // pointer operand so the opcode doesn't need to change.
+      AddToWorkList(GEP);
+      CI.setOperand(0, GEP->getOperand(0));
+      return &CI;
+    }
+    
+    // If the GEP has a single use, and the base pointer is a bitcast, and the
+    // GEP computes a constant offset, see if we can convert these three
+    // instructions into fewer.  This typically happens with unions and other
+    // non-type-safe code.
+    if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
+      if (GEP->hasAllConstantIndices()) {
+        // We are guaranteed to get a constant from EmitGEPOffset.
+        ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, CI, *this));
+        int64_t Offset = OffsetV->getSExtValue();
+        
+        // Get the base pointer input of the bitcast, and the type it points to.
+        Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+        const Type *GEPIdxTy =
+          cast<PointerType>(OrigBase->getType())->getElementType();
+        if (GEPIdxTy->isSized()) {
+          SmallVector<Value*, 8> NewIndices;
+          
+          // Start with the index over the outer type.  Note that the type size
+          // might be zero (even if the offset isn't zero) if the indexed type
+          // is something like [0 x {int, int}]
+          const Type *IntPtrTy = TD->getIntPtrType();
+          int64_t FirstIdx = 0;
+          if (int64_t TySize = TD->getTypeSize(GEPIdxTy)) {
+            FirstIdx = Offset/TySize;
+            Offset %= TySize;
+          
+            // Handle silly modulus not returning values values [0..TySize).
+            if (Offset < 0) {
+              --FirstIdx;
+              Offset += TySize;
+              assert(Offset >= 0);
+            }
+            assert((uint64_t)Offset < (uint64_t)TySize &&"Out of range offset");
+          }
+          
+          NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+
+          // Index into the types.  If we fail, set OrigBase to null.
+          while (Offset) {
+            if (const StructType *STy = dyn_cast<StructType>(GEPIdxTy)) {
+              const StructLayout *SL = TD->getStructLayout(STy);
+              if (Offset < (int64_t)SL->getSizeInBytes()) {
+                unsigned Elt = SL->getElementContainingOffset(Offset);
+                NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt));
+              
+                Offset -= SL->getElementOffset(Elt);
+                GEPIdxTy = STy->getElementType(Elt);
+              } else {
+                // Otherwise, we can't index into this, bail out.
+                Offset = 0;
+                OrigBase = 0;
+              }
+            } else if (isa<ArrayType>(GEPIdxTy) || isa<VectorType>(GEPIdxTy)) {
+              const SequentialType *STy = cast<SequentialType>(GEPIdxTy);
+              if (uint64_t EltSize = TD->getTypeSize(STy->getElementType())) {
+                NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+                Offset %= EltSize;
+              } else {
+                NewIndices.push_back(ConstantInt::get(IntPtrTy, 0));
+              }
+              GEPIdxTy = STy->getElementType();
+            } else {
+              // Otherwise, we can't index into this, bail out.
+              Offset = 0;
+              OrigBase = 0;
+            }
+          }
+          if (OrigBase) {
+            // If we were able to index down into an element, create the GEP
+            // and bitcast the result.  This eliminates one bitcast, potentially
+            // two.
+            Instruction *NGEP = new GetElementPtrInst(OrigBase, &NewIndices[0],
+                                                      NewIndices.size(), "");
+            InsertNewInstBefore(NGEP, CI);
+            NGEP->takeName(GEP);
+            
+            if (isa<BitCastInst>(CI))
+              return new BitCastInst(NGEP, CI.getType());
+            assert(isa<PtrToIntInst>(CI));
+            return new PtrToIntInst(NGEP, CI.getType());
+          }
+        }
+      }      
+    }
+  }
+    
+  return commonCastTransforms(CI);
+}
+
+
+
  /// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as
  /// integer types. This function implements the common transforms for all those
  /// cases.
@@ -6329,14 +6615,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
    int NumCastsRemoved = 0;
    if (!isa<BitCastInst>(CI) &&
        CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),
-                                 NumCastsRemoved)) {
+                                 CI.getOpcode(), NumCastsRemoved)) {
      // If this cast is a truncate, evaluting in a different type always
-    // eliminates the cast, so it is always a win.  If this is a noop-cast
-    // this just removes a noop cast which isn't pointful, but simplifies
-    // the code.  If this is a zero-extension, we need to do an AND to
-    // maintain the clear top-part of the computation, so we require that
-    // the input have eliminated at least one cast.  If this is a sign
-    // extension, we insert two new casts (to do the extension) so we
+    // eliminates the cast, so it is always a win.  If this is a zero-extension,
+    // we need to do an AND to maintain the clear top-part of the computation,
+    // so we require that the input have eliminated at least one cast.  If this
+    // is a sign extension, we insert two new casts (to do the extension) so we
      // require that two casts have been eliminated.
      bool DoXForm;
      switch (CI.getOpcode()) {
@@ -6353,9 +6637,6 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
      case Instruction::SExt:
        DoXForm = NumCastsRemoved >= 2;
        break;
-    case Instruction::BitCast:
-      DoXForm = false;
-      break;
      }
      
      if (DoXForm) {
@@ -6471,7 +6752,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
    return 0;
  }
  
-Instruction *InstCombiner::visitTrunc(CastInst &CI) {
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
    if (Instruction *Result = commonIntCastTransforms(CI))
      return Result;
    
@@ -6528,7 +6809,7 @@ Instruction *InstCombiner::visitTrunc(CastInst &CI) {
    return 0;
  }
  
-Instruction *InstCombiner::visitZExt(CastInst &CI) {
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
    // If one of the common conversion will work ..
    if (Instruction *Result = commonIntCastTransforms(CI))
      return Result;
@@ -6579,7 +6860,7 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) {
          Value *Sh = ConstantInt::get(In->getType(),
                                      In->getType()->getPrimitiveSizeInBits()-1);
          In = InsertNewInstBefore(BinaryOperator::createLShr(In, Sh,
-                                                        In->getName()+".lobit"), 
+                                                        In->getName()+".lobit"),
                                   CI);
          if (In->getType() != CI.getType())
            In = CastInst::createIntegerCast(In, CI.getType(),
@@ -6588,7 +6869,7 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) {
          if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
            Constant *One = ConstantInt::get(In->getType(), 1);
            In = InsertNewInstBefore(BinaryOperator::createXor(In, One,
-                                                          In->getName()+".not"), 
+                                                          In->getName()+".not"),
                                     CI);
          }
  
@@ -6653,13 +6934,44 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) {
    return 0;
  }
  
-Instruction *InstCombiner::visitSExt(CastInst &CI) {
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
    if (Instruction *I = commonIntCastTransforms(CI))
      return I;
    
-  // (x <s 0) ? -1 : 0 -> ashr x, 31
-  // (x >u 2147483647) ? -1 : 0 -> ashr x, 31
-
+  Value *Src = CI.getOperand(0);
+  
+  // sext (x <s 0) -> ashr x, 31   -> all ones if signed
+  // sext (x >s -1) -> ashr x, 31  -> all ones if not signed
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) {
+    // If we are just checking for a icmp eq of a single bit and zext'ing it
+    // to an integer, then shift the bit to the appropriate place and then
+    // cast to integer to avoid the comparison.
+    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+      const APInt &Op1CV = Op1C->getValue();
+      
+      // sext (x <s  0) to i32 --> x>>s31      true if signbit set.
+      // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
+      if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+          (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){
+        Value *In = ICI->getOperand(0);
+        Value *Sh = ConstantInt::get(In->getType(),
+                                     In->getType()->getPrimitiveSizeInBits()-1);
+        In = InsertNewInstBefore(BinaryOperator::createAShr(In, Sh,
+                                                        In->getName()+".lobit"),
+                                 CI);
+        if (In->getType() != CI.getType())
+          In = CastInst::createIntegerCast(In, CI.getType(),
+                                           true/*SExt*/, "tmp", &CI);
+        
+        if (ICI->getPredicate() == ICmpInst::ICMP_SGT)
+          In = InsertNewInstBefore(BinaryOperator::createNot(In,
+                                     In->getName()+".not"), CI);
+        
+        return ReplaceInstUsesWith(CI, In);
+      }
+    }
+  }
+      
    return 0;
  }
  
@@ -6688,15 +7000,14 @@ Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
  }
  
  Instruction *InstCombiner::visitPtrToInt(CastInst &CI) {
-  return commonCastTransforms(CI);
+  return commonPointerCastTransforms(CI);
  }
  
  Instruction *InstCombiner::visitIntToPtr(CastInst &CI) {
    return commonCastTransforms(CI);
  }
  
-Instruction *InstCombiner::visitBitCast(CastInst &CI) {
-
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
    // If the operands are integer typed then apply the integer transforms,
    // otherwise just apply the common ones.
    Value *Src = CI.getOperand(0);
@@ -6706,6 +7017,9 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) {
    if (SrcTy->isInteger() && DestTy->isInteger()) {
      if (Instruction *Result = commonIntCastTransforms(CI))
        return Result;
+  } else if (isa<PointerType>(SrcTy)) {
+    if (Instruction *I = commonPointerCastTransforms(CI))
+      return I;
    } else {
      if (Instruction *Result = commonCastTransforms(CI))
        return Result;
@@ -6717,28 +7031,33 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) {
    if (DestTy == Src->getType())
      return ReplaceInstUsesWith(CI, Src);
  
-  // If the source and destination are pointers, and this cast is equivalent to
-  // a getelementptr X, 0, 0, 0...  turn it into the appropriate getelementptr.
-  // This can enhance SROA and other transforms that want type-safe pointers.
    if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
-    if (const PointerType *SrcPTy = dyn_cast<PointerType>(SrcTy)) {
-      const Type *DstElTy = DstPTy->getElementType();
-      const Type *SrcElTy = SrcPTy->getElementType();
-      
-      Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty);
-      unsigned NumZeros = 0;
-      while (SrcElTy != DstElTy && 
-             isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
-             SrcElTy->getNumContainedTypes() /* not "{}" */) {
-        SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
-        ++NumZeros;
-      }
+    const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+    const Type *DstElTy = DstPTy->getElementType();
+    const Type *SrcElTy = SrcPTy->getElementType();
+    
+    // If we are casting a malloc or alloca to a pointer to a type of the same
+    // size, rewrite the allocation instruction to allocate the "right" type.
+    if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
+      if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+        return V;
+    
+    // If the source and destination are pointers, and this cast is equivalent
+    // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
+    // This can enhance SROA and other transforms that want type-safe pointers.
+    Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty);
+    unsigned NumZeros = 0;
+    while (SrcElTy != DstElTy && 
+           isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+           SrcElTy->getNumContainedTypes() /* not "{}" */) {
+      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+      ++NumZeros;
+    }
  
-      // If we found a path from the src to dest, create the getelementptr now.
-      if (SrcElTy == DstElTy) {
-        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-        return new GetElementPtrInst(Src, &Idxs[0], Idxs.size());
-      }
+    // If we found a path from the src to dest, create the getelementptr now.
+    if (SrcElTy == DstElTy) {
+      SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+      return new GetElementPtrInst(Src, &Idxs[0], Idxs.size());
      }
    }
  
@@ -6814,7 +7133,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
    case Instruction::AShr:
      return Constant::getNullValue(I->getType());
    case Instruction::And:
-    return ConstantInt::getAllOnesValue(I->getType());
+    return Constant::getAllOnesValue(I->getType());
    case Instruction::Mul:
      return ConstantInt::get(I->getType(), 1);
    }
@@ -7173,13 +7492,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    return 0;
  }
  
-/// GetKnownAlignment - If the specified pointer has an alignment that we can
-/// determine, return it, otherwise return 0.
-static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
+/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD,
+                                           unsigned PrefAlign = 0) {
    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
      unsigned Align = GV->getAlignment();
      if (Align == 0 && TD) 
        Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
+
+    // If there is a large requested alignment and we can, bump up the alignment
+    // of the global.
+    if (PrefAlign > Align && GV->hasInitializer()) {
+      GV->setAlignment(PrefAlign);
+      Align = PrefAlign;
+    }
      return Align;
    } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
      unsigned Align = AI->getAlignment();
@@ -7197,21 +7526,20 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
                     (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
        }
      }
+    
+    // If there is a requested alignment and if this is an alloca, round up.  We
+    // don't do this for malloc, because some systems can't respect the request.
+    if (PrefAlign > Align && isa<AllocaInst>(AI)) {
+      AI->setAlignment(PrefAlign);
+      Align = PrefAlign;
+    }
      return Align;
    } else if (isa<BitCastInst>(V) ||
               (isa<ConstantExpr>(V) && 
                cast<ConstantExpr>(V)->getOpcode() == Instruction::BitCast)) {
-    User *CI = cast<User>(V);
-    if (isa<PointerType>(CI->getOperand(0)->getType()))
-      return GetKnownAlignment(CI->getOperand(0), TD);
-    return 0;
-  } else if (isa<GetElementPtrInst>(V) ||
-             (isa<ConstantExpr>(V) && 
-              cast<ConstantExpr>(V)->getOpcode()==Instruction::GetElementPtr)) {
-    User *GEPI = cast<User>(V);
-    unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD);
-    if (BaseAlignment == 0) return 0;
-    
+    return GetOrEnforceKnownAlignment(cast<User>(V)->getOperand(0),
+                                      TD, PrefAlign);
+  } else if (User *GEPI = dyn_castGetElementPtr(V)) {
      // If all indexes are zero, it is just the alignment of the base pointer.
      bool AllZeroOperands = true;
      for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
@@ -7220,9 +7548,15 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
          AllZeroOperands = false;
          break;
        }
-    if (AllZeroOperands)
-      return BaseAlignment;
-    
+
+    if (AllZeroOperands) {
+      // Treat this like a bitcast.
+      return GetOrEnforceKnownAlignment(GEPI->getOperand(0), TD, PrefAlign);
+    }
+
+    unsigned BaseAlignment = GetOrEnforceKnownAlignment(GEPI->getOperand(0),TD);
+    if (BaseAlignment == 0) return 0;
+
      // Otherwise, if the base alignment is >= the alignment we expect for the
      // base pointer type, then we know that the resultant pointer is aligned at
      // least as much as its type requires.
@@ -7230,11 +7564,13 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
  
      const Type *BasePtrTy = GEPI->getOperand(0)->getType();
      const PointerType *PtrTy = cast<PointerType>(BasePtrTy);
-    if (TD->getABITypeAlignment(PtrTy->getElementType())
-        <= BaseAlignment) {
+    unsigned Align = TD->getABITypeAlignment(PtrTy->getElementType());
+    if (Align <= BaseAlignment) {
        const Type *GEPTy = GEPI->getType();
        const PointerType *GEPPtrTy = cast<PointerType>(GEPTy);
-      return TD->getABITypeAlignment(GEPPtrTy->getElementType());
+      Align = std::min(Align, (unsigned)
+                       TD->getABITypeAlignment(GEPPtrTy->getElementType()));
+      return Align;
      }
      return 0;
    }
@@ -7290,15 +7626,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      // If we can determine a pointer alignment that is bigger than currently
      // set, update the alignment.
      if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
-      unsigned Alignment1 = GetKnownAlignment(MI->getOperand(1), TD);
-      unsigned Alignment2 = GetKnownAlignment(MI->getOperand(2), TD);
+      unsigned Alignment1 = GetOrEnforceKnownAlignment(MI->getOperand(1), TD);
+      unsigned Alignment2 = GetOrEnforceKnownAlignment(MI->getOperand(2), TD);
        unsigned Align = std::min(Alignment1, Alignment2);
        if (MI->getAlignment()->getZExtValue() < Align) {
          MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align));
          Changed = true;
        }
      } else if (isa<MemSetInst>(MI)) {
-      unsigned Alignment = GetKnownAlignment(MI->getDest(), TD);
+      unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD);
        if (MI->getAlignment()->getZExtValue() < Alignment) {
          MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
          Changed = true;
@@ -7316,7 +7652,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::x86_sse2_loadu_dq:
        // Turn PPC lvx     -> load if the pointer is known aligned.
        // Turn X86 loadups -> load if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
                                        PointerType::get(II->getType()), CI);
          return new LoadInst(Ptr);
@@ -7325,7 +7661,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::ppc_altivec_stvx:
      case Intrinsic::ppc_altivec_stvxl:
        // Turn stvx -> store if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(2), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) {
          const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType());
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2),
                                        OpPtrTy, CI);
@@ -7337,7 +7673,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::x86_sse2_storeu_dq:
      case Intrinsic::x86_sse2_storel_dq:
        // Turn X86 storeu -> store if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
          const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
                                        OpPtrTy, CI);
@@ -7387,7 +7723,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
            for (unsigned i = 0; i != 16; ++i) {
              if (isa<UndefValue>(Mask->getOperand(i)))
                continue;
-            unsigned Idx =cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+            unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
              Idx &= 31;  // Match the hardware behavior.
              
              if (ExtractedElts[Idx] == 0) {
@@ -7532,6 +7868,14 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    const FunctionType *FT = Callee->getFunctionType();
    const Type *OldRetTy = Caller->getType();
  
+  const FunctionType *ActualFT =
+    cast<FunctionType>(cast<PointerType>(CE->getType())->getElementType());
+  
+  // If the parameter attributes don't match up, don't do the xform.  We don't
+  // want to lose an sret attribute or something.
+  if (FT->getParamAttrs() != ActualFT->getParamAttrs())
+    return false;
+  
    // Check to see if we are changing the return type...
    if (OldRetTy != FT->getReturnType()) {
      if (Callee->isDeclaration() && !Caller->use_empty() && 
@@ -7662,10 +8006,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    Instruction *NC;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
      NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(),
-                        &Args[0], Args.size(), Caller->getName(), Caller);
-    cast<InvokeInst>(II)->setCallingConv(II->getCallingConv());
+                        Args.begin(), Args.end(), Caller->getName(), Caller);
+    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
    } else {
-    NC = new CallInst(Callee, &Args[0], Args.size(), Caller->getName(), Caller);
+    NC = new CallInst(Callee, Args.begin(), Args.end(),
+                      Caller->getName(), Caller);
      if (cast<CallInst>(Caller)->isTailCall())
        cast<CallInst>(NC)->setTailCall();
     cast<CallInst>(NC)->setCallingConv(cast<CallInst>(Caller)->getCallingConv());
@@ -7938,6 +8283,10 @@ static bool DeadPHICycle(PHINode *PN,
    // Remember this node, and if we find the cycle, return.
    if (!PotentiallyDeadPHIs.insert(PN))
      return true;
+  
+  // Don't scan crazily complex things.
+  if (PotentiallyDeadPHIs.size() == 16)
+    return false;
  
    if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
      return DeadPHICycle(PU, PotentiallyDeadPHIs);
@@ -8006,7 +8355,7 @@ static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
  
  Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    Value *PtrOp = GEP.getOperand(0);
-  // Is it 'getelementptr %P, long 0'  or 'getelementptr %P'
+  // Is it 'getelementptr %P, i32 0'  or 'getelementptr %P'
    // If so, eliminate the noop.
    if (GEP.getNumOperands() == 1)
      return ReplaceInstUsesWith(GEP, PtrOp);
@@ -8021,22 +8370,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    if (GEP.getNumOperands() == 2 && HasZeroPointerIndex)
      return ReplaceInstUsesWith(GEP, PtrOp);
  
-  // Keep track of whether all indices are zero constants integers.
-  bool AllZeroIndices = true;
-  
    // Eliminate unneeded casts for indices.
    bool MadeChange = false;
    
    gep_type_iterator GTI = gep_type_begin(GEP);
    for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) {
-    // Track whether this GEP has all zero indices, if so, it doesn't move the
-    // input pointer, it just changes its type.
-    if (AllZeroIndices) {
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(i)))
-        AllZeroIndices = CI->isNullValue();
-      else
-        AllZeroIndices = false;
-    }
      if (isa<SequentialType>(*GTI)) {
        if (CastInst *CI = dyn_cast<CastInst>(GEP.getOperand(i))) {
          if (CI->getOpcode() == Instruction::ZExt ||
@@ -8072,7 +8410,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    // If this GEP instruction doesn't move the pointer, and if the input operand
    // is a bitcast of another pointer, just replace the GEP with a bitcast of the
    // real input to the dest type.
-  if (AllZeroIndices && isa<BitCastInst>(GEP.getOperand(0)))
+  if (GEP.hasAllZeroIndices() && isa<BitCastInst>(GEP.getOperand(0)))
      return new BitCastInst(cast<BitCastInst>(GEP.getOperand(0))->getOperand(0),
                             GEP.getType());
      
@@ -8336,13 +8674,6 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
  Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
    Value *Op = FI.getOperand(0);
  
-  // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
-  if (CastInst *CI = dyn_cast<CastInst>(Op))
-    if (isa<PointerType>(CI->getOperand(0)->getType())) {
-      FI.setOperand(0, CI->getOperand(0));
-      return &FI;
-    }
-
    // free undef -> unreachable.
    if (isa<UndefValue>(Op)) {
      // Insert a new store to null because we cannot modify the CFG here.
@@ -8350,11 +8681,33 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
                    UndefValue::get(PointerType::get(Type::Int1Ty)), &FI);
      return EraseInstFromFunction(FI);
    }
-
+  
    // If we have 'free null' delete the instruction.  This can happen in stl code
    // when lots of inlining happens.
    if (isa<ConstantPointerNull>(Op))
      return EraseInstFromFunction(FI);
+  
+  // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
+  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op)) {
+    FI.setOperand(0, CI->getOperand(0));
+    return &FI;
+  }
+  
+  // Change free (gep X, 0,0,0,0) into free(X)
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+    if (GEPI->hasAllZeroIndices()) {
+      AddToWorkList(GEPI);
+      FI.setOperand(0, GEPI->getOperand(0));
+      return &FI;
+    }
+  }
+  
+  // Change free(malloc) into nothing, if the malloc has a single use.
+  if (MallocInst *MI = dyn_cast<MallocInst>(Op))
+    if (MI->hasOneUse()) {
+      EraseInstFromFunction(FI);
+      return EraseInstFromFunction(*MI);
+    }
  
    return 0;
  }
@@ -8433,9 +8786,36 @@ static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
    return false;
  }
  
+/// GetUnderlyingObject - Trace through a series of getelementptrs and bitcasts
+/// until we find the underlying object a pointer is referring to or something
+/// we don't understand.  Note that the returned pointer may be offset from the
+/// input, because we ignore GEP indices.
+static Value *GetUnderlyingObject(Value *Ptr) {
+  while (1) {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+      if (CE->getOpcode() == Instruction::BitCast ||
+          CE->getOpcode() == Instruction::GetElementPtr)
+        Ptr = CE->getOperand(0);
+      else
+        return Ptr;
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr)) {
+      Ptr = BCI->getOperand(0);
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+      Ptr = GEP->getOperand(0);
+    } else {
+      return Ptr;
+    }
+  }
+}
+
  Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    Value *Op = LI.getOperand(0);
  
+  // Attempt to improve the alignment.
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD);
+  if (KnownAlign > LI.getAlignment())
+    LI.setAlignment(KnownAlign);
+
    // load (cast X) --> cast (load X) iff safe
    if (isa<CastInst>(Op))
      if (Instruction *Res = InstCombineLoadCast(*this, LI))
@@ -8457,8 +8837,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    }
  
    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op))
-    if (isa<ConstantPointerNull>(GEPI->getOperand(0)) ||
-        isa<UndefValue>(GEPI->getOperand(0))) {
+    if (isa<ConstantPointerNull>(GEPI->getOperand(0))) {
        // Insert a new store to null instruction before the load to indicate
        // that this code is not reachable.  We do this instead of inserting
        // an unreachable instruction directly because we cannot modify the
@@ -8507,6 +8886,17 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
            return Res;
        }
    }
+    
+  // If this load comes from anywhere in a constant global, and if the global
+  // is all undef or zero, we know what it loads.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Op))) {
+    if (GV->isConstant() && GV->hasInitializer()) {
+      if (GV->getInitializer()->isNullValue())
+        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
+      else if (isa<UndefValue>(GV->getInitializer()))
+        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+    }
+  }
  
    if (Op->hasOneUse()) {
      // Change select and PHI nodes to select values instead of addresses: this
@@ -8632,6 +9022,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
        }
    }
  
+  // Attempt to improve the alignment.
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD);
+  if (KnownAlign > SI.getAlignment())
+    SI.setAlignment(KnownAlign);
+
    // Do really simple DSE, to catch cases where there are several consequtive
    // stores to the same location, separated by a few arithmetic operations. This
    // situation often occurs with bitfield accesses.
@@ -8706,66 +9101,118 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    // ends with an unconditional branch, try to move it to the successor block.
    BBI = &SI; ++BBI;
    if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
-    if (BI->isUnconditional()) {
-      // Check to see if the successor block has exactly two incoming edges.  If
-      // so, see if the other predecessor contains a store to the same location.
-      // if so, insert a PHI node (if needed) and move the stores down.
-      BasicBlock *Dest = BI->getSuccessor(0);
-
-      pred_iterator PI = pred_begin(Dest);
-      BasicBlock *Other = 0;
-      if (*PI != BI->getParent())
-        Other = *PI;
-      ++PI;
-      if (PI != pred_end(Dest)) {
-        if (*PI != BI->getParent())
-          if (Other)
-            Other = 0;
-          else
-            Other = *PI;
-        if (++PI != pred_end(Dest))
-          Other = 0;
-      }
-      if (Other) {  // If only one other pred...
-        BBI = Other->getTerminator();
-        // Make sure this other block ends in an unconditional branch and that
-        // there is an instruction before the branch.
-        if (isa<BranchInst>(BBI) && cast<BranchInst>(BBI)->isUnconditional() &&
-            BBI != Other->begin()) {
-          --BBI;
-          StoreInst *OtherStore = dyn_cast<StoreInst>(BBI);
-          
-          // If this instruction is a store to the same location.
-          if (OtherStore && OtherStore->getOperand(1) == SI.getOperand(1)) {
-            // Okay, we know we can perform this transformation.  Insert a PHI
-            // node now if we need it.
-            Value *MergedVal = OtherStore->getOperand(0);
-            if (MergedVal != SI.getOperand(0)) {
-              PHINode *PN = new PHINode(MergedVal->getType(), "storemerge");
-              PN->reserveOperandSpace(2);
-              PN->addIncoming(SI.getOperand(0), SI.getParent());
-              PN->addIncoming(OtherStore->getOperand(0), Other);
-              MergedVal = InsertNewInstBefore(PN, Dest->front());
-            }
-            
-            // Advance to a place where it is safe to insert the new store and
-            // insert it.
-            BBI = Dest->begin();
-            while (isa<PHINode>(BBI)) ++BBI;
-            InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
-                                              OtherStore->isVolatile()), *BBI);
-
-            // Nuke the old stores.
-            EraseInstFromFunction(SI);
-            EraseInstFromFunction(*OtherStore);
-            ++NumCombined;
-            return 0;
-          }
-        }
+    if (BI->isUnconditional())
+      if (SimplifyStoreAtEndOfBlock(SI))
+        return 0;  // xform done!
+  
+  return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+///   if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+///   *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+  BasicBlock *StoreBB = SI.getParent();
+  
+  // Check to see if the successor block has exactly two incoming edges.  If
+  // so, see if the other predecessor contains a store to the same location.
+  // if so, insert a PHI node (if needed) and move the stores down.
+  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+  
+  // Determine whether Dest has exactly two predecessors and, if so, compute
+  // the other predecessor.
+  pred_iterator PI = pred_begin(DestBB);
+  BasicBlock *OtherBB = 0;
+  if (*PI != StoreBB)
+    OtherBB = *PI;
+  ++PI;
+  if (PI == pred_end(DestBB))
+    return false;
+  
+  if (*PI != StoreBB) {
+    if (OtherBB)
+      return false;
+    OtherBB = *PI;
+  }
+  if (++PI != pred_end(DestBB))
+    return false;
+  
+  
+  // Verify that the other block ends in a branch and is not otherwise empty.
+  BasicBlock::iterator BBI = OtherBB->getTerminator();
+  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+  if (!OtherBr || BBI == OtherBB->begin())
+    return false;
+  
+  // If the other block ends in an unconditional branch, check for the 'if then
+  // else' case.  there is an instruction before the branch.
+  StoreInst *OtherStore = 0;
+  if (OtherBr->isUnconditional()) {
+    // If this isn't a store, or isn't a store to the same location, bail out.
+    --BBI;
+    OtherStore = dyn_cast<StoreInst>(BBI);
+    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1))
+      return false;
+  } else {
+    // Otherwise, the other block ended with a conditional branch. If one of the
+    // destinations is StoreBB, then we have the if/then case.
+    if (OtherBr->getSuccessor(0) != StoreBB && 
+        OtherBr->getSuccessor(1) != StoreBB)
+      return false;
+    
+    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+    // if/then triangle.  See if there is a store to the same ptr as SI that
+    // lives in OtherBB.
+    for (;; --BBI) {
+      // Check to see if we find the matching store.
+      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+        if (OtherStore->getOperand(1) != SI.getOperand(1))
+          return false;
+        break;
        }
+      // If we find something that may be using the stored value, or if we run
+      // out of instructions, we can't do the xform.
+      if (isa<LoadInst>(BBI) || BBI->mayWriteToMemory() ||
+          BBI == OtherBB->begin())
+        return false;
      }
+    
+    // In order to eliminate the store in OtherBr, we have to
+    // make sure nothing reads the stored value in StoreBB.
+    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+      // FIXME: This should really be AA driven.
+      if (isa<LoadInst>(I) || I->mayWriteToMemory())
+        return false;
+    }
+  }
    
-  return 0;
+  // Insert a PHI node now if we need it.
+  Value *MergedVal = OtherStore->getOperand(0);
+  if (MergedVal != SI.getOperand(0)) {
+    PHINode *PN = new PHINode(MergedVal->getType(), "storemerge");
+    PN->reserveOperandSpace(2);
+    PN->addIncoming(SI.getOperand(0), SI.getParent());
+    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+    MergedVal = InsertNewInstBefore(PN, DestBB->front());
+  }
+  
+  // Advance to a place where it is safe to insert the new store and
+  // insert it.
+  BBI = DestBB->begin();
+  while (isa<PHINode>(BBI)) ++BBI;
+  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
+                                    OtherStore->isVolatile()), *BBI);
+  
+  // Nuke the old stores.
+  EraseInstFromFunction(SI);
+  EraseInstFromFunction(*OtherStore);
+  ++NumCombined;
+  return true;
  }
  
  
@@ -8949,16 +9396,16 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
  
  Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
  
-  // If packed val is undef, replace extract with scalar undef.
+  // If vector val is undef, replace extract with scalar undef.
    if (isa<UndefValue>(EI.getOperand(0)))
      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
  
-  // If packed val is constant 0, replace extract with scalar 0.
+  // If vector val is constant 0, replace extract with scalar 0.
    if (isa<ConstantAggregateZero>(EI.getOperand(0)))
      return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
    
    if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
-    // If packed val is constant with uniform operands, replace EI
+    // If vector val is constant with uniform operands, replace EI
      // with that operand
      Constant *op0 = C->getOperand(0);
      for (unsigned i = 1; i < C->getNumOperands(); ++i)
@@ -8997,6 +9444,17 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
      
      if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
        return ReplaceInstUsesWith(EI, Elt);
+    
+    // If the this extractelement is directly using a bitcast from a vector of
+    // the same number of elements, see if we can find the source element from
+    // it.  In this case, we will end up needing to bitcast the scalars.
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+      if (const VectorType *VT = 
+              dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+        if (VT->getNumElements() == VectorWidth)
+          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+            return new BitCastInst(Elt, EI.getType());
+    }
    }
    
    if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -9208,7 +9666,8 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
          EI->getOperand(0)->getType() == IE.getType()) {
        unsigned NumVectorElts = IE.getType()->getNumElements();
-      unsigned ExtractedIdx=cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+      unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
        
        if (ExtractedIdx >= NumVectorElts) // Out of range extract.
@@ -9461,7 +9920,7 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
          Inst->eraseFromParent();
          continue;
        }
-      
+     
        IC.AddToWorkList(Inst);
      }
  
@@ -9651,6 +10110,9 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
    }
  
    assert(WorklistMap.empty() && "Worklist empty, but map not?");
+    
+  // Do an explicit clear, this shrinks the map if needed.
+  WorklistMap.clear();
    return Changed;
  }