Significantly improve the documentation of the instcombine divide/compare

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index 2735dff48f4f4a99a30ce97b0392ef9d72c36677..bf68d540e1f49d7c91bfe18f482f5214b7d6d0cd 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -76,6 +76,9 @@ namespace {
      TargetData *TD;
      bool MustPreserveLCSSA;
    public:
+    static char ID; // Pass identification, replacement for typeid
+    InstCombiner() : FunctionPass((intptr_t)&ID) {}
+
      /// AddToWorkList - Add the specified instruction to the worklist if it
      /// isn't already in it.
      void AddToWorkList(Instruction *I) {
@@ -183,6 +186,11 @@ namespace {
      Instruction *visitFCmpInst(FCmpInst &I);
      Instruction *visitICmpInst(ICmpInst &I);
      Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+    Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+                                                Instruction *LHS,
+                                                ConstantInt *RHS);
+    Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                ConstantInt *DivRHS);
  
      Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS,
                               ICmpInst::Predicate Cond, Instruction &I);
@@ -190,9 +198,10 @@ namespace {
                                       BinaryOperator &I);
      Instruction *commonCastTransforms(CastInst &CI);
      Instruction *commonIntCastTransforms(CastInst &CI);
-    Instruction *visitTrunc(CastInst &CI);
-    Instruction *visitZExt(CastInst &CI);
-    Instruction *visitSExt(CastInst &CI);
+    Instruction *commonPointerCastTransforms(CastInst &CI);
+    Instruction *visitTrunc(TruncInst &CI);
+    Instruction *visitZExt(ZExtInst &CI);
+    Instruction *visitSExt(SExtInst &CI);
      Instruction *visitFPTrunc(CastInst &CI);
      Instruction *visitFPExt(CastInst &CI);
      Instruction *visitFPToUI(CastInst &CI);
@@ -201,7 +210,7 @@ namespace {
      Instruction *visitSIToFP(CastInst &CI);
      Instruction *visitPtrToInt(CastInst &CI);
      Instruction *visitIntToPtr(CastInst &CI);
-    Instruction *visitBitCast(CastInst &CI);
+    Instruction *visitBitCast(BitCastInst &CI);
      Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                                  Instruction *FI);
      Instruction *visitSelectInst(SelectInst &CI);
@@ -347,12 +356,14 @@ namespace {
                                bool isSub, Instruction &I);
      Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                   bool isSigned, bool Inside, Instruction &IB);
-    Instruction *PromoteCastOfAllocation(CastInst &CI, AllocationInst &AI);
+    Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI);
      Instruction *MatchBSwap(BinaryOperator &I);
+    bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
  
      Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
    };
  
+  char InstCombiner::ID = 0;
    RegisterPass<InstCombiner> X("instcombine", "Combine redundant instructions");
  }
  
@@ -380,8 +391,7 @@ static const Type *getPromotedType(const Type *Ty) {
    if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
      if (ITy->getBitWidth() < 32)
        return Type::Int32Ty;
-  } else if (Ty == Type::FloatTy)
-    return Type::DoubleTy;
+  }
    return Ty;
  }
  
@@ -522,7 +532,7 @@ static inline Value *dyn_castNotVal(Value *V) {
  
    // Constants can be considered to be not'ed values...
    if (ConstantInt *C = dyn_cast<ConstantInt>(V))
-    return ConstantExpr::getNot(C);
+    return ConstantInt::get(~C->getValue());
    return 0;
  }
  
@@ -844,16 +854,14 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const Type *Ty,
           "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
    APInt UnknownBits = ~(KnownZero|KnownOne);
  
-  APInt SignBit(APInt::getSignBit(BitWidth));
-  
    // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
    // bit if it is unknown.
    Min = KnownOne;
    Max = KnownOne|UnknownBits;
    
    if (UnknownBits[BitWidth-1]) { // Sign bit is unknown
-    Min |= SignBit;
-    Max &= ~SignBit;
+    Min.set(BitWidth-1);
+    Max.clear(BitWidth-1);
    }
  }
  
@@ -1133,7 +1141,6 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
      const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
      uint32_t SrcBitWidth = SrcTy->getBitWidth();
      
-    DemandedMask &= SrcTy->getMask().zext(BitWidth);
      DemandedMask.trunc(SrcBitWidth);
      RHSKnownZero.trunc(SrcBitWidth);
      RHSKnownOne.trunc(SrcBitWidth);
@@ -1154,9 +1161,6 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
      const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
      uint32_t SrcBitWidth = SrcTy->getBitWidth();
      
-    // Get the sign bit for the source type
-    APInt InSignBit(APInt::getSignBit(SrcBitWidth));
-    InSignBit.zext(BitWidth);
      APInt InputDemandedBits = DemandedMask & 
                                APInt::getLowBitsSet(BitWidth, SrcBitWidth);
  
@@ -1164,7 +1168,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
      // If any of the sign extended bits are demanded, we know that the sign
      // bit is demanded.
      if ((NewBits & DemandedMask) != 0)
-      InputDemandedBits |= InSignBit;
+      InputDemandedBits.set(SrcBitWidth-1);
        
      InputDemandedBits.trunc(SrcBitWidth);
      RHSKnownZero.trunc(SrcBitWidth);
@@ -1279,7 +1283,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
      if (DemandedMask[BitWidth-1] == 0) {
        // Right fill the mask of bits for this SUB to demand the most
        // significant bit and all those below it.
-      unsigned NLZ = DemandedMask.countLeadingZeros();
+      uint32_t NLZ = DemandedMask.countLeadingZeros();
        APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
        if (SimplifyDemandedBits(I->getOperand(0), DemandedFromOps,
                                 LHSKnownZero, LHSKnownOne, Depth+1))
@@ -1344,6 +1348,10 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
        
        // Signed shift right.
        APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      // If any of the "high bits" are demanded, we should set the sign bit as
+      // demanded.
+      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+        DemandedMaskIn.set(BitWidth-1);
        if (SimplifyDemandedBits(I->getOperand(0),
                                 DemandedMaskIn,
                                 RHSKnownZero, RHSKnownOne, Depth+1))
@@ -1492,7 +1500,73 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
      UndefElts |= 1ULL << IdxNo;
      break;
    }
+  case Instruction::BitCast: {
+    // Packed->packed casts only.
+    const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+    if (!VTy) break;
+    unsigned InVWidth = VTy->getNumElements();
+    uint64_t InputDemandedElts = 0;
+    unsigned Ratio;
+
+    if (VWidth == InVWidth) {
+      // If we are converting from <4x i32> -> <4 x f32>, we demand the same
+      // elements as are demanded of us.
+      Ratio = 1;
+      InputDemandedElts = DemandedElts;
+    } else if (VWidth > InVWidth) {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the result than there are in the source,
+      // then an input element is live if any of the corresponding output
+      // elements are live.
+      Ratio = VWidth/InVWidth;
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+        if (DemandedElts & (1ULL << OutIdx))
+          InputDemandedElts |= 1ULL << (OutIdx/Ratio);
+      }
+    } else {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the source than there are in the result,
+      // then an input element is live if the corresponding output element is
+      // live.
+      Ratio = InVWidth/VWidth;
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (DemandedElts & (1ULL << InIdx/Ratio))
+          InputDemandedElts |= 1ULL << InIdx;
+    }
+    
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) {
+      I->setOperand(0, TmpV);
+      MadeChange = true;
+    }
      
+    UndefElts = UndefElts2;
+    if (VWidth > InVWidth) {
+      assert(0 && "Unimp");
+      // If there are more elements in the result than there are in the source,
+      // then an output element is undef if the corresponding input element is
+      // undef.
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+        if (UndefElts2 & (1ULL << (OutIdx/Ratio)))
+          UndefElts |= 1ULL << OutIdx;
+    } else if (VWidth < InVWidth) {
+      assert(0 && "Unimp");
+      // If there are more elements in the source than there are in the result,
+      // then a result element is undef if all of the corresponding input
+      // elements are undef.
+      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if ((UndefElts2 & (1ULL << InIdx)) == 0)    // Not undef?
+          UndefElts &= ~(1ULL << (InIdx/Ratio));    // Clear undef bit.
+    }
+    break;
+  }
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
@@ -1870,8 +1944,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
  
      if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) {
        // X + (signbit) --> X ^ signbit
-      APInt Val(CI->getValue());
-      unsigned BitWidth = Val.getBitWidth();
+      const APInt& Val = CI->getValue();
+      uint32_t BitWidth = Val.getBitWidth();
        if (Val == APInt::getSignBit(BitWidth))
          return BinaryOperator::createXor(LHS, RHS);
        
@@ -1893,10 +1967,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
      Value *XorLHS = 0;
      if (isa<ConstantInt>(RHSC) &&
          match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
-      unsigned TySizeBits = I.getType()->getPrimitiveSizeInBits();
-      APInt RHSVal(cast<ConstantInt>(RHSC)->getValue());
+      uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits();
+      const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
        
-      unsigned Size = TySizeBits / 2;
+      uint32_t Size = TySizeBits / 2;
        APInt C0080Val(APInt(TySizeBits, 1ULL).shl(Size - 1));
        APInt CFF80Val(-C0080Val);
        do {
@@ -1978,9 +2052,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
      return BinaryOperator::createMul(LHS, AddOne(C2));
  
    // X + ~X --> -1   since   ~X = -X-1
-  if (dyn_castNotVal(LHS) == RHS ||
-      dyn_castNotVal(RHS) == LHS)
-    return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType()));
+  if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS)
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
    
  
    // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0
@@ -1999,14 +2072,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
        if (Anded == CRHS) {
          // See if all bits from the first bit set in the Add RHS up are included
          // in the mask.  First, get the rightmost bit.
-        APInt AddRHSV(CRHS->getValue());
+        const APInt& AddRHSV = CRHS->getValue();
  
          // Form a mask of all bits from the lowest bit added through the top.
-        APInt AddRHSHighBits = ~((AddRHSV & -AddRHSV)-1);
-        AddRHSHighBits &= C2->getType()->getMask();
+        APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
  
          // See if the and mask includes all of these bits.
-        APInt AddRHSHighBitsAnd = AddRHSHighBits & C2->getValue();
+        APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
  
          if (AddRHSHighBits == AddRHSHighBitsAnd) {
            // Okay, the xform is safe.  Insert the new add pronto.
@@ -2050,7 +2122,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
  // isSignBit - Return true if the value represented by the constant only has the
  // highest order bit set.
  static bool isSignBit(ConstantInt *CI) {
-  unsigned NumBits = CI->getType()->getPrimitiveSizeInBits();
+  uint32_t NumBits = CI->getType()->getPrimitiveSizeInBits();
    return CI->getValue() == APInt::getSignBit(NumBits);
  }
  
@@ -2161,7 +2233,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
        // 0 - (X sdiv C)  -> (X sdiv -C)
        if (Op1I->getOpcode() == Instruction::SDiv)
          if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-          if (CSI->isNullValue())
+          if (CSI->isZero())
              if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
                return BinaryOperator::createSDiv(Op1I->getOperand(0),
                                                 ConstantExpr::getNeg(DivRHS));
@@ -2205,7 +2277,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS) {
    switch (pred) {
      case ICmpInst::ICMP_SLT: 
        // True if LHS s< RHS and RHS == 0
-      return RHS->isNullValue();
+      return RHS->isZero();
      case ICmpInst::ICMP_SLE: 
        // True if LHS s<= RHS and RHS == -1
        return RHS->isAllOnesValue();
@@ -2240,7 +2312,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
              return BinaryOperator::createMul(SI->getOperand(0),
                                               ConstantExpr::getShl(CI, ShOp));
  
-      if (CI->isNullValue())
+      if (CI->isZero())
          return ReplaceInstUsesWith(I, Op1);  // X * 0  == 0
        if (CI->equalsInt(1))                  // X * 1  == X
          return ReplaceInstUsesWith(I, Op0);
@@ -2322,8 +2394,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
          // If the multiply type is not the same as the source type, sign extend
          // or truncate to the multiply type.
          if (I.getType() != V->getType()) {
-          unsigned SrcBits = V->getType()->getPrimitiveSizeInBits();
-          unsigned DstBits = I.getType()->getPrimitiveSizeInBits();
+          uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits();
+          uint32_t DstBits = I.getType()->getPrimitiveSizeInBits();
            Instruction::CastOps opcode = 
              (SrcBits == DstBits ? Instruction::BitCast : 
               (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc));
@@ -2451,7 +2523,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
    if (BinaryOperator *RHSI = dyn_cast<BinaryOperator>(I.getOperand(1))) {
      if (RHSI->getOpcode() == Instruction::Shl &&
          isa<ConstantInt>(RHSI->getOperand(0))) {
-      APInt C1(cast<ConstantInt>(RHSI->getOperand(0))->getValue());
+      const APInt& C1 = cast<ConstantInt>(RHSI->getOperand(0))->getValue();
        if (C1.isPowerOf2()) {
          Value *N = RHSI->getOperand(1);
          const Type *NTy = N->getType();
@@ -2469,7 +2541,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
    if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 
      if (ConstantInt *STO = dyn_cast<ConstantInt>(SI->getOperand(1)))
        if (ConstantInt *SFO = dyn_cast<ConstantInt>(SI->getOperand(2)))  {
-        APInt TVA(STO->getValue()), FVA(SFO->getValue());
+        const APInt &TVA = STO->getValue(), &FVA = SFO->getValue();
          if (TVA.isPowerOf2() && FVA.isPowerOf2()) {
            // Compute the shift amounts
            uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
@@ -2934,14 +3006,14 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
        // Adding a one to a single bit bit-field should be turned into an XOR
        // of the bit.  First thing to check is to see if this AND is with a
        // single bit constant.
-      APInt AndRHSV(cast<ConstantInt>(AndRHS)->getValue());
+      const APInt& AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
  
        // If there is only one bit set...
        if (isOneBitSet(cast<ConstantInt>(AndRHS))) {
          // Ok, at this point, we know that we are masking the result of the
          // ADD down to exactly one bit.  If the constant we are adding has
          // no bits set below this bit, then we can eliminate the ADD.
-        APInt AddRHS(cast<ConstantInt>(OpRHS)->getValue());
+        const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
  
          // Check to see if any bits below the one bit set in AndRHSV are set.
          if ((AddRHS & (AndRHSV-1)) == 0) {
@@ -3082,8 +3154,8 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
  // any number of 0s on either side.  The 1s are allowed to wrap from LSB to
  // MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
  // not, since all 1s are not contiguous.
-static bool isRunOfOnes(ConstantInt *Val, unsigned &MB, unsigned &ME) {
-  APInt V = Val->getValue();
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+  const APInt& V = Val->getValue();
    uint32_t BitWidth = Val->getType()->getBitWidth();
    if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
  
@@ -3126,7 +3198,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
        // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
        // part, we don't need any explicit masks to take them out of A.  If that
        // is all N is, ignore it.
-      unsigned MB = 0, ME = 0;
+      uint32_t MB = 0, ME = 0;
        if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
          uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
          APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
@@ -3174,15 +3246,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
        return &I;
    } else {
      if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
-      if (CP->isAllOnesValue())
+      if (CP->isAllOnesValue())            // X & <-1,-1> -> X
          return ReplaceInstUsesWith(I, I.getOperand(0));
+    } else if (isa<ConstantAggregateZero>(Op1)) {
+      return ReplaceInstUsesWith(I, Op1);  // X & <0,0> -> <0,0>
      }
    }
    
    if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
-    APInt AndRHSMask(AndRHS->getValue());
-    APInt TypeMask(cast<IntegerType>(Op0->getType())->getMask());
-    APInt NotAndRHS = AndRHSMask^TypeMask;
+    const APInt& AndRHSMask = AndRHS->getValue();
+    APInt NotAndRHS(~AndRHSMask);
  
      // Optimize a variety of ((val OP C1) & C2) combinations...
      if (isa<BinaryOperator>(Op0)) {
@@ -3291,13 +3364,28 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
    }
    
    {
-    Value *A = 0, *B = 0;
-    if (match(Op0, m_Or(m_Value(A), m_Value(B))))
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    if (match(Op0, m_Or(m_Value(A), m_Value(B)))) {
        if (A == Op1 || B == Op1)    // (A | ?) & A  --> A
          return ReplaceInstUsesWith(I, Op1);
-    if (match(Op1, m_Or(m_Value(A), m_Value(B))))
+    
+      // (A|B) & ~(A&B) -> A^B
+      if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
+        if ((A == C && B == D) || (A == D && B == C))
+          return BinaryOperator::createXor(A, B);
+      }
+    }
+    
+    if (match(Op1, m_Or(m_Value(A), m_Value(B)))) {
        if (A == Op0 || B == Op0)    // A & (A | ?)  --> A
          return ReplaceInstUsesWith(I, Op0);
+
+      // ~(A&B) & (A|B) -> A^B
+      if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
+        if ((A == C && B == D) || (A == D && B == C))
+          return BinaryOperator::createXor(A, B);
+      }
+    }
      
      if (Op0->hasOneUse() &&
          match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
@@ -3596,14 +3684,14 @@ static bool CollectBSwapParts(Value *V, SmallVector<Value*, 8> &ByteValues) {
  /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
  /// If so, insert the new bswap intrinsic and return it.
  Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
-  // We cannot bswap one byte.
-  if (I.getType() == Type::Int8Ty)
-    return 0;
+  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+  if (!ITy || ITy->getBitWidth() % 16) 
+    return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
    
    /// ByteValues - For each byte of the result, we keep track of which value
    /// defines each byte.
    SmallVector<Value*, 8> ByteValues;
-  ByteValues.resize(TD->getTypeSize(I.getType()));
+  ByteValues.resize(ITy->getBitWidth()/8);
      
    // Try to find all the pieces corresponding to the bswap.
    if (CollectBSwapParts(I.getOperand(0), ByteValues) ||
@@ -3618,20 +3706,9 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
    for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
      if (ByteValues[i] != V)
        return 0;
-    
-  // If they do then *success* we can turn this into a bswap.  Figure out what
-  // bswap to make it into.
+  const Type *Tys[] = { ITy, ITy };
    Module *M = I.getParent()->getParent()->getParent();
-  const char *FnName = 0;
-  if (I.getType() == Type::Int16Ty)
-    FnName = "llvm.bswap.i16";
-  else if (I.getType() == Type::Int32Ty)
-    FnName = "llvm.bswap.i32";
-  else if (I.getType() == Type::Int64Ty)
-    FnName = "llvm.bswap.i64";
-  else
-    assert(0 && "Unknown integer type!");
-  Constant *F = M->getOrInsertFunction(FnName, I.getType(), I.getType(), NULL);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 2);
    return new CallInst(F, V);
  }
  
@@ -3641,7 +3718,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
    if (isa<UndefValue>(Op1))                       // X | undef -> -1
-    return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType()));
+    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
    // or X, X = X
    if (Op0 == Op1)
@@ -3655,7 +3732,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
      if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth),
                               KnownZero, KnownOne))
        return &I;
+  } else if (isa<ConstantAggregateZero>(Op1)) {
+    return ReplaceInstUsesWith(I, Op0);  // X | <0,0> -> X
+  } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
+    if (CP->isAllOnesValue())            // X | <-1,-1> -> <-1,-1>
+      return ReplaceInstUsesWith(I, I.getOperand(1));
    }
+    
+
    
    // or X, -1 == -1
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
@@ -3665,7 +3749,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
        Instruction *Or = BinaryOperator::createOr(X, RHS);
        InsertNewInstBefore(Or, I);
        Or->takeName(Op0);
-      return BinaryOperator::createAnd(Or, ConstantExpr::getOr(RHS, C1));
+      return BinaryOperator::createAnd(Or, 
+               ConstantInt::get(RHS->getValue() | C1->getValue()));
      }
  
      // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
@@ -3674,7 +3759,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
        InsertNewInstBefore(Or, I);
        Or->takeName(Op0);
        return BinaryOperator::createXor(Or,
-                 ConstantExpr::getAnd(C1, ConstantExpr::getNot(RHS)));
+                 ConstantInt::get(C1->getValue() & ~RHS->getValue()));
      }
  
      // Try to fold constant and into select arguments.
@@ -3724,35 +3809,89 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
      return BinaryOperator::createXor(NOr, C1);
    }
  
-  // (A & C1)|(B & C2)
-  if (match(Op0, m_And(m_Value(A), m_ConstantInt(C1))) &&
-      match(Op1, m_And(m_Value(B), m_ConstantInt(C2)))) {
-
-    if (A == B)  // (A & C1)|(A & C2) == A & (C1|C2)
-      return BinaryOperator::createAnd(A, ConstantExpr::getOr(C1, C2));
-
-
-    // If we have: ((V + N) & C1) | (V & C2)
-    // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
-    // replace with V+N.
-    if (C1 == ConstantExpr::getNot(C2)) {
-      Value *V1 = 0, *V2 = 0;
-      if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
-          match(A, m_Add(m_Value(V1), m_Value(V2)))) {
-        // Add commutes, try both ways.
-        if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
-          return ReplaceInstUsesWith(I, A);
-        if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
-          return ReplaceInstUsesWith(I, A);
+  // (A & C)|(B & D)
+  Value *C = 0, *D = 0;
+  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+      match(Op1, m_And(m_Value(B), m_Value(D)))) {
+    Value *V1 = 0, *V2 = 0, *V3 = 0;
+    C1 = dyn_cast<ConstantInt>(C);
+    C2 = dyn_cast<ConstantInt>(D);
+    if (C1 && C2) {  // (A & C1)|(B & C2)
+      // If we have: ((V + N) & C1) | (V & C2)
+      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+      // replace with V+N.
+      if (C1->getValue() == ~C2->getValue()) {
+        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
+            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+          // Add commutes, try both ways.
+          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+            return ReplaceInstUsesWith(I, A);
+          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+            return ReplaceInstUsesWith(I, A);
+        }
+        // Or commutes, try both ways.
+        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
+            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+          // Add commutes, try both ways.
+          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+            return ReplaceInstUsesWith(I, B);
+          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+            return ReplaceInstUsesWith(I, B);
+        }
+      }
+      V1 = 0; V2 = 0; V3 = 0;
+    }
+    
+    // Check to see if we have any common things being and'ed.  If so, find the
+    // terms for V1 & (V2|V3).
+    if (isOnlyUse(Op0) || isOnlyUse(Op1)) {
+      if (A == B)      // (A & C)|(A & D) == A & (C|D)
+        V1 = A, V2 = C, V3 = D;
+      else if (A == D) // (A & C)|(B & A) == A & (B|C)
+        V1 = A, V2 = B, V3 = C;
+      else if (C == B) // (A & C)|(C & D) == C & (A|D)
+        V1 = C, V2 = A, V3 = D;
+      else if (C == D) // (A & C)|(B & C) == C & (A|B)
+        V1 = C, V2 = A, V3 = B;
+      
+      if (V1) {
+        Value *Or =
+          InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I);
+        return BinaryOperator::createAnd(V1, Or);
        }
-      // Or commutes, try both ways.
-      if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
-          match(B, m_Add(m_Value(V1), m_Value(V2)))) {
-        // Add commutes, try both ways.
-        if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
-          return ReplaceInstUsesWith(I, B);
-        if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
-          return ReplaceInstUsesWith(I, B);
+      
+      // (V1 & V3)|(V2 & ~V3) -> ((V1 ^ V2) & V3) ^ V2
+      if (isOnlyUse(Op0) && isOnlyUse(Op1)) {
+        // Try all combination of terms to find V3 and ~V3.
+        if (A->hasOneUse() && match(A, m_Not(m_Value(V3)))) {
+          if (V3 == B)
+            V1 = D, V2 = C;
+          else if (V3 == D)
+            V1 = B, V2 = C;
+        }
+        if (B->hasOneUse() && match(B, m_Not(m_Value(V3)))) {
+          if (V3 == A)
+            V1 = C, V2 = D;
+          else if (V3 == C)
+            V1 = A, V2 = D;
+        }
+        if (C->hasOneUse() && match(C, m_Not(m_Value(V3)))) {
+          if (V3 == B)
+            V1 = D, V2 = A;
+          else if (V3 == D)
+            V1 = B, V2 = A;
+        }
+        if (D->hasOneUse() && match(D, m_Not(m_Value(V3)))) {
+          if (V3 == A)
+            V1 = C, V2 = B;
+          else if (V3 == C)
+            V1 = A, V2 = B;
+        }
+        if (V1) {
+          A = InsertNewInstBefore(BinaryOperator::createXor(V1, V2, "tmp"), I);
+          A = InsertNewInstBefore(BinaryOperator::createAnd(A, V3, "tmp"), I);
+          return BinaryOperator::createXor(A, V2);
+        }
        }
      }
    }
@@ -3774,16 +3913,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
  
    if (match(Op0, m_Not(m_Value(A)))) {   // ~A | Op1
      if (A == Op1)   // ~A | A == -1
-      return ReplaceInstUsesWith(I,
-                                ConstantInt::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
    } else {
      A = 0;
    }
    // Note, A is still live here!
    if (match(Op1, m_Not(m_Value(B)))) {   // Op0 | ~B
      if (Op0 == B)
-      return ReplaceInstUsesWith(I,
-                                ConstantInt::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
      // (~A | ~B) == (~(A & B)) - De Morgan's Law
      if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
@@ -3808,13 +3945,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
              LHSCC != ICmpInst::ICMP_UGE && LHSCC != ICmpInst::ICMP_ULE &&
              RHSCC != ICmpInst::ICMP_UGE && RHSCC != ICmpInst::ICMP_ULE &&
              LHSCC != ICmpInst::ICMP_SGE && LHSCC != ICmpInst::ICMP_SLE &&
-            RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE) {
+            RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE &&
+            // We can't fold (ugt x, C) | (sgt x, C2).
+            PredicatesFoldable(LHSCC, RHSCC)) {
            // Ensure that the larger constant is on the RHS.
-          ICmpInst::Predicate GT = ICmpInst::isSignedPredicate(LHSCC) ? 
-            ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
-          Constant *Cmp = ConstantExpr::getICmp(GT, LHSCst, RHSCst);
            ICmpInst *LHS = cast<ICmpInst>(Op0);
-          if (cast<ConstantInt>(Cmp)->getZExtValue()) {
+          bool NeedsSwap;
+          if (ICmpInst::isSignedPredicate(LHSCC))
+            NeedsSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+          else
+            NeedsSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+            
+          if (NeedsSwap) {
              std::swap(LHS, RHS);
              std::swap(LHSCst, RHSCst);
              std::swap(LHSCC, RHSCC);
@@ -3839,7 +3981,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
                  Instruction *Add = BinaryOperator::createAdd(LHSVal, AddCST,
                                                        LHSVal->getName()+".off");
                  InsertNewInstBefore(Add, I);
-                AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+                AddCST = Subtract(AddOne(RHSCst), LHSCst);
                  return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
                }
                break;                         // (X == 13 | X == 15) -> no change
@@ -3988,8 +4130,33 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
      if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth),
                               KnownZero, KnownOne))
        return &I;
+  } else if (isa<ConstantAggregateZero>(Op1)) {
+    return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
    }
  
+  // Is this a ~ operation?
+  if (Value *NotOp = dyn_castNotVal(&I)) {
+    // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+    // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+      if (Op0I->getOpcode() == Instruction::And || 
+          Op0I->getOpcode() == Instruction::Or) {
+        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+          Instruction *NotY =
+            BinaryOperator::createNot(Op0I->getOperand(1),
+                                      Op0I->getOperand(1)->getName()+".not");
+          InsertNewInstBefore(NotY, I);
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::createOr(Op0NotVal, NotY);
+          else
+            return BinaryOperator::createAnd(Op0NotVal, NotY);
+        }
+      }
+    }
+  }
+  
+  
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
      // xor (icmp A, B), true = not (icmp A, B) = !icmp A, B
      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
@@ -4006,19 +4173,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                                                ConstantInt::get(I.getType(), 1));
            return BinaryOperator::createAdd(Op0I->getOperand(1), ConstantRHS);
          }
-
-      // ~(~X & Y) --> (X | ~Y)
-      if (Op0I->getOpcode() == Instruction::And && RHS->isAllOnesValue()) {
-        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
-        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
-          Instruction *NotY =
-            BinaryOperator::createNot(Op0I->getOperand(1),
-                                      Op0I->getOperand(1)->getName()+".not");
-          InsertNewInstBefore(NotY, I);
-          return BinaryOperator::createOr(Op0NotVal, NotY);
-        }
-      }
-
+          
        if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
          if (Op0I->getOpcode() == Instruction::Add) {
            // ~(X-c) --> (-c-1)-X
@@ -4028,6 +4183,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                             ConstantExpr::getSub(NegOp0CI,
                                               ConstantInt::get(I.getType(), 1)),
                                            Op0I->getOperand(0));
+          } else if (RHS->getValue().isSignBit()) {
+            // (X + C) ^ signbit -> (X + C + signbit)
+            Constant *C = ConstantInt::get(RHS->getValue() + Op0CI->getValue());
+            return BinaryOperator::createAdd(Op0I->getOperand(0), C);
+
            }
          } else if (Op0I->getOpcode() == Instruction::Or) {
            // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
@@ -4035,7 +4195,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
              Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
              // Anything in both C1 and C2 is known to be zero, remove it from
              // NewRHS.
-            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+            Constant *CommonBits = And(Op0CI, RHS);
              NewRHS = ConstantExpr::getAnd(NewRHS, 
                                            ConstantExpr::getNot(CommonBits));
              AddToWorkList(Op0I);
@@ -4057,12 +4217,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
  
    if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1
      if (X == Op1)
-      return ReplaceInstUsesWith(I,
-                                ConstantInt::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
    if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1
      if (X == Op0)
-      return ReplaceInstUsesWith(I, ConstantInt::getAllOnesValue(I.getType()));
+      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
  
    
    BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
@@ -4204,7 +4363,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
  /// overflowed for this type.
  static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
                              ConstantInt *In2, bool IsSigned = false) {
-  Result = cast<ConstantInt>(ConstantExpr::getAdd(In1, In2));
+  Result = cast<ConstantInt>(Add(In1, In2));
  
    if (IsSigned)
      if (In2->getValue().isNegative())
@@ -4225,38 +4384,66 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
    Value *Result = Constant::getNullValue(IntPtrTy);
  
    // Build a mask for high order bits.
-  uint64_t PtrSizeMask = ~0ULL >> (64-TD.getPointerSize()*8);
+  unsigned IntPtrWidth = TD.getPointerSize()*8;
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
  
    for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
      Value *Op = GEP->getOperand(i);
      uint64_t Size = TD.getTypeSize(GTI.getIndexedType()) & PtrSizeMask;
-    Constant *Scale = ConstantInt::get(IntPtrTy, Size);
-    if (Constant *OpC = dyn_cast<Constant>(Op)) {
-      if (!OpC->isNullValue()) {
-        OpC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-        Scale = ConstantExpr::getMul(OpC, Scale);
-        if (Constant *RC = dyn_cast<Constant>(Result))
-          Result = ConstantExpr::getAdd(RC, Scale);
-        else {
-          // Emit an add instruction.
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+      if (OpC->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+        
+        if (ConstantInt *RC = dyn_cast<ConstantInt>(Result))
+          Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size));
+        else
            Result = IC.InsertNewInstBefore(
-             BinaryOperator::createAdd(Result, Scale,
-                                       GEP->getName()+".offs"), I);
-        }
+                   BinaryOperator::createAdd(Result,
+                                             ConstantInt::get(IntPtrTy, Size),
+                                             GEP->getName()+".offs"), I);
+        continue;
        }
-    } else {
-      // Convert to correct type.
-      Op = IC.InsertNewInstBefore(CastInst::createSExtOrBitCast(Op, IntPtrTy,
-                                               Op->getName()+".c"), I);
-      if (Size != 1)
-        // We'll let instcombine(mul) convert this to a shl if possible.
+      
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale);
+      if (Constant *RC = dyn_cast<Constant>(Result))
+        Result = ConstantExpr::getAdd(RC, Scale);
+      else {
+        // Emit an add instruction.
+        Result = IC.InsertNewInstBefore(
+           BinaryOperator::createAdd(Result, Scale,
+                                     GEP->getName()+".offs"), I);
+      }
+      continue;
+    }
+    // Convert to correct type.
+    if (Op->getType() != IntPtrTy) {
+      if (Constant *OpC = dyn_cast<Constant>(Op))
+        Op = ConstantExpr::getSExt(OpC, IntPtrTy);
+      else
+        Op = IC.InsertNewInstBefore(new SExtInst(Op, IntPtrTy,
+                                                 Op->getName()+".c"), I);
+    }
+    if (Size != 1) {
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      if (Constant *OpC = dyn_cast<Constant>(Op))
+        Op = ConstantExpr::getMul(OpC, Scale);
+      else    // We'll let instcombine(mul) convert this to a shl if possible.
          Op = IC.InsertNewInstBefore(BinaryOperator::createMul(Op, Scale,
-                                                    GEP->getName()+".idx"), I);
+                                                  GEP->getName()+".idx"), I);
+    }
  
-      // Emit an add instruction.
+    // Emit an add instruction.
+    if (isa<Constant>(Op) && isa<Constant>(Result))
+      Result = ConstantExpr::getAdd(cast<Constant>(Op),
+                                    cast<Constant>(Result));
+    else
        Result = IC.InsertNewInstBefore(BinaryOperator::createAdd(Op, Result,
-                                                    GEP->getName()+".offs"), I);
-    }
+                                                  GEP->getName()+".offs"), I);
    }
    return Result;
  }
@@ -4580,6 +4767,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          return new ICmpInst(ICmpInst::ICMP_NE, Op0,Op1);
        if (isMinValuePlusOne(CI,false))              // A <u MIN+1 -> A == MIN
          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI));
+      // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
+      if (CI->isMinValue(true))
+        return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                            ConstantInt::getAllOnesValue(Op0->getType()));
+          
        break;
  
      case ICmpInst::ICMP_SLT:
@@ -4598,6 +4790,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
        if (isMaxValueMinusOne(CI, false))          // A >u MAX-1 -> A == MAX
          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI));
+        
+      // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
+      if (CI->isMaxValue(true))
+        return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                            ConstantInt::getNullValue(Op0->getType()));
        break;
  
      case ICmpInst::ICMP_SGT:
@@ -4675,7 +4872,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
      if ((KnownOne | KnownZero) != 0) {
        // Compute the Min, Max and RHS values based on the known bits. For the
        // EQ and NE we use unsigned values.
-      APInt Min(BitWidth, 0), Max(BitWidth, 0), RHSVal(CI->getValue());
+      APInt Min(BitWidth, 0), Max(BitWidth, 0);
+      const APInt& RHSVal = CI->getValue();
        if (ICmpInst::isSignedPredicate(I.getPredicate())) {
          ComputeSignedMinMaxValuesFromKnownBits(Ty, KnownZero, KnownOne, Min, 
                                                 Max);
@@ -4696,13 +4894,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        case ICmpInst::ICMP_ULT:
          if (Max.ult(RHSVal))
            return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-        if (Min.ugt(RHSVal))
+        if (Min.uge(RHSVal))
            return ReplaceInstUsesWith(I, ConstantInt::getFalse());
          break;
        case ICmpInst::ICMP_UGT:
          if (Min.ugt(RHSVal))
            return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-        if (Max.ult(RHSVal))
+        if (Max.ule(RHSVal))
            return ReplaceInstUsesWith(I, ConstantInt::getFalse());
          break;
        case ICmpInst::ICMP_SLT:
@@ -4714,7 +4912,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        case ICmpInst::ICMP_SGT: 
          if (Min.sgt(RHSVal))
            return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-        if (Max.slt(RHSVal))
+        if (Max.sle(RHSVal))
            return ReplaceInstUsesWith(I, ConstantInt::getFalse());
          break;
        }
@@ -4724,516 +4922,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
      // instruction, see if that instruction also has constants so that the 
      // instruction can be folded into the icmp 
      if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
-      switch (LHSI->getOpcode()) {
-      case Instruction::And:
-        if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
-            LHSI->getOperand(0)->hasOneUse()) {
-          ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
-
-          // If the LHS is an AND of a truncating cast, we can widen the
-          // and/compare to be the input width without changing the value
-          // produced, eliminating a cast.
-          if (CastInst *Cast = dyn_cast<CastInst>(LHSI->getOperand(0))) {
-            // We can do this transformation if either the AND constant does not
-            // have its sign bit set or if it is an equality comparison. 
-            // Extending a relational comparison when we're checking the sign
-            // bit would not work.
-            if (Cast->hasOneUse() && isa<TruncInst>(Cast) &&
-                (I.isEquality() || AndCST->getValue().isPositive() && 
-                 CI->getValue().isPositive())) {
-              ConstantInt *NewCST;
-              ConstantInt *NewCI;
-              APInt NewCSTVal(AndCST->getValue()), NewCIVal(CI->getValue());
-              uint32_t BitWidth = cast<IntegerType>(
-                Cast->getOperand(0)->getType())->getBitWidth();
-              NewCST = ConstantInt::get(NewCSTVal.zext(BitWidth));
-              NewCI = ConstantInt::get(NewCIVal.zext(BitWidth));
-              Instruction *NewAnd = 
-                BinaryOperator::createAnd(Cast->getOperand(0), NewCST, 
-                                          LHSI->getName());
-              InsertNewInstBefore(NewAnd, I);
-              return new ICmpInst(I.getPredicate(), NewAnd, NewCI);
-            }
-          }
-          
-          // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
-          // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
-          // happens a LOT in code produced by the C front-end, for bitfield
-          // access.
-          BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
-          if (Shift && !Shift->isShift())
-            Shift = 0;
-
-          ConstantInt *ShAmt;
-          ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
-          const Type *Ty = Shift ? Shift->getType() : 0;  // Type of the shift.
-          const Type *AndTy = AndCST->getType();          // Type of the and.
-
-          // We can fold this as long as we can't shift unknown bits
-          // into the mask.  This can only happen with signed shift
-          // rights, as they sign-extend.
-          if (ShAmt) {
-            bool CanFold = Shift->isLogicalShift();
-            if (!CanFold) {
-              // To test for the bad case of the signed shr, see if any
-              // of the bits shifted in could be tested after the mask.
-              uint32_t TyBits = Ty->getPrimitiveSizeInBits();
-              int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
-
-              uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
-              if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & 
-                   AndCST->getValue()) == 0)
-                CanFold = true;
-            }
-
-            if (CanFold) {
-              Constant *NewCst;
-              if (Shift->getOpcode() == Instruction::Shl)
-                NewCst = ConstantExpr::getLShr(CI, ShAmt);
-              else
-                NewCst = ConstantExpr::getShl(CI, ShAmt);
-
-              // Check to see if we are shifting out any of the bits being
-              // compared.
-              if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != CI){
-                // If we shifted bits out, the fold is not going to work out.
-                // As a special case, check to see if this means that the
-                // result is always true or false now.
-                if (I.getPredicate() == ICmpInst::ICMP_EQ)
-                  return ReplaceInstUsesWith(I, ConstantInt::getFalse());
-                if (I.getPredicate() == ICmpInst::ICMP_NE)
-                  return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-              } else {
-                I.setOperand(1, NewCst);
-                Constant *NewAndCST;
-                if (Shift->getOpcode() == Instruction::Shl)
-                  NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
-                else
-                  NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
-                LHSI->setOperand(1, NewAndCST);
-                LHSI->setOperand(0, Shift->getOperand(0));
-                AddToWorkList(Shift); // Shift is dead.
-                AddUsesToWorkList(I);
-                return &I;
-              }
-            }
-          }
-          
-          // Turn ((X >> Y) & C) == 0  into  (X & (C << Y)) == 0.  The later is
-          // preferable because it allows the C<<Y expression to be hoisted out
-          // of a loop if Y is invariant and X is not.
-          if (Shift && Shift->hasOneUse() && CI->isNullValue() &&
-              I.isEquality() && !Shift->isArithmeticShift() &&
-              isa<Instruction>(Shift->getOperand(0))) {
-            // Compute C << Y.
-            Value *NS;
-            if (Shift->getOpcode() == Instruction::LShr) {
-              NS = BinaryOperator::createShl(AndCST, 
-                                          Shift->getOperand(1), "tmp");
-            } else {
-              // Insert a logical shift.
-              NS = BinaryOperator::createLShr(AndCST,
-                                          Shift->getOperand(1), "tmp");
-            }
-            InsertNewInstBefore(cast<Instruction>(NS), I);
-
-            // Compute X & (C << Y).
-            Instruction *NewAnd = BinaryOperator::createAnd(
-                Shift->getOperand(0), NS, LHSI->getName());
-            InsertNewInstBefore(NewAnd, I);
-            
-            I.setOperand(0, NewAnd);
-            return &I;
-          }
-        }
-        break;
-
-      case Instruction::Shl:         // (icmp pred (shl X, ShAmt), CI)
-        if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-          if (I.isEquality()) {
-            unsigned TypeBits = CI->getType()->getPrimitiveSizeInBits();
-
-            // Check that the shift amount is in range.  If not, don't perform
-            // undefined shifts.  When the shift is visited it will be
-            // simplified.
-            if (ShAmt->uge(TypeBits))
-              break;
-
-            // If we are comparing against bits always shifted out, the
-            // comparison cannot succeed.
-            Constant *Comp =
-              ConstantExpr::getShl(ConstantExpr::getLShr(CI, ShAmt), ShAmt);
-            if (Comp != CI) {// Comparing against a bit that we know is zero.
-              bool IsICMP_NE = I.getPredicate() == ICmpInst::ICMP_NE;
-              Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
-              return ReplaceInstUsesWith(I, Cst);
-            }
-
-            if (LHSI->hasOneUse()) {
-              // Otherwise strength reduce the shift into an and.
-              uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-              uint64_t Val = (1ULL << (TypeBits-ShAmtVal))-1;
-              Constant *Mask = ConstantInt::get(CI->getType(), Val);
-
-              Instruction *AndI =
-                BinaryOperator::createAnd(LHSI->getOperand(0),
-                                          Mask, LHSI->getName()+".mask");
-              Value *And = InsertNewInstBefore(AndI, I);
-              return new ICmpInst(I.getPredicate(), And,
-                                     ConstantExpr::getLShr(CI, ShAmt));
-            }
-          }
-        }
-        break;
-
-      case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
-      case Instruction::AShr:
-        if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-          if (I.isEquality()) {
-            // Check that the shift amount is in range.  If not, don't perform
-            // undefined shifts.  When the shift is visited it will be
-            // simplified.
-            unsigned TypeBits = CI->getType()->getPrimitiveSizeInBits();
-            if (ShAmt->uge(TypeBits))
-              break;
-
-            // If we are comparing against bits always shifted out, the
-            // comparison cannot succeed.
-            Constant *Comp;
-            if (LHSI->getOpcode() == Instruction::LShr) 
-              Comp = ConstantExpr::getLShr(ConstantExpr::getShl(CI, ShAmt), 
-                                           ShAmt);
-            else
-              Comp = ConstantExpr::getAShr(ConstantExpr::getShl(CI, ShAmt), 
-                                           ShAmt);
-
-            if (Comp != CI) {// Comparing against a bit that we know is zero.
-              bool IsICMP_NE = I.getPredicate() == ICmpInst::ICMP_NE;
-              Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
-              return ReplaceInstUsesWith(I, Cst);
-            }
-
-            if (LHSI->hasOneUse() || CI->isNullValue()) {
-              uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-
-              // Otherwise strength reduce the shift into an and.
-              APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-              Constant *Mask = ConstantInt::get(Val);
-
-              Instruction *AndI =
-                BinaryOperator::createAnd(LHSI->getOperand(0),
-                                          Mask, LHSI->getName()+".mask");
-              Value *And = InsertNewInstBefore(AndI, I);
-              return new ICmpInst(I.getPredicate(), And,
-                                     ConstantExpr::getShl(CI, ShAmt));
-            }
-          }
-        }
-        break;
-
-      case Instruction::SDiv:
-      case Instruction::UDiv:
-        // Fold: icmp pred ([us]div X, C1), C2 -> range test
-        // Fold this div into the comparison, producing a range check. 
-        // Determine, based on the divide type, what the range is being 
-        // checked.  If there is an overflow on the low or high side, remember 
-        // it, otherwise compute the range [low, hi) bounding the new value.
-        // See: InsertRangeTest above for the kinds of replacements possible.
-        if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-          // FIXME: If the operand types don't match the type of the divide 
-          // then don't attempt this transform. The code below doesn't have the
-          // logic to deal with a signed divide and an unsigned compare (and
-          // vice versa). This is because (x /s C1) <s C2  produces different 
-          // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
-          // (x /u C1) <u C2.  Simply casting the operands and result won't 
-          // work. :(  The if statement below tests that condition and bails 
-          // if it finds it. 
-          bool DivIsSigned = LHSI->getOpcode() == Instruction::SDiv;
-          if (!I.isEquality() && DivIsSigned != I.isSignedPredicate())
-            break;
-          if (DivRHS->isZero())
-            break; // Don't hack on div by zero
-
-          // Initialize the variables that will indicate the nature of the
-          // range check.
-          bool LoOverflow = false, HiOverflow = false;
-          ConstantInt *LoBound = 0, *HiBound = 0;
-
-          // Compute Prod = CI * DivRHS. We are essentially solving an equation
-          // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
-          // C2 (CI). By solving for X we can turn this into a range check 
-          // instead of computing a divide. 
-          ConstantInt *Prod = 
-            cast<ConstantInt>(ConstantExpr::getMul(CI, DivRHS));
-
-          // Determine if the product overflows by seeing if the product is
-          // not equal to the divide. Make sure we do the same kind of divide
-          // as in the LHS instruction that we're folding. 
-          bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
-                                     ConstantExpr::getUDiv(Prod, DivRHS)) != CI;
-
-          // Get the ICmp opcode
-          ICmpInst::Predicate predicate = I.getPredicate();
-
-          if (!DivIsSigned) {  // udiv
-            LoBound = Prod;
-            LoOverflow = ProdOV;
-            HiOverflow = ProdOV || 
-                         AddWithOverflow(HiBound, LoBound, DivRHS, false);
-          } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0.
-            if (CI->isNullValue()) {       // (X / pos) op 0
-              // Can't overflow.
-              LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
-              HiBound = DivRHS;
-            } else if (CI->getValue().isPositive()) {   // (X / pos) op pos
-              LoBound = Prod;
-              LoOverflow = ProdOV;
-              HiOverflow = ProdOV || 
-                           AddWithOverflow(HiBound, Prod, DivRHS, true);
-            } else {                       // (X / pos) op neg
-              Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS));
-              LoOverflow = AddWithOverflow(LoBound, Prod,
-                                           cast<ConstantInt>(DivRHSH), true);
-              HiBound = AddOne(Prod);
-              HiOverflow = ProdOV;
-            }
-          } else {                         // Divisor is < 0.
-            if (CI->isNullValue()) {       // (X / neg) op 0
-              LoBound = AddOne(DivRHS);
-              HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
-              if (HiBound == DivRHS)
-                LoBound = 0;               // - INTMIN = INTMIN
-            } else if (CI->getValue().isPositive()) {   // (X / neg) op pos
-              HiOverflow = LoOverflow = ProdOV;
-              if (!LoOverflow)
-                LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS),
-                                             true);
-              HiBound = AddOne(Prod);
-            } else {                       // (X / neg) op neg
-              LoBound = Prod;
-              LoOverflow = HiOverflow = ProdOV;
-              HiBound = cast<ConstantInt>(ConstantExpr::getSub(Prod, DivRHS));
-            }
-
-            // Dividing by a negate swaps the condition.
-            predicate = ICmpInst::getSwappedPredicate(predicate);
-          }
-
-          if (LoBound) {
-            Value *X = LHSI->getOperand(0);
-            switch (predicate) {
-            default: assert(0 && "Unhandled icmp opcode!");
-            case ICmpInst::ICMP_EQ:
-              if (LoOverflow && HiOverflow)
-                return ReplaceInstUsesWith(I, ConstantInt::getFalse());
-              else if (HiOverflow)
-                return new ICmpInst(DivIsSigned ?  ICmpInst::ICMP_SGE : 
-                                    ICmpInst::ICMP_UGE, X, LoBound);
-              else if (LoOverflow)
-                return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
-                                    ICmpInst::ICMP_ULT, X, HiBound);
-              else
-                return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, 
-                                       true, I);
-            case ICmpInst::ICMP_NE:
-              if (LoOverflow && HiOverflow)
-                return ReplaceInstUsesWith(I, ConstantInt::getTrue());
-              else if (HiOverflow)
-                return new ICmpInst(DivIsSigned ?  ICmpInst::ICMP_SLT : 
-                                    ICmpInst::ICMP_ULT, X, LoBound);
-              else if (LoOverflow)
-                return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
-                                    ICmpInst::ICMP_UGE, X, HiBound);
-              else
-                return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, 
-                                       false, I);
-            case ICmpInst::ICMP_ULT:
-            case ICmpInst::ICMP_SLT:
-              if (LoOverflow)
-                return ReplaceInstUsesWith(I, ConstantInt::getFalse());
-              return new ICmpInst(predicate, X, LoBound);
-            case ICmpInst::ICMP_UGT:
-            case ICmpInst::ICMP_SGT:
-              if (HiOverflow)
-                return ReplaceInstUsesWith(I, ConstantInt::getFalse());
-              if (predicate == ICmpInst::ICMP_UGT)
-                return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
-              else
-                return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
-            }
-          }
-        }
-        break;
-      }
-
-    // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
-    if (I.isEquality()) {
-      bool isICMP_NE = I.getPredicate() == ICmpInst::ICMP_NE;
-
-      // If the first operand is (add|sub|and|or|xor|rem) with a constant, and 
-      // the second operand is a constant, simplify a bit.
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) {
-        switch (BO->getOpcode()) {
-        case Instruction::SRem:
-          // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
-          if (CI->isZero() && isa<ConstantInt>(BO->getOperand(1)) &&
-              BO->hasOneUse()) {
-            APInt V(cast<ConstantInt>(BO->getOperand(1))->getValue());
-            if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
-              Value *NewRem = InsertNewInstBefore(BinaryOperator::createURem(
-                  BO->getOperand(0), BO->getOperand(1), BO->getName()), I);
-              return new ICmpInst(I.getPredicate(), NewRem, 
-                                  Constant::getNullValue(BO->getType()));
-            }
-          }
-          break;
-        case Instruction::Add:
-          // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
-          if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-            if (BO->hasOneUse())
-              return new ICmpInst(I.getPredicate(), BO->getOperand(0),
-                                  ConstantExpr::getSub(CI, BOp1C));
-          } else if (CI->isNullValue()) {
-            // Replace ((add A, B) != 0) with (A != -B) if A or B is
-            // efficiently invertible, or if the add has just this one use.
-            Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
-
-            if (Value *NegVal = dyn_castNegVal(BOp1))
-              return new ICmpInst(I.getPredicate(), BOp0, NegVal);
-            else if (Value *NegVal = dyn_castNegVal(BOp0))
-              return new ICmpInst(I.getPredicate(), NegVal, BOp1);
-            else if (BO->hasOneUse()) {
-              Instruction *Neg = BinaryOperator::createNeg(BOp1);
-              InsertNewInstBefore(Neg, I);
-              Neg->takeName(BO);
-              return new ICmpInst(I.getPredicate(), BOp0, Neg);
-            }
-          }
-          break;
-        case Instruction::Xor:
-          // For the xor case, we can xor two constants together, eliminating
-          // the explicit xor.
-          if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
-            return new ICmpInst(I.getPredicate(), BO->getOperand(0), 
-                                ConstantExpr::getXor(CI, BOC));
-
-          // FALLTHROUGH
-        case Instruction::Sub:
-          // Replace (([sub|xor] A, B) != 0) with (A != B)
-          if (CI->isZero())
-            return new ICmpInst(I.getPredicate(), BO->getOperand(0),
-                                BO->getOperand(1));
-          break;
-
-        case Instruction::Or:
-          // If bits are being or'd in that are not present in the constant we
-          // are comparing against, then the comparison could never succeed!
-          if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
-            Constant *NotCI = ConstantExpr::getNot(CI);
-            if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
-              return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
-                                                             isICMP_NE));
-          }
-          break;
-
-        case Instruction::And:
-          if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-            // If bits are being compared against that are and'd out, then the
-            // comparison can never succeed!
-            if (!ConstantExpr::getAnd(CI,
-                                      ConstantExpr::getNot(BOC))->isNullValue())
-              return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
-                                                             isICMP_NE));
-
-            // If we have ((X & C) == C), turn it into ((X & C) != 0).
-            if (CI == BOC && isOneBitSet(CI))
-              return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
-                                  ICmpInst::ICMP_NE, Op0,
-                                  Constant::getNullValue(CI->getType()));
-
-            // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
-            if (isSignBit(BOC)) {
-              Value *X = BO->getOperand(0);
-              Constant *Zero = Constant::getNullValue(X->getType());
-              ICmpInst::Predicate pred = isICMP_NE ? 
-                ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
-              return new ICmpInst(pred, X, Zero);
-            }
-
-            // ((X & ~7) == 0) --> X < 8
-            if (CI->isNullValue() && isHighOnes(BOC)) {
-              Value *X = BO->getOperand(0);
-              Constant *NegX = ConstantExpr::getNeg(BOC);
-              ICmpInst::Predicate pred = isICMP_NE ? 
-                ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
-              return new ICmpInst(pred, X, NegX);
-            }
-
-          }
-        default: break;
-        }
-      } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
-        // Handle set{eq|ne} <intrinsic>, intcst.
-        switch (II->getIntrinsicID()) {
-        default: break;
-        case Intrinsic::bswap_i16: 
-          // icmp eq (bswap(x)), c -> icmp eq (x,bswap(c))
-          AddToWorkList(II);  // Dead?
-          I.setOperand(0, II->getOperand(1));
-          I.setOperand(1, ConstantInt::get(Type::Int16Ty,
-                                           ByteSwap_16(CI->getZExtValue())));
-          return &I;
-        case Intrinsic::bswap_i32:   
-          // icmp eq (bswap(x)), c -> icmp eq (x,bswap(c))
-          AddToWorkList(II);  // Dead?
-          I.setOperand(0, II->getOperand(1));
-          I.setOperand(1, ConstantInt::get(Type::Int32Ty,
-                                           ByteSwap_32(CI->getZExtValue())));
-          return &I;
-        case Intrinsic::bswap_i64:   
-          // icmp eq (bswap(x)), c -> icmp eq (x,bswap(c))
-          AddToWorkList(II);  // Dead?
-          I.setOperand(0, II->getOperand(1));
-          I.setOperand(1, ConstantInt::get(Type::Int64Ty,
-                                           ByteSwap_64(CI->getZExtValue())));
-          return &I;
-        }
-      }
-    } else {  // Not a ICMP_EQ/ICMP_NE
-      // If the LHS is a cast from an integral value of the same size, then 
-      // since we know the RHS is a constant, try to simlify.
-      if (CastInst *Cast = dyn_cast<CastInst>(Op0)) {
-        Value *CastOp = Cast->getOperand(0);
-        const Type *SrcTy = CastOp->getType();
-        unsigned SrcTySize = SrcTy->getPrimitiveSizeInBits();
-        if (SrcTy->isInteger() && 
-            SrcTySize == Cast->getType()->getPrimitiveSizeInBits()) {
-          // If this is an unsigned comparison, try to make the comparison use
-          // smaller constant values.
-          switch (I.getPredicate()) {
-            default: break;
-            case ICmpInst::ICMP_ULT: { // X u< 128 => X s> -1
-              ConstantInt *CUI = cast<ConstantInt>(CI);
-              if (CUI->getValue() == APInt::getSignBit(SrcTySize))
-                return new ICmpInst(ICmpInst::ICMP_SGT, CastOp, 
-                  ConstantInt::get(APInt::getAllOnesValue(SrcTySize)));
-              break;
-            }
-            case ICmpInst::ICMP_UGT: { // X u> 127 => X s< 0
-              ConstantInt *CUI = cast<ConstantInt>(CI);
-              if (CUI->getValue() == APInt::getSignedMaxValue(SrcTySize))
-                return new ICmpInst(ICmpInst::ICMP_SLT, CastOp, 
-                                    Constant::getNullValue(SrcTy));
-              break;
-            }
-          }
-
-        }
-      }
-    }
+      if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+        return Res;
    }
  
-  // Handle icmp with constant RHS
+  // Handle icmp with constant (but not simple integer constant) RHS
    if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
      if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
        switch (LHSI->getOpcode()) {
@@ -5257,7 +4950,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          if (Instruction *NV = FoldOpIntoPhi(I))
            return NV;
          break;
-      case Instruction::Select:
+      case Instruction::Select: {
          // If either operand of the select is a constant, we can fold the
          // comparison into the select arms, which will cause one to be
          // constant folded and the select turned into a bitwise or.
@@ -5284,6 +4977,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
            return new SelectInst(LHSI->getOperand(0), Op1, Op2);
          break;
        }
+      case Instruction::Malloc:
+        // If we have (malloc != null), and if the malloc has a single use, we
+        // can assume it is successful and remove the malloc.
+        if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
+          AddToWorkList(LHSI);
+          return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
+                                                         !isTrueWhenEqual(I)));
+        }
+        break;
+      }
    }
  
    // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
@@ -5348,7 +5051,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
            if (ConstantInt *C2 = dyn_cast<ConstantInt>(D))
              if (Op1->hasOneUse()) {
-              Constant *NC = ConstantExpr::getXor(C1, C2);
+              Constant *NC = ConstantInt::get(C1->getValue() ^ C2->getValue());
                Instruction *Xor = BinaryOperator::createXor(C, NC, "tmp");
                return new ICmpInst(I.getPredicate(), A,
                                    InsertNewInstBefore(Xor, I));
@@ -5408,58 +5111,619 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
    return Changed ? &I : 0;
  }
  
-// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
-// We only handle extending casts so far.
-//
-Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
-  const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
-  Value *LHSCIOp        = LHSCI->getOperand(0);
-  const Type *SrcTy     = LHSCIOp->getType();
-  const Type *DestTy    = LHSCI->getType();
-  Value *RHSCIOp;
  
-  // We only handle extension cast instructions, so far. Enforce this.
-  if (LHSCI->getOpcode() != Instruction::ZExt &&
-      LHSCI->getOpcode() != Instruction::SExt)
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                          ConstantInt *DivRHS) {
+  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+  const APInt &CmpRHSV = CmpRHS->getValue();
+  
+  // FIXME: If the operand types don't match the type of the divide 
+  // then don't attempt this transform. The code below doesn't have the
+  // logic to deal with a signed divide and an unsigned compare (and
+  // vice versa). This is because (x /s C1) <s C2  produces different 
+  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+  // (x /u C1) <u C2.  Simply casting the operands and result won't 
+  // work. :(  The if statement below tests that condition and bails 
+  // if it finds it. 
+  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSignedPredicate())
      return 0;
-
-  bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
-  bool isSignedCmp = ICI.isSignedPredicate();
-
-  if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
-    // Not an extension from the same type?
-    RHSCIOp = CI->getOperand(0);
-    if (RHSCIOp->getType() != LHSCIOp->getType()) 
-      return 0;
-    
-    // If the signedness of the two compares doesn't agree (i.e. one is a sext
-    // and the other is a zext), then we can't handle this.
-    if (CI->getOpcode() != LHSCI->getOpcode())
-      return 0;
-
-    // Likewise, if the signedness of the [sz]exts and the compare don't match, 
-    // then we can't handle this.
-    if (isSignedExt != isSignedCmp && !ICI.isEquality())
-      return 0;
+  if (DivRHS->isZero())
+    return 0; // The ProdOV computation fails on divide by zero.
+
+  // Compute Prod = CI * DivRHS. We are essentially solving an equation
+  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
+  // C2 (CI). By solving for X we can turn this into a range check 
+  // instead of computing a divide. 
+  ConstantInt *Prod = Multiply(CmpRHS, DivRHS);
+
+  // Determine if the product overflows by seeing if the product is
+  // not equal to the divide. Make sure we do the same kind of divide
+  // as in the LHS instruction that we're folding. 
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+  // Get the ICmp opcode
+  ICmpInst::Predicate Pred = ICI.getPredicate();
+
+  // Figure out the interval that is being checked.  For example, a comparison
+  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 
+  // Compute this interval based on the constants involved and the signedness of
+  // the compare/divide.  This computes a half-open interval, keeping track of
+  // whether either value in the interval overflows.  After analysis each
+  // overflow variable is set to 0 if it's corresponding bound variable is valid
+  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+  int LoOverflow = 0, HiOverflow = 0;
+  ConstantInt *LoBound = 0, *HiBound = 0;
+  
+  
+  if (!DivIsSigned) {  // udiv
+    // e.g. X/5 op 3  --> [15, 20)
+    LoBound = Prod;
+    HiOverflow = LoOverflow = ProdOV;
+    if (!HiOverflow)
+      HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
+  } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0.
+    if (CmpRHSV == 0) {       // (X / pos) op 0
+      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
+      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
+      HiBound = DivRHS;
+    } else if (CmpRHSV.isPositive()) {   // (X / pos) op pos
+      LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
+      HiOverflow = LoOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, true);
+    } else {                       // (X / pos) op neg
+      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
+      Constant *DivRHSH = ConstantExpr::getNeg(SubOne(DivRHS));
+      LoOverflow = AddWithOverflow(LoBound, Prod,
+                                   cast<ConstantInt>(DivRHSH), true) ? -1 : 0;
+      HiBound = AddOne(Prod);
+      HiOverflow = ProdOV ? -1 : 0;
+    }
+  } else {                         // Divisor is < 0.
+    if (CmpRHSV == 0) {       // (X / neg) op 0
+      // e.g. X/-5 op 0  --> [-4, 5)
+      LoBound = AddOne(DivRHS);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));
+      if (HiBound == DivRHS) {     // -INTMIN = INTMIN
+        HiOverflow = 1;            // [INTMIN+1, overflow)
+        HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
+      }
+    } else if (CmpRHSV.isPositive()) {   // (X / neg) op pos
+      // e.g. X/-5 op 3  --> [-19, -14)
+      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow)
+        LoOverflow = AddWithOverflow(LoBound, Prod, AddOne(DivRHS), true) ?-1:0;
+      HiBound = AddOne(Prod);
+    } else {                       // (X / neg) op neg
+      // e.g. X/-5 op -3  --> [15, 20)
+      LoBound = Prod;
+      LoOverflow = HiOverflow = ProdOV ? 1 : 0;
+      HiBound = Subtract(Prod, DivRHS);
+    }
      
-    // Okay, just insert a compare of the reduced operands now!
-    return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+    // Dividing by a negative swaps the condition.  LT <-> GT
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  Value *X = DivI->getOperand(0);
+  switch (Pred) {
+  default: assert(0 && "Unhandled icmp opcode!");
+  case ICmpInst::ICMP_EQ:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+    else if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
+                          ICmpInst::ICMP_UGE, X, LoBound);
+    else if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
+                          ICmpInst::ICMP_ULT, X, HiBound);
+    else
+      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
+  case ICmpInst::ICMP_NE:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+    else if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : 
+                          ICmpInst::ICMP_ULT, X, LoBound);
+    else if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : 
+                          ICmpInst::ICMP_UGE, X, HiBound);
+    else
+      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT:
+    if (LoOverflow == +1)   // Low bound is greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+    if (LoOverflow == -1)   // Low bound is less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+    return new ICmpInst(Pred, X, LoBound);
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT:
+    if (HiOverflow == +1)       // High bound greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+    else if (HiOverflow == -1)  // High bound less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+    if (Pred == ICmpInst::ICMP_UGT)
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+    else
+      return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
    }
+}
  
-  // If we aren't dealing with a constant on the RHS, exit early
-  ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
-  if (!CI)
-    return 0;
-
-  // Compute the constant that would happen if we truncated to SrcTy then
-  // reextended to DestTy.
-  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
-  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
  
-  // If the re-extended constant didn't change...
-  if (Res2 == CI) {
-    // Make sure that sign of the Cmp and the sign of the Cast are the same.
-    // For example, we might have:
+/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
+///
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+                                                          Instruction *LHSI,
+                                                          ConstantInt *RHS) {
+  const APInt &RHSV = RHS->getValue();
+  
+  switch (LHSI->getOpcode()) {
+  case Instruction::Xor:         // (icmp pred (xor X, XorCST), CI)
+    if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+      // fold the xor.
+      if (ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0 ||
+          ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue()) {
+        Value *CompareVal = LHSI->getOperand(0);
+        
+        // If the sign bit of the XorCST is not set, there is no change to
+        // the operation, just stop using the Xor.
+        if (!XorCST->getValue().isNegative()) {
+          ICI.setOperand(0, CompareVal);
+          AddToWorkList(LHSI);
+          return &ICI;
+        }
+        
+        // Was the old condition true if the operand is positive?
+        bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+        
+        // If so, the new one isn't.
+        isTrueIfPositive ^= true;
+        
+        if (isTrueIfPositive)
+          return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, SubOne(RHS));
+        else
+          return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, AddOne(RHS));
+      }
+    }
+    break;
+  case Instruction::And:         // (icmp pred (and X, AndCST), RHS)
+    if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+        LHSI->getOperand(0)->hasOneUse()) {
+      ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+      
+      // If the LHS is an AND of a truncating cast, we can widen the
+      // and/compare to be the input width without changing the value
+      // produced, eliminating a cast.
+      if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+        // We can do this transformation if either the AND constant does not
+        // have its sign bit set or if it is an equality comparison. 
+        // Extending a relational comparison when we're checking the sign
+        // bit would not work.
+        if (Cast->hasOneUse() &&
+            (ICI.isEquality() || AndCST->getValue().isPositive() && 
+             RHSV.isPositive())) {
+          uint32_t BitWidth = 
+            cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
+          APInt NewCST = AndCST->getValue();
+          NewCST.zext(BitWidth);
+          APInt NewCI = RHSV;
+          NewCI.zext(BitWidth);
+          Instruction *NewAnd = 
+            BinaryOperator::createAnd(Cast->getOperand(0),
+                                      ConstantInt::get(NewCST),LHSI->getName());
+          InsertNewInstBefore(NewAnd, ICI);
+          return new ICmpInst(ICI.getPredicate(), NewAnd,
+                              ConstantInt::get(NewCI));
+        }
+      }
+      
+      // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+      // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
+      // happens a LOT in code produced by the C front-end, for bitfield
+      // access.
+      BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+      if (Shift && !Shift->isShift())
+        Shift = 0;
+      
+      ConstantInt *ShAmt;
+      ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+      const Type *Ty = Shift ? Shift->getType() : 0;  // Type of the shift.
+      const Type *AndTy = AndCST->getType();          // Type of the and.
+      
+      // We can fold this as long as we can't shift unknown bits
+      // into the mask.  This can only happen with signed shift
+      // rights, as they sign-extend.
+      if (ShAmt) {
+        bool CanFold = Shift->isLogicalShift();
+        if (!CanFold) {
+          // To test for the bad case of the signed shr, see if any
+          // of the bits shifted in could be tested after the mask.
+          uint32_t TyBits = Ty->getPrimitiveSizeInBits();
+          int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
+          
+          uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
+          if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & 
+               AndCST->getValue()) == 0)
+            CanFold = true;
+        }
+        
+        if (CanFold) {
+          Constant *NewCst;
+          if (Shift->getOpcode() == Instruction::Shl)
+            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+          else
+            NewCst = ConstantExpr::getShl(RHS, ShAmt);
+          
+          // Check to see if we are shifting out any of the bits being
+          // compared.
+          if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != RHS) {
+            // If we shifted bits out, the fold is not going to work out.
+            // As a special case, check to see if this means that the
+            // result is always true or false now.
+            if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse());
+            if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+              return ReplaceInstUsesWith(ICI, ConstantInt::getTrue());
+          } else {
+            ICI.setOperand(1, NewCst);
+            Constant *NewAndCST;
+            if (Shift->getOpcode() == Instruction::Shl)
+              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+            else
+              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+            LHSI->setOperand(1, NewAndCST);
+            LHSI->setOperand(0, Shift->getOperand(0));
+            AddToWorkList(Shift); // Shift is dead.
+            AddUsesToWorkList(ICI);
+            return &ICI;
+          }
+        }
+      }
+      
+      // Turn ((X >> Y) & C) == 0  into  (X & (C << Y)) == 0.  The later is
+      // preferable because it allows the C<<Y expression to be hoisted out
+      // of a loop if Y is invariant and X is not.
+      if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+          ICI.isEquality() && !Shift->isArithmeticShift() &&
+          isa<Instruction>(Shift->getOperand(0))) {
+        // Compute C << Y.
+        Value *NS;
+        if (Shift->getOpcode() == Instruction::LShr) {
+          NS = BinaryOperator::createShl(AndCST, 
+                                         Shift->getOperand(1), "tmp");
+        } else {
+          // Insert a logical shift.
+          NS = BinaryOperator::createLShr(AndCST,
+                                          Shift->getOperand(1), "tmp");
+        }
+        InsertNewInstBefore(cast<Instruction>(NS), ICI);
+        
+        // Compute X & (C << Y).
+        Instruction *NewAnd = 
+          BinaryOperator::createAnd(Shift->getOperand(0), NS, LHSI->getName());
+        InsertNewInstBefore(NewAnd, ICI);
+        
+        ICI.setOperand(0, NewAnd);
+        return &ICI;
+      }
+    }
+    break;
+    
+  case Instruction::Shl:         // (icmp pred (shl X, ShAmt), CI)
+    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      if (ICI.isEquality()) {
+        uint32_t TypeBits = RHSV.getBitWidth();
+        
+        // Check that the shift amount is in range.  If not, don't perform
+        // undefined shifts.  When the shift is visited it will be
+        // simplified.
+        if (ShAmt->uge(TypeBits))
+          break;
+        
+        // If we are comparing against bits always shifted out, the
+        // comparison cannot succeed.
+        Constant *Comp =
+          ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt);
+        if (Comp != RHS) {// Comparing against a bit that we know is zero.
+          bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+          Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+          return ReplaceInstUsesWith(ICI, Cst);
+        }
+        
+        if (LHSI->hasOneUse()) {
+          // Otherwise strength reduce the shift into an and.
+          uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+          Constant *Mask =
+            ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
+          
+          Instruction *AndI =
+            BinaryOperator::createAnd(LHSI->getOperand(0),
+                                      Mask, LHSI->getName()+".mask");
+          Value *And = InsertNewInstBefore(AndI, ICI);
+          return new ICmpInst(ICI.getPredicate(), And,
+                              ConstantInt::get(RHSV.lshr(ShAmtVal)));
+        }
+      }
+    }
+    break;
+    
+  case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
+  case Instruction::AShr:
+    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      if (ICI.isEquality()) {
+        // Check that the shift amount is in range.  If not, don't perform
+        // undefined shifts.  When the shift is visited it will be
+        // simplified.
+        uint32_t TypeBits = RHSV.getBitWidth();
+        if (ShAmt->uge(TypeBits))
+          break;
+        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+        
+        // If we are comparing against bits always shifted out, the
+        // comparison cannot succeed.
+        APInt Comp = RHSV << ShAmtVal;
+        if (LHSI->getOpcode() == Instruction::LShr)
+          Comp = Comp.lshr(ShAmtVal);
+        else
+          Comp = Comp.ashr(ShAmtVal);
+        
+        if (Comp != RHSV) { // Comparing against a bit that we know is zero.
+          bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+          Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+          return ReplaceInstUsesWith(ICI, Cst);
+        }
+        
+        if (LHSI->hasOneUse() || RHSV == 0) {
+          // Otherwise strength reduce the shift into an and.
+          APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+          Constant *Mask = ConstantInt::get(Val);
+          
+          Instruction *AndI =
+            BinaryOperator::createAnd(LHSI->getOperand(0),
+                                      Mask, LHSI->getName()+".mask");
+          Value *And = InsertNewInstBefore(AndI, ICI);
+          return new ICmpInst(ICI.getPredicate(), And,
+                              ConstantExpr::getShl(RHS, ShAmt));
+        }
+      }
+    }
+    break;
+    
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+    // Fold: icmp pred ([us]div X, C1), C2 -> range test
+    // Fold this div into the comparison, producing a range check. 
+    // Determine, based on the divide type, what the range is being 
+    // checked.  If there is an overflow on the low or high side, remember 
+    // it, otherwise compute the range [low, hi) bounding the new value.
+    // See: InsertRangeTest above for the kinds of replacements possible.
+    if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+      if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+                                          DivRHS))
+        return R;
+    break;
+  }
+  
+  // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+  if (ICI.isEquality()) {
+    bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    
+    // If the first operand is (add|sub|and|or|xor|rem) with a constant, and 
+    // the second operand is a constant, simplify a bit.
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+      switch (BO->getOpcode()) {
+      case Instruction::SRem:
+        // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+        if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+          const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+          if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
+            Instruction *NewRem =
+              BinaryOperator::createURem(BO->getOperand(0), BO->getOperand(1),
+                                         BO->getName());
+            InsertNewInstBefore(NewRem, ICI);
+            return new ICmpInst(ICI.getPredicate(), NewRem, 
+                                Constant::getNullValue(BO->getType()));
+          }
+        }
+        break;
+      case Instruction::Add:
+        // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+        if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          if (BO->hasOneUse())
+            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                                Subtract(RHS, BOp1C));
+        } else if (RHSV == 0) {
+          // Replace ((add A, B) != 0) with (A != -B) if A or B is
+          // efficiently invertible, or if the add has just this one use.
+          Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+          
+          if (Value *NegVal = dyn_castNegVal(BOp1))
+            return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+          else if (Value *NegVal = dyn_castNegVal(BOp0))
+            return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+          else if (BO->hasOneUse()) {
+            Instruction *Neg = BinaryOperator::createNeg(BOp1);
+            InsertNewInstBefore(Neg, ICI);
+            Neg->takeName(BO);
+            return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+          }
+        }
+        break;
+      case Instruction::Xor:
+        // For the xor case, we can xor two constants together, eliminating
+        // the explicit xor.
+        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
+                              ConstantExpr::getXor(RHS, BOC));
+        
+        // FALLTHROUGH
+      case Instruction::Sub:
+        // Replace (([sub|xor] A, B) != 0) with (A != B)
+        if (RHSV == 0)
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                              BO->getOperand(1));
+        break;
+        
+      case Instruction::Or:
+        // If bits are being or'd in that are not present in the constant we
+        // are comparing against, then the comparison could never succeed!
+        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+          Constant *NotCI = ConstantExpr::getNot(RHS);
+          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+            return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty, 
+                                                             isICMP_NE));
+        }
+        break;
+        
+      case Instruction::And:
+        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          // If bits are being compared against that are and'd out, then the
+          // comparison can never succeed!
+          if ((RHSV & ~BOC->getValue()) != 0)
+            return ReplaceInstUsesWith(ICI, ConstantInt::get(Type::Int1Ty,
+                                                             isICMP_NE));
+          
+          // If we have ((X & C) == C), turn it into ((X & C) != 0).
+          if (RHS == BOC && RHSV.isPowerOf2())
+            return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+                                ICmpInst::ICMP_NE, LHSI,
+                                Constant::getNullValue(RHS->getType()));
+          
+          // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+          if (isSignBit(BOC)) {
+            Value *X = BO->getOperand(0);
+            Constant *Zero = Constant::getNullValue(X->getType());
+            ICmpInst::Predicate pred = isICMP_NE ? 
+              ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+            return new ICmpInst(pred, X, Zero);
+          }
+          
+          // ((X & ~7) == 0) --> X < 8
+          if (RHSV == 0 && isHighOnes(BOC)) {
+            Value *X = BO->getOperand(0);
+            Constant *NegX = ConstantExpr::getNeg(BOC);
+            ICmpInst::Predicate pred = isICMP_NE ? 
+              ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+            return new ICmpInst(pred, X, NegX);
+          }
+        }
+      default: break;
+      }
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+      // Handle icmp {eq|ne} <intrinsic>, intcst.
+      if (II->getIntrinsicID() == Intrinsic::bswap) {
+        AddToWorkList(II);
+        ICI.setOperand(0, II->getOperand(1));
+        ICI.setOperand(1, ConstantInt::get(RHSV.byteSwap()));
+        return &ICI;
+      }
+    }
+  } else {  // Not a ICMP_EQ/ICMP_NE
+            // If the LHS is a cast from an integral value of the same size, 
+            // then since we know the RHS is a constant, try to simlify.
+    if (CastInst *Cast = dyn_cast<CastInst>(LHSI)) {
+      Value *CastOp = Cast->getOperand(0);
+      const Type *SrcTy = CastOp->getType();
+      uint32_t SrcTySize = SrcTy->getPrimitiveSizeInBits();
+      if (SrcTy->isInteger() && 
+          SrcTySize == Cast->getType()->getPrimitiveSizeInBits()) {
+        // If this is an unsigned comparison, try to make the comparison use
+        // smaller constant values.
+        if (ICI.getPredicate() == ICmpInst::ICMP_ULT && RHSV.isSignBit()) {
+          // X u< 128 => X s> -1
+          return new ICmpInst(ICmpInst::ICMP_SGT, CastOp, 
+                           ConstantInt::get(APInt::getAllOnesValue(SrcTySize)));
+        } else if (ICI.getPredicate() == ICmpInst::ICMP_UGT &&
+                   RHSV == APInt::getSignedMaxValue(SrcTySize)) {
+          // X u> 127 => X s< 0
+          return new ICmpInst(ICmpInst::ICMP_SLT, CastOp, 
+                              Constant::getNullValue(SrcTy));
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
+/// We only handle extending casts so far.
+///
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
+  const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
+  Value *LHSCIOp        = LHSCI->getOperand(0);
+  const Type *SrcTy     = LHSCIOp->getType();
+  const Type *DestTy    = LHSCI->getType();
+  Value *RHSCIOp;
+
+  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 
+  // integer type is the same size as the pointer type.
+  if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+      getTargetData().getPointerSizeInBits() == 
+         cast<IntegerType>(DestTy)->getBitWidth()) {
+    Value *RHSOp = 0;
+    if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+    } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+      RHSOp = RHSC->getOperand(0);
+      // If the pointer types don't match, insert a bitcast.
+      if (LHSCIOp->getType() != RHSOp->getType())
+        RHSOp = InsertCastBefore(Instruction::BitCast, RHSOp,
+                                 LHSCIOp->getType(), ICI);
+    }
+
+    if (RHSOp)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+  }
+  
+  // The code below only handles extension cast instructions, so far.
+  // Enforce this.
+  if (LHSCI->getOpcode() != Instruction::ZExt &&
+      LHSCI->getOpcode() != Instruction::SExt)
+    return 0;
+
+  bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+  bool isSignedCmp = ICI.isSignedPredicate();
+
+  if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
+    // Not an extension from the same type?
+    RHSCIOp = CI->getOperand(0);
+    if (RHSCIOp->getType() != LHSCIOp->getType()) 
+      return 0;
+    
+    // If the signedness of the two compares doesn't agree (i.e. one is a sext
+    // and the other is a zext), then we can't handle this.
+    if (CI->getOpcode() != LHSCI->getOpcode())
+      return 0;
+
+    // Likewise, if the signedness of the [sz]exts and the compare don't match, 
+    // then we can't handle this.
+    if (isSignedExt != isSignedCmp && !ICI.isEquality())
+      return 0;
+    
+    // Okay, just insert a compare of the reduced operands now!
+    return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+  }
+
+  // If we aren't dealing with a constant on the RHS, exit early
+  ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
+  if (!CI)
+    return 0;
+
+  // Compute the constant that would happen if we truncated to SrcTy then
+  // reextended to DestTy.
+  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
+
+  // If the re-extended constant didn't change...
+  if (Res2 == CI) {
+    // Make sure that sign of the Cmp and the sign of the Cast are the same.
+    // For example, we might have:
      //    %A = sext short %X to uint
      //    %B = icmp ugt uint %A, 1330
      // It is incorrect to transform this into 
@@ -5772,7 +6036,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
      if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
      Value *X = ShiftOp->getOperand(0);
      
-    unsigned AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
+    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
      if (AmtSum > TypeBits)
        AmtSum = TypeBits;
      
@@ -5807,7 +6071,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
        }
        // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
        if (I.getOpcode() == Instruction::LShr) {
-        APInt Mask(Ty->getMask().lshr(ShiftAmt1));
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
          return BinaryOperator::createAnd(X, ConstantInt::get(Mask));
        }
        // We can simplify ((X << C) >>s C) into a trunc + sext.
@@ -5833,7 +6097,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
        }
        // Otherwise, we can't handle it yet.
      } else if (ShiftAmt1 < ShiftAmt2) {
-      unsigned ShiftDiff = ShiftAmt2-ShiftAmt1;
+      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
        
        // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
        if (I.getOpcode() == Instruction::Shl) {
@@ -5861,7 +6125,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
        // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
      } else {
        assert(ShiftAmt2 < ShiftAmt1);
-      unsigned ShiftDiff = ShiftAmt1-ShiftAmt2;
+      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
  
        // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
        if (I.getOpcode() == Instruction::Shl) {
@@ -5899,7 +6163,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
  /// X*Scale+Offset.
  ///
  static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
-                                        unsigned &Offset) {
+                                        int &Offset) {
    assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!");
    if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
      Offset = CI->getZExtValue();
@@ -5943,10 +6207,9 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
  
  /// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
  /// try to eliminate the cast by moving the type information into the alloc.
-Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI,
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                     AllocationInst &AI) {
-  const PointerType *PTy = dyn_cast<PointerType>(CI.getType());
-  if (!PTy) return 0;   // Not casting the allocation to a pointer type.
+  const PointerType *PTy = cast<PointerType>(CI.getType());
    
    // Remove any uses of AI that are dead.
    assert(!CI.use_empty() && "Dead instructions should be removed earlier!");
@@ -5983,7 +6246,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI,
  
    // See if we can satisfy the modulus by pulling a scale out of the array
    // size argument.
-  unsigned ArraySizeScale, ArrayOffset;
+  unsigned ArraySizeScale;
+  int ArrayOffset;
    Value *NumElements = // See if the array size is a decomposable linear expr.
      DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
   
@@ -6000,8 +6264,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI,
      // If the allocation size is constant, form a constant mul expression
      Amt = ConstantInt::get(Type::Int32Ty, Scale);
      if (isa<ConstantInt>(NumElements))
-      Amt = ConstantExpr::getMul(
-              cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
+      Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
      // otherwise multiply the amount and the number of elements
      else if (Scale != 1) {
        Instruction *Tmp = BinaryOperator::createMul(Amt, NumElements, "tmp");
@@ -6009,8 +6272,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI,
      }
    }
    
-  if (unsigned Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
-    Value *Off = ConstantInt::get(Type::Int32Ty, Offset);
+  if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+    Value *Off = ConstantInt::get(Type::Int32Ty, Offset, true);
      Instruction *Tmp = BinaryOperator::createAdd(Amt, Off, "tmp");
      Amt = InsertNewInstBefore(Tmp, AI);
    }
@@ -6090,7 +6353,7 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
            MaskedValueIsZero(I->getOperand(0),
              APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
            CI->getLimitedValue(BitWidth) < BitWidth) {
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved);
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved);
        }
      }
      break;
@@ -6173,7 +6436,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
    if (isa<UndefValue>(Src))   // cast undef -> undef
      return ReplaceInstUsesWith(CI, UndefValue::get(CI.getType()));
  
-  // Many cases of "cast of a cast" are eliminable. If its eliminable we just
+  // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
    // eliminate it now.
    if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
      if (Instruction::CastOps opc = 
@@ -6184,32 +6447,6 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
      }
    }
  
-  // If casting the result of a getelementptr instruction with no offset, turn
-  // this into a cast of the original pointer!
-  //
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
-    bool AllZeroOperands = true;
-    for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i)
-      if (!isa<Constant>(GEP->getOperand(i)) ||
-          !cast<Constant>(GEP->getOperand(i))->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
-    if (AllZeroOperands) {
-      // Changing the cast operand is usually not a good idea but it is safe
-      // here because the pointer operand is being replaced with another 
-      // pointer operand so the opcode doesn't need to change.
-      CI.setOperand(0, GEP->getOperand(0));
-      return &CI;
-    }
-  }
-    
-  // If we are casting a malloc or alloca to a pointer to a type of the same
-  // size, rewrite the allocation instruction to allocate the "right" type.
-  if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
-    if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
-      return V;
-
    // If we are casting a select then fold the cast into the select
    if (SelectInst *SI = dyn_cast<SelectInst>(Src))
      if (Instruction *NV = FoldOpIntoSelect(CI, SI, this))
@@ -6223,6 +6460,113 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
    return 0;
  }
  
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+  
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+    // If casting the result of a getelementptr instruction with no offset, turn
+    // this into a cast of the original pointer!
+    if (GEP->hasAllZeroIndices()) {
+      // Changing the cast operand is usually not a good idea but it is safe
+      // here because the pointer operand is being replaced with another 
+      // pointer operand so the opcode doesn't need to change.
+      AddToWorkList(GEP);
+      CI.setOperand(0, GEP->getOperand(0));
+      return &CI;
+    }
+    
+    // If the GEP has a single use, and the base pointer is a bitcast, and the
+    // GEP computes a constant offset, see if we can convert these three
+    // instructions into fewer.  This typically happens with unions and other
+    // non-type-safe code.
+    if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
+      if (GEP->hasAllConstantIndices()) {
+        // We are guaranteed to get a constant from EmitGEPOffset.
+        ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, CI, *this));
+        int64_t Offset = OffsetV->getSExtValue();
+        
+        // Get the base pointer input of the bitcast, and the type it points to.
+        Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+        const Type *GEPIdxTy =
+          cast<PointerType>(OrigBase->getType())->getElementType();
+        if (GEPIdxTy->isSized()) {
+          SmallVector<Value*, 8> NewIndices;
+          
+          // Start with the index over the outer type.  Note that the type size
+          // might be zero (even if the offset isn't zero) if the indexed type
+          // is something like [0 x {int, int}]
+          const Type *IntPtrTy = TD->getIntPtrType();
+          int64_t FirstIdx = 0;
+          if (int64_t TySize = TD->getTypeSize(GEPIdxTy)) {
+            FirstIdx = Offset/TySize;
+            Offset %= TySize;
+          
+            // Handle silly modulus not returning values values [0..TySize).
+            if (Offset < 0) {
+              --FirstIdx;
+              Offset += TySize;
+              assert(Offset >= 0);
+            }
+            assert((uint64_t)Offset < (uint64_t)TySize &&"Out of range offset");
+          }
+          
+          NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+
+          // Index into the types.  If we fail, set OrigBase to null.
+          while (Offset) {
+            if (const StructType *STy = dyn_cast<StructType>(GEPIdxTy)) {
+              const StructLayout *SL = TD->getStructLayout(STy);
+              if (Offset < (int64_t)SL->getSizeInBytes()) {
+                unsigned Elt = SL->getElementContainingOffset(Offset);
+                NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt));
+              
+                Offset -= SL->getElementOffset(Elt);
+                GEPIdxTy = STy->getElementType(Elt);
+              } else {
+                // Otherwise, we can't index into this, bail out.
+                Offset = 0;
+                OrigBase = 0;
+              }
+            } else if (isa<ArrayType>(GEPIdxTy) || isa<VectorType>(GEPIdxTy)) {
+              const SequentialType *STy = cast<SequentialType>(GEPIdxTy);
+              if (uint64_t EltSize = TD->getTypeSize(STy->getElementType())) {
+                NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+                Offset %= EltSize;
+              } else {
+                NewIndices.push_back(ConstantInt::get(IntPtrTy, 0));
+              }
+              GEPIdxTy = STy->getElementType();
+            } else {
+              // Otherwise, we can't index into this, bail out.
+              Offset = 0;
+              OrigBase = 0;
+            }
+          }
+          if (OrigBase) {
+            // If we were able to index down into an element, create the GEP
+            // and bitcast the result.  This eliminates one bitcast, potentially
+            // two.
+            Instruction *NGEP = new GetElementPtrInst(OrigBase, &NewIndices[0],
+                                                      NewIndices.size(), "");
+            InsertNewInstBefore(NGEP, CI);
+            NGEP->takeName(GEP);
+            
+            if (isa<BitCastInst>(CI))
+              return new BitCastInst(NGEP, CI.getType());
+            assert(isa<PtrToIntInst>(CI));
+            return new PtrToIntInst(NGEP, CI.getType());
+          }
+        }
+      }      
+    }
+  }
+    
+  return commonCastTransforms(CI);
+}
+
+
+
  /// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as
  /// integer types. This function implements the common transforms for all those
  /// cases.
@@ -6234,8 +6578,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
    Value *Src = CI.getOperand(0);
    const Type *SrcTy = Src->getType();
    const Type *DestTy = CI.getType();
-  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
-  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+  uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+  uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits();
  
    // See if we can simplify any instructions used by the LHS whose sole 
    // purpose is to compute bits we don't care about.
@@ -6296,7 +6640,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
        case Instruction::ZExt: {
          // We need to emit an AND to clear the high bits.
          assert(SrcBitSize < DestBitSize && "Not a zext?");
-        Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize, SrcBitSize));
+        Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize,
+                                                            SrcBitSize));
          return BinaryOperator::createAnd(Res, C);
        }
        case Instruction::SExt:
@@ -6317,8 +6662,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
-    // If we are discarding information, or just changing the sign, 
-    // rewrite.
+    // If we are discarding information, rewrite.
      if (DestBitSize <= SrcBitSize && DestBitSize != 1) {
        // Don't insert two casts if they cannot be eliminated.  We allow 
        // two casts to be inserted if the sizes are the same.  This could 
@@ -6392,81 +6736,18 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
        }
      }
      break;
-
-  case Instruction::ICmp:
-    // If we are just checking for a icmp eq of a single bit and casting it
-    // to an integer, then shift the bit to the appropriate place and then
-    // cast to integer to avoid the comparison.
-    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
-      APInt Op1CV(Op1C->getValue());
-      // cast (X == 0) to int --> X^1      iff X has only the low bit set.
-      // cast (X == 0) to int --> (X>>1)^1 iff X has only the 2nd bit set.
-      // cast (X == 1) to int --> X        iff X has only the low bit set.
-      // cast (X == 2) to int --> X>>1     iff X has only the 2nd bit set.
-      // cast (X != 0) to int --> X        iff X has only the low bit set.
-      // cast (X != 0) to int --> X>>1     iff X has only the 2nd bit set.
-      // cast (X != 1) to int --> X^1      iff X has only the low bit set.
-      // cast (X != 2) to int --> (X>>1)^1 iff X has only the 2nd bit set.
-      if (Op1CV == 0 || Op1CV.isPowerOf2()) {
-        // If Op1C some other power of two, convert:
-        uint32_t BitWidth = Op1C->getType()->getBitWidth();
-        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-        APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-        ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne);
-
-        // This only works for EQ and NE
-        ICmpInst::Predicate pred = cast<ICmpInst>(SrcI)->getPredicate();
-        if (pred != ICmpInst::ICMP_NE && pred != ICmpInst::ICMP_EQ)
-          break;
-        
-        APInt KnownZeroMask(KnownZero ^ TypeMask);
-        if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
-          bool isNE = pred == ICmpInst::ICMP_NE;
-          if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
-            // (X&4) == 2 --> false
-            // (X&4) != 2 --> true
-            Constant *Res = ConstantInt::get(Type::Int1Ty, isNE);
-            Res = ConstantExpr::getZExt(Res, CI.getType());
-            return ReplaceInstUsesWith(CI, Res);
-          }
-          
-          unsigned ShiftAmt = KnownZeroMask.logBase2();
-          Value *In = Op0;
-          if (ShiftAmt) {
-            // Perform a logical shr by shiftamt.
-            // Insert the shift to put the result in the low bit.
-            In = InsertNewInstBefore(
-              BinaryOperator::createLShr(In,
-                                     ConstantInt::get(In->getType(), ShiftAmt),
-                                     In->getName()+".lobit"), CI);
-          }
-          
-          if ((Op1CV != 0) == isNE) { // Toggle the low bit.
-            Constant *One = ConstantInt::get(In->getType(), 1);
-            In = BinaryOperator::createXor(In, One, "tmp");
-            InsertNewInstBefore(cast<Instruction>(In), CI);
-          }
-          
-          if (CI.getType() == In->getType())
-            return ReplaceInstUsesWith(CI, In);
-          else
-            return CastInst::createIntegerCast(In, CI.getType(), false/*ZExt*/);
-        }
-      }
-    }
-    break;
    }
    return 0;
  }
  
-Instruction *InstCombiner::visitTrunc(CastInst &CI) {
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
    if (Instruction *Result = commonIntCastTransforms(CI))
      return Result;
    
    Value *Src = CI.getOperand(0);
    const Type *Ty = CI.getType();
-  unsigned DestBitWidth = Ty->getPrimitiveSizeInBits();
-  unsigned SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth();
+  uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits();
+  uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth();
    
    if (Instruction *SrcI = dyn_cast<Instruction>(Src)) {
      switch (SrcI->getOpcode()) {
@@ -6516,7 +6797,7 @@ Instruction *InstCombiner::visitTrunc(CastInst &CI) {
    return 0;
  }
  
-Instruction *InstCombiner::visitZExt(CastInst &CI) {
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
    // If one of the common conversion will work ..
    if (Instruction *Result = commonIntCastTransforms(CI))
      return Result;
@@ -6531,9 +6812,9 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) {
      if (isa<TruncInst>(CSrc)) {
        // Get the sizes of the types involved
        Value *A = CSrc->getOperand(0);
-      unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();
-      unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();
-      unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+      uint32_t SrcSize = A->getType()->getPrimitiveSizeInBits();
+      uint32_t MidSize = CSrc->getType()->getPrimitiveSizeInBits();
+      uint32_t DstSize = CI.getType()->getPrimitiveSizeInBits();
        // If we're actually extending zero bits and the trunc is a no-op
        if (MidSize < DstSize && SrcSize == DstSize) {
          // Replace both of the casts with an And of the type mask.
@@ -6552,11 +6833,134 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) {
      }
    }
  
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) {
+    // If we are just checking for a icmp eq of a single bit and zext'ing it
+    // to an integer, then shift the bit to the appropriate place and then
+    // cast to integer to avoid the comparison.
+    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+      const APInt &Op1CV = Op1C->getValue();
+      
+      // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
+      // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
+      if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+          (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){
+        Value *In = ICI->getOperand(0);
+        Value *Sh = ConstantInt::get(In->getType(),
+                                    In->getType()->getPrimitiveSizeInBits()-1);
+        In = InsertNewInstBefore(BinaryOperator::createLShr(In, Sh,
+                                                        In->getName()+".lobit"),
+                                 CI);
+        if (In->getType() != CI.getType())
+          In = CastInst::createIntegerCast(In, CI.getType(),
+                                           false/*ZExt*/, "tmp", &CI);
+
+        if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = InsertNewInstBefore(BinaryOperator::createXor(In, One,
+                                                          In->getName()+".not"),
+                                   CI);
+        }
+
+        return ReplaceInstUsesWith(CI, In);
+      }
+      
+      
+      
+      // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
+      // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+      // zext (X == 1) to i32 --> X        iff X has only the low bit set.
+      // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
+      // zext (X != 0) to i32 --> X        iff X has only the low bit set.
+      // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
+      // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
+      // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+      if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
+          // This only works for EQ and NE
+          ICI->isEquality()) {
+        // If Op1C some other power of two, convert:
+        uint32_t BitWidth = Op1C->getType()->getBitWidth();
+        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+        APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+        ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+        
+        APInt KnownZeroMask(~KnownZero);
+        if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+          bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+          if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+            // (X&4) == 2 --> false
+            // (X&4) != 2 --> true
+            Constant *Res = ConstantInt::get(Type::Int1Ty, isNE);
+            Res = ConstantExpr::getZExt(Res, CI.getType());
+            return ReplaceInstUsesWith(CI, Res);
+          }
+          
+          uint32_t ShiftAmt = KnownZeroMask.logBase2();
+          Value *In = ICI->getOperand(0);
+          if (ShiftAmt) {
+            // Perform a logical shr by shiftamt.
+            // Insert the shift to put the result in the low bit.
+            In = InsertNewInstBefore(
+                   BinaryOperator::createLShr(In,
+                                     ConstantInt::get(In->getType(), ShiftAmt),
+                                              In->getName()+".lobit"), CI);
+          }
+          
+          if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+            Constant *One = ConstantInt::get(In->getType(), 1);
+            In = BinaryOperator::createXor(In, One, "tmp");
+            InsertNewInstBefore(cast<Instruction>(In), CI);
+          }
+          
+          if (CI.getType() == In->getType())
+            return ReplaceInstUsesWith(CI, In);
+          else
+            return CastInst::createIntegerCast(In, CI.getType(), false/*ZExt*/);
+        }
+      }
+    }
+  }    
    return 0;
  }
  
-Instruction *InstCombiner::visitSExt(CastInst &CI) {
-  return commonIntCastTransforms(CI);
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+  if (Instruction *I = commonIntCastTransforms(CI))
+    return I;
+  
+  Value *Src = CI.getOperand(0);
+  
+  // sext (x <s 0) -> ashr x, 31   -> all ones if signed
+  // sext (x >s -1) -> ashr x, 31  -> all ones if not signed
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) {
+    // If we are just checking for a icmp eq of a single bit and zext'ing it
+    // to an integer, then shift the bit to the appropriate place and then
+    // cast to integer to avoid the comparison.
+    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+      const APInt &Op1CV = Op1C->getValue();
+      
+      // sext (x <s  0) to i32 --> x>>s31      true if signbit set.
+      // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
+      if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+          (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){
+        Value *In = ICI->getOperand(0);
+        Value *Sh = ConstantInt::get(In->getType(),
+                                     In->getType()->getPrimitiveSizeInBits()-1);
+        In = InsertNewInstBefore(BinaryOperator::createAShr(In, Sh,
+                                                        In->getName()+".lobit"),
+                                 CI);
+        if (In->getType() != CI.getType())
+          In = CastInst::createIntegerCast(In, CI.getType(),
+                                           true/*SExt*/, "tmp", &CI);
+        
+        if (ICI->getPredicate() == ICmpInst::ICMP_SGT)
+          In = InsertNewInstBefore(BinaryOperator::createNot(In,
+                                     In->getName()+".not"), CI);
+        
+        return ReplaceInstUsesWith(CI, In);
+      }
+    }
+  }
+      
+  return 0;
  }
  
  Instruction *InstCombiner::visitFPTrunc(CastInst &CI) {
@@ -6584,15 +6988,14 @@ Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
  }
  
  Instruction *InstCombiner::visitPtrToInt(CastInst &CI) {
-  return commonCastTransforms(CI);
+  return commonPointerCastTransforms(CI);
  }
  
  Instruction *InstCombiner::visitIntToPtr(CastInst &CI) {
    return commonCastTransforms(CI);
  }
  
-Instruction *InstCombiner::visitBitCast(CastInst &CI) {
-
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
    // If the operands are integer typed then apply the integer transforms,
    // otherwise just apply the common ones.
    Value *Src = CI.getOperand(0);
@@ -6602,6 +7005,9 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) {
    if (SrcTy->isInteger() && DestTy->isInteger()) {
      if (Instruction *Result = commonIntCastTransforms(CI))
        return Result;
+  } else if (isa<PointerType>(SrcTy)) {
+    if (Instruction *I = commonPointerCastTransforms(CI))
+      return I;
    } else {
      if (Instruction *Result = commonCastTransforms(CI))
        return Result;
@@ -6613,28 +7019,33 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) {
    if (DestTy == Src->getType())
      return ReplaceInstUsesWith(CI, Src);
  
-  // If the source and destination are pointers, and this cast is equivalent to
-  // a getelementptr X, 0, 0, 0...  turn it into the appropriate getelementptr.
-  // This can enhance SROA and other transforms that want type-safe pointers.
    if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
-    if (const PointerType *SrcPTy = dyn_cast<PointerType>(SrcTy)) {
-      const Type *DstElTy = DstPTy->getElementType();
-      const Type *SrcElTy = SrcPTy->getElementType();
-      
-      Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty);
-      unsigned NumZeros = 0;
-      while (SrcElTy != DstElTy && 
-             isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
-             SrcElTy->getNumContainedTypes() /* not "{}" */) {
-        SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
-        ++NumZeros;
-      }
+    const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+    const Type *DstElTy = DstPTy->getElementType();
+    const Type *SrcElTy = SrcPTy->getElementType();
+    
+    // If we are casting a malloc or alloca to a pointer to a type of the same
+    // size, rewrite the allocation instruction to allocate the "right" type.
+    if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
+      if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+        return V;
+    
+    // If the source and destination are pointers, and this cast is equivalent
+    // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
+    // This can enhance SROA and other transforms that want type-safe pointers.
+    Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty);
+    unsigned NumZeros = 0;
+    while (SrcElTy != DstElTy && 
+           isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+           SrcElTy->getNumContainedTypes() /* not "{}" */) {
+      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+      ++NumZeros;
+    }
  
-      // If we found a path from the src to dest, create the getelementptr now.
-      if (SrcElTy == DstElTy) {
-        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-        return new GetElementPtrInst(Src, &Idxs[0], Idxs.size());
-      }
+    // If we found a path from the src to dest, create the getelementptr now.
+    if (SrcElTy == DstElTy) {
+      SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+      return new GetElementPtrInst(Src, &Idxs[0], Idxs.size());
      }
    }
  
@@ -6710,7 +7121,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
    case Instruction::AShr:
      return Constant::getNullValue(I->getType());
    case Instruction::And:
-    return ConstantInt::getAllOnesValue(I->getType());
+    return Constant::getAllOnesValue(I->getType());
    case Instruction::Mul:
      return ConstantInt::get(I->getType(), 1);
    }
@@ -6840,38 +7251,29 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    // Selecting between two integer constants?
    if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
      if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
-      // select C, 1, 0 -> cast C to int
+      // select C, 1, 0 -> zext C to int
        if (FalseValC->isZero() && TrueValC->getValue() == 1) {
          return CastInst::create(Instruction::ZExt, CondVal, SI.getType());
        } else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
-        // select C, 0, 1 -> cast !C to int
+        // select C, 0, 1 -> zext !C to int
          Value *NotCond =
            InsertNewInstBefore(BinaryOperator::createNot(CondVal,
                                                 "not."+CondVal->getName()), SI);
          return CastInst::create(Instruction::ZExt, NotCond, SI.getType());
        }
+      
+      // FIXME: Turn select 0/-1 and -1/0 into sext from condition!
  
        if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) {
  
          // (x <s 0) ? -1 : 0 -> ashr x, 31
-        // (x >u 2147483647) ? -1 : 0 -> ashr x, 31
          if (TrueValC->isAllOnesValue() && FalseValC->isZero())
            if (ConstantInt *CmpCst = dyn_cast<ConstantInt>(IC->getOperand(1))) {
-            bool CanXForm = false;
-            if (IC->isSignedPredicate())
-              CanXForm = CmpCst->isZero() && 
-                         IC->getPredicate() == ICmpInst::ICMP_SLT;
-            else {
-              unsigned Bits = CmpCst->getType()->getPrimitiveSizeInBits();
-              CanXForm = CmpCst->getValue() == APInt::getSignedMaxValue(Bits) &&
-                         IC->getPredicate() == ICmpInst::ICMP_UGT;
-            }
-            
-            if (CanXForm) {
+            if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) {
                // The comparison constant and the result are not neccessarily the
                // same width. Make an all-ones value by inserting a AShr.
                Value *X = IC->getOperand(0);
-              unsigned Bits = X->getType()->getPrimitiveSizeInBits();
+              uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
                Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
                Instruction *SRA = BinaryOperator::create(Instruction::AShr, X,
                                                          ShAmt, "ones");
@@ -6880,8 +7282,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                // Finally, convert to the type of the select RHS.  We figure out
                // if this requires a SExt, Trunc or BitCast based on the sizes.
                Instruction::CastOps opc = Instruction::BitCast;
-              unsigned SRASize = SRA->getType()->getPrimitiveSizeInBits();
-              unsigned SISize  = SI.getType()->getPrimitiveSizeInBits();
+              uint32_t SRASize = SRA->getType()->getPrimitiveSizeInBits();
+              uint32_t SISize  = SI.getType()->getPrimitiveSizeInBits();
                if (SRASize < SISize)
                  opc = Instruction::SExt;
                else if (SRASize > SISize)
@@ -6892,7 +7294,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
  
  
          // If one of the constants is zero (we know they can't both be) and we
-        // have a fcmp instruction with zero, and we have an 'and' with the
+        // have an icmp instruction with zero, and we have an 'and' with the
          // non-constant value, eliminate this whole mess.  This corresponds to
          // cases like this: ((X & 27) ? 27 : 0)
          if (TrueValC->isZero() || FalseValC->isZero())
@@ -7110,10 +7512,7 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
      if (isa<PointerType>(CI->getOperand(0)->getType()))
        return GetKnownAlignment(CI->getOperand(0), TD);
      return 0;
-  } else if (isa<GetElementPtrInst>(V) ||
-             (isa<ConstantExpr>(V) && 
-              cast<ConstantExpr>(V)->getOpcode()==Instruction::GetElementPtr)) {
-    User *GEPI = cast<User>(V);
+  } else if (User *GEPI = dyn_castGetElementPtr(V)) {
      unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD);
      if (BaseAlignment == 0) return 0;
      
@@ -7292,7 +7691,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
            for (unsigned i = 0; i != 16; ++i) {
              if (isa<UndefValue>(Mask->getOperand(i)))
                continue;
-            unsigned Idx =cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+            unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
              Idx &= 31;  // Match the hardware behavior.
              
              if (ExtractedElts[Idx] == 0) {
@@ -7437,6 +7836,14 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    const FunctionType *FT = Callee->getFunctionType();
    const Type *OldRetTy = Caller->getType();
  
+  const FunctionType *ActualFT =
+    cast<FunctionType>(cast<PointerType>(CE->getType())->getElementType());
+  
+  // If the parameter attributes don't match up, don't do the xform.  We don't
+  // want to lose an sret attribute or something.
+  if (FT->getParamAttrs() != ActualFT->getParamAttrs())
+    return false;
+  
    // Check to see if we are changing the return type...
    if (OldRetTy != FT->getReturnType()) {
      if (Callee->isDeclaration() && !Caller->use_empty() && 
@@ -7467,6 +7874,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
      const Type *ParamTy = FT->getParamType(i);
      const Type *ActTy = (*AI)->getType();
      ConstantInt *c = dyn_cast<ConstantInt>(*AI);
+    //Some conversions are safe even if we do not have a body.
      //Either we can cast directly, or we can upconvert the argument
      bool isConvertible = ActTy == ParamTy ||
        (isa<PointerType>(ParamTy) && isa<PointerType>(ActTy)) ||
@@ -7475,6 +7883,40 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
        (c && ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()
         && c->getValue().isStrictlyPositive());
      if (Callee->isDeclaration() && !isConvertible) return false;
+
+    // Most other conversions can be done if we have a body, even if these
+    // lose information, e.g. int->short.
+    // Some conversions cannot be done at all, e.g. float to pointer.
+    // Logic here parallels CastInst::getCastOpcode (the design there
+    // requires legality checks like this be done before calling it).
+    if (ParamTy->isInteger()) {
+      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
+        if (VActTy->getBitWidth() != ParamTy->getPrimitiveSizeInBits())
+          return false;
+      }
+      if (!ActTy->isInteger() && !ActTy->isFloatingPoint() &&
+          !isa<PointerType>(ActTy))
+        return false;
+    } else if (ParamTy->isFloatingPoint()) {
+      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
+        if (VActTy->getBitWidth() != ParamTy->getPrimitiveSizeInBits())
+          return false;
+      }
+      if (!ActTy->isInteger() && !ActTy->isFloatingPoint())
+        return false;
+    } else if (const VectorType *VParamTy = dyn_cast<VectorType>(ParamTy)) {
+      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
+        if (VActTy->getBitWidth() != VParamTy->getBitWidth())
+          return false;
+      }
+      if (VParamTy->getBitWidth() != ActTy->getPrimitiveSizeInBits())      
+        return false;
+    } else if (isa<PointerType>(ParamTy)) {
+      if (!ActTy->isInteger() && !isa<PointerType>(ActTy))
+        return false;
+    } else {
+      return false;
+    }
    }
  
    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
@@ -7876,7 +8318,7 @@ static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
  
  Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    Value *PtrOp = GEP.getOperand(0);
-  // Is it 'getelementptr %P, long 0'  or 'getelementptr %P'
+  // Is it 'getelementptr %P, i32 0'  or 'getelementptr %P'
    // If so, eliminate the noop.
    if (GEP.getNumOperands() == 1)
      return ReplaceInstUsesWith(GEP, PtrOp);
@@ -7891,22 +8333,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    if (GEP.getNumOperands() == 2 && HasZeroPointerIndex)
      return ReplaceInstUsesWith(GEP, PtrOp);
  
-  // Keep track of whether all indices are zero constants integers.
-  bool AllZeroIndices = true;
-  
    // Eliminate unneeded casts for indices.
    bool MadeChange = false;
    
    gep_type_iterator GTI = gep_type_begin(GEP);
    for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) {
-    // Track whether this GEP has all zero indices, if so, it doesn't move the
-    // input pointer, it just changes its type.
-    if (AllZeroIndices) {
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(i)))
-        AllZeroIndices = CI->isNullValue();
-      else
-        AllZeroIndices = false;
-    }
      if (isa<SequentialType>(*GTI)) {
        if (CastInst *CI = dyn_cast<CastInst>(GEP.getOperand(i))) {
          if (CI->getOpcode() == Instruction::ZExt ||
@@ -7942,7 +8373,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    // If this GEP instruction doesn't move the pointer, and if the input operand
    // is a bitcast of another pointer, just replace the GEP with a bitcast of the
    // real input to the dest type.
-  if (AllZeroIndices && isa<BitCastInst>(GEP.getOperand(0)))
+  if (GEP.hasAllZeroIndices() && isa<BitCastInst>(GEP.getOperand(0)))
      return new BitCastInst(cast<BitCastInst>(GEP.getOperand(0))->getOperand(0),
                             GEP.getType());
      
@@ -8206,13 +8637,6 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
  Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
    Value *Op = FI.getOperand(0);
  
-  // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
-  if (CastInst *CI = dyn_cast<CastInst>(Op))
-    if (isa<PointerType>(CI->getOperand(0)->getType())) {
-      FI.setOperand(0, CI->getOperand(0));
-      return &FI;
-    }
-
    // free undef -> unreachable.
    if (isa<UndefValue>(Op)) {
      // Insert a new store to null because we cannot modify the CFG here.
@@ -8220,11 +8644,33 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
                    UndefValue::get(PointerType::get(Type::Int1Ty)), &FI);
      return EraseInstFromFunction(FI);
    }
-
+  
    // If we have 'free null' delete the instruction.  This can happen in stl code
    // when lots of inlining happens.
    if (isa<ConstantPointerNull>(Op))
      return EraseInstFromFunction(FI);
+  
+  // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
+  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op)) {
+    FI.setOperand(0, CI->getOperand(0));
+    return &FI;
+  }
+  
+  // Change free (gep X, 0,0,0,0) into free(X)
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+    if (GEPI->hasAllZeroIndices()) {
+      AddToWorkList(GEPI);
+      FI.setOperand(0, GEPI->getOperand(0));
+      return &FI;
+    }
+  }
+  
+  // Change free(malloc) into nothing, if the malloc has a single use.
+  if (MallocInst *MI = dyn_cast<MallocInst>(Op))
+    if (MI->hasOneUse()) {
+      EraseInstFromFunction(FI);
+      return EraseInstFromFunction(*MI);
+    }
  
    return 0;
  }
@@ -8327,8 +8773,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    }
  
    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op))
-    if (isa<ConstantPointerNull>(GEPI->getOperand(0)) ||
-        isa<UndefValue>(GEPI->getOperand(0))) {
+    if (isa<ConstantPointerNull>(GEPI->getOperand(0))) {
        // Insert a new store to null instruction before the load to indicate
        // that this code is not reachable.  We do this instead of inserting
        // an unreachable instruction directly because we cannot modify the
@@ -8576,66 +9021,118 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    // ends with an unconditional branch, try to move it to the successor block.
    BBI = &SI; ++BBI;
    if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
-    if (BI->isUnconditional()) {
-      // Check to see if the successor block has exactly two incoming edges.  If
-      // so, see if the other predecessor contains a store to the same location.
-      // if so, insert a PHI node (if needed) and move the stores down.
-      BasicBlock *Dest = BI->getSuccessor(0);
-
-      pred_iterator PI = pred_begin(Dest);
-      BasicBlock *Other = 0;
-      if (*PI != BI->getParent())
-        Other = *PI;
-      ++PI;
-      if (PI != pred_end(Dest)) {
-        if (*PI != BI->getParent())
-          if (Other)
-            Other = 0;
-          else
-            Other = *PI;
-        if (++PI != pred_end(Dest))
-          Other = 0;
-      }
-      if (Other) {  // If only one other pred...
-        BBI = Other->getTerminator();
-        // Make sure this other block ends in an unconditional branch and that
-        // there is an instruction before the branch.
-        if (isa<BranchInst>(BBI) && cast<BranchInst>(BBI)->isUnconditional() &&
-            BBI != Other->begin()) {
-          --BBI;
-          StoreInst *OtherStore = dyn_cast<StoreInst>(BBI);
-          
-          // If this instruction is a store to the same location.
-          if (OtherStore && OtherStore->getOperand(1) == SI.getOperand(1)) {
-            // Okay, we know we can perform this transformation.  Insert a PHI
-            // node now if we need it.
-            Value *MergedVal = OtherStore->getOperand(0);
-            if (MergedVal != SI.getOperand(0)) {
-              PHINode *PN = new PHINode(MergedVal->getType(), "storemerge");
-              PN->reserveOperandSpace(2);
-              PN->addIncoming(SI.getOperand(0), SI.getParent());
-              PN->addIncoming(OtherStore->getOperand(0), Other);
-              MergedVal = InsertNewInstBefore(PN, Dest->front());
-            }
-            
-            // Advance to a place where it is safe to insert the new store and
-            // insert it.
-            BBI = Dest->begin();
-            while (isa<PHINode>(BBI)) ++BBI;
-            InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
-                                              OtherStore->isVolatile()), *BBI);
-
-            // Nuke the old stores.
-            EraseInstFromFunction(SI);
-            EraseInstFromFunction(*OtherStore);
-            ++NumCombined;
-            return 0;
-          }
-        }
+    if (BI->isUnconditional())
+      if (SimplifyStoreAtEndOfBlock(SI))
+        return 0;  // xform done!
+  
+  return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+///   if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+///   *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+  BasicBlock *StoreBB = SI.getParent();
+  
+  // Check to see if the successor block has exactly two incoming edges.  If
+  // so, see if the other predecessor contains a store to the same location.
+  // if so, insert a PHI node (if needed) and move the stores down.
+  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+  
+  // Determine whether Dest has exactly two predecessors and, if so, compute
+  // the other predecessor.
+  pred_iterator PI = pred_begin(DestBB);
+  BasicBlock *OtherBB = 0;
+  if (*PI != StoreBB)
+    OtherBB = *PI;
+  ++PI;
+  if (PI == pred_end(DestBB))
+    return false;
+  
+  if (*PI != StoreBB) {
+    if (OtherBB)
+      return false;
+    OtherBB = *PI;
+  }
+  if (++PI != pred_end(DestBB))
+    return false;
+  
+  
+  // Verify that the other block ends in a branch and is not otherwise empty.
+  BasicBlock::iterator BBI = OtherBB->getTerminator();
+  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+  if (!OtherBr || BBI == OtherBB->begin())
+    return false;
+  
+  // If the other block ends in an unconditional branch, check for the 'if then
+  // else' case.  there is an instruction before the branch.
+  StoreInst *OtherStore = 0;
+  if (OtherBr->isUnconditional()) {
+    // If this isn't a store, or isn't a store to the same location, bail out.
+    --BBI;
+    OtherStore = dyn_cast<StoreInst>(BBI);
+    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1))
+      return false;
+  } else {
+    // Otherwise, the other block ended with a conditional branch. If one of the
+    // destinations is StoreBB, then we have the if/then case.
+    if (OtherBr->getSuccessor(0) != StoreBB && 
+        OtherBr->getSuccessor(1) != StoreBB)
+      return false;
+    
+    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+    // if/then triangle.  See if there is a store to the same ptr as SI that
+    // lives in OtherBB.
+    for (;; --BBI) {
+      // Check to see if we find the matching store.
+      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+        if (OtherStore->getOperand(1) != SI.getOperand(1))
+          return false;
+        break;
        }
+      // If we find something that may be using the stored value, or if we run
+      // out of instructions, we can't do the xform.
+      if (isa<LoadInst>(BBI) || BBI->mayWriteToMemory() ||
+          BBI == OtherBB->begin())
+        return false;
+    }
+    
+    // In order to eliminate the store in OtherBr, we have to
+    // make sure nothing reads the stored value in StoreBB.
+    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+      // FIXME: This should really be AA driven.
+      if (isa<LoadInst>(I) || I->mayWriteToMemory())
+        return false;
      }
+  }
    
-  return 0;
+  // Insert a PHI node now if we need it.
+  Value *MergedVal = OtherStore->getOperand(0);
+  if (MergedVal != SI.getOperand(0)) {
+    PHINode *PN = new PHINode(MergedVal->getType(), "storemerge");
+    PN->reserveOperandSpace(2);
+    PN->addIncoming(SI.getOperand(0), SI.getParent());
+    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+    MergedVal = InsertNewInstBefore(PN, DestBB->front());
+  }
+  
+  // Advance to a place where it is safe to insert the new store and
+  // insert it.
+  BBI = DestBB->begin();
+  while (isa<PHINode>(BBI)) ++BBI;
+  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
+                                    OtherStore->isVolatile()), *BBI);
+  
+  // Nuke the old stores.
+  EraseInstFromFunction(SI);
+  EraseInstFromFunction(*OtherStore);
+  ++NumCombined;
+  return true;
  }
  
  
@@ -8843,11 +9340,19 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
    // If extracting a specified index from the vector, see if we can recursively
    // find a previously computed scalar that was inserted into the vector.
    if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+    unsigned IndexVal = IdxC->getZExtValue();
+    unsigned VectorWidth = 
+      cast<VectorType>(EI.getOperand(0)->getType())->getNumElements();
+      
+    // If this is extracting an invalid index, turn this into undef, to avoid
+    // crashing the code below.
+    if (IndexVal >= VectorWidth)
+      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+    
      // This instruction only demands the single element from the input vector.
      // If the input vector has a single use, simplify it based on this use
      // property.
-    uint64_t IndexVal = IdxC->getZExtValue();
-    if (EI.getOperand(0)->hasOneUse()) {
+    if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
        uint64_t UndefElts;
        if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
                                                  1 << IndexVal,
@@ -8859,6 +9364,17 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
      
      if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
        return ReplaceInstUsesWith(EI, Elt);
+    
+    // If the this extractelement is directly using a bitcast from a vector of
+    // the same number of elements, see if we can find the source element from
+    // it.  In this case, we will end up needing to bitcast the scalars.
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+      if (const VectorType *VT = 
+              dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+        if (VT->getNumElements() == VectorWidth)
+          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+            return new BitCastInst(Elt, EI.getType());
+    }
    }
    
    if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -9060,13 +9576,18 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
    Value *ScalarOp = IE.getOperand(1);
    Value *IdxOp    = IE.getOperand(2);
    
+  // Inserting an undef or into an undefined place, remove this.
+  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+    ReplaceInstUsesWith(IE, VecOp);
+  
    // If the inserted element was extracted from some other vector, and if the 
    // indexes are constant, try to turn this into a shufflevector operation.
    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
          EI->getOperand(0)->getType() == IE.getType()) {
        unsigned NumVectorElts = IE.getType()->getNumElements();
-      unsigned ExtractedIdx=cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+      unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
        
        if (ExtractedIdx >= NumVectorElts) // Out of range extract.