X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FInstructionCombining.cpp;h=9957bc19c69e0cc7001b1c43f133603868fa0eee;hb=ac151da20487e5954cdad3e8f7ffd48f768ab254;hp=85fd2c3bdb745b9fd848677639d4f9943c43ea1a;hpb=827cde1c8319e51463007078a7ce3660ebc93036;p=oota-llvm.git

diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 85fd2c3bdb7..72a27f500c9 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -2,14 +2,14 @@
 //
 //                     The LLVM Compiler Infrastructure
 //
-// This file was developed by the LLVM research group and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // InstructionCombining - Combine instructions to form fewer, simple
-// instructions.  This pass does not modify the CFG This pass is where algebraic
-// simplification happens.
+// instructions.  This pass does not modify the CFG.  This pass is where
+// algebraic simplification happens.
 //
 // This pass combines things like:
 //    %Y = add i32 %X, 1
@@ -39,12 +39,13 @@
 #include "llvm/Pass.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
-#include "llvm/ParameterAttributes.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/InstVisitor.h"
@@ -57,6 +58,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
+#include <climits>
 #include <sstream>
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -120,8 +122,8 @@ namespace {
     /// the work lists because they might get more simplified now.
     ///
     void AddUsesToWorkList(Instruction &I) {
-      for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i)))
+      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+        if (Instruction *Op = dyn_cast<Instruction>(*i))
           AddToWorkList(Op);
     }
     
@@ -134,11 +136,11 @@ namespace {
     Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) {
       Value *R = I.getOperand(op);
       
-      for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
+      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+        if (Instruction *Op = dyn_cast<Instruction>(*i)) {
           AddToWorkList(Op);
           // Set the operand to undef to drop the use.
-          I.setOperand(i, UndefValue::get(Op->getType()));
+          *i = UndefValue::get(Op->getType());
         }
       
       return R;
@@ -184,6 +186,8 @@ namespace {
     Instruction *visitAShr(BinaryOperator &I);
     Instruction *visitLShr(BinaryOperator &I);
     Instruction *commonShiftTransforms(BinaryOperator &I);
+    Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+                                      Constant *RHSC);
     Instruction *visitFCmpInst(FCmpInst &I);
     Instruction *visitICmpInst(ICmpInst &I);
     Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
@@ -203,14 +207,14 @@ namespace {
     Instruction *visitTrunc(TruncInst &CI);
     Instruction *visitZExt(ZExtInst &CI);
     Instruction *visitSExt(SExtInst &CI);
-    Instruction *visitFPTrunc(CastInst &CI);
+    Instruction *visitFPTrunc(FPTruncInst &CI);
     Instruction *visitFPExt(CastInst &CI);
-    Instruction *visitFPToUI(CastInst &CI);
-    Instruction *visitFPToSI(CastInst &CI);
+    Instruction *visitFPToUI(FPToUIInst &FI);
+    Instruction *visitFPToSI(FPToSIInst &FI);
     Instruction *visitUIToFP(CastInst &CI);
     Instruction *visitSIToFP(CastInst &CI);
     Instruction *visitPtrToInt(CastInst &CI);
-    Instruction *visitIntToPtr(CastInst &CI);
+    Instruction *visitIntToPtr(IntToPtrInst &CI);
     Instruction *visitBitCast(BitCastInst &CI);
     Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                                 Instruction *FI);
@@ -228,6 +232,7 @@ namespace {
     Instruction *visitInsertElementInst(InsertElementInst &IE);
     Instruction *visitExtractElementInst(ExtractElementInst &EI);
     Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+    Instruction *visitExtractValueInst(ExtractValueInst &EV);
 
     // visitInstruction - Specify what to return for unhandled instructions...
     Instruction *visitInstruction(Instruction &I) { return 0; }
@@ -236,6 +241,9 @@ namespace {
     Instruction *visitCallSite(CallSite CS);
     bool transformConstExprCastCall(CallSite CS);
     Instruction *transformCallThroughTrampoline(CallSite CS);
+    Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                   bool DoXform = true);
+    bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
 
   public:
     // InsertNewInstBefore - insert an instruction New before instruction Old
@@ -260,10 +268,15 @@ namespace {
       if (Constant *CV = dyn_cast<Constant>(V))
         return ConstantExpr::getCast(opc, CV, Ty);
       
-      Instruction *C = CastInst::create(opc, V, Ty, V->getName(), &Pos);
+      Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos);
       AddToWorkList(C);
       return C;
     }
+        
+    Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) {
+      return InsertCastBefore(Instruction::BitCast, V, Ty, Pos);
+    }
+
 
     // ReplaceInstUsesWith - This method is to be used when an instruction is
     // found to be dead, replacable with another preexisting expression.  Here
@@ -312,6 +325,19 @@ namespace {
       I.eraseFromParent();
       return 0;  // Don't do anything with FI
     }
+        
+    void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+                           APInt &KnownOne, unsigned Depth = 0) const {
+      return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+    }
+    
+    bool MaskedValueIsZero(Value *V, const APInt &Mask, 
+                           unsigned Depth = 0) const {
+      return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+    }
+    unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+      return llvm::ComputeNumSignBits(Op, TD, Depth);
+    }
 
   private:
     /// InsertOperandCastBefore - This inserts a cast of V to DestTy before the
@@ -361,14 +387,25 @@ namespace {
     Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI);
     Instruction *MatchBSwap(BinaryOperator &I);
     bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+    Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+    Instruction *SimplifyMemSet(MemSetInst *MI);
+
 
     Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
-  };
 
-  char InstCombiner::ID = 0;
-  RegisterPass<InstCombiner> X("instcombine", "Combine redundant instructions");
+    bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+                                    unsigned CastOpc,
+                                    int &NumCastsRemoved);
+    unsigned GetOrEnforceKnownAlignment(Value *V,
+                                        unsigned PrefAlign = 0);
+
+  };
 }
 
+char InstCombiner::ID = 0;
+static RegisterPass<InstCombiner>
+X("instcombine", "Combine redundant instructions");
+
 // getComplexity:  Assign a complexity or rank value to LLVM Values...
 //   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
 static unsigned getComplexity(Value *V) {
@@ -492,7 +529,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
 
           // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
           Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
-          Instruction *New = BinaryOperator::create(Opcode, Op->getOperand(0),
+          Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
                                                     Op1->getOperand(0),
                                                     Op1->getName(), &I);
           AddToWorkList(New);
@@ -525,6 +562,11 @@ static inline Value *dyn_castNegVal(Value *V) {
   // Constants can be considered to be negated values if they can be folded.
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
     return ConstantExpr::getNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isInteger())
+      return ConstantExpr::getNeg(C);
+
   return 0;
 }
 
@@ -571,6 +613,17 @@ static User *dyn_castGetElementPtr(Value *V) {
   return false;
 }
 
+/// getOpcode - If this is an Instruction or a ConstantExpr, return the
+/// opcode value. Otherwise return UserOp1.
+static unsigned getOpcode(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getOpcode();
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    return CE->getOpcode();
+  // Use UserOp1 to mean there's no opcode.
+  return Instruction::UserOp1;
+}
+
 /// AddOne - Add one to a ConstantInt
 static ConstantInt *AddOne(ConstantInt *C) {
   APInt Val(C->getValue());
@@ -597,225 +650,29 @@ static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) {
 static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) {
   return ConstantInt::get(C1->getValue() * C2->getValue());
 }
-
-/// ComputeMaskedBits - Determine which of the bits specified in Mask are
-/// known to be either zero or one and return them in the KnownZero/KnownOne
-/// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
-/// processing.
-/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
-/// we cannot optimize based on the assumption that it is zero without changing
-/// it to be an explicit zero.  If we don't change it to zero, other code could
-/// optimized based on the contradictory assumption that it is non-zero.
-/// Because instcombine aggressively folds operations with undef args anyway,
-/// this won't lose us code quality.
-static void ComputeMaskedBits(Value *V, const APInt &Mask, APInt& KnownZero, 
-                              APInt& KnownOne, unsigned Depth = 0) {
-  assert(V && "No Value?");
-  assert(Depth <= 6 && "Limit Search Depth");
-  uint32_t BitWidth = Mask.getBitWidth();
-  assert(cast<IntegerType>(V->getType())->getBitWidth() == BitWidth &&
-         KnownZero.getBitWidth() == BitWidth && 
-         KnownOne.getBitWidth() == BitWidth &&
-         "V, Mask, KnownOne and KnownZero should have same BitWidth");
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    // We know all of the bits for a constant!
-    KnownOne = CI->getValue() & Mask;
-    KnownZero = ~KnownOne & Mask;
-    return;
-  }
-
-  if (Depth == 6 || Mask == 0)
-    return;  // Limit search depth.
-
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return;
-
-  KnownZero.clear(); KnownOne.clear();   // Don't know anything.
-  APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
-  
-  switch (I->getOpcode()) {
-  case Instruction::And: {
-    // If either the LHS or the RHS are Zero, the result is zero.
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    APInt Mask2(Mask & ~KnownZero);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
-    // Output known-1 bits are only known if set in both the LHS & RHS.
-    KnownOne &= KnownOne2;
-    // Output known-0 are known to be clear if zero in either the LHS | RHS.
-    KnownZero |= KnownZero2;
-    return;
-  }
-  case Instruction::Or: {
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    APInt Mask2(Mask & ~KnownOne);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
-    // Output known-0 bits are only known if clear in both the LHS & RHS.
-    KnownZero &= KnownZero2;
-    // Output known-1 are known to be set if set in either the LHS | RHS.
-    KnownOne |= KnownOne2;
-    return;
-  }
-  case Instruction::Xor: {
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
-    // Output known-0 bits are known if clear or set in both the LHS & RHS.
-    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
-    // Output known-1 are known to be set if set in only one of the LHS, RHS.
-    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-    KnownZero = KnownZeroOut;
-    return;
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+  uint32_t W = C1->getBitWidth();
+  APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+  if (sign) {
+    LHSExt.sext(W * 2);
+    RHSExt.sext(W * 2);
+  } else {
+    LHSExt.zext(W * 2);
+    RHSExt.zext(W * 2);
   }
-  case Instruction::Select:
-    ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
 
-    // Only known if known in both the LHS and RHS.
-    KnownOne &= KnownOne2;
-    KnownZero &= KnownZero2;
-    return;
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::SIToFP:
-  case Instruction::PtrToInt:
-  case Instruction::UIToFP:
-  case Instruction::IntToPtr:
-    return; // Can't work with floating point or pointers
-  case Instruction::Trunc: {
-    // All these have integer operands
-    uint32_t SrcBitWidth = 
-      cast<IntegerType>(I->getOperand(0)->getType())->getBitWidth();
-    APInt MaskIn(Mask);
-    MaskIn.zext(SrcBitWidth);
-    KnownZero.zext(SrcBitWidth);
-    KnownOne.zext(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
-    KnownZero.trunc(BitWidth);
-    KnownOne.trunc(BitWidth);
-    return;
-  }
-  case Instruction::BitCast: {
-    const Type *SrcTy = I->getOperand(0)->getType();
-    if (SrcTy->isInteger()) {
-      ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
-      return;
-    }
-    break;
-  }
-  case Instruction::ZExt:  {
-    // Compute the bits in the result that are not present in the input.
-    const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
-    uint32_t SrcBitWidth = SrcTy->getBitWidth();
-      
-    APInt MaskIn(Mask);
-    MaskIn.trunc(SrcBitWidth);
-    KnownZero.trunc(SrcBitWidth);
-    KnownOne.trunc(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    // The top bits are known to be zero.
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
-    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    return;
-  }
-  case Instruction::SExt: {
-    // Compute the bits in the result that are not present in the input.
-    const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
-    uint32_t SrcBitWidth = SrcTy->getBitWidth();
-      
-    APInt MaskIn(Mask); 
-    MaskIn.trunc(SrcBitWidth);
-    KnownZero.trunc(SrcBitWidth);
-    KnownOne.trunc(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+  APInt MulExt = LHSExt * RHSExt;
 
-    // If the sign bit of the input is known set or clear, then we know the
-    // top bits of the result.
-    if (KnownZero[SrcBitWidth-1])             // Input sign bit known zero
-      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    else if (KnownOne[SrcBitWidth-1])           // Input sign bit known set
-      KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    return;
-  }
-  case Instruction::Shl:
-    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      APInt Mask2(Mask.lshr(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, Depth+1);
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-      KnownZero <<= ShiftAmt;
-      KnownOne  <<= ShiftAmt;
-      KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
-      return;
-    }
-    break;
-  case Instruction::LShr:
-    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      
-      // Unsigned shift right.
-      APInt Mask2(Mask.shl(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1);
-      assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
-      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
-      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-      // high bits known zero.
-      KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
-      return;
-    }
-    break;
-  case Instruction::AShr:
-    // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      
-      // Signed shift right.
-      APInt Mask2(Mask.shl(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1);
-      assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
-      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
-      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-        
-      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
-      if (KnownZero[BitWidth-ShiftAmt-1])    // New bits are known zero.
-        KnownZero |= HighBits;
-      else if (KnownOne[BitWidth-ShiftAmt-1])  // New bits are known one.
-        KnownOne |= HighBits;
-      return;
-    }
-    break;
-  }
+  if (sign) {
+    APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+    APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+    return MulExt.slt(Min) || MulExt.sgt(Max);
+  } else 
+    return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
 }
 
-/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
-/// this predicate to simplify operations downstream.  Mask is known to be zero
-/// for bits that V cannot have.
-static bool MaskedValueIsZero(Value *V, const APInt& Mask, unsigned Depth = 0) {
-  APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
-  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, Depth);
-  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-  return (KnownZero & Mask) == Mask;
-}
 
 /// ShrinkDemandedConstant - Check to see if the specified operand of the 
 /// specified instruction is a constant integer.  If so, check to see if there
@@ -947,7 +804,9 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
   APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
   switch (I->getOpcode()) {
-  default: break;
+  default:
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    break;
   case Instruction::And:
     // If either the LHS or the RHS are Zero, the result is zero.
     if (SimplifyDemandedBits(I->getOperand(1), DemandedMask,
@@ -1059,7 +918,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
     if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
       Instruction *Or =
-        BinaryOperator::createOr(I->getOperand(0), I->getOperand(1),
+        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                  I->getName());
       InsertNewInstBefore(Or, *I);
       return UpdateValueUsesWith(I, Or);
@@ -1074,7 +933,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
       if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
         Constant *AndC = ConstantInt::get(~RHSKnownOne & DemandedMask);
         Instruction *And = 
-          BinaryOperator::createAnd(I->getOperand(0), AndC, "tmp");
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
         InsertNewInstBefore(And, *I);
         return UpdateValueUsesWith(I, And);
       }
@@ -1233,7 +1092,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
       // Turn it into OR if input bits are zero.
       if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
         Instruction *Or =
-          BinaryOperator::createOr(I->getOperand(0), I->getOperand(1),
+          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                    I->getName());
         InsertNewInstBefore(Or, *I);
         return UpdateValueUsesWith(I, Or);
@@ -1293,6 +1152,9 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
                                LHSKnownZero, LHSKnownOne, Depth+1))
         return true;
     }
+    // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+    // the known zeros and ones.
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
     break;
   case Instruction::Shl:
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -1338,7 +1200,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
     // the shift amount is >= the size of the datatype, which is undefined.
     if (DemandedMask == 1) {
       // Perform the logical shift right.
-      Value *NewVal = BinaryOperator::createLShr(
+      Value *NewVal = BinaryOperator::CreateLShr(
                         I->getOperand(0), I->getOperand(1), I->getName());
       InsertNewInstBefore(cast<Instruction>(NewVal), *I);
       return UpdateValueUsesWith(I, NewVal);
@@ -1376,10 +1238,10 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
         
       // If the input sign bit is known to be zero, or if none of the top bits
       // are demanded, turn this into an unsigned shift right.
-      if (RHSKnownZero[BitWidth-ShiftAmt-1] || 
+      if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || 
           (HighBits & ~DemandedMask) == HighBits) {
         // Perform the logical shift right.
-        Value *NewVal = BinaryOperator::createLShr(
+        Value *NewVal = BinaryOperator::CreateLShr(
                           I->getOperand(0), SA, I->getName());
         InsertNewInstBefore(cast<Instruction>(NewVal), *I);
         return UpdateValueUsesWith(I, NewVal);
@@ -1388,6 +1250,101 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
       }
     }
     break;
+  case Instruction::SRem:
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue();
+      if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
+        APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        if (SimplifyDemandedBits(I->getOperand(0), Mask2,
+                                 LHSKnownZero, LHSKnownOne, Depth+1))
+          return true;
+
+        if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+          LHSKnownZero |= ~LowBits;
+        else if (LHSKnownOne[BitWidth-1])
+          LHSKnownOne |= ~LowBits;
+
+        KnownZero |= LHSKnownZero & DemandedMask;
+        KnownOne |= LHSKnownOne & DemandedMask;
+
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
+      }
+    }
+    break;
+  case Instruction::URem: {
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = (RA - 1);
+        APInt Mask2 = LowBits & DemandedMask;
+        KnownZero |= ~LowBits & DemandedMask;
+        if (SimplifyDemandedBits(I->getOperand(0), Mask2,
+                                 KnownZero, KnownOne, Depth+1))
+          return true;
+
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
+        break;
+      }
+    }
+
+    APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    if (SimplifyDemandedBits(I->getOperand(0), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1))
+      return true;
+
+    uint32_t Leaders = KnownZero2.countLeadingOnes();
+    if (SimplifyDemandedBits(I->getOperand(1), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1))
+      return true;
+
+    Leaders = std::max(Leaders,
+                       KnownZero2.countLeadingOnes());
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+    break;
+  }
+  case Instruction::Call:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap: {
+        // If the only bits demanded come from one byte of the bswap result,
+        // just shift the input byte into position to eliminate the bswap.
+        unsigned NLZ = DemandedMask.countLeadingZeros();
+        unsigned NTZ = DemandedMask.countTrailingZeros();
+          
+        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
+        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
+        // have 14 leading zeros, round to 8.
+        NLZ &= ~7;
+        NTZ &= ~7;
+        // If we need exactly one byte, we can do this transformation.
+        if (BitWidth-NLZ-NTZ == 8) {
+          unsigned ResultBit = NTZ;
+          unsigned InputBit = BitWidth-NTZ-8;
+          
+          // Replace this with either a left or right shift to get the byte into
+          // the right place.
+          Instruction *NewVal;
+          if (InputBit > ResultBit)
+            NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+                    ConstantInt::get(I->getType(), InputBit-ResultBit));
+          else
+            NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+                    ConstantInt::get(I->getType(), ResultBit-InputBit));
+          NewVal->takeName(I);
+          InsertNewInstBefore(NewVal, *I);
+          return UpdateValueUsesWith(I, NewVal);
+        }
+          
+        // TODO: Could compute known zero/one bits based on the input.
+        break;
+      }
+      }
+    }
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    break;
   }
   
   // If the client is only demanding bits that we know, return the known
@@ -1635,19 +1592,19 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
           default: assert(0 && "Case stmts out of sync!");
           case Intrinsic::x86_sse_sub_ss:
           case Intrinsic::x86_sse2_sub_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::createSub(LHS, RHS,
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateSub(LHS, RHS,
                                                         II->getName()), *II);
             break;
           case Intrinsic::x86_sse_mul_ss:
           case Intrinsic::x86_sse2_mul_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::createMul(LHS, RHS,
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateMul(LHS, RHS,
                                                          II->getName()), *II);
             break;
           }
           
           Instruction *New =
-            new InsertElementInst(UndefValue::get(II->getType()), TmpV, 0U,
-                                  II->getName());
+            InsertElementInst::Create(UndefValue::get(II->getType()), TmpV, 0U,
+                                      II->getName());
           InsertNewInstBefore(New, *II);
           AddSoonDeadInstToWorklist(*II, 0);
           return New;
@@ -1665,21 +1622,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
   return MadeChange ? I : 0;
 }
 
-/// @returns true if the specified compare predicate is
-/// true when both operands are equal...
-/// @brief Determine if the icmp Predicate is true when both operands are equal
-static bool isTrueWhenEqual(ICmpInst::Predicate pred) {
-  return pred == ICmpInst::ICMP_EQ  || pred == ICmpInst::ICMP_UGE ||
-         pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_ULE ||
-         pred == ICmpInst::ICMP_SLE;
-}
-
-/// @returns true if the specified compare instruction is
-/// true when both operands are equal...
-/// @brief Determine if the ICmpInst returns true when both operands are equal
-static bool isTrueWhenEqual(ICmpInst &ICI) {
-  return isTrueWhenEqual(ICI.getPredicate());
-}
 
 /// AssociativeOpt - Perform an optimization on an associative operator.  This
 /// function is designed to check a chain of associative operators for a
@@ -1690,7 +1632,7 @@ static bool isTrueWhenEqual(ICmpInst &ICI) {
 /// 'shouldApply' and 'apply' methods.
 ///
 template<typename Functor>
-Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
+static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
   unsigned Opcode = Root.getOpcode();
   Value *LHS = Root.getOperand(0);
 
@@ -1713,8 +1655,6 @@ Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
     // If the functor wants to apply the optimization to the RHS of LHSI,
     // reassociate the expression from ((? op A) op B) to (? op (A op B))
     if (ShouldApply) {
-      BasicBlock *BB = Root.getParent();
-
       // Now all of the instructions are in the current basic block, go ahead
       // and perform the reassociation.
       Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0));
@@ -1730,9 +1670,8 @@ Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
       }
       Root.replaceAllUsesWith(TmpLHSI);          // Users now use TmpLHSI
       TmpLHSI->setOperand(1, &Root);             // TmpLHSI now uses the root
-      TmpLHSI->getParent()->getInstList().remove(TmpLHSI);
       BasicBlock::iterator ARI = &Root; ++ARI;
-      BB->getInstList().insert(ARI, TmpLHSI);    // Move TmpLHSI to after Root
+      TmpLHSI->moveBefore(ARI);                  // Move TmpLHSI to after Root
       ARI = Root;
 
       // Now propagate the ExtraOperand down the chain of instructions until we
@@ -1741,8 +1680,7 @@ Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
         Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0));
         // Move the instruction to immediately before the chain we are
         // constructing to avoid breaking dominance properties.
-        NextLHSI->getParent()->getInstList().remove(NextLHSI);
-        BB->getInstList().insert(ARI, NextLHSI);
+        NextLHSI->moveBefore(ARI);
         ARI = NextLHSI;
 
         Value *NextOp = NextLHSI->getOperand(1);
@@ -1761,6 +1699,7 @@ Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
   return 0;
 }
 
+namespace {
 
 // AddRHS - Implements: X + X --> X << 1
 struct AddRHS {
@@ -1768,8 +1707,8 @@ struct AddRHS {
   AddRHS(Value *rhs) : RHS(rhs) {}
   bool shouldApply(Value *LHS) const { return LHS == RHS; }
   Instruction *apply(BinaryOperator &Add) const {
-    return BinaryOperator::createShl(Add.getOperand(0),
-                                  ConstantInt::get(Add.getType(), 1));
+    return BinaryOperator::CreateShl(Add.getOperand(0),
+                                     ConstantInt::get(Add.getType(), 1));
   }
 };
 
@@ -1784,17 +1723,19 @@ struct AddMaskingAnd {
            ConstantExpr::getAnd(C1, C2)->isNullValue();
   }
   Instruction *apply(BinaryOperator &Add) const {
-    return BinaryOperator::createOr(Add.getOperand(0), Add.getOperand(1));
+    return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
   }
 };
 
+}
+
 static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
                                              InstCombiner *IC) {
   if (CastInst *CI = dyn_cast<CastInst>(&I)) {
     if (Constant *SOC = dyn_cast<Constant>(SO))
       return ConstantExpr::getCast(CI->getOpcode(), SOC, I.getType());
 
-    return IC->InsertNewInstBefore(CastInst::create(
+    return IC->InsertNewInstBefore(CastInst::Create(
           CI->getOpcode(), SO, I.getType(), SO->getName() + ".cast"), I);
   }
 
@@ -1813,9 +1754,9 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
     std::swap(Op0, Op1);
   Instruction *New;
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
-    New = BinaryOperator::create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
+    New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
   else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-    New = CmpInst::create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, 
+    New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, 
                           SO->getName()+".cmp");
   else {
     assert(0 && "Unknown binary instruction type!");
@@ -1842,8 +1783,8 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
     Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC);
     Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC);
 
-    return new SelectInst(SI->getCondition(), SelectTrueVal,
-                          SelectFalseVal);
+    return SelectInst::Create(SI->getCondition(), SelectTrueVal,
+                              SelectFalseVal);
   }
   return 0;
 }
@@ -1883,7 +1824,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   }
 
   // Okay, we can do the transformation: create the new PHI node.
-  PHINode *NewPN = new PHINode(I.getType(), "");
+  PHINode *NewPN = PHINode::Create(I.getType(), "");
   NewPN->reserveOperandSpace(PN->getNumOperands()/2);
   InsertNewInstBefore(NewPN, *PN);
   NewPN->takeName(PN);
@@ -1901,11 +1842,11 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
       } else {
         assert(PN->getIncomingBlock(i) == NonConstBB);
         if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 
-          InV = BinaryOperator::create(BO->getOpcode(),
+          InV = BinaryOperator::Create(BO->getOpcode(),
                                        PN->getIncomingValue(i), C, "phitmp",
                                        NonConstBB->getTerminator());
         else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = CmpInst::create(CI->getOpcode(), 
+          InV = CmpInst::Create(CI->getOpcode(), 
                                 CI->getPredicate(),
                                 PN->getIncomingValue(i), C, "phitmp",
                                 NonConstBB->getTerminator());
@@ -1925,7 +1866,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
         InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
       } else {
         assert(PN->getIncomingBlock(i) == NonConstBB);
-        InV = CastInst::create(CI->getOpcode(), PN->getIncomingValue(i), 
+        InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 
                                I.getType(), "phitmp", 
                                NonConstBB->getTerminator());
         AddToWorkList(cast<Instruction>(InV));
@@ -1936,6 +1877,34 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   return ReplaceInstUsesWith(I, NewPN);
 }
 
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+  // There are different heuristics we can use for this.  Here are some simple
+  // ones.
+  
+  // Add has the property that adding any two 2's complement numbers can only 
+  // have one carry bit which can change a sign.  As such, if LHS and RHS each
+  // have at least two sign bits, we know that the addition of the two values will
+  // sign extend fine.
+  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+    return true;
+  
+  
+  // If one of the operands only has one non-zero bit, and if the other operand
+  // has a known-zero bit in a more significant place than it (not including the
+  // sign bit) the ripple may go up to and fill the zero, but won't change the
+  // sign.  For example, (X & ~4) + 1.
+  
+  // TODO: Implement.
+  
+  return false;
+}
+
+
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
@@ -1960,7 +1929,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       const APInt& Val = CI->getValue();
       uint32_t BitWidth = Val.getBitWidth();
       if (Val == APInt::getSignBit(BitWidth))
-        return BinaryOperator::createXor(LHS, RHS);
+        return BinaryOperator::CreateXor(LHS, RHS);
       
       // See if SimplifyDemandedBits can simplify this.  This handles stuff like
       // (X & 254)+1 -> (X&254)|1
@@ -2005,9 +1974,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       } while (Size >= 1);
       
       // FIXME: This shouldn't be necessary. When the backends can handle types
-      // with funny bit widths then this whole cascade of if statements should
-      // be removed. It is just here to get the size of the "middle" type back
-      // up to something that the back ends can handle.
+      // with funny bit widths then this switch statement should be removed. It
+      // is just here to get the size of the "middle" type back up to something
+      // that the back ends can handle.
       const Type *MiddleType = 0;
       switch (Size) {
         default: break;
@@ -2023,8 +1992,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     }
   }
 
+  if (I.getType() == Type::Int1Ty)
+    return BinaryOperator::CreateXor(LHS, RHS);
+
   // X + X --> X << 1
-  if (I.getType()->isInteger() && I.getType() != Type::Int1Ty) {
+  if (I.getType()->isInteger()) {
     if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result;
 
     if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
@@ -2040,29 +2012,39 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   }
 
   // -A + B  -->  B - A
-  if (Value *V = dyn_castNegVal(LHS))
-    return BinaryOperator::createSub(RHS, V);
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castNegVal(LHS)) {
+    if (LHS->getType()->isIntOrIntVector()) {
+      if (Value *RHSV = dyn_castNegVal(RHS)) {
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum");
+        InsertNewInstBefore(NewAdd, I);
+        return BinaryOperator::CreateNeg(NewAdd);
+      }
+    }
+    
+    return BinaryOperator::CreateSub(RHS, LHSV);
+  }
 
   // A + -B  -->  A - B
   if (!isa<Constant>(RHS))
     if (Value *V = dyn_castNegVal(RHS))
-      return BinaryOperator::createSub(LHS, V);
+      return BinaryOperator::CreateSub(LHS, V);
 
 
   ConstantInt *C2;
   if (Value *X = dyn_castFoldableMul(LHS, C2)) {
     if (X == RHS)   // X*C + X --> X * (C+1)
-      return BinaryOperator::createMul(RHS, AddOne(C2));
+      return BinaryOperator::CreateMul(RHS, AddOne(C2));
 
     // X*C1 + X*C2 --> X * (C1+C2)
     ConstantInt *C1;
     if (X == dyn_castFoldableMul(RHS, C1))
-      return BinaryOperator::createMul(X, Add(C1, C2));
+      return BinaryOperator::CreateMul(X, Add(C1, C2));
   }
 
   // X + X*C --> X * (C+1)
   if (dyn_castFoldableMul(RHS, C2) == LHS)
-    return BinaryOperator::createMul(LHS, AddOne(C2));
+    return BinaryOperator::CreateMul(LHS, AddOne(C2));
 
   // X + ~X --> -1   since   ~X = -X-1
   if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS)
@@ -2073,11 +2055,52 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
     if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
       return R;
+  
+  // A+B --> A|B iff A and B have no bits set in common.
+  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+    APInt LHSKnownOne(IT->getBitWidth(), 0);
+    APInt LHSKnownZero(IT->getBitWidth(), 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    if (LHSKnownZero != 0) {
+      APInt RHSKnownOne(IT->getBitWidth(), 0);
+      APInt RHSKnownZero(IT->getBitWidth(), 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      
+      // No bits in common -> bitwise or.
+      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+        return BinaryOperator::CreateOr(LHS, RHS);
+    }
+  }
+
+  // W*X + Y*Z --> W * (X+Z)  iff W == Y
+  if (I.getType()->isIntOrIntVector()) {
+    Value *W, *X, *Y, *Z;
+    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+      if (W != Y) {
+        if (W == Z) {
+          std::swap(Y, Z);
+        } else if (Y == X) {
+          std::swap(W, X);
+        } else if (X == Z) {
+          std::swap(Y, Z);
+          std::swap(W, X);
+        }
+      }
+
+      if (W == Y) {
+        Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z,
+                                                            LHS->getName()), I);
+        return BinaryOperator::CreateMul(W, NewAdd);
+      }
+    }
+  }
 
   if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
     Value *X = 0;
     if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
-      return BinaryOperator::createSub(SubOne(CRHS), X);
+      return BinaryOperator::CreateSub(SubOne(CRHS), X);
 
     // (X & FF00) + xx00  -> (X+xx00) & FF00
     if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
@@ -2095,9 +2118,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
         if (AddRHSHighBits == AddRHSHighBitsAnd) {
           // Okay, the xform is safe.  Insert the new add pronto.
-          Value *NewAdd = InsertNewInstBefore(BinaryOperator::createAdd(X, CRHS,
+          Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS,
                                                             LHS->getName()), I);
-          return BinaryOperator::createAnd(NewAdd, C2);
+          return BinaryOperator::CreateAnd(NewAdd, C2);
         }
       }
     }
@@ -2109,8 +2132,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   }
 
   // add (cast *A to intptrtype) B -> 
-  //   cast (GEP (cast *A to sbyte*) B) -> 
-  //     intptrtype
+  //   cast (GEP (cast *A to sbyte*) B)  -->  intptrtype
   {
     CastInst *CI = dyn_cast<CastInst>(LHS);
     Value *Other = RHS;
@@ -2122,23 +2144,125 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         (CI->getType()->getPrimitiveSizeInBits() == 
          TD->getIntPtrType()->getPrimitiveSizeInBits()) 
         && isa<PointerType>(CI->getOperand(0)->getType())) {
-      Value *I2 = InsertCastBefore(Instruction::BitCast, CI->getOperand(0),
-                                   PointerType::get(Type::Int8Ty), I);
-      I2 = InsertNewInstBefore(new GetElementPtrInst(I2, Other, "ctg2"), I);
+      unsigned AS =
+        cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace();
+      Value *I2 = InsertBitCastBefore(CI->getOperand(0),
+                                      PointerType::get(Type::Int8Ty, AS), I);
+      I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I);
       return new PtrToIntInst(I2, CI->getType());
     }
   }
-
+  
+  // add (select X 0 (sub n A)) A  -->  select X A n
+  {
+    SelectInst *SI = dyn_cast<SelectInst>(LHS);
+    Value *Other = RHS;
+    if (!SI) {
+      SI = dyn_cast<SelectInst>(RHS);
+      Other = LHS;
+    }
+    if (SI && SI->hasOneUse()) {
+      Value *TV = SI->getTrueValue();
+      Value *FV = SI->getFalseValue();
+      Value *A, *N;
+
+      // Can we fold the add into the argument of the select?
+      // We check both true and false select arguments for a matching subtract.
+      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Value(A))) &&
+          A == Other)  // Fold the add into the true select value.
+        return SelectInst::Create(SI->getCondition(), N, A);
+      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Value(A))) && 
+          A == Other)  // Fold the add into the false select value.
+        return SelectInst::Create(SI->getCondition(), A, N);
+    }
+  }
+  
+  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+      return ReplaceInstUsesWith(I, LHS);
+
+  // Check for (add (sext x), y), see if we can merge this into an
+  // integer add followed by a sext.
+  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+    // (add (sext x), cst) --> (sext (add x, cst'))
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+      Constant *CI = 
+        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new, smaller add.
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
+                                                        CI, "addconv");
+        InsertNewInstBefore(NewAdd, I);
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (add (sext x), (sext y)) --> (sext (add int x, y))
+    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of sexts), and if the
+      // integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
+                                                        RHSConv->getOperand(0),
+                                                        "addconv");
+        InsertNewInstBefore(NewAdd, I);
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+  }
+  
+  // Check for (add double (sitofp x), y), see if we can merge this into an
+  // integer add followed by a promotion.
+  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+    // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+    // ... if the constant fits in the integer value.  This is useful for things
+    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+    // requires a constant pool load, and generally allows the add to be better
+    // instcombined.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+      Constant *CI = 
+      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new integer add.
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
+                                                        CI, "addconv");
+        InsertNewInstBefore(NewAdd, I);
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of int->fp conversions),
+      // and if the integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
+                                                        RHSConv->getOperand(0),
+                                                        "addconv");
+        InsertNewInstBefore(NewAdd, I);
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+  }
+  
   return Changed ? &I : 0;
 }
 
-// isSignBit - Return true if the value represented by the constant only has the
-// highest order bit set.
-static bool isSignBit(ConstantInt *CI) {
-  uint32_t NumBits = CI->getType()->getPrimitiveSizeInBits();
-  return CI->getValue() == APInt::getSignBit(NumBits);
-}
-
 Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
@@ -2147,7 +2271,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
 
   // If this is a 'B = x-(-A)', change to B = x+A...
   if (Value *V = dyn_castNegVal(Op1))
-    return BinaryOperator::createAdd(Op0, V);
+    return BinaryOperator::CreateAdd(Op0, V);
 
   if (isa<UndefValue>(Op0))
     return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
@@ -2157,24 +2281,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
     // Replace (-1 - A) with (~A)...
     if (C->isAllOnesValue())
-      return BinaryOperator::createNot(Op1);
+      return BinaryOperator::CreateNot(Op1);
 
     // C - ~X == X + (1+C)
     Value *X = 0;
     if (match(Op1, m_Not(m_Value(X))))
-      return BinaryOperator::createAdd(X, AddOne(C));
+      return BinaryOperator::CreateAdd(X, AddOne(C));
 
     // -(X >>u 31) -> (X >>s 31)
     // -(X >>s 31) -> (X >>u 31)
     if (C->isZero()) {
-      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1))
+      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
         if (SI->getOpcode() == Instruction::LShr) {
           if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
             // Check to see if we are shifting out everything but the sign bit.
             if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
                 SI->getType()->getPrimitiveSizeInBits()-1) {
               // Ok, the transformation is safe.  Insert AShr.
-              return BinaryOperator::create(Instruction::AShr, 
+              return BinaryOperator::Create(Instruction::AShr, 
                                           SI->getOperand(0), CU, SI->getName());
             }
           }
@@ -2185,11 +2309,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
             if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
                 SI->getType()->getPrimitiveSizeInBits()-1) {
               // Ok, the transformation is safe.  Insert LShr. 
-              return BinaryOperator::createLShr(
+              return BinaryOperator::CreateLShr(
                                           SI->getOperand(0), CU, SI->getName());
             }
           }
-        } 
+        }
+      }
     }
 
     // Try to fold constant sub into select arguments.
@@ -2202,17 +2327,20 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         return NV;
   }
 
+  if (I.getType() == Type::Int1Ty)
+    return BinaryOperator::CreateXor(Op0, Op1);
+
   if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
     if (Op1I->getOpcode() == Instruction::Add &&
         !Op0->getType()->isFPOrFPVector()) {
       if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::createNeg(Op1I->getOperand(1), I.getName());
+        return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName());
       else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::createNeg(Op1I->getOperand(0), I.getName());
+        return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName());
       else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
         if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
           // C1-(X+C2) --> (C1-C2)-X
-          return BinaryOperator::createSub(Subtract(CI1, CI2), 
+          return BinaryOperator::CreateSub(Subtract(CI1, CI2), 
                                            Op1I->getOperand(0));
       }
     }
@@ -2229,7 +2357,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         Op1I->setOperand(1, IIOp0);
 
         // Create the new top level add instruction...
-        return BinaryOperator::createAdd(Op0, Op1);
+        return BinaryOperator::CreateAdd(Op0, Op1);
       }
 
       // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
@@ -2239,8 +2367,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
 
         Value *NewNot =
-          InsertNewInstBefore(BinaryOperator::createNot(OtherOp, "B.not"), I);
-        return BinaryOperator::createAnd(Op0, NewNot);
+          InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I);
+        return BinaryOperator::CreateAnd(Op0, NewNot);
       }
 
       // 0 - (X sdiv C)  -> (X sdiv -C)
@@ -2248,14 +2376,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
           if (CSI->isZero())
             if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
-              return BinaryOperator::createSDiv(Op1I->getOperand(0),
+              return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
                                                ConstantExpr::getNeg(DivRHS));
 
       // X - X*C --> X * (1-C)
       ConstantInt *C2 = 0;
       if (dyn_castFoldableMul(Op1I, C2) == Op0) {
         Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
-        return BinaryOperator::createMul(Op0, CP1);
+        return BinaryOperator::CreateMul(Op0, CP1);
       }
 
       // X - ((X / Y) * Y) --> X % Y
@@ -2264,15 +2392,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
           if (Op0 == I->getOperand(0) &&
               Op1I->getOperand(1) == I->getOperand(1)) {
             if (I->getOpcode() == Instruction::SDiv)
-              return BinaryOperator::createSRem(Op0, Op1I->getOperand(1));
+              return BinaryOperator::CreateSRem(Op0, Op1I->getOperand(1));
             if (I->getOpcode() == Instruction::UDiv)
-              return BinaryOperator::createURem(Op0, Op1I->getOperand(1));
+              return BinaryOperator::CreateURem(Op0, Op1I->getOperand(1));
           }
     }
   }
 
   if (!Op0->getType()->isFPOrFPVector())
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
       if (Op0I->getOpcode() == Instruction::Add) {
         if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
           return ReplaceInstUsesWith(I, Op0I->getOperand(1));
@@ -2280,17 +2408,18 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
           return ReplaceInstUsesWith(I, Op0I->getOperand(0));
       } else if (Op0I->getOpcode() == Instruction::Sub) {
         if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
-          return BinaryOperator::createNeg(Op0I->getOperand(1), I.getName());
+          return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
       }
+    }
 
   ConstantInt *C1;
   if (Value *X = dyn_castFoldableMul(Op0, C1)) {
     if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::createMul(Op1, SubOne(C1));
+      return BinaryOperator::CreateMul(Op1, SubOne(C1));
 
     ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
     if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::createMul(Op1, Subtract(C1, C2));
+      return BinaryOperator::CreateMul(X, Subtract(C1, C2));
   }
   return 0;
 }
@@ -2319,8 +2448,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
   case ICmpInst::ICMP_UGE: 
     // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
     TrueIfSigned = true;
-    return RHS->getValue() == 
-      APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits());
+    return RHS->getValue().isSignBit();
   default:
     return false;
   }
@@ -2341,7 +2469,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
         if (SI->getOpcode() == Instruction::Shl)
           if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
-            return BinaryOperator::createMul(SI->getOperand(0),
+            return BinaryOperator::CreateMul(SI->getOperand(0),
                                              ConstantExpr::getShl(CI, ShOp));
 
       if (CI->isZero())
@@ -2349,11 +2477,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       if (CI->equalsInt(1))                  // X * 1  == X
         return ReplaceInstUsesWith(I, Op0);
       if (CI->isAllOnesValue())              // X * -1 == 0 - X
-        return BinaryOperator::createNeg(Op0, I.getName());
+        return BinaryOperator::CreateNeg(Op0, I.getName());
 
       const APInt& Val = cast<ConstantInt>(CI)->getValue();
       if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
-        return BinaryOperator::createShl(Op0,
+        return BinaryOperator::CreateShl(Op0,
                  ConstantInt::get(Op0->getType(), Val.logBase2()));
       }
     } else if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
@@ -2370,14 +2498,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
       if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
-          isa<ConstantInt>(Op0I->getOperand(1))) {
+          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) {
         // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
-        Instruction *Add = BinaryOperator::createMul(Op0I->getOperand(0),
+        Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0),
                                                      Op1, "tmp");
         InsertNewInstBefore(Add, I);
         Value *C1C2 = ConstantExpr::getMul(Op1, 
                                            cast<Constant>(Op0I->getOperand(1)));
-        return BinaryOperator::createAdd(Add, C1C2);
+        return BinaryOperator::CreateAdd(Add, C1C2);
         
       }
 
@@ -2393,14 +2521,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
   if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
     if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
-      return BinaryOperator::createMul(Op0v, Op1v);
+      return BinaryOperator::CreateMul(Op0v, Op1v);
+
+  if (I.getType() == Type::Int1Ty)
+    return BinaryOperator::CreateAnd(Op0, I.getOperand(1));
 
   // If one of the operands of the multiply is a cast from a boolean value, then
   // we know the bool is either zero or one, so this is a 'masking' multiply.
   // See if we can simplify things based on how the boolean was originally
   // formed.
   CastInst *BoolCast = 0;
-  if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(0)))
+  if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0))
     if (CI->getOperand(0)->getType() == Type::Int1Ty)
       BoolCast = CI;
   if (!BoolCast)
@@ -2423,7 +2554,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
                                           SCOpTy->getPrimitiveSizeInBits()-1);
         Value *V =
           InsertNewInstBefore(
-            BinaryOperator::create(Instruction::AShr, SCIOp0, Amt,
+            BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt,
                                             BoolCast->getOperand(0)->getName()+
                                             ".mask"), I);
 
@@ -2439,7 +2570,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         }
 
         Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
-        return BinaryOperator::createAnd(V, OtherOp);
+        return BinaryOperator::CreateAnd(V, OtherOp);
       }
     }
   }
@@ -2454,22 +2585,27 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // undef / X -> 0
-  if (isa<UndefValue>(Op0))
+  // undef / X -> 0        for integer.
+  // undef / X -> undef    for FP (the undef could be a snan).
+  if (isa<UndefValue>(Op0)) {
+    if (Op0->getType()->isFPOrFPVector())
+      return ReplaceInstUsesWith(I, Op0);
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
 
   // X / undef -> undef
   if (isa<UndefValue>(Op1))
     return ReplaceInstUsesWith(I, Op1);
 
-  // Handle cases involving: div X, (select Cond, Y, Z)
+  // Handle cases involving: [su]div X, (select Cond, Y, Z)
+  // This does not apply for fdiv.
   if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
-    // div X, (Cond ? 0 : Y) -> div X, Y.  If the div and the select are in the
-    // same basic block, then we replace the select with Y, and the condition 
-    // of the select with false (if the cond value is in the same BB).  If the
-    // select has uses other than the div, this allows them to be simplified
-    // also. Note that div X, Y is just as good as div X, 0 (undef)
-    if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+    // [su]div X, (Cond ? 0 : Y) -> div X, Y.  If the div and the select are in
+    // the same basic block, then we replace the select with Y, and the
+    // condition of the select with false (if the cond value is in the same BB).
+    // If the select has uses other than the div, this allows them to be
+    // simplified also. Note that div X, Y is just as good as div X, 0 (undef)
+    if (ConstantInt *ST = dyn_cast<ConstantInt>(SI->getOperand(1)))
       if (ST->isNullValue()) {
         Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
         if (CondI && CondI->getParent() == I.getParent())
@@ -2481,8 +2617,8 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
         return &I;
       }
 
-    // Likewise for: div X, (Cond ? Y : 0) -> div X, Y
-    if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+    // Likewise for: [su]div X, (Cond ? Y : 0) -> div X, Y
+    if (ConstantInt *ST = dyn_cast<ConstantInt>(SI->getOperand(2)))
       if (ST->isNullValue()) {
         Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
         if (CondI && CondI->getParent() == I.getParent())
@@ -2505,6 +2641,18 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
 Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
+  // (sdiv X, X) --> 1     (udiv X, X) --> 1
+  if (Op0 == Op1) {
+    if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
+      ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+      std::vector<Constant*> Elts(Ty->getNumElements(), CI);
+      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
+    }
+
+    ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+    return ReplaceInstUsesWith(I, CI);
+  }
+  
   if (Instruction *Common = commonDivTransforms(I))
     return Common;
 
@@ -2517,8 +2665,11 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     if (Instruction *LHS = dyn_cast<Instruction>(Op0))
       if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
         if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
-          return BinaryOperator::create(I.getOpcode(), LHS->getOperand(0),
-                                        Multiply(RHS, LHSRHS));
+          if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv))
+            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+          else 
+            return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+                                          Multiply(RHS, LHSRHS));
         }
 
     if (!RHS->isZero()) { // avoid X udiv 0
@@ -2536,6 +2687,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     if (LHS->equalsInt(0))
       return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
+  // It can't be division by zero, hence it must be division by one.
+  if (I.getType() == Type::Int1Ty)
+    return ReplaceInstUsesWith(I, Op0);
+
   return 0;
 }
 
@@ -2551,7 +2706,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   // if so, convert to a right shift.
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
     if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
-      return BinaryOperator::createLShr(Op0, 
+      return BinaryOperator::CreateLShr(Op0, 
                ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
   }
 
@@ -2565,9 +2720,9 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
         const Type *NTy = N->getType();
         if (uint32_t C2 = C1.logBase2()) {
           Constant *C2V = ConstantInt::get(NTy, C2);
-          N = InsertNewInstBefore(BinaryOperator::createAdd(N, C2V, "tmp"), I);
+          N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I);
         }
-        return BinaryOperator::createLShr(Op0, N);
+        return BinaryOperator::CreateLShr(Op0, N);
       }
     }
   }
@@ -2583,18 +2738,18 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
           uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
           // Construct the "on true" case of the select
           Constant *TC = ConstantInt::get(Op0->getType(), TSA);
-          Instruction *TSI = BinaryOperator::createLShr(
+          Instruction *TSI = BinaryOperator::CreateLShr(
                                                  Op0, TC, SI->getName()+".t");
           TSI = InsertNewInstBefore(TSI, I);
   
           // Construct the "on false" case of the select
           Constant *FC = ConstantInt::get(Op0->getType(), FSA); 
-          Instruction *FSI = BinaryOperator::createLShr(
+          Instruction *FSI = BinaryOperator::CreateLShr(
                                                  Op0, FC, SI->getName()+".f");
           FSI = InsertNewInstBefore(FSI, I);
 
           // construct the select instruction and return it.
-          return new SelectInst(SI->getOperand(0), TSI, FSI, SI->getName());
+          return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
         }
       }
   return 0;
@@ -2610,11 +2765,11 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     // sdiv X, -1 == -X
     if (RHS->isAllOnesValue())
-      return BinaryOperator::createNeg(Op0);
+      return BinaryOperator::CreateNeg(Op0);
 
     // -X/C -> X/-C
     if (Value *LHSNeg = dyn_castNegVal(Op0))
-      return BinaryOperator::createSDiv(LHSNeg, ConstantExpr::getNeg(RHS));
+      return BinaryOperator::CreateSDiv(LHSNeg, ConstantExpr::getNeg(RHS));
   }
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
@@ -2623,7 +2778,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
     APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
     if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
       // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
-      return BinaryOperator::createUDiv(Op0, Op1, I.getName());
+      return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
     }
   }      
   
@@ -2634,46 +2789,6 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
   return commonDivTransforms(I);
 }
 
-/// GetFactor - If we can prove that the specified value is at least a multiple
-/// of some factor, return that factor.
-static Constant *GetFactor(Value *V) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-    return CI;
-  
-  // Unless we can be tricky, we know this is a multiple of 1.
-  Constant *Result = ConstantInt::get(V->getType(), 1);
-  
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return Result;
-  
-  if (I->getOpcode() == Instruction::Mul) {
-    // Handle multiplies by a constant, etc.
-    return ConstantExpr::getMul(GetFactor(I->getOperand(0)),
-                                GetFactor(I->getOperand(1)));
-  } else if (I->getOpcode() == Instruction::Shl) {
-    // (X<<C) -> X * (1 << C)
-    if (Constant *ShRHS = dyn_cast<Constant>(I->getOperand(1))) {
-      ShRHS = ConstantExpr::getShl(Result, ShRHS);
-      return ConstantExpr::getMul(GetFactor(I->getOperand(0)), ShRHS);
-    }
-  } else if (I->getOpcode() == Instruction::And) {
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // X & 0xFFF0 is known to be a multiple of 16.
-      uint32_t Zeros = RHS->getValue().countTrailingZeros();
-      if (Zeros != V->getType()->getPrimitiveSizeInBits())// don't shift by "32"
-        return ConstantExpr::getShl(Result, 
-                                    ConstantInt::get(Result->getType(), Zeros));
-    }
-  } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
-    // Only handle int->int casts.
-    if (!CI->isIntegerCast())
-      return Result;
-    Value *Op = CI->getOperand(0);
-    return ConstantExpr::getCast(CI->getOpcode(), GetFactor(Op), V->getType());
-  }    
-  return Result;
-}
-
 /// This function implements the transforms on rem instructions that work
 /// regardless of the kind of rem instruction it is (urem, srem, or frem). It 
 /// is used by the visitors to those instructions.
@@ -2681,13 +2796,16 @@ static Constant *GetFactor(Value *V) {
 Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // 0 % X == 0, we don't need to preserve faults!
+  // 0 % X == 0 for integer, we don't need to preserve faults!
   if (Constant *LHS = dyn_cast<Constant>(Op0))
     if (LHS->isNullValue())
       return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
-  if (isa<UndefValue>(Op0))              // undef % X -> 0
+  if (isa<UndefValue>(Op0)) {             // undef % X -> 0
+    if (I.getType()->isFPOrFPVector())
+      return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
   if (isa<UndefValue>(Op1))
     return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
 
@@ -2752,9 +2870,13 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
         if (Instruction *NV = FoldOpIntoPhi(I))
           return NV;
       }
-      // (X * C1) % C2 --> 0  iff  C1 % C2 == 0
-      if (ConstantExpr::getSRem(GetFactor(Op0I), RHS)->isNullValue())
-        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+      // See if we can fold away this rem instruction.
+      uint32_t BitWidth = cast<IntegerType>(I.getType())->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth),
+                               KnownZero, KnownOne))
+        return &I;
     }
   }
 
@@ -2773,7 +2895,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
     // if so, convert to a bitwise and.
     if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
       if (C->getValue().isPowerOf2())
-        return BinaryOperator::createAnd(Op0, SubOne(C));
+        return BinaryOperator::CreateAnd(Op0, SubOne(C));
   }
 
   if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
@@ -2782,9 +2904,9 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
         isa<ConstantInt>(RHSI->getOperand(0))) {
       if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
         Constant *N1 = ConstantInt::getAllOnesValue(I.getType());
-        Value *Add = InsertNewInstBefore(BinaryOperator::createAdd(RHSI, N1,
+        Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1,
                                                                    "tmp"), I);
-        return BinaryOperator::createAnd(Op0, Add);
+        return BinaryOperator::CreateAnd(Op0, Add);
       }
     }
   }
@@ -2798,10 +2920,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
         if ((STO->getValue().isPowerOf2()) && 
             (SFO->getValue().isPowerOf2())) {
           Value *TrueAnd = InsertNewInstBefore(
-            BinaryOperator::createAnd(Op0, SubOne(STO), SI->getName()+".t"), I);
+            BinaryOperator::CreateAnd(Op0, SubOne(STO), SI->getName()+".t"), I);
           Value *FalseAnd = InsertNewInstBefore(
-            BinaryOperator::createAnd(Op0, SubOne(SFO), SI->getName()+".f"), I);
-          return new SelectInst(SI->getOperand(0), TrueAnd, FalseAnd);
+            BinaryOperator::CreateAnd(Op0, SubOne(SFO), SI->getName()+".f"), I);
+          return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
         }
       }
   }
@@ -2831,7 +2953,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
     APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
     if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
       // X srem Y -> X urem Y, iff X and Y don't have sign bit set
-      return BinaryOperator::createURem(Op0, Op1, I.getName());
+      return BinaryOperator::CreateURem(Op0, Op1, I.getName());
     }
   }
 
@@ -2969,8 +3091,8 @@ struct FoldICmpLogical {
   bool shouldApply(Value *V) const {
     if (ICmpInst *ICI = dyn_cast<ICmpInst>(V))
       if (PredicatesFoldable(pred, ICI->getPredicate()))
-        return (ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS ||
-                ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS);
+        return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) ||
+                (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS));
     return false;
   }
   Instruction *apply(Instruction &Log) const {
@@ -3019,10 +3141,10 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   case Instruction::Xor:
     if (Op->hasOneUse()) {
       // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
-      Instruction *And = BinaryOperator::createAnd(X, AndRHS);
+      Instruction *And = BinaryOperator::CreateAnd(X, AndRHS);
       InsertNewInstBefore(And, TheAnd);
       And->takeName(Op);
-      return BinaryOperator::createXor(And, Together);
+      return BinaryOperator::CreateXor(And, Together);
     }
     break;
   case Instruction::Or:
@@ -3031,10 +3153,10 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 
     if (Op->hasOneUse() && Together != OpRHS) {
       // (X | C1) & C2 --> (X | (C1&C2)) & C2
-      Instruction *Or = BinaryOperator::createOr(X, Together);
+      Instruction *Or = BinaryOperator::CreateOr(X, Together);
       InsertNewInstBefore(Or, TheAnd);
       Or->takeName(Op);
-      return BinaryOperator::createAnd(Or, AndRHS);
+      return BinaryOperator::CreateAnd(Or, AndRHS);
     }
     break;
   case Instruction::Add:
@@ -3062,10 +3184,10 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
             return &TheAnd;
           } else {
             // Pull the XOR out of the AND.
-            Instruction *NewAnd = BinaryOperator::createAnd(X, AndRHS);
+            Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS);
             InsertNewInstBefore(NewAnd, TheAnd);
             NewAnd->takeName(Op);
-            return BinaryOperator::createXor(NewAnd, AndRHS);
+            return BinaryOperator::CreateXor(NewAnd, AndRHS);
           }
         }
       }
@@ -3124,9 +3246,9 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
         // Make the argument unsigned.
         Value *ShVal = Op->getOperand(0);
         ShVal = InsertNewInstBefore(
-            BinaryOperator::createLShr(ShVal, OpRHS, 
+            BinaryOperator::CreateLShr(ShVal, OpRHS, 
                                    Op->getName()), TheAnd);
-        return BinaryOperator::createAnd(ShVal, AndRHS, TheAnd.getName());
+        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
       }
     }
     break;
@@ -3160,7 +3282,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
 
     // Emit V-Lo <u Hi-Lo
     Constant *NegLo = ConstantExpr::getNeg(Lo);
-    Instruction *Add = BinaryOperator::createAdd(V, NegLo, V->getName()+".off");
+    Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
     InsertNewInstBefore(Add, IB);
     Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
     return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
@@ -3180,7 +3302,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
   // Emit V-Lo >u Hi-1-Lo
   // Note that Hi has already had one subtracted from it, above.
   ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
-  Instruction *Add = BinaryOperator::createAdd(V, NegLo, V->getName()+".off");
+  Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
   InsertNewInstBefore(Add, IB);
   Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
   return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
@@ -3255,9 +3377,9 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
   
   Instruction *New;
   if (isSub)
-    New = BinaryOperator::createSub(LHSI->getOperand(0), RHS, "fold");
+    New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold");
   else
-    New = BinaryOperator::createAdd(LHSI->getOperand(0), RHS, "fold");
+    New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold");
   return InsertNewInstBefore(New, I);
 }
 
@@ -3305,19 +3427,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         if (Op0I->hasOneUse()) {
           if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
             // Not masking anything out for the LHS, move to RHS.
-            Instruction *NewRHS = BinaryOperator::createAnd(Op0RHS, AndRHS,
+            Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS,
                                                    Op0RHS->getName()+".masked");
             InsertNewInstBefore(NewRHS, I);
-            return BinaryOperator::create(
+            return BinaryOperator::Create(
                        cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS);
           }
           if (!isa<Constant>(Op0RHS) &&
               MaskedValueIsZero(Op0RHS, NotAndRHS)) {
             // Not masking anything out for the RHS, move to LHS.
-            Instruction *NewLHS = BinaryOperator::createAnd(Op0LHS, AndRHS,
+            Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS,
                                                    Op0LHS->getName()+".masked");
             InsertNewInstBefore(NewLHS, I);
-            return BinaryOperator::create(
+            return BinaryOperator::Create(
                        cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS);
           }
         }
@@ -3328,9 +3450,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
         // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
         if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
-          return BinaryOperator::createAnd(V, AndRHS);
+          return BinaryOperator::CreateAnd(V, AndRHS);
         if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
-          return BinaryOperator::createAnd(V, AndRHS);  // Add commutes
+          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
         break;
 
       case Instruction::Sub:
@@ -3338,7 +3460,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
         // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
         if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
-          return BinaryOperator::createAnd(V, AndRHS);
+          return BinaryOperator::CreateAnd(V, AndRHS);
         break;
       }
 
@@ -3352,20 +3474,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
         if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
             CastOp->getNumOperands() == 2)
-          if (ConstantInt *AndCI = dyn_cast<ConstantInt>(CastOp->getOperand(1)))
+          if (ConstantInt *AndCI = dyn_cast<ConstantInt>(CastOp->getOperand(1))) {
             if (CastOp->getOpcode() == Instruction::And) {
               // Change: and (cast (and X, C1) to T), C2
               // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
               // This will fold the two constants together, which may allow 
               // other simplifications.
-              Instruction *NewCast = CastInst::createTruncOrBitCast(
+              Instruction *NewCast = CastInst::CreateTruncOrBitCast(
                 CastOp->getOperand(0), I.getType(), 
                 CastOp->getName()+".shrunk");
               NewCast = InsertNewInstBefore(NewCast, I);
               // trunc_or_bitcast(C1)&C2
               Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
               C3 = ConstantExpr::getAnd(C3, AndRHS);
-              return BinaryOperator::createAnd(NewCast, C3);
+              return BinaryOperator::CreateAnd(NewCast, C3);
             } else if (CastOp->getOpcode() == Instruction::Or) {
               // Change: and (cast (or X, C1) to T), C2
               // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
@@ -3373,6 +3495,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
               if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)   // trunc(C1)&C2
                 return ReplaceInstUsesWith(I, AndRHS);
             }
+          }
       }
     }
 
@@ -3393,10 +3516,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
   // (~A & ~B) == (~(A | B)) - De Morgan's Law
   if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-    Instruction *Or = BinaryOperator::createOr(Op0NotVal, Op1NotVal,
+    Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal,
                                                I.getName()+".demorgan");
     InsertNewInstBefore(Or, I);
-    return BinaryOperator::createNot(Or);
+    return BinaryOperator::CreateNot(Or);
   }
   
   {
@@ -3408,7 +3531,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       // (A|B) & ~(A&B) -> A^B
       if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
         if ((A == C && B == D) || (A == D && B == C))
-          return BinaryOperator::createXor(A, B);
+          return BinaryOperator::CreateXor(A, B);
       }
     }
     
@@ -3419,7 +3542,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       // ~(A&B) & (A|B) -> A^B
       if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
         if ((A == C && B == D) || (A == D && B == C))
-          return BinaryOperator::createXor(A, B);
+          return BinaryOperator::CreateXor(A, B);
       }
     }
     
@@ -3441,9 +3564,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         std::swap(A, B);
       }
       if (A == Op0) {                                // A&(A^B) -> A & ~B
-        Instruction *NotB = BinaryOperator::createNot(B, "tmp");
+        Instruction *NotB = BinaryOperator::CreateNot(B, "tmp");
         InsertNewInstBefore(NotB, I);
-        return BinaryOperator::createAnd(A, NotB);
+        return BinaryOperator::CreateAnd(A, NotB);
       }
     }
   }
@@ -3470,8 +3593,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
              ICmpInst::isSignedPredicate(LHSCC) == 
                  ICmpInst::isSignedPredicate(RHSCC))) {
           // Ensure that the larger constant is on the RHS.
-          ICmpInst::Predicate GT = ICmpInst::isSignedPredicate(LHSCC) ? 
-            ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+          ICmpInst::Predicate GT;
+          if (ICmpInst::isSignedPredicate(LHSCC) ||
+              (ICmpInst::isEquality(LHSCC) && 
+               ICmpInst::isSignedPredicate(RHSCC)))
+            GT = ICmpInst::ICMP_SGT;
+          else
+            GT = ICmpInst::ICMP_UGT;
+          
           Constant *Cmp = ConstantExpr::getICmp(GT, LHSCst, RHSCst);
           ICmpInst *LHS = cast<ICmpInst>(Op0);
           if (cast<ConstantInt>(Cmp)->getZExtValue()) {
@@ -3520,7 +3649,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
             case ICmpInst::ICMP_NE:
               if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
                 Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-                Instruction *Add = BinaryOperator::createAdd(LHSVal, AddCST,
+                Instruction *Add = BinaryOperator::CreateAdd(LHSVal, AddCST,
                                                       LHSVal->getName()+".off");
                 InsertNewInstBefore(Add, I);
                 return new ICmpInst(ICmpInst::ICMP_UGT, Add,
@@ -3613,11 +3742,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
                               I.getType(), TD) &&
             ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                               I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::createAnd(Op0C->getOperand(0),
+          Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0),
                                                          Op1C->getOperand(0),
                                                          I.getName());
           InsertNewInstBefore(NewOp, I);
-          return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
       }
     
@@ -3628,10 +3757,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
         Instruction *NewOp =
-          InsertNewInstBefore(BinaryOperator::createAnd(SI0->getOperand(0),
+          InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0),
                                                         SI1->getOperand(0),
                                                         SI0->getName()), I);
-        return BinaryOperator::create(SI1->getOpcode(), NewOp, 
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
                                       SI1->getOperand(1));
       }
   }
@@ -3766,7 +3895,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
   const Type *Tys[] = { ITy };
   Module *M = I.getParent()->getParent()->getParent();
   Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
-  return new CallInst(F, V);
+  return CallInst::Create(F, V);
 }
 
 
@@ -3803,19 +3932,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     ConstantInt *C1 = 0; Value *X = 0;
     // (X & C1) | C2 --> (X | C2) & (C1|C2)
     if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
-      Instruction *Or = BinaryOperator::createOr(X, RHS);
+      Instruction *Or = BinaryOperator::CreateOr(X, RHS);
       InsertNewInstBefore(Or, I);
       Or->takeName(Op0);
-      return BinaryOperator::createAnd(Or, 
+      return BinaryOperator::CreateAnd(Or, 
                ConstantInt::get(RHS->getValue() | C1->getValue()));
     }
 
     // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
     if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
-      Instruction *Or = BinaryOperator::createOr(X, RHS);
+      Instruction *Or = BinaryOperator::CreateOr(X, RHS);
       InsertNewInstBefore(Or, I);
       Or->takeName(Op0);
-      return BinaryOperator::createXor(Or,
+      return BinaryOperator::CreateXor(Or,
                  ConstantInt::get(C1->getValue() & ~RHS->getValue()));
     }
 
@@ -3851,19 +3980,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   // (X^C)|Y -> (X|Y)^C iff Y&C == 0
   if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
       MaskedValueIsZero(Op1, C1->getValue())) {
-    Instruction *NOr = BinaryOperator::createOr(A, Op1);
+    Instruction *NOr = BinaryOperator::CreateOr(A, Op1);
     InsertNewInstBefore(NOr, I);
     NOr->takeName(Op0);
-    return BinaryOperator::createXor(NOr, C1);
+    return BinaryOperator::CreateXor(NOr, C1);
   }
 
   // Y|(X^C) -> (X|Y)^C iff Y&C == 0
   if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
       MaskedValueIsZero(Op0, C1->getValue())) {
-    Instruction *NOr = BinaryOperator::createOr(A, Op0);
+    Instruction *NOr = BinaryOperator::CreateOr(A, Op0);
     InsertNewInstBefore(NOr, I);
     NOr->takeName(Op0);
-    return BinaryOperator::createXor(NOr, C1);
+    return BinaryOperator::CreateXor(NOr, C1);
   }
 
   // (A & C)|(B & D)
@@ -3913,8 +4042,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       
       if (V1) {
         Value *Or =
-          InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I);
-        return BinaryOperator::createAnd(V1, Or);
+          InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I);
+        return BinaryOperator::CreateAnd(V1, Or);
       }
     }
   }
@@ -3926,10 +4055,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
           SI0->getOperand(1) == SI1->getOperand(1) &&
           (SI0->hasOneUse() || SI1->hasOneUse())) {
         Instruction *NewOp =
-        InsertNewInstBefore(BinaryOperator::createOr(SI0->getOperand(0),
+        InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0),
                                                      SI1->getOperand(0),
                                                      SI0->getName()), I);
-        return BinaryOperator::create(SI1->getOpcode(), NewOp, 
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
                                       SI1->getOperand(1));
       }
   }
@@ -3947,9 +4076,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
     // (~A | ~B) == (~(A & B)) - De Morgan's Law
     if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-      Value *And = InsertNewInstBefore(BinaryOperator::createAnd(A, B,
+      Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B,
                                               I.getName()+".demorgan"), I);
-      return BinaryOperator::createNot(And);
+      return BinaryOperator::CreateNot(And);
     }
   }
 
@@ -4001,7 +4130,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
             case ICmpInst::ICMP_EQ:
               if (LHSCst == SubOne(RHSCst)) {// (X == 13 | X == 14) -> X-13 <u 2
                 Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-                Instruction *Add = BinaryOperator::createAdd(LHSVal, AddCST,
+                Instruction *Add = BinaryOperator::CreateAdd(LHSVal, AddCST,
                                                       LHSVal->getName()+".off");
                 InsertNewInstBefore(Add, I);
                 AddCST = Subtract(AddOne(RHSCst), LHSCst);
@@ -4110,18 +4239,22 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
       if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
-        const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
-            // Only do this if the casts both really cause code to be generated.
-            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                              I.getType(), TD) &&
-            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                              I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::createOr(Op0C->getOperand(0),
-                                                        Op1C->getOperand(0),
-                                                        I.getName());
-          InsertNewInstBefore(NewOp, I);
-          return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
+            !isa<ICmpInst>(Op1C->getOperand(0))) {
+          const Type *SrcTy = Op0C->getOperand(0)->getType();
+          if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+              // Only do this if the casts both really cause code to be
+              // generated.
+              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+                                I.getType(), TD) &&
+              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                                I.getType(), TD)) {
+            Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0),
+                                                          Op1C->getOperand(0),
+                                                          I.getName());
+            InsertNewInstBefore(NewOp, I);
+            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+          }
         }
       }
   }
@@ -4131,7 +4264,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
     if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
       if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
-          RHS->getPredicate() == FCmpInst::FCMP_UNO)
+          RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+          LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType())
         if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
           if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
             // If either of the constants are nans, then the whole thing returns
@@ -4150,6 +4284,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
+namespace {
+
 // XorSelf - Implements: X ^ X --> 0
 struct XorSelf {
   Value *RHS;
@@ -4160,13 +4296,19 @@ struct XorSelf {
   }
 };
 
+}
 
 Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   bool Changed = SimplifyCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (isa<UndefValue>(Op1))
+  if (isa<UndefValue>(Op1)) {
+    if (isa<UndefValue>(Op0))
+      // Handle undef ^ undef -> 0 special case. This is a common
+      // idiom (misuse).
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
     return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
+  }
 
   // xor X, X = 0, even if X is nested in a sequence of Xor's.
   if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
@@ -4196,13 +4338,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
         if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
           Instruction *NotY =
-            BinaryOperator::createNot(Op0I->getOperand(1),
+            BinaryOperator::CreateNot(Op0I->getOperand(1),
                                       Op0I->getOperand(1)->getName()+".not");
           InsertNewInstBefore(NotY, I);
           if (Op0I->getOpcode() == Instruction::And)
-            return BinaryOperator::createOr(Op0NotVal, NotY);
+            return BinaryOperator::CreateOr(Op0NotVal, NotY);
           else
-            return BinaryOperator::createAnd(Op0NotVal, NotY);
+            return BinaryOperator::CreateAnd(Op0NotVal, NotY);
         }
       }
     }
@@ -4221,6 +4363,25 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                             FCI->getOperand(0), FCI->getOperand(1));
     }
 
+    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+        if (CI->hasOneUse() && Op0C->hasOneUse()) {
+          Instruction::CastOps Opcode = Op0C->getOpcode();
+          if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+            if (RHS == ConstantExpr::getCast(Opcode, ConstantInt::getTrue(),
+                                             Op0C->getDestTy())) {
+              Instruction *NewCI = InsertNewInstBefore(CmpInst::Create(
+                                     CI->getOpcode(), CI->getInversePredicate(),
+                                     CI->getOperand(0), CI->getOperand(1)), I);
+              NewCI->takeName(CI);
+              return CastInst::Create(Opcode, NewCI, Op0C->getType());
+            }
+          }
+        }
+      }
+    }
+
     if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
       // ~(c-X) == X-c-1 == X+(-c-1)
       if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
@@ -4228,22 +4389,22 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
           Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
           Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
                                               ConstantInt::get(I.getType(), 1));
-          return BinaryOperator::createAdd(Op0I->getOperand(1), ConstantRHS);
+          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
         }
           
-      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
         if (Op0I->getOpcode() == Instruction::Add) {
           // ~(X-c) --> (-c-1)-X
           if (RHS->isAllOnesValue()) {
             Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
-            return BinaryOperator::createSub(
+            return BinaryOperator::CreateSub(
                            ConstantExpr::getSub(NegOp0CI,
                                              ConstantInt::get(I.getType(), 1)),
                                           Op0I->getOperand(0));
           } else if (RHS->getValue().isSignBit()) {
             // (X + C) ^ signbit -> (X + C + signbit)
             Constant *C = ConstantInt::get(RHS->getValue() + Op0CI->getValue());
-            return BinaryOperator::createAdd(Op0I->getOperand(0), C);
+            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
 
           }
         } else if (Op0I->getOpcode() == Instruction::Or) {
@@ -4261,6 +4422,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
             return &I;
           }
         }
+      }
     }
 
     // Try to fold constant and into select arguments.
@@ -4318,8 +4480,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         std::swap(A, B);
       if (B == Op1) {                                // (A|B)^B == A & ~B
         Instruction *NotB =
-          InsertNewInstBefore(BinaryOperator::createNot(Op1, "tmp"), I);
-        return BinaryOperator::createAnd(A, NotB);
+          InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I);
+        return BinaryOperator::CreateAnd(A, NotB);
       }
     } else if (match(Op0I, m_Xor(m_Value(A), m_Value(B)))) {
       if (Op1 == A)                                          // (A^B)^A == B
@@ -4332,8 +4494,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       if (B == Op1 &&                                      // (B&A)^A == ~B & A
           !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
         Instruction *N =
-          InsertNewInstBefore(BinaryOperator::createNot(A, "tmp"), I);
-        return BinaryOperator::createAnd(N, Op1);
+          InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I);
+        return BinaryOperator::CreateAnd(N, Op1);
       }
     }
   }
@@ -4344,10 +4506,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       Op0I->getOperand(1) == Op1I->getOperand(1) &&
       (Op1I->hasOneUse() || Op1I->hasOneUse())) {
     Instruction *NewOp =
-      InsertNewInstBefore(BinaryOperator::createXor(Op0I->getOperand(0),
+      InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0),
                                                     Op1I->getOperand(0),
                                                     Op0I->getName()), I);
-    return BinaryOperator::create(Op1I->getOpcode(), NewOp, 
+    return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
                                   Op1I->getOperand(1));
   }
     
@@ -4357,13 +4519,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
         match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
       if ((A == C && B == D) || (A == D && B == C)) 
-        return BinaryOperator::createXor(A, B);
+        return BinaryOperator::CreateXor(A, B);
     }
     // (A | B)^(A & B) -> A ^ B
     if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
         match(Op1I, m_And(m_Value(C), m_Value(D)))) {
       if ((A == C && B == D) || (A == D && B == C)) 
-        return BinaryOperator::createXor(A, B);
+        return BinaryOperator::CreateXor(A, B);
     }
     
     // (A & B)^(C & D)
@@ -4383,8 +4545,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       
       if (X) {
         Instruction *NewOp =
-        InsertNewInstBefore(BinaryOperator::createXor(Y, Z, Op0->getName()), I);
-        return BinaryOperator::createAnd(NewOp, X);
+        InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I);
+        return BinaryOperator::CreateAnd(NewOp, X);
       }
     }
   }
@@ -4405,14 +4567,15 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                               I.getType(), TD) &&
             ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                               I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::createXor(Op0C->getOperand(0),
+          Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0),
                                                          Op1C->getOperand(0),
                                                          I.getName());
           InsertNewInstBefore(NewOp, I);
-          return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
       }
   }
+
   return Changed ? &I : 0;
 }
 
@@ -4441,11 +4604,12 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
   Value *Result = Constant::getNullValue(IntPtrTy);
 
   // Build a mask for high order bits.
-  unsigned IntPtrWidth = TD.getPointerSize()*8;
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
   uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
 
-  for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
-    Value *Op = GEP->getOperand(i);
+  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+       ++i, ++GTI) {
+    Value *Op = *i;
     uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()) & PtrSizeMask;
     if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
       if (OpC->isZero()) continue;
@@ -4458,7 +4622,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
           Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size));
         else
           Result = IC.InsertNewInstBefore(
-                   BinaryOperator::createAdd(Result,
+                   BinaryOperator::CreateAdd(Result,
                                              ConstantInt::get(IntPtrTy, Size),
                                              GEP->getName()+".offs"), I);
         continue;
@@ -4472,7 +4636,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
       else {
         // Emit an add instruction.
         Result = IC.InsertNewInstBefore(
-           BinaryOperator::createAdd(Result, Scale,
+           BinaryOperator::CreateAdd(Result, Scale,
                                      GEP->getName()+".offs"), I);
       }
       continue;
@@ -4490,7 +4654,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
       if (Constant *OpC = dyn_cast<Constant>(Op))
         Op = ConstantExpr::getMul(OpC, Scale);
       else    // We'll let instcombine(mul) convert this to a shl if possible.
-        Op = IC.InsertNewInstBefore(BinaryOperator::createMul(Op, Scale,
+        Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale,
                                                   GEP->getName()+".idx"), I);
     }
 
@@ -4499,79 +4663,142 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
       Result = ConstantExpr::getAdd(cast<Constant>(Op),
                                     cast<Constant>(Result));
     else
-      Result = IC.InsertNewInstBefore(BinaryOperator::createAdd(Op, Result,
+      Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result,
                                                   GEP->getName()+".offs"), I);
   }
   return Result;
 }
 
-/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
-/// else.  At this point we know that the GEP is on the LHS of the comparison.
-Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
-                                       ICmpInst::Predicate Cond,
-                                       Instruction &I) {
-  assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
 
-  if (CastInst *CI = dyn_cast<CastInst>(RHS))
-    if (isa<PointerType>(CI->getOperand(0)->getType()))
-      RHS = CI->getOperand(0);
+/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of
+/// the *offset* implied by GEP to zero.  For example, if we have &A[i], we want
+/// to return 'i' for "icmp ne i, 0".  Note that, in general, indices can be
+/// complex, and scales are involved.  The above expression would also be legal
+/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).  This
+/// later form is less amenable to optimization though, and we are allowed to
+/// generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+/// 
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+                                          InstCombiner &IC) {
+  TargetData &TD = IC.getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
 
-  Value *PtrBase = GEPLHS->getOperand(0);
-  if (PtrBase == RHS) {
-    // As an optimization, we don't actually have to compute the actual value of
-    // OFFSET if this is a icmp_eq or icmp_ne comparison, just return whether 
-    // each index is zero or not.
-    if (Cond == ICmpInst::ICMP_EQ || Cond == ICmpInst::ICMP_NE) {
-      Instruction *InVal = 0;
-      gep_type_iterator GTI = gep_type_begin(GEPLHS);
-      for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i, ++GTI) {
-        bool EmitIt = true;
-        if (Constant *C = dyn_cast<Constant>(GEPLHS->getOperand(i))) {
-          if (isa<UndefValue>(C))  // undef index -> undef.
-            return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-          if (C->isNullValue())
-            EmitIt = false;
-          else if (TD->getABITypeSize(GTI.getIndexedType()) == 0) {
-            EmitIt = false;  // This is indexing into a zero sized array?
-          } else if (isa<ConstantInt>(C))
-            return ReplaceInstUsesWith(I, // No comparison is needed here.
-                                 ConstantInt::get(Type::Int1Ty, 
-                                                  Cond == ICmpInst::ICMP_NE));
-        }
+  // Check to see if this gep only has a single variable index.  If so, and if
+  // any constant indices are a multiple of its scale, then we can compute this
+  // in terms of the scale of the variable index.  For example, if the GEP
+  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+  // because the expression will cross zero at the same point.
+  unsigned i, e = GEP->getNumOperands();
+  int64_t Offset = 0;
+  for (i = 1; i != e; ++i, ++GTI) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      // Compute the aggregate offset of constant indices.
+      if (CI->isZero()) continue;
 
-        if (EmitIt) {
-          Instruction *Comp =
-            new ICmpInst(Cond, GEPLHS->getOperand(i),
-                    Constant::getNullValue(GEPLHS->getOperand(i)->getType()));
-          if (InVal == 0)
-            InVal = Comp;
-          else {
-            InVal = InsertNewInstBefore(InVal, I);
-            InsertNewInstBefore(Comp, I);
-            if (Cond == ICmpInst::ICMP_NE)   // True if any are unequal
-              InVal = BinaryOperator::createOr(InVal, Comp);
-            else                              // True if all are equal
-              InVal = BinaryOperator::createAnd(InVal, Comp);
-          }
-        }
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+      } else {
+        uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+        Offset += Size*CI->getSExtValue();
       }
-
-      if (InVal)
-        return InVal;
-      else
-        // No comparison is needed here, all indexes = 0
-        ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
-                                                Cond == ICmpInst::ICMP_EQ));
+    } else {
+      // Found our variable index.
+      break;
     }
-
-    // Only lower this if the icmp is the only user of the GEP or if we expect
-    // the result to fold to a constant!
-    if (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) {
-      // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
-      Value *Offset = EmitGEPOffset(GEPLHS, I, *this);
-      return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
-                          Constant::getNullValue(Offset->getType()));
+  }
+  
+  // If there are no variable indices, we must have a constant offset, just
+  // evaluate it the general way.
+  if (i == e) return 0;
+  
+  Value *VariableIdx = GEP->getOperand(i);
+  // Determine the scale factor of the variable element.  For example, this is
+  // 4 if the variable index is into an array of i32.
+  uint64_t VariableScale = TD.getABITypeSize(GTI.getIndexedType());
+  
+  // Verify that there are no other variable indices.  If so, emit the hard way.
+  for (++i, ++GTI; i != e; ++i, ++GTI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!CI) return 0;
+   
+    // Compute the aggregate offset of constant indices.
+    if (CI->isZero()) continue;
+    
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+    } else {
+      uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+      Offset += Size*CI->getSExtValue();
     }
+  }
+  
+  // Okay, we know we have a single variable index, which must be a
+  // pointer/array/vector index.  If there is no offset, life is simple, return
+  // the index.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  if (Offset == 0) {
+    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
+    // we don't need to bother extending: the extension won't affect where the
+    // computation crosses zero.
+    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+      VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(),
+                                  VariableIdx->getNameStart(), &I);
+    return VariableIdx;
+  }
+  
+  // Otherwise, there is an index.  The computation we will do will be modulo
+  // the pointer size, so get it.
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+  
+  Offset &= PtrSizeMask;
+  VariableScale &= PtrSizeMask;
+
+  // To do this transformation, any constant index must be a multiple of the
+  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
+  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
+  // multiple of the variable scale.
+  int64_t NewOffs = Offset / (int64_t)VariableScale;
+  if (Offset != NewOffs*(int64_t)VariableScale)
+    return 0;
+
+  // Okay, we can do this evaluation.  Start by converting the index to intptr.
+  const Type *IntPtrTy = TD.getIntPtrType();
+  if (VariableIdx->getType() != IntPtrTy)
+    VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
+                                              true /*SExt*/, 
+                                              VariableIdx->getNameStart(), &I);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+  return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else.  At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
+                                       ICmpInst::Predicate Cond,
+                                       Instruction &I) {
+  assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
+
+  // Look through bitcasts.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+    RHS = BCI->getOperand(0);
+
+  Value *PtrBase = GEPLHS->getOperand(0);
+  if (PtrBase == RHS) {
+    // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
+    // This transformation (ignoring the base and scales) is valid because we
+    // know pointers can't overflow.  See if we can output an optimized form.
+    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+    
+    // If not, synthesize the offset the hard way.
+    if (Offset == 0)
+      Offset = EmitGEPOffset(GEPLHS, I, *this);
+    return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+                        Constant::getNullValue(Offset->getType()));
   } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
     // If the base pointers are different, but the indices are the same, just
     // compare the base pointer.
@@ -4639,7 +4866,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
       if (NumDifferences == 0)   // SAME GEP?
         return ReplaceInstUsesWith(I, // No comparison is needed here.
                                    ConstantInt::get(Type::Int1Ty,
-                                                    isTrueWhenEqual(Cond)));
+                                             ICmpInst::isTrueWhenEqual(Cond)));
 
       else if (NumDifferences == 1) {
         Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -4662,6 +4889,137 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
   return 0;
 }
 
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+                                                Instruction *LHSI,
+                                                Constant *RHSC) {
+  if (!isa<ConstantFP>(RHSC)) return 0;
+  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+  
+  // Get the width of the mantissa.  We don't want to hack on conversions that
+  // might lose information from the integer, e.g. "i64 -> float"
+  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+  if (MantissaWidth == -1) return 0;  // Unknown.
+  
+  // Check to see that the input is converted from an integer type that is small
+  // enough that preserves all bits.  TODO: check here for "known" sign bits.
+  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+  unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+  
+  // If this is a uitofp instruction, we need an extra bit to hold the sign.
+  if (isa<UIToFPInst>(LHSI))
+    ++InputSize;
+  
+  // If the conversion would lose info, don't hack on this.
+  if ((int)InputSize > MantissaWidth)
+    return 0;
+  
+  // Otherwise, we can potentially simplify the comparison.  We know that it
+  // will always come through as an integer value and we know the constant is
+  // not a NAN (it would have been previously simplified).
+  assert(!RHS.isNaN() && "NaN comparison not already folded!");
+  
+  ICmpInst::Predicate Pred;
+  switch (I.getPredicate()) {
+  default: assert(0 && "Unexpected predicate!");
+  case FCmpInst::FCMP_UEQ:
+  case FCmpInst::FCMP_OEQ: Pred = ICmpInst::ICMP_EQ; break;
+  case FCmpInst::FCMP_UGT:
+  case FCmpInst::FCMP_OGT: Pred = ICmpInst::ICMP_SGT; break;
+  case FCmpInst::FCMP_UGE:
+  case FCmpInst::FCMP_OGE: Pred = ICmpInst::ICMP_SGE; break;
+  case FCmpInst::FCMP_ULT:
+  case FCmpInst::FCMP_OLT: Pred = ICmpInst::ICMP_SLT; break;
+  case FCmpInst::FCMP_ULE:
+  case FCmpInst::FCMP_OLE: Pred = ICmpInst::ICMP_SLE; break;
+  case FCmpInst::FCMP_UNE:
+  case FCmpInst::FCMP_ONE: Pred = ICmpInst::ICMP_NE; break;
+  case FCmpInst::FCMP_ORD:
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+  case FCmpInst::FCMP_UNO:
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+  }
+  
+  const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+  
+  // Now we know that the APFloat is a normal number, zero or inf.
+  
+  // See if the FP constant is too large for the integer.  For example,
+  // comparing an i8 to 300.0.
+  unsigned IntWidth = IntTy->getPrimitiveSizeInBits();
+  
+  // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
+  // and large values. 
+  APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+  SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+                        APFloat::rmNearestTiesToEven);
+  if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
+    if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+        Pred == ICmpInst::ICMP_SLE)
+      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+  }
+  
+  // See if the RHS value is < SignedMin.
+  APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+  SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+                        APFloat::rmNearestTiesToEven);
+  if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+    if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+        Pred == ICmpInst::ICMP_SGE)
+      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+  }
+
+  // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] but
+  // it may still be fractional.  See if it is fractional by casting the FP
+  // value to the integer value and back, checking for equality.  Don't do this
+  // for zero, because -0.0 is not fractional.
+  Constant *RHSInt = ConstantExpr::getFPToSI(RHSC, IntTy);
+  if (!RHS.isZero() &&
+      ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) != RHSC) {
+    // If we had a comparison against a fractional value, we have to adjust
+    // the compare predicate and sometimes the value.  RHSC is rounded towards
+    // zero at this point.
+    switch (Pred) {
+    default: assert(0 && "Unexpected integer comparison!");
+    case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
+      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+    case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
+      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+    case ICmpInst::ICMP_SLE:
+      // (float)int <= 4.4   --> int <= 4
+      // (float)int <= -4.4  --> int < -4
+      if (RHS.isNegative())
+        Pred = ICmpInst::ICMP_SLT;
+      break;
+    case ICmpInst::ICMP_SLT:
+      // (float)int < -4.4   --> int < -4
+      // (float)int < 4.4    --> int <= 4
+      if (!RHS.isNegative())
+        Pred = ICmpInst::ICMP_SLE;
+      break;
+    case ICmpInst::ICMP_SGT:
+      // (float)int > 4.4    --> int > 4
+      // (float)int > -4.4   --> int >= -4
+      if (RHS.isNegative())
+        Pred = ICmpInst::ICMP_SGE;
+      break;
+    case ICmpInst::ICMP_SGE:
+      // (float)int >= -4.4   --> int >= -4
+      // (float)int >= 4.4    --> int > 4
+      if (!RHS.isNegative())
+        Pred = ICmpInst::ICMP_SGT;
+      break;
+    }
+  }
+
+  // Lower this FP comparison into an appropriate integer version of the
+  // comparison.
+  return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
 Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   bool Changed = SimplifyCompare(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -4710,10 +5068,31 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
   // Handle fcmp with constant RHS
   if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+    // If the constant is a nan, see if we can fold the comparison based on it.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->getValueAPF().isNaN()) {
+        if (FCmpInst::isOrdered(I.getPredicate()))   // True if ordered and...
+          return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+        assert(FCmpInst::isUnordered(I.getPredicate()) &&
+               "Comparison must be either ordered or unordered!");
+        // True if unordered.
+        return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+      }
+    }
+    
     if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
       switch (LHSI->getOpcode()) {
       case Instruction::PHI:
-        if (Instruction *NV = FoldOpIntoPhi(I))
+        // Only fold fcmp into the PHI if the phi and fcmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I))
+            return NV;
+        break;
+      case Instruction::SIToFP:
+      case Instruction::UIToFP:
+        if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
           return NV;
         break;
       case Instruction::Select:
@@ -4740,7 +5119,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         }
 
         if (Op1)
-          return new SelectInst(LHSI->getOperand(0), Op1, Op2);
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
         break;
       }
   }
@@ -4756,11 +5135,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   // icmp X, X
   if (Op0 == Op1)
     return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
-                                                   isTrueWhenEqual(I)));
+                                                   I.isTrueWhenEqual()));
 
   if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
     return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
-
+  
   // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
   // addresses never equal each other!  We already know that Op0 != Op1.
   if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
@@ -4768,19 +5147,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
        isa<ConstantPointerNull>(Op1)))
     return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
-                                                   !isTrueWhenEqual(I)));
+                                                   !I.isTrueWhenEqual()));
 
   // icmp's with boolean values can always be turned into bitwise operations
   if (Ty == Type::Int1Ty) {
     switch (I.getPredicate()) {
     default: assert(0 && "Invalid icmp instruction!");
     case ICmpInst::ICMP_EQ: {               // icmp eq bool %A, %B -> ~(A^B)
-      Instruction *Xor = BinaryOperator::createXor(Op0, Op1, I.getName()+"tmp");
+      Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp");
       InsertNewInstBefore(Xor, I);
-      return BinaryOperator::createNot(Xor);
+      return BinaryOperator::CreateNot(Xor);
     }
     case ICmpInst::ICMP_NE:                  // icmp eq bool %A, %B -> A^B
-      return BinaryOperator::createXor(Op0, Op1);
+      return BinaryOperator::CreateXor(Op0, Op1);
 
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_SGT:
@@ -4788,9 +5167,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       // FALL THROUGH
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_SLT: {               // icmp lt bool A, B -> ~X & Y
-      Instruction *Not = BinaryOperator::createNot(Op0, I.getName()+"tmp");
+      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
       InsertNewInstBefore(Not, I);
-      return BinaryOperator::createAnd(Not, Op1);
+      return BinaryOperator::CreateAnd(Not, Op1);
     }
     case ICmpInst::ICMP_UGE:
     case ICmpInst::ICMP_SGE:
@@ -4798,9 +5177,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       // FALL THROUGH
     case ICmpInst::ICMP_ULE:
     case ICmpInst::ICMP_SLE: {               //  icmp le bool %A, %B -> ~A | B
-      Instruction *Not = BinaryOperator::createNot(Op0, I.getName()+"tmp");
+      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
       InsertNewInstBefore(Not, I);
-      return BinaryOperator::createOr(Not, Op1);
+      return BinaryOperator::CreateOr(Not, Op1);
     }
     }
   }
@@ -4808,6 +5187,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   // See if we are doing a comparison between a constant and an instruction that
   // can be folded into the comparison.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+      Value *A, *B;
+    
+    // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+    if (I.isEquality() && CI->isNullValue() &&
+        match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+      // (icmp cond A B) if cond is equality
+      return new ICmpInst(I.getPredicate(), A, B);
+    }
+    
     switch (I.getPredicate()) {
     default: break;
     case ICmpInst::ICMP_ULT:                        // A <u MIN -> FALSE
@@ -5004,8 +5392,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         break;
 
       case Instruction::PHI:
-        if (Instruction *NV = FoldOpIntoPhi(I))
-          return NV;
+        // Only fold icmp into the PHI if the phi and fcmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I))
+            return NV;
         break;
       case Instruction::Select: {
         // If either operand of the select is a constant, we can fold the
@@ -5031,7 +5423,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         }
 
         if (Op1)
-          return new SelectInst(LHSI->getOperand(0), Op1, Op2);
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
         break;
       }
       case Instruction::Malloc:
@@ -5040,7 +5432,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
           AddToWorkList(LHSI);
           return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
-                                                         !isTrueWhenEqual(I)));
+                                                         !I.isTrueWhenEqual()));
         }
         break;
       }
@@ -5071,13 +5463,14 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         Op1 = CI2->getOperand(0);
 
       // If Op1 is a constant, we can fold the cast into the constant.
-      if (Op0->getType() != Op1->getType())
+      if (Op0->getType() != Op1->getType()) {
         if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
           Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
         } else {
           // Otherwise, cast the RHS right before the icmp
-          Op1 = InsertCastBefore(Instruction::BitCast, Op1, Op0->getType(), I);
+          Op1 = InsertBitCastBefore(Op1, Op0->getType(), I);
         }
+      }
       return new ICmpInst(I.getPredicate(), Op0, Op1);
     }
   }
@@ -5094,8 +5487,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         return R;
   }
   
+  // ~x < ~y --> y < x
+  { Value *A, *B;
+    if (match(Op0, m_Not(m_Value(A))) &&
+        match(Op1, m_Not(m_Value(B))))
+      return new ICmpInst(I.getPredicate(), B, A);
+  }
+  
   if (I.isEquality()) {
     Value *A, *B, *C, *D;
+    
+    // -x == -y --> x == y
+    if (match(Op0, m_Neg(m_Value(A))) &&
+        match(Op1, m_Neg(m_Value(B))))
+      return new ICmpInst(I.getPredicate(), A, B);
+    
     if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
       if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
         Value *OtherVal = A == Op1 ? B : A;
@@ -5109,7 +5515,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
           if (ConstantInt *C2 = dyn_cast<ConstantInt>(D))
             if (Op1->hasOneUse()) {
               Constant *NC = ConstantInt::get(C1->getValue() ^ C2->getValue());
-              Instruction *Xor = BinaryOperator::createXor(C, NC, "tmp");
+              Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp");
               return new ICmpInst(I.getPredicate(), A,
                                   InsertNewInstBefore(Xor, I));
             }
@@ -5157,8 +5563,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       }
       
       if (X) {   // Build (X^Y) & Z
-        Op1 = InsertNewInstBefore(BinaryOperator::createXor(X, Y, "tmp"), I);
-        Op1 = InsertNewInstBefore(BinaryOperator::createAnd(Op1, Z, "tmp"), I);
+        Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I);
+        Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I);
         I.setOperand(0, Op1);
         I.setOperand(1, Constant::getNullValue(Op1->getType()));
         return &I;
@@ -5222,12 +5628,12 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
     HiOverflow = LoOverflow = ProdOV;
     if (!HiOverflow)
       HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
-  } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0.
+  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
     if (CmpRHSV == 0) {       // (X / pos) op 0
       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
       LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
       HiBound = DivRHS;
-    } else if (CmpRHSV.isPositive()) {   // (X / pos) op pos
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
       HiOverflow = LoOverflow = ProdOV;
       if (!HiOverflow)
@@ -5240,7 +5646,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
       HiBound = AddOne(Prod);
       HiOverflow = ProdOV ? -1 : 0;
     }
-  } else {                         // Divisor is < 0.
+  } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
     if (CmpRHSV == 0) {       // (X / neg) op 0
       // e.g. X/-5 op 0  --> [-4, 5)
       LoBound = AddOne(DivRHS);
@@ -5249,7 +5655,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
         HiOverflow = 1;            // [INTMIN+1, overflow)
         HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
       }
-    } else if (CmpRHSV.isPositive()) {   // (X / neg) op pos
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
       // e.g. X/-5 op 3  --> [-19, -14)
       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow)
@@ -5324,8 +5730,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
       // If this is a comparison that tests the signbit (X < 0) or (x > -1),
       // fold the xor.
-      if (ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0 ||
-          ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue()) {
+      if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+          (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
         Value *CompareVal = LHSI->getOperand(0);
         
         // If the sign bit of the XorCST is not set, there is no change to
@@ -5363,8 +5769,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // Extending a relational comparison when we're checking the sign
         // bit would not work.
         if (Cast->hasOneUse() &&
-            (ICI.isEquality() || AndCST->getValue().isPositive() && 
-             RHSV.isPositive())) {
+            (ICI.isEquality() ||
+             (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
           uint32_t BitWidth = 
             cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
           APInt NewCST = AndCST->getValue();
@@ -5372,7 +5778,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           APInt NewCI = RHSV;
           NewCI.zext(BitWidth);
           Instruction *NewAnd = 
-            BinaryOperator::createAnd(Cast->getOperand(0),
+            BinaryOperator::CreateAnd(Cast->getOperand(0),
                                       ConstantInt::get(NewCST),LHSI->getName());
           InsertNewInstBefore(NewAnd, ICI);
           return new ICmpInst(ICI.getPredicate(), NewAnd,
@@ -5452,18 +5858,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // Compute C << Y.
         Value *NS;
         if (Shift->getOpcode() == Instruction::LShr) {
-          NS = BinaryOperator::createShl(AndCST, 
+          NS = BinaryOperator::CreateShl(AndCST, 
                                          Shift->getOperand(1), "tmp");
         } else {
           // Insert a logical shift.
-          NS = BinaryOperator::createLShr(AndCST,
+          NS = BinaryOperator::CreateLShr(AndCST,
                                           Shift->getOperand(1), "tmp");
         }
         InsertNewInstBefore(cast<Instruction>(NS), ICI);
         
         // Compute X & (C << Y).
         Instruction *NewAnd = 
-          BinaryOperator::createAnd(Shift->getOperand(0), NS, LHSI->getName());
+          BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
         InsertNewInstBefore(NewAnd, ICI);
         
         ICI.setOperand(0, NewAnd);
@@ -5502,7 +5908,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
         
         Instruction *AndI =
-          BinaryOperator::createAnd(LHSI->getOperand(0),
+          BinaryOperator::CreateAnd(LHSI->getOperand(0),
                                     Mask, LHSI->getName()+".mask");
         Value *And = InsertNewInstBefore(AndI, ICI);
         return new ICmpInst(ICI.getPredicate(), And,
@@ -5518,7 +5924,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) <<
                                            (TypeBits-ShAmt->getZExtValue()-1));
       Instruction *AndI =
-        BinaryOperator::createAnd(LHSI->getOperand(0),
+        BinaryOperator::CreateAnd(LHSI->getOperand(0),
                                   Mask, LHSI->getName()+".mask");
       Value *And = InsertNewInstBefore(AndI, ICI);
       
@@ -5530,44 +5936,54 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     
   case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
   case Instruction::AShr: {
+    // Only handle equality comparisons of shift-by-constant.
     ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-    if (!ShAmt) break;
+    if (!ShAmt || !ICI.isEquality()) break;
 
-    if (ICI.isEquality()) {
-      // Check that the shift amount is in range.  If not, don't perform
-      // undefined shifts.  When the shift is visited it will be
-      // simplified.
-      uint32_t TypeBits = RHSV.getBitWidth();
-      if (ShAmt->uge(TypeBits))
-        break;
-      uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+    // Check that the shift amount is in range.  If not, don't perform
+    // undefined shifts.  When the shift is visited it will be
+    // simplified.
+    uint32_t TypeBits = RHSV.getBitWidth();
+    if (ShAmt->uge(TypeBits))
+      break;
+    
+    uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
       
-      // If we are comparing against bits always shifted out, the
-      // comparison cannot succeed.
-      APInt Comp = RHSV << ShAmtVal;
-      if (LHSI->getOpcode() == Instruction::LShr)
-        Comp = Comp.lshr(ShAmtVal);
-      else
-        Comp = Comp.ashr(ShAmtVal);
+    // If we are comparing against bits always shifted out, the
+    // comparison cannot succeed.
+    APInt Comp = RHSV << ShAmtVal;
+    if (LHSI->getOpcode() == Instruction::LShr)
+      Comp = Comp.lshr(ShAmtVal);
+    else
+      Comp = Comp.ashr(ShAmtVal);
+    
+    if (Comp != RHSV) { // Comparing against a bit that we know is zero.
+      bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+      Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+      return ReplaceInstUsesWith(ICI, Cst);
+    }
+    
+    // Otherwise, check to see if the bits shifted out are known to be zero.
+    // If so, we can compare against the unshifted value:
+    //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+    if (LHSI->hasOneUse() &&
+        MaskedValueIsZero(LHSI->getOperand(0), 
+                          APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
+      return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                          ConstantExpr::getShl(RHS, ShAmt));
+    }
       
-      if (Comp != RHSV) { // Comparing against a bit that we know is zero.
-        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-        Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
-        return ReplaceInstUsesWith(ICI, Cst);
-      }
+    if (LHSI->hasOneUse()) {
+      // Otherwise strength reduce the shift into an and.
+      APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+      Constant *Mask = ConstantInt::get(Val);
       
-      if (LHSI->hasOneUse() || RHSV == 0) {
-        // Otherwise strength reduce the shift into an and.
-        APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-        Constant *Mask = ConstantInt::get(Val);
-        
-        Instruction *AndI =
-          BinaryOperator::createAnd(LHSI->getOperand(0),
-                                    Mask, LHSI->getName()+".mask");
-        Value *And = InsertNewInstBefore(AndI, ICI);
-        return new ICmpInst(ICI.getPredicate(), And,
-                            ConstantExpr::getShl(RHS, ShAmt));
-      }
+      Instruction *AndI =
+        BinaryOperator::CreateAnd(LHSI->getOperand(0),
+                                  Mask, LHSI->getName()+".mask");
+      Value *And = InsertNewInstBefore(AndI, ICI);
+      return new ICmpInst(ICI.getPredicate(), And,
+                          ConstantExpr::getShl(RHS, ShAmt));
     }
     break;
   }
@@ -5585,6 +6001,37 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                           DivRHS))
         return R;
     break;
+
+  case Instruction::Add:
+    // Fold: icmp pred (add, X, C1), C2
+
+    if (!ICI.isEquality()) {
+      ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+      if (!LHSC) break;
+      const APInt &LHSV = LHSC->getValue();
+
+      ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+                            .subtract(LHSV);
+
+      if (ICI.isSignedPredicate()) {
+        if (CR.getLower().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+                              ConstantInt::get(CR.getUpper()));
+        } else if (CR.getUpper().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+                              ConstantInt::get(CR.getLower()));
+        }
+      } else {
+        if (CR.getLower().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+                              ConstantInt::get(CR.getUpper()));
+        } else if (CR.getUpper().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+                              ConstantInt::get(CR.getLower()));
+        }
+      }
+    }
+    break;
   }
   
   // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
@@ -5601,7 +6048,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
           if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
             Instruction *NewRem =
-              BinaryOperator::createURem(BO->getOperand(0), BO->getOperand(1),
+              BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1),
                                          BO->getName());
             InsertNewInstBefore(NewRem, ICI);
             return new ICmpInst(ICI.getPredicate(), NewRem, 
@@ -5625,7 +6072,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           else if (Value *NegVal = dyn_castNegVal(BOp0))
             return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
           else if (BO->hasOneUse()) {
-            Instruction *Neg = BinaryOperator::createNeg(BOp1);
+            Instruction *Neg = BinaryOperator::CreateNeg(BOp1);
             InsertNewInstBefore(Neg, ICI);
             Neg->takeName(BO);
             return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
@@ -5673,7 +6120,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                 Constant::getNullValue(RHS->getType()));
           
           // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
-          if (isSignBit(BOC)) {
+          if (BOC->getValue().isSignBit()) {
             Value *X = BO->getOperand(0);
             Constant *Zero = Constant::getNullValue(X->getType());
             ICmpInst::Predicate pred = isICMP_NE ? 
@@ -5750,8 +6197,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
       RHSOp = RHSC->getOperand(0);
       // If the pointer types don't match, insert a bitcast.
       if (LHSCIOp->getType() != RHSOp->getType())
-        RHSOp = InsertCastBefore(Instruction::BitCast, RHSOp,
-                                 LHSCIOp->getType(), ICI);
+        RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI);
     }
 
     if (RHSOp)
@@ -5773,18 +6219,22 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
     if (RHSCIOp->getType() != LHSCIOp->getType()) 
       return 0;
     
-    // If the signedness of the two compares doesn't agree (i.e. one is a sext
+    // If the signedness of the two casts doesn't agree (i.e. one is a sext
     // and the other is a zext), then we can't handle this.
     if (CI->getOpcode() != LHSCI->getOpcode())
       return 0;
 
-    // Likewise, if the signedness of the [sz]exts and the compare don't match, 
-    // then we can't handle this.
-    if (isSignedExt != isSignedCmp && !ICI.isEquality())
-      return 0;
-    
-    // Okay, just insert a compare of the reduced operands now!
-    return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+    // Deal with equality cases early.
+    if (ICI.isEquality())
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // A signed comparison of sign extended values simplifies into a
+    // signed comparison.
+    if (isSignedCmp && isSignedExt)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // The other three cases all fold into an unsigned comparison.
+    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
   }
 
   // If we aren't dealing with a constant on the RHS, exit early
@@ -5859,7 +6309,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
     if (Constant *CI = dyn_cast<Constant>(Result))
       return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
     else
-      return BinaryOperator::createNot(Result);
+      return BinaryOperator::CreateNot(Result);
   }
 }
 
@@ -5872,7 +6322,22 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
 }
 
 Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
-  return commonShiftTransforms(I);
+  if (Instruction *R = commonShiftTransforms(I))
+    return R;
+  
+  Value *Op0 = I.getOperand(0);
+  
+  // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
+  if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
+    if (CSI->isAllOnesValue())
+      return ReplaceInstUsesWith(I, CSI);
+  
+  // See if we can turn a signed shr into an unsigned shr.
+  if (MaskedValueIsZero(Op0, 
+                      APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
+    return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
+  
+  return 0;
 }
 
 Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
@@ -5898,26 +6363,12 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
 
-  // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
-  if (I.getOpcode() == Instruction::AShr)
-    if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-      if (CSI->isAllOnesValue())
-        return ReplaceInstUsesWith(I, CSI);
-
   // Try to fold constant and into select arguments.
   if (isa<Constant>(Op0))
     if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
       if (Instruction *R = FoldOpIntoSelect(I, SI, this))
         return R;
 
-  // See if we can turn a signed shr into an unsigned shr.
-  if (I.isArithmeticShift()) {
-    if (MaskedValueIsZero(Op0, 
-          APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()))) {
-      return BinaryOperator::createLShr(Op0, Op1, I.getName());
-    }
-  }
-
   if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
     if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
       return Res;
@@ -5952,7 +6403,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
     if (BO->getOpcode() == Instruction::Mul && isLeftShift)
       if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
-        return BinaryOperator::createMul(BO->getOperand(0),
+        return BinaryOperator::CreateMul(BO->getOperand(0),
                                          ConstantExpr::getShl(BOOp, Op1));
   
   // Try to fold constant and into select arguments.
@@ -5963,6 +6414,50 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     if (Instruction *NV = FoldOpIntoPhi(I))
       return NV;
   
+  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+    // place.  Don't try to do this transformation in this case.  Also, we
+    // require that the input operand is a shift-by-constant so that we have
+    // confidence that the shifts will get folded together.  We could do this
+    // xform in more cases, but it is unlikely to be profitable.
+    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
+        isa<ConstantInt>(TrOp->getOperand(1))) {
+      // Okay, we'll do this xform.  Make the shift of shift.
+      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+      Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt,
+                                                I.getName());
+      InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2)
+
+      // For logical shifts, the truncation has the effect of making the high
+      // part of the register be zeros.  Emulate this by inserting an AND to
+      // clear the top bits as needed.  This 'and' will usually be zapped by
+      // other xforms later if dead.
+      unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits();
+      unsigned DstSize = TI->getType()->getPrimitiveSizeInBits();
+      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+      
+      // The mask we constructed says what the trunc would do if occurring
+      // between the shifts.  We want to know the effect *after* the second
+      // shift.  We know that it is a logical shift by a constant, so adjust the
+      // mask as appropriate.
+      if (I.getOpcode() == Instruction::Shl)
+        MaskV <<= Op1->getZExtValue();
+      else {
+        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+        MaskV = MaskV.lshr(Op1->getZExtValue());
+      }
+
+      Instruction *And = BinaryOperator::CreateAnd(NSh, ConstantInt::get(MaskV),
+                                                   TI->getName());
+      InsertNewInstBefore(And, I); // shift1 & 0x00FF
+
+      // Return the value truncated to the interesting size.
+      return new TruncInst(And, I.getType());
+    }
+  }
+  
   if (Op0->hasOneUse()) {
     if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
       // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
@@ -5979,16 +6474,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
               match(Op0BO->getOperand(1),
                     m_Shr(m_Value(V1), m_ConstantInt(CC))) && CC == Op1) {
-            Instruction *YS = BinaryOperator::createShl(
+            Instruction *YS = BinaryOperator::CreateShl(
                                             Op0BO->getOperand(0), Op1,
                                             Op0BO->getName());
             InsertNewInstBefore(YS, I); // (Y << C)
             Instruction *X = 
-              BinaryOperator::create(Op0BO->getOpcode(), YS, V1,
+              BinaryOperator::Create(Op0BO->getOpcode(), YS, V1,
                                      Op0BO->getOperand(1)->getName());
             InsertNewInstBefore(X, I);  // (X + (Y << C))
             uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::createAnd(X, ConstantInt::get(
+            return BinaryOperator::CreateAnd(X, ConstantInt::get(
                        APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
           }
           
@@ -5999,16 +6494,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                     m_And(m_Shr(m_Value(V1), m_Value(V2)),m_ConstantInt(CC))) &&
               cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse() &&
               V2 == Op1) {
-            Instruction *YS = BinaryOperator::createShl(
+            Instruction *YS = BinaryOperator::CreateShl(
                                                      Op0BO->getOperand(0), Op1,
                                                      Op0BO->getName());
             InsertNewInstBefore(YS, I); // (Y << C)
             Instruction *XM =
-              BinaryOperator::createAnd(V1, ConstantExpr::getShl(CC, Op1),
+              BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                         V1->getName()+".mask");
             InsertNewInstBefore(XM, I); // X & (CC << C)
             
-            return BinaryOperator::create(Op0BO->getOpcode(), YS, XM);
+            return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
           }
         }
           
@@ -6018,16 +6513,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
               match(Op0BO->getOperand(0),
                     m_Shr(m_Value(V1), m_ConstantInt(CC))) && CC == Op1) {
-            Instruction *YS = BinaryOperator::createShl(
+            Instruction *YS = BinaryOperator::CreateShl(
                                                      Op0BO->getOperand(1), Op1,
                                                      Op0BO->getName());
             InsertNewInstBefore(YS, I); // (Y << C)
             Instruction *X =
-              BinaryOperator::create(Op0BO->getOpcode(), V1, YS,
+              BinaryOperator::Create(Op0BO->getOpcode(), V1, YS,
                                      Op0BO->getOperand(0)->getName());
             InsertNewInstBefore(X, I);  // (X + (Y << C))
             uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::createAnd(X, ConstantInt::get(
+            return BinaryOperator::CreateAnd(X, ConstantInt::get(
                        APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
           }
           
@@ -6038,16 +6533,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                           m_ConstantInt(CC))) && V2 == Op1 &&
               cast<BinaryOperator>(Op0BO->getOperand(0))
                   ->getOperand(0)->hasOneUse()) {
-            Instruction *YS = BinaryOperator::createShl(
+            Instruction *YS = BinaryOperator::CreateShl(
                                                      Op0BO->getOperand(1), Op1,
                                                      Op0BO->getName());
             InsertNewInstBefore(YS, I); // (Y << C)
             Instruction *XM =
-              BinaryOperator::createAnd(V1, ConstantExpr::getShl(CC, Op1),
+              BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                         V1->getName()+".mask");
             InsertNewInstBefore(XM, I); // X & (CC << C)
             
-            return BinaryOperator::create(Op0BO->getOpcode(), XM, YS);
+            return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
           }
           
           break;
@@ -6081,19 +6576,18 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         // the constant which would cause it to be modified for this
         // operation.
         //
-        if (isValid && !isLeftShift && I.getOpcode() == Instruction::AShr) {
+        if (isValid && I.getOpcode() == Instruction::AShr)
           isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
-        }
         
         if (isValid) {
           Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
           
           Instruction *NewShift =
-            BinaryOperator::create(I.getOpcode(), Op0BO->getOperand(0), Op1);
+            BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1);
           InsertNewInstBefore(NewShift, I);
           NewShift->takeName(Op0BO);
           
-          return BinaryOperator::create(Op0BO->getOpcode(), NewShift,
+          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
                                         NewRHS);
         }
       }
@@ -6121,21 +6615,21 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     
     // Check for (X << c1) << c2  and  (X >> c1) >> c2
     if (I.getOpcode() == ShiftOp->getOpcode()) {
-      return BinaryOperator::create(I.getOpcode(), X,
+      return BinaryOperator::Create(I.getOpcode(), X,
                                     ConstantInt::get(Ty, AmtSum));
     } else if (ShiftOp->getOpcode() == Instruction::LShr &&
                I.getOpcode() == Instruction::AShr) {
       // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
-      return BinaryOperator::createLShr(X, ConstantInt::get(Ty, AmtSum));
+      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
     } else if (ShiftOp->getOpcode() == Instruction::AShr &&
                I.getOpcode() == Instruction::LShr) {
       // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
       Instruction *Shift =
-        BinaryOperator::createAShr(X, ConstantInt::get(Ty, AmtSum));
+        BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
       InsertNewInstBefore(Shift, I);
 
       APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-      return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+      return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
     }
     
     // Okay, if we get here, one shift must be left, and the other shift must be
@@ -6144,12 +6638,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // If we have ((X >>? C) << C), turn this into X & (-1 << C).
       if (I.getOpcode() == Instruction::Shl) {
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::createAnd(X, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
       }
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
       if (I.getOpcode() == Instruction::LShr) {
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::createAnd(X, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
       }
       // We can simplify ((X << C) >>s C) into a trunc + sext.
       // NOTE: we could do this for any C, but that would make 'unusual' integer
@@ -6181,22 +6675,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
         Instruction *Shift =
-          BinaryOperator::createShl(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
         InsertNewInstBefore(Shift, I);
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
       }
       
       // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
         Instruction *Shift =
-          BinaryOperator::createLShr(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
         InsertNewInstBefore(Shift, I);
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
       }
       
       // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
@@ -6209,23 +6703,23 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
         Instruction *Shift =
-          BinaryOperator::create(ShiftOp->getOpcode(), X,
+          BinaryOperator::Create(ShiftOp->getOpcode(), X,
                                  ConstantInt::get(Ty, ShiftDiff));
         InsertNewInstBefore(Shift, I);
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
       }
       
       // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr) {
         assert(ShiftOp->getOpcode() == Instruction::Shl);
         Instruction *Shift =
-          BinaryOperator::createShl(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
         InsertNewInstBefore(Shift, I);
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
       }
       
       // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
@@ -6340,14 +6834,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
       Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
     // otherwise multiply the amount and the number of elements
     else if (Scale != 1) {
-      Instruction *Tmp = BinaryOperator::createMul(Amt, NumElements, "tmp");
+      Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
       Amt = InsertNewInstBefore(Tmp, AI);
     }
   }
   
   if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
     Value *Off = ConstantInt::get(Type::Int32Ty, Offset, true);
-    Instruction *Tmp = BinaryOperator::createAdd(Amt, Off, "tmp");
+    Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp");
     Amt = InsertNewInstBefore(Tmp, AI);
   }
   
@@ -6381,8 +6875,19 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
 ///
 /// This is a truncation operation if Ty is smaller than V->getType(), or an
 /// extension operation if Ty is larger.
-static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
-                                       unsigned CastOpc, int &NumCastsRemoved) {
+///
+/// If CastOpc is a truncation, then Ty will be a type smaller than V.  We
+/// should return true if trunc(V) can be computed by computing V in the smaller
+/// type.  If V is an instruction, then trunc(inst(x,y)) can be computed as
+/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be
+/// efficiently truncated.
+///
+/// If CastOpc is a sext or zext, we are asking if the low bits of the value can
+/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
+/// the final result.
+bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+                                              unsigned CastOpc,
+                                              int &NumCastsRemoved) {
   // We can always evaluate constants in another type.
   if (isa<ConstantInt>(V))
     return true;
@@ -6400,7 +6905,7 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
       // If the first operand is itself a cast, and is eliminable, do not count
       // this as an eliminable cast.  We would prefer to eliminate those two
       // casts first.
-      if (!isa<CastInst>(I->getOperand(0)))
+      if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse())
         ++NumCastsRemoved;
       return true;
     }
@@ -6422,6 +6927,14 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
            CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
                                       NumCastsRemoved);
 
+  case Instruction::Mul:
+    // A multiply can be truncated by truncating its operands.
+    return Ty->getBitWidth() < OrigTy->getBitWidth() && 
+           CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                      NumCastsRemoved) &&
+           CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
+                                      NumCastsRemoved);
+
   case Instruction::Shl:
     // If we are truncating the result of this SHL, and if it's a shift of a
     // constant amount, we can always perform a SHL in a smaller type.
@@ -6457,8 +6970,17 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
     // of casts in the input.
     if (I->getOpcode() == CastOpc)
       return true;
-    
     break;
+      
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc,
+                                      NumCastsRemoved))
+        return false;
+    return true;
+  }
   default:
     // TODO: Can handle more cases here.
     break;
@@ -6481,6 +7003,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
   switch (I->getOpcode()) {
   case Instruction::Add:
   case Instruction::Sub:
+  case Instruction::Mul:
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
@@ -6489,8 +7012,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
   case Instruction::Shl: {
     Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
     Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
-    Res = BinaryOperator::create((Instruction::BinaryOps)I->getOpcode(),
-                                 LHS, RHS, I->getName());
+    Res = BinaryOperator::Create((Instruction::BinaryOps)I->getOpcode(),
+                                 LHS, RHS);
     break;
   }    
   case Instruction::Trunc:
@@ -6502,16 +7025,27 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
     if (I->getOperand(0)->getType() == Ty)
       return I->getOperand(0);
     
-    // Otherwise, must be the same type of case, so just reinsert a new one.
-    Res = CastInst::create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
-                           Ty, I->getName());
+    // Otherwise, must be the same type of cast, so just reinsert a new one.
+    Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
+                           Ty);
+    break;
+  case Instruction::PHI: {
+    PHINode *OPN = cast<PHINode>(I);
+    PHINode *NPN = PHINode::Create(Ty);
+    for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+      Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+      NPN->addIncoming(V, OPN->getIncomingBlock(i));
+    }
+    Res = NPN;
     break;
+  }
   default: 
     // TODO: Can handle more cases here.
     assert(0 && "Unreachable!");
     break;
   }
   
+  Res->takeName(I);
   return InsertNewInstBefore(Res, *I);
 }
 
@@ -6526,7 +7060,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
         isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
       // The first cast (CSrc) is eliminable so we need to fix up or replace
       // the second cast (CI). CSrc will then have a good chance of being dead.
-      return CastInst::create(opc, CSrc->getOperand(0), CI.getType());
+      return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
     }
   }
 
@@ -6630,9 +7164,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
             // If we were able to index down into an element, create the GEP
             // and bitcast the result.  This eliminates one bitcast, potentially
             // two.
-            Instruction *NGEP = new GetElementPtrInst(OrigBase, 
-                                                      NewIndices.begin(),
-                                                      NewIndices.end(), "");
+            Instruction *NGEP = GetElementPtrInst::Create(OrigBase, 
+                                                          NewIndices.begin(),
+                                                          NewIndices.end(), "");
             InsertNewInstBefore(NGEP, CI);
             NGEP->takeName(GEP);
             
@@ -6721,11 +7255,11 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
         assert(SrcBitSize < DestBitSize && "Not a zext?");
         Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize,
                                                             SrcBitSize));
-        return BinaryOperator::createAnd(Res, C);
+        return BinaryOperator::CreateAnd(Res, C);
       }
       case Instruction::SExt:
         // We need to emit a cast to truncate, then a cast to sext.
-        return CastInst::create(Instruction::SExt,
+        return CastInst::Create(Instruction::SExt,
             InsertCastBefore(Instruction::Trunc, Res, Src->getType(), 
                              CI), DestTy);
       }
@@ -6752,7 +7286,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
         Instruction::CastOps opcode = CI.getOpcode();
         Value *Op0c = InsertOperandCastBefore(opcode, Op0, DestTy, SrcI);
         Value *Op1c = InsertOperandCastBefore(opcode, Op1, DestTy, SrcI);
-        return BinaryOperator::create(
+        return BinaryOperator::Create(
             cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
       }
     }
@@ -6763,7 +7297,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
         Op1 == ConstantInt::getTrue() &&
         (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
       Value *New = InsertOperandCastBefore(Instruction::ZExt, Op0, DestTy, &CI);
-      return BinaryOperator::createXor(New, ConstantInt::get(CI.getType(), 1));
+      return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
     }
     break;
   case Instruction::SDiv:
@@ -6781,7 +7315,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
                                               Op0, DestTy, SrcI);
         Value *Op1c = InsertOperandCastBefore(Instruction::BitCast, 
                                               Op1, DestTy, SrcI);
-        return BinaryOperator::create(
+        return BinaryOperator::Create(
           cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
       }
     }
@@ -6799,7 +7333,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
           Instruction::BitCast : Instruction::Trunc);
       Value *Op0c = InsertOperandCastBefore(opcode, Op0, DestTy, SrcI);
       Value *Op1c = InsertOperandCastBefore(opcode, Op1, DestTy, SrcI);
-      return BinaryOperator::createShl(Op0c, Op1c);
+      return BinaryOperator::CreateShl(Op0c, Op1c);
     }
     break;
   case Instruction::AShr:
@@ -6811,7 +7345,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
       uint32_t ShiftAmt = cast<ConstantInt>(Op1)->getLimitedValue(SrcBitSize);
       if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) {
         // Insert the new logical shift right.
-        return BinaryOperator::createLShr(Op0, Op1);
+        return BinaryOperator::CreateLShr(Op0, Op1);
       }
     }
     break;
@@ -6849,7 +7383,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
           Value *V1 = InsertCastBefore(Instruction::Trunc, SrcIOp0, Ty, CI);
           Value *V2 = InsertCastBefore(Instruction::Trunc, SrcI->getOperand(1),
                                        Ty, CI);
-          return BinaryOperator::createLShr(V1, V2);
+          return BinaryOperator::CreateLShr(V1, V2);
         }
       } else {     // This is a variable shr.
         
@@ -6860,9 +7394,9 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
           Value *One = ConstantInt::get(SrcI->getType(), 1);
 
           Value *V = InsertNewInstBefore(
-              BinaryOperator::createShl(One, SrcI->getOperand(1),
+              BinaryOperator::CreateShl(One, SrcI->getOperand(1),
                                      "tmp"), CI);
-          V = InsertNewInstBefore(BinaryOperator::createAnd(V,
+          V = InsertNewInstBefore(BinaryOperator::CreateAnd(V,
                                                             SrcI->getOperand(0),
                                                             "tmp"), CI);
           Value *Zero = Constant::getNullValue(V->getType());
@@ -6876,6 +7410,101 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   return 0;
 }
 
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                             bool DoXform) {
+  // If we are just checking for a icmp eq of a single bit and zext'ing it
+  // to an integer, then shift the bit to the appropriate place and then
+  // cast to integer to avoid the comparison.
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+    const APInt &Op1CV = Op1C->getValue();
+      
+    // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
+    // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
+    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+        (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+      if (!DoXform) return ICI;
+
+      Value *In = ICI->getOperand(0);
+      Value *Sh = ConstantInt::get(In->getType(),
+                                   In->getType()->getPrimitiveSizeInBits()-1);
+      In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
+                                                        In->getName()+".lobit"),
+                               CI);
+      if (In->getType() != CI.getType())
+        In = CastInst::CreateIntegerCast(In, CI.getType(),
+                                         false/*ZExt*/, "tmp", &CI);
+
+      if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+        Constant *One = ConstantInt::get(In->getType(), 1);
+        In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One,
+                                                         In->getName()+".not"),
+                                 CI);
+      }
+
+      return ReplaceInstUsesWith(CI, In);
+    }
+      
+      
+      
+    // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    // zext (X == 1) to i32 --> X        iff X has only the low bit set.
+    // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 0) to i32 --> X        iff X has only the low bit set.
+    // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
+        // This only works for EQ and NE
+        ICI->isEquality()) {
+      // If Op1C some other power of two, convert:
+      uint32_t BitWidth = Op1C->getType()->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+        
+      APInt KnownZeroMask(~KnownZero);
+      if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+        if (!DoXform) return ICI;
+
+        bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+        if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+          // (X&4) == 2 --> false
+          // (X&4) != 2 --> true
+          Constant *Res = ConstantInt::get(Type::Int1Ty, isNE);
+          Res = ConstantExpr::getZExt(Res, CI.getType());
+          return ReplaceInstUsesWith(CI, Res);
+        }
+          
+        uint32_t ShiftAmt = KnownZeroMask.logBase2();
+        Value *In = ICI->getOperand(0);
+        if (ShiftAmt) {
+          // Perform a logical shr by shiftamt.
+          // Insert the shift to put the result in the low bit.
+          In = InsertNewInstBefore(BinaryOperator::CreateLShr(In,
+                                  ConstantInt::get(In->getType(), ShiftAmt),
+                                                   In->getName()+".lobit"), CI);
+        }
+          
+        if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = BinaryOperator::CreateXor(In, One, "tmp");
+          InsertNewInstBefore(cast<Instruction>(In), CI);
+        }
+          
+        if (CI.getType() == In->getType())
+          return ReplaceInstUsesWith(CI, In);
+        else
+          return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+      }
+    }
+  }
+
+  return 0;
+}
+
 Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   // If one of the common conversion will work ..
   if (Instruction *Result = commonIntCastTransforms(CI))
@@ -6900,104 +7529,36 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
         APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
         Constant *AndConst = ConstantInt::get(AndValue);
         Instruction *And = 
-          BinaryOperator::createAnd(CSrc->getOperand(0), AndConst);
+          BinaryOperator::CreateAnd(CSrc->getOperand(0), AndConst);
         // Unfortunately, if the type changed, we need to cast it back.
         if (And->getType() != CI.getType()) {
           And->setName(CSrc->getName()+".mask");
           InsertNewInstBefore(And, CI);
-          And = CastInst::createIntegerCast(And, CI.getType(), false/*ZExt*/);
+          And = CastInst::CreateIntegerCast(And, CI.getType(), false/*ZExt*/);
         }
         return And;
       }
     }
   }
 
-  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) {
-    // If we are just checking for a icmp eq of a single bit and zext'ing it
-    // to an integer, then shift the bit to the appropriate place and then
-    // cast to integer to avoid the comparison.
-    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
-      const APInt &Op1CV = Op1C->getValue();
-      
-      // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
-      // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
-      if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
-          (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){
-        Value *In = ICI->getOperand(0);
-        Value *Sh = ConstantInt::get(In->getType(),
-                                    In->getType()->getPrimitiveSizeInBits()-1);
-        In = InsertNewInstBefore(BinaryOperator::createLShr(In, Sh,
-                                                        In->getName()+".lobit"),
-                                 CI);
-        if (In->getType() != CI.getType())
-          In = CastInst::createIntegerCast(In, CI.getType(),
-                                           false/*ZExt*/, "tmp", &CI);
-
-        if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
-          Constant *One = ConstantInt::get(In->getType(), 1);
-          In = InsertNewInstBefore(BinaryOperator::createXor(In, One,
-                                                          In->getName()+".not"),
-                                   CI);
-        }
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+    return transformZExtICmp(ICI, CI);
 
-        return ReplaceInstUsesWith(CI, In);
-      }
-      
-      
-      
-      // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
-      // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-      // zext (X == 1) to i32 --> X        iff X has only the low bit set.
-      // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
-      // zext (X != 0) to i32 --> X        iff X has only the low bit set.
-      // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
-      // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
-      // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-      if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
-          // This only works for EQ and NE
-          ICI->isEquality()) {
-        // If Op1C some other power of two, convert:
-        uint32_t BitWidth = Op1C->getType()->getBitWidth();
-        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-        APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-        ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
-        
-        APInt KnownZeroMask(~KnownZero);
-        if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
-          bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
-          if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
-            // (X&4) == 2 --> false
-            // (X&4) != 2 --> true
-            Constant *Res = ConstantInt::get(Type::Int1Ty, isNE);
-            Res = ConstantExpr::getZExt(Res, CI.getType());
-            return ReplaceInstUsesWith(CI, Res);
-          }
-          
-          uint32_t ShiftAmt = KnownZeroMask.logBase2();
-          Value *In = ICI->getOperand(0);
-          if (ShiftAmt) {
-            // Perform a logical shr by shiftamt.
-            // Insert the shift to put the result in the low bit.
-            In = InsertNewInstBefore(
-                   BinaryOperator::createLShr(In,
-                                     ConstantInt::get(In->getType(), ShiftAmt),
-                                              In->getName()+".lobit"), CI);
-          }
-          
-          if ((Op1CV != 0) == isNE) { // Toggle the low bit.
-            Constant *One = ConstantInt::get(In->getType(), 1);
-            In = BinaryOperator::createXor(In, One, "tmp");
-            InsertNewInstBefore(cast<Instruction>(In), CI);
-          }
-          
-          if (CI.getType() == In->getType())
-            return ReplaceInstUsesWith(CI, In);
-          else
-            return CastInst::createIntegerCast(In, CI.getType(), false/*ZExt*/);
-        }
-      }
+  BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+  if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+    // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+    // of the (zext icmp) will be transformed.
+    ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+    ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+    if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        (transformZExtICmp(LHS, CI, false) ||
+         transformZExtICmp(RHS, CI, false))) {
+      Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI);
+      Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI);
+      return BinaryOperator::Create(Instruction::Or, LCast, RCast);
     }
-  }    
+  }
+
   return 0;
 }
 
@@ -7023,39 +7584,155 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
         Value *In = ICI->getOperand(0);
         Value *Sh = ConstantInt::get(In->getType(),
                                      In->getType()->getPrimitiveSizeInBits()-1);
-        In = InsertNewInstBefore(BinaryOperator::createAShr(In, Sh,
+        In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
                                                         In->getName()+".lobit"),
                                  CI);
         if (In->getType() != CI.getType())
-          In = CastInst::createIntegerCast(In, CI.getType(),
+          In = CastInst::CreateIntegerCast(In, CI.getType(),
                                            true/*SExt*/, "tmp", &CI);
         
         if (ICI->getPredicate() == ICmpInst::ICMP_SGT)
-          In = InsertNewInstBefore(BinaryOperator::createNot(In,
+          In = InsertNewInstBefore(BinaryOperator::CreateNot(In,
                                      In->getName()+".not"), CI);
         
         return ReplaceInstUsesWith(CI, In);
       }
     }
   }
+
+  // See if the value being truncated is already sign extended.  If so, just
+  // eliminate the trunc/sext pair.
+  if (getOpcode(Src) == Instruction::Trunc) {
+    Value *Op = cast<User>(Src)->getOperand(0);
+    unsigned OpBits   = cast<IntegerType>(Op->getType())->getBitWidth();
+    unsigned MidBits  = cast<IntegerType>(Src->getType())->getBitWidth();
+    unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();
+    unsigned NumSignBits = ComputeNumSignBits(Op);
+
+    if (OpBits == DestBits) {
+      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+      // bits, it is already ready.
+      if (NumSignBits > DestBits-MidBits)
+        return ReplaceInstUsesWith(CI, Op);
+    } else if (OpBits < DestBits) {
+      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+      // bits, just sext from i32.
+      if (NumSignBits > OpBits-MidBits)
+        return new SExtInst(Op, CI.getType(), "tmp");
+    } else {
+      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+      // bits, just truncate to i32.
+      if (NumSignBits > OpBits-MidBits)
+        return new TruncInst(Op, CI.getType(), "tmp");
+    }
+  }
       
   return 0;
 }
 
-Instruction *InstCombiner::visitFPTrunc(CastInst &CI) {
-  return commonCastTransforms(CI);
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+  APFloat F = CFP->getValueAPF();
+  if (F.convert(Sem, APFloat::rmNearestTiesToEven) == APFloat::opOK)
+    return ConstantFP::get(F);
+  return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (I->getOpcode() == Instruction::FPExt)
+      return LookThroughFPExtensions(I->getOperand(0));
+  
+  // If this value is a constant, return the constant in the smallest FP type
+  // that can accurately represent it.  This allows us to turn
+  // (float)((double)X+2.0) into x+2.0f.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType() == Type::PPC_FP128Ty)
+      return V;  // No constant folding of this.
+    // See if the value can be truncated to float and then reextended.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+      return V;
+    if (CFP->getType() == Type::DoubleTy)
+      return V;  // Won't shrink.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+      return V;
+    // Don't try to shrink to various long double types.
+  }
+  
+  return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  // If we have fptrunc(add (fpextend x), (fpextend y)), where x and y are
+  // smaller than the destination type, we can eliminate the truncate by doing
+  // the add as the smaller type.  This applies to add/sub/mul/div as well as
+  // many builtins (sqrt, etc).
+  BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+  if (OpI && OpI->hasOneUse()) {
+    switch (OpI->getOpcode()) {
+    default: break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      const Type *SrcTy = OpI->getType();
+      Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+      Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+      if (LHSTrunc->getType() != SrcTy && 
+          RHSTrunc->getType() != SrcTy) {
+        unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+        // If the source types were both smaller than the destination type of
+        // the cast, do this xform.
+        if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize &&
+            RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) {
+          LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
+                                      CI.getType(), CI);
+          RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
+                                      CI.getType(), CI);
+          return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+        }
+      }
+      break;  
+    }
+  }
+  return 0;
 }
 
 Instruction *InstCombiner::visitFPExt(CastInst &CI) {
   return commonCastTransforms(CI);
 }
 
-Instruction *InstCombiner::visitFPToUI(CastInst &CI) {
-  return commonCastTransforms(CI);
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+  // fptoui(uitofp(X)) --> X  if the intermediate type has enough bits in its
+  // mantissa to accurately represent all values of X.  For example, do not
+  // do this with i64->float->i64.
+  if (UIToFPInst *SrcI = dyn_cast<UIToFPInst>(FI.getOperand(0)))
+    if (SrcI->getOperand(0)->getType() == FI.getType() &&
+        (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+                    SrcI->getType()->getFPMantissaWidth())
+      return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+
+  return commonCastTransforms(FI);
 }
 
-Instruction *InstCombiner::visitFPToSI(CastInst &CI) {
-  return commonCastTransforms(CI);
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+  // fptosi(sitofp(X)) --> X  if the intermediate type has enough bits in its
+  // mantissa to accurately represent all values of X.  For example, do not
+  // do this with i64->float->i64.
+  if (SIToFPInst *SrcI = dyn_cast<SIToFPInst>(FI.getOperand(0)))
+    if (SrcI->getOperand(0)->getType() == FI.getType() &&
+        (int)FI.getType()->getPrimitiveSizeInBits() <= 
+                    SrcI->getType()->getFPMantissaWidth())
+      return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+  
+  return commonCastTransforms(FI);
 }
 
 Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
@@ -7070,8 +7747,58 @@ Instruction *InstCombiner::visitPtrToInt(CastInst &CI) {
   return commonPointerCastTransforms(CI);
 }
 
-Instruction *InstCombiner::visitIntToPtr(CastInst &CI) {
-  return commonCastTransforms(CI);
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  const Type *DestPointee = cast<PointerType>(CI.getType())->getElementType();
+  if (!DestPointee->isSized()) return 0;
+
+  // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP.
+  ConstantInt *Cst;
+  Value *X;
+  if (match(CI.getOperand(0), m_Add(m_Cast<PtrToIntInst>(m_Value(X)),
+                                    m_ConstantInt(Cst)))) {
+    // If the source and destination operands have the same type, see if this
+    // is a single-index GEP.
+    if (X->getType() == CI.getType()) {
+      // Get the size of the pointee type.
+      uint64_t Size = TD->getABITypeSize(DestPointee);
+
+      // Convert the constant to intptr type.
+      APInt Offset = Cst->getValue();
+      Offset.sextOrTrunc(TD->getPointerSizeInBits());
+
+      // If Offset is evenly divisible by Size, we can do this xform.
+      if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
+        Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
+        return GetElementPtrInst::Create(X, ConstantInt::get(Offset));
+      }
+    }
+    // TODO: Could handle other cases, e.g. where add is indexing into field of
+    // struct etc.
+  } else if (CI.getOperand(0)->hasOneUse() &&
+             match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) {
+    // Otherwise, if this is inttoptr(add x, cst), try to turn this into an
+    // "inttoptr+GEP" instead of "add+intptr".
+    
+    // Get the size of the pointee type.
+    uint64_t Size = TD->getABITypeSize(DestPointee);
+    
+    // Convert the constant to intptr type.
+    APInt Offset = Cst->getValue();
+    Offset.sextOrTrunc(TD->getPointerSizeInBits());
+    
+    // If Offset is evenly divisible by Size, we can do this xform.
+    if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
+      Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
+      
+      Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(),
+                                                            "tmp"), CI);
+      return GetElementPtrInst::Create(P, ConstantInt::get(Offset), "tmp");
+    }
+  }
+  return 0;
 }
 
 Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -7103,6 +7830,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     const Type *DstElTy = DstPTy->getElementType();
     const Type *SrcElTy = SrcPTy->getElementType();
     
+    // If the address spaces don't match, don't eliminate the bitcast, which is
+    // required for changing types.
+    if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+      return 0;
+    
     // If we are casting a malloc or alloca to a pointer to a type of the same
     // size, rewrite the allocation instruction to allocate the "right" type.
     if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
@@ -7124,8 +7856,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     // If we found a path from the src to dest, create the getelementptr now.
     if (SrcElTy == DstElTy) {
       SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      return new GetElementPtrInst(Src, Idxs.begin(), Idxs.end(), "", 
-                                   ((Instruction*) NULL));
+      return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "", 
+                                       ((Instruction*) NULL));
     }
   }
 
@@ -7222,10 +7954,10 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     }
 
     // Fold this by inserting a select from the input values.
-    SelectInst *NewSI = new SelectInst(SI.getCondition(), TI->getOperand(0),
-                                       FI->getOperand(0), SI.getName()+".v");
+    SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
+                                           FI->getOperand(0), SI.getName()+".v");
     InsertNewInstBefore(NewSI, SI);
-    return CastInst::create(Instruction::CastOps(TI->getOpcode()), NewSI, 
+    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 
                             TI->getType());
   }
 
@@ -7263,15 +7995,15 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
   }
 
   // If we reach here, they do have operations in common.
-  SelectInst *NewSI = new SelectInst(SI.getCondition(), OtherOpT,
-                                     OtherOpF, SI.getName()+".v");
+  SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT,
+                                         OtherOpF, SI.getName()+".v");
   InsertNewInstBefore(NewSI, SI);
 
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
     if (MatchIsOpZero)
-      return BinaryOperator::create(BO->getOpcode(), MatchOp, NewSI);
+      return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
     else
-      return BinaryOperator::create(BO->getOpcode(), NewSI, MatchOp);
+      return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
   }
   assert(0 && "Shouldn't get here");
   return 0;
@@ -7306,26 +8038,33 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
       if (C->getZExtValue()) {
         // Change: A = select B, true, C --> A = or B, C
-        return BinaryOperator::createOr(CondVal, FalseVal);
+        return BinaryOperator::CreateOr(CondVal, FalseVal);
       } else {
         // Change: A = select B, false, C --> A = and !B, C
         Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
                                              "not."+CondVal->getName()), SI);
-        return BinaryOperator::createAnd(NotCond, FalseVal);
+        return BinaryOperator::CreateAnd(NotCond, FalseVal);
       }
     } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
       if (C->getZExtValue() == false) {
         // Change: A = select B, C, false --> A = and B, C
-        return BinaryOperator::createAnd(CondVal, TrueVal);
+        return BinaryOperator::CreateAnd(CondVal, TrueVal);
       } else {
         // Change: A = select B, C, true --> A = or !B, C
         Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
                                              "not."+CondVal->getName()), SI);
-        return BinaryOperator::createOr(NotCond, TrueVal);
+        return BinaryOperator::CreateOr(NotCond, TrueVal);
       }
     }
+    
+    // select a, b, a  -> a&b
+    // select a, a, b  -> a|b
+    if (CondVal == TrueVal)
+      return BinaryOperator::CreateOr(CondVal, FalseVal);
+    else if (CondVal == FalseVal)
+      return BinaryOperator::CreateAnd(CondVal, TrueVal);
   }
 
   // Selecting between two integer constants?
@@ -7333,13 +8072,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
       // select C, 1, 0 -> zext C to int
       if (FalseValC->isZero() && TrueValC->getValue() == 1) {
-        return CastInst::create(Instruction::ZExt, CondVal, SI.getType());
+        return CastInst::Create(Instruction::ZExt, CondVal, SI.getType());
       } else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
         // select C, 0, 1 -> zext !C to int
         Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
                                                "not."+CondVal->getName()), SI);
-        return CastInst::create(Instruction::ZExt, NotCond, SI.getType());
+        return CastInst::Create(Instruction::ZExt, NotCond, SI.getType());
       }
       
       // FIXME: Turn select 0/-1 and -1/0 into sext from condition!
@@ -7355,7 +8094,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
               Value *X = IC->getOperand(0);
               uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
               Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
-              Instruction *SRA = BinaryOperator::create(Instruction::AShr, X,
+              Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
                                                         ShAmt, "ones");
               InsertNewInstBefore(SRA, SI);
               
@@ -7368,7 +8107,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                 opc = Instruction::SExt;
               else if (SRASize > SISize)
                 opc = Instruction::Trunc;
-              return CastInst::create(opc, SRA, SI.getType());
+              return CastInst::Create(opc, SRA, SI.getType());
             }
           }
 
@@ -7393,7 +8132,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                 ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
                 Value *V = ICA;
                 if (ShouldNotVal)
-                  V = InsertNewInstBefore(BinaryOperator::create(
+                  V = InsertNewInstBefore(BinaryOperator::Create(
                                   Instruction::Xor, V, ICA->getOperand(1)), SI);
                 return ReplaceInstUsesWith(SI, V);
               }
@@ -7498,7 +8237,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
               NegVal = ConstantExpr::getNeg(C);
             } else {
               NegVal = InsertNewInstBefore(
-                    BinaryOperator::createNeg(SubOp->getOperand(1), "tmp"), SI);
+                    BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI);
             }
 
             Value *NewTrueOp = OtherAddOp;
@@ -7506,10 +8245,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             if (AddOp != TI)
               std::swap(NewTrueOp, NewFalseOp);
             Instruction *NewSel =
-              new SelectInst(CondVal, NewTrueOp,NewFalseOp,SI.getName()+".p");
+              SelectInst::Create(CondVal, NewTrueOp,
+                                 NewFalseOp, SI.getName() + ".p");
 
             NewSel = InsertNewInstBefore(NewSel, SI);
-            return BinaryOperator::createAdd(SubOp->getOperand(0), NewSel);
+            return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
           }
         }
       }
@@ -7532,11 +8272,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
           if (OpToFold) {
             Constant *C = GetSelectFoldableConstant(TVI);
             Instruction *NewSel =
-              new SelectInst(SI.getCondition(), TVI->getOperand(2-OpToFold), C);
+              SelectInst::Create(SI.getCondition(),
+                                 TVI->getOperand(2-OpToFold), C);
             InsertNewInstBefore(NewSel, SI);
             NewSel->takeName(TVI);
             if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
-              return BinaryOperator::create(BO->getOpcode(), FalseVal, NewSel);
+              return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
             else {
               assert(0 && "Unknown instruction!!");
             }
@@ -7557,11 +8298,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
           if (OpToFold) {
             Constant *C = GetSelectFoldableConstant(FVI);
             Instruction *NewSel =
-              new SelectInst(SI.getCondition(), C, FVI->getOperand(2-OpToFold));
+              SelectInst::Create(SI.getCondition(), C,
+                                 FVI->getOperand(2-OpToFold));
             InsertNewInstBefore(NewSel, SI);
             NewSel->takeName(FVI);
             if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
-              return BinaryOperator::create(BO->getOpcode(), TrueVal, NewSel);
+              return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
             else
               assert(0 && "Unknown instruction!!");
           }
@@ -7578,88 +8320,195 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   return 0;
 }
 
-/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
-/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
-/// and it is more than the alignment of the ultimate object, see if we can
-/// increase the alignment of the ultimate object, making this check succeed.
-static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD,
-                                           unsigned PrefAlign = 0) {
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
-    unsigned Align = GV->getAlignment();
-    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) 
-      Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
+/// EnforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned EnforceKnownAlignment(Value *V,
+                                      unsigned Align, unsigned PrefAlign) {
+
+  User *U = dyn_cast<User>(V);
+  if (!U) return Align;
+
+  switch (getOpcode(U)) {
+  default: break;
+  case Instruction::BitCast:
+    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+  case Instruction::GetElementPtr: {
+    // If all indexes are zero, it is just the alignment of the base pointer.
+    bool AllZeroOperands = true;
+    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+      if (!isa<Constant>(*i) ||
+          !cast<Constant>(*i)->isNullValue()) {
+        AllZeroOperands = false;
+        break;
+      }
 
+    if (AllZeroOperands) {
+      // Treat this like a bitcast.
+      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+    }
+    break;
+  }
+  }
+
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // If there is a large requested alignment and we can, bump up the alignment
     // of the global.
-    if (PrefAlign > Align && GV->hasInitializer()) {
+    if (!GV->isDeclaration()) {
       GV->setAlignment(PrefAlign);
       Align = PrefAlign;
     }
-    return Align;
   } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
-    unsigned Align = AI->getAlignment();
-    if (Align == 0 && TD) {
-      if (isa<AllocaInst>(AI))
-        Align = TD->getPrefTypeAlignment(AI->getType()->getElementType());
-      else if (isa<MallocInst>(AI)) {
-        // Malloc returns maximally aligned memory.
-        Align = TD->getABITypeAlignment(AI->getType()->getElementType());
-        Align =
-          std::max(Align,
-                   (unsigned)TD->getABITypeAlignment(Type::DoubleTy));
-        Align =
-          std::max(Align,
-                   (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
-      }
-    }
-    
     // If there is a requested alignment and if this is an alloca, round up.  We
     // don't do this for malloc, because some systems can't respect the request.
-    if (PrefAlign > Align && isa<AllocaInst>(AI)) {
+    if (isa<AllocaInst>(AI)) {
       AI->setAlignment(PrefAlign);
       Align = PrefAlign;
     }
-    return Align;
-  } else if (isa<BitCastInst>(V) ||
-             (isa<ConstantExpr>(V) && 
-              cast<ConstantExpr>(V)->getOpcode() == Instruction::BitCast)) {
-    return GetOrEnforceKnownAlignment(cast<User>(V)->getOperand(0),
-                                      TD, PrefAlign);
-  } else if (User *GEPI = dyn_castGetElementPtr(V)) {
-    // If all indexes are zero, it is just the alignment of the base pointer.
-    bool AllZeroOperands = true;
-    for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
-      if (!isa<Constant>(GEPI->getOperand(i)) ||
-          !cast<Constant>(GEPI->getOperand(i))->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
+  }
 
-    if (AllZeroOperands) {
-      // Treat this like a bitcast.
-      return GetOrEnforceKnownAlignment(GEPI->getOperand(0), TD, PrefAlign);
-    }
+  return Align;
+}
 
-    unsigned BaseAlignment = GetOrEnforceKnownAlignment(GEPI->getOperand(0),TD);
-    if (BaseAlignment == 0) return 0;
+/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
+                                                  unsigned PrefAlign) {
+  unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
+                      sizeof(PrefAlign) * CHAR_BIT;
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
+  unsigned TrailZ = KnownZero.countTrailingOnes();
+  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+  if (PrefAlign > Align)
+    Align = EnforceKnownAlignment(V, Align, PrefAlign);
+  
+    // We don't need to make any adjustment.
+  return Align;
+}
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
+  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
+  unsigned MinAlign = std::min(DstAlign, SrcAlign);
+  unsigned CopyAlign = MI->getAlignment()->getZExtValue();
+
+  if (CopyAlign < MinAlign) {
+    MI->setAlignment(ConstantInt::get(Type::Int32Ty, MinAlign));
+    return MI;
+  }
+  
+  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+  // load/store.
+  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
+  if (MemOpLength == 0) return 0;
+  
+  // Source and destination pointer types are always "i8*" for intrinsic.  See
+  // if the size is something we can handle with a single primitive load/store.
+  // A single load+store correctly handles overlapping memory in the memmove
+  // case.
+  unsigned Size = MemOpLength->getZExtValue();
+  if (Size == 0) return MI;  // Delete this mem transfer.
+  
+  if (Size > 8 || (Size&(Size-1)))
+    return 0;  // If not 1/2/4/8 bytes, exit.
+  
+  // Use an integer load+store unless we can find something better.
+  Type *NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3));
+  
+  // Memcpy forces the use of i8* for the source and destination.  That means
+  // that if you're using memcpy to move one double around, you'll get a cast
+  // from double* to i8*.  We'd much rather use a double load+store rather than
+  // an i64 load+store, here because this improves the odds that the source or
+  // dest address will be promotable.  See if we can find a better type than the
+  // integer datatype.
+  if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
+    const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
+    if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
+      // down through these levels if so.
+      while (!SrcETy->isSingleValueType()) {
+        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+          if (STy->getNumElements() == 1)
+            SrcETy = STy->getElementType(0);
+          else
+            break;
+        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+          if (ATy->getNumElements() == 1)
+            SrcETy = ATy->getElementType();
+          else
+            break;
+        } else
+          break;
+      }
+      
+      if (SrcETy->isSingleValueType())
+        NewPtrTy = PointerType::getUnqual(SrcETy);
+    }
+  }
+  
+  
+  // If the memcpy/memmove provides better alignment info than we can
+  // infer, use it.
+  SrcAlign = std::max(SrcAlign, CopyAlign);
+  DstAlign = std::max(DstAlign, CopyAlign);
+  
+  Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI);
+  Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI);
+  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
+  InsertNewInstBefore(L, *MI);
+  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
 
-    // Otherwise, if the base alignment is >= the alignment we expect for the
-    // base pointer type, then we know that the resultant pointer is aligned at
-    // least as much as its type requires.
-    if (!TD) return 0;
+  // Set the size of the copy to 0, it will be deleted on the next iteration.
+  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+  return MI;
+}
 
-    const Type *BasePtrTy = GEPI->getOperand(0)->getType();
-    const PointerType *PtrTy = cast<PointerType>(BasePtrTy);
-    unsigned Align = TD->getABITypeAlignment(PtrTy->getElementType());
-    if (Align <= BaseAlignment) {
-      const Type *GEPTy = GEPI->getType();
-      const PointerType *GEPPtrTy = cast<PointerType>(GEPTy);
-      Align = std::min(Align, (unsigned)
-                       TD->getABITypeAlignment(GEPPtrTy->getElementType()));
-      return Align;
-    }
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+  if (MI->getAlignment()->getZExtValue() < Alignment) {
+    MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
+    return MI;
+  }
+  
+  // Extract the length and alignment and fill if they are constant.
+  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+  if (!LenC || !FillC || FillC->getType() != Type::Int8Ty)
     return 0;
+  uint64_t Len = LenC->getZExtValue();
+  Alignment = MI->getAlignment()->getZExtValue();
+  
+  // If the length is zero, this is a no-op
+  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+  
+  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+    const Type *ITy = IntegerType::get(Len*8);  // n=1 -> i8.
+    
+    Value *Dest = MI->getDest();
+    Dest = InsertBitCastBefore(Dest, PointerType::getUnqual(ITy), *MI);
+
+    // Alignment 0 is identity for alignment 1 for memset, but not store.
+    if (Alignment == 0) Alignment = 1;
+    
+    // Extract the fill value and store.
+    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false,
+                                      Alignment), *MI);
+    
+    // Set the size of the copy to 0, it will be deleted on the next iteration.
+    MI->setLength(Constant::getNullValue(LenC->getType()));
+    return MI;
   }
+
   return 0;
 }
 
@@ -7692,200 +8541,179 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // If we have a memmove and the source operation is a constant global,
     // then the source and dest pointers can't alias, so we can change this
     // into a call to memcpy.
-    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(II)) {
+    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
       if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
         if (GVSrc->isConstant()) {
           Module *M = CI.getParent()->getParent()->getParent();
-          const char *Name;
-          if (CI.getCalledFunction()->getFunctionType()->getParamType(2) == 
-              Type::Int32Ty)
-            Name = "llvm.memcpy.i32";
+          Intrinsic::ID MemCpyID;
+          if (CI.getOperand(3)->getType() == Type::Int32Ty)
+            MemCpyID = Intrinsic::memcpy_i32;
           else
-            Name = "llvm.memcpy.i64";
-          Constant *MemCpy = M->getOrInsertFunction(Name,
-                                     CI.getCalledFunction()->getFunctionType());
-          CI.setOperand(0, MemCpy);
+            MemCpyID = Intrinsic::memcpy_i64;
+          CI.setOperand(0, Intrinsic::getDeclaration(M, MemCpyID));
           Changed = true;
         }
+
+      // memmove(x,x,size) -> noop.
+      if (MMI->getSource() == MMI->getDest())
+        return EraseInstFromFunction(CI);
     }
 
     // If we can determine a pointer alignment that is bigger than currently
     // set, update the alignment.
     if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
-      unsigned Alignment1 = GetOrEnforceKnownAlignment(MI->getOperand(1), TD);
-      unsigned Alignment2 = GetOrEnforceKnownAlignment(MI->getOperand(2), TD);
-      unsigned Align = std::min(Alignment1, Alignment2);
-      if (MI->getAlignment()->getZExtValue() < Align) {
-        MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align));
-        Changed = true;
-      }
-
-      // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
-      // load/store.
-      ConstantInt *MemOpLength = dyn_cast<ConstantInt>(CI.getOperand(3));
-      if (MemOpLength) {
-        unsigned Size = MemOpLength->getZExtValue();
-        unsigned Align = cast<ConstantInt>(CI.getOperand(4))->getZExtValue();
-        PointerType *NewPtrTy = NULL;
-        // Destination pointer type is always i8 *
-        // If Size is 8 then use Int64Ty
-        // If Size is 4 then use Int32Ty
-        // If Size is 2 then use Int16Ty
-        // If Size is 1 then use Int8Ty
-        if (Size && Size <=8 && !(Size&(Size-1)))
-          NewPtrTy = PointerType::get(IntegerType::get(Size<<3));
-
-        if (NewPtrTy) {
-          Value *Src = InsertCastBefore(Instruction::BitCast, CI.getOperand(2),
-                                        NewPtrTy, CI);
-          Value *Dest = InsertCastBefore(Instruction::BitCast, CI.getOperand(1),
-                                         NewPtrTy, CI);
-          Value *L = new LoadInst(Src, "tmp", false, Align, &CI);
-          Value *NS = new StoreInst(L, Dest, false, Align, &CI);
-          CI.replaceAllUsesWith(NS);
-          Changed = true;
-          return EraseInstFromFunction(CI);
-        }
-      }
-    } else if (isa<MemSetInst>(MI)) {
-      unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD);
-      if (MI->getAlignment()->getZExtValue() < Alignment) {
-        MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
-        Changed = true;
-      }
+      if (Instruction *I = SimplifyMemTransfer(MI))
+        return I;
+    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+      if (Instruction *I = SimplifyMemSet(MSI))
+        return I;
     }
           
     if (Changed) return II;
-  } else {
-    switch (II->getIntrinsicID()) {
-    default: break;
-    case Intrinsic::ppc_altivec_lvx:
-    case Intrinsic::ppc_altivec_lvxl:
-    case Intrinsic::x86_sse_loadu_ps:
-    case Intrinsic::x86_sse2_loadu_pd:
-    case Intrinsic::x86_sse2_loadu_dq:
-      // Turn PPC lvx     -> load if the pointer is known aligned.
-      // Turn X86 loadups -> load if the pointer is known aligned.
-      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
-        Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
-                                      PointerType::get(II->getType()), CI);
-        return new LoadInst(Ptr);
-      }
-      break;
-    case Intrinsic::ppc_altivec_stvx:
-    case Intrinsic::ppc_altivec_stvxl:
-      // Turn stvx -> store if the pointer is known aligned.
-      if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) {
-        const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType());
-        Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2),
-                                      OpPtrTy, CI);
-        return new StoreInst(II->getOperand(1), Ptr);
-      }
-      break;
-    case Intrinsic::x86_sse_storeu_ps:
-    case Intrinsic::x86_sse2_storeu_pd:
-    case Intrinsic::x86_sse2_storeu_dq:
-    case Intrinsic::x86_sse2_storel_dq:
-      // Turn X86 storeu -> store if the pointer is known aligned.
-      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
-        const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
-        Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
-                                      OpPtrTy, CI);
-        return new StoreInst(II->getOperand(2), Ptr);
-      }
-      break;
+  }
+  
+  switch (II->getIntrinsicID()) {
+  default: break;
+  case Intrinsic::bswap:
+    // bswap(bswap(x)) -> x
+    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+      if (Operand->getIntrinsicID() == Intrinsic::bswap)
+        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+    break;
+  case Intrinsic::ppc_altivec_lvx:
+  case Intrinsic::ppc_altivec_lvxl:
+  case Intrinsic::x86_sse_loadu_ps:
+  case Intrinsic::x86_sse2_loadu_pd:
+  case Intrinsic::x86_sse2_loadu_dq:
+    // Turn PPC lvx     -> load if the pointer is known aligned.
+    // Turn X86 loadups -> load if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+      Value *Ptr = InsertBitCastBefore(II->getOperand(1),
+                                       PointerType::getUnqual(II->getType()),
+                                       CI);
+      return new LoadInst(Ptr);
+    }
+    break;
+  case Intrinsic::ppc_altivec_stvx:
+  case Intrinsic::ppc_altivec_stvxl:
+    // Turn stvx -> store if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getOperand(1)->getType());
+      Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
+      return new StoreInst(II->getOperand(1), Ptr);
+    }
+    break;
+  case Intrinsic::x86_sse_storeu_ps:
+  case Intrinsic::x86_sse2_storeu_pd:
+  case Intrinsic::x86_sse2_storeu_dq:
+  case Intrinsic::x86_sse2_storel_dq:
+    // Turn X86 storeu -> store if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getOperand(2)->getType());
+      Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
+      return new StoreInst(II->getOperand(2), Ptr);
+    }
+    break;
+    
+  case Intrinsic::x86_sse_cvttss2si: {
+    // These intrinsics only demands the 0th element of its input vector.  If
+    // we can simplify the input based on that, do so now.
+    uint64_t UndefElts;
+    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1, 
+                                              UndefElts)) {
+      II->setOperand(1, V);
+      return II;
+    }
+    break;
+  }
+    
+  case Intrinsic::ppc_altivec_vperm:
+    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
       
-    case Intrinsic::x86_sse_cvttss2si: {
-      // These intrinsics only demands the 0th element of its input vector.  If
-      // we can simplify the input based on that, do so now.
-      uint64_t UndefElts;
-      if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1, 
-                                                UndefElts)) {
-        II->setOperand(1, V);
-        return II;
+      // Check that all of the elements are integer constants or undefs.
+      bool AllEltsOk = true;
+      for (unsigned i = 0; i != 16; ++i) {
+        if (!isa<ConstantInt>(Mask->getOperand(i)) && 
+            !isa<UndefValue>(Mask->getOperand(i))) {
+          AllEltsOk = false;
+          break;
+        }
       }
-      break;
-    }
       
-    case Intrinsic::ppc_altivec_vperm:
-      // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
-      if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
-        assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+      if (AllEltsOk) {
+        // Cast the input vectors to byte vectors.
+        Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
+        Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
+        Value *Result = UndefValue::get(Op0->getType());
         
-        // Check that all of the elements are integer constants or undefs.
-        bool AllEltsOk = true;
-        for (unsigned i = 0; i != 16; ++i) {
-          if (!isa<ConstantInt>(Mask->getOperand(i)) && 
-              !isa<UndefValue>(Mask->getOperand(i))) {
-            AllEltsOk = false;
-            break;
-          }
-        }
+        // Only extract each element once.
+        Value *ExtractedElts[32];
+        memset(ExtractedElts, 0, sizeof(ExtractedElts));
         
-        if (AllEltsOk) {
-          // Cast the input vectors to byte vectors.
-          Value *Op0 = InsertCastBefore(Instruction::BitCast, 
-                                        II->getOperand(1), Mask->getType(), CI);
-          Value *Op1 = InsertCastBefore(Instruction::BitCast,
-                                        II->getOperand(2), Mask->getType(), CI);
-          Value *Result = UndefValue::get(Op0->getType());
-          
-          // Only extract each element once.
-          Value *ExtractedElts[32];
-          memset(ExtractedElts, 0, sizeof(ExtractedElts));
-          
-          for (unsigned i = 0; i != 16; ++i) {
-            if (isa<UndefValue>(Mask->getOperand(i)))
-              continue;
-            unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
-            Idx &= 31;  // Match the hardware behavior.
-            
-            if (ExtractedElts[Idx] == 0) {
-              Instruction *Elt = 
-                new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
-              InsertNewInstBefore(Elt, CI);
-              ExtractedElts[Idx] = Elt;
-            }
+        for (unsigned i = 0; i != 16; ++i) {
+          if (isa<UndefValue>(Mask->getOperand(i)))
+            continue;
+          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+          Idx &= 31;  // Match the hardware behavior.
           
-            // Insert this value into the result vector.
-            Result = new InsertElementInst(Result, ExtractedElts[Idx], i,"tmp");
-            InsertNewInstBefore(cast<Instruction>(Result), CI);
+          if (ExtractedElts[Idx] == 0) {
+            Instruction *Elt = 
+              new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
+            InsertNewInstBefore(Elt, CI);
+            ExtractedElts[Idx] = Elt;
           }
-          return CastInst::create(Instruction::BitCast, Result, CI.getType());
+        
+          // Insert this value into the result vector.
+          Result = InsertElementInst::Create(Result, ExtractedElts[Idx],
+                                             i, "tmp");
+          InsertNewInstBefore(cast<Instruction>(Result), CI);
         }
+        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
       }
-      break;
+    }
+    break;
 
-    case Intrinsic::stackrestore: {
-      // If the save is right next to the restore, remove the restore.  This can
-      // happen when variable allocas are DCE'd.
-      if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
-        if (SS->getIntrinsicID() == Intrinsic::stacksave) {
-          BasicBlock::iterator BI = SS;
-          if (&*++BI == II)
-            return EraseInstFromFunction(CI);
-        }
-      }
-      
-      // If the stack restore is in a return/unwind block and if there are no
-      // allocas or calls between the restore and the return, nuke the restore.
-      TerminatorInst *TI = II->getParent()->getTerminator();
-      if (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)) {
-        BasicBlock::iterator BI = II;
-        bool CannotRemove = false;
-        for (++BI; &*BI != TI; ++BI) {
-          if (isa<AllocaInst>(BI) ||
-              (isa<CallInst>(BI) && !isa<IntrinsicInst>(BI))) {
-            CannotRemove = true;
-            break;
-          }
-        }
-        if (!CannotRemove)
+  case Intrinsic::stackrestore: {
+    // If the save is right next to the restore, remove the restore.  This can
+    // happen when variable allocas are DCE'd.
+    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+        BasicBlock::iterator BI = SS;
+        if (&*++BI == II)
           return EraseInstFromFunction(CI);
       }
-      break;
     }
+    
+    // Scan down this block to see if there is another stack restore in the
+    // same block without an intervening call/alloca.
+    BasicBlock::iterator BI = II;
+    TerminatorInst *TI = II->getParent()->getTerminator();
+    bool CannotRemove = false;
+    for (++BI; &*BI != TI; ++BI) {
+      if (isa<AllocaInst>(BI)) {
+        CannotRemove = true;
+        break;
+      }
+      if (isa<CallInst>(BI)) {
+        if (!isa<IntrinsicInst>(BI)) {
+          CannotRemove = true;
+          break;
+        }
+        // If there is a stackrestore below this one, remove this one.
+        return EraseInstFromFunction(CI);
+      }
     }
+    
+    // If the stack restore is in a return/unwind block and if there are no
+    // allocas or calls between the restore and the return, nuke the restore.
+    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+      return EraseInstFromFunction(CI);
+    break;
+  }
   }
 
   return visitCallSite(II);
@@ -7897,6 +8725,31 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
   return visitCallSite(&II);
 }
 
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value 
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+                                         const CastInst * const CI,
+                                         const TargetData * const TD,
+                                         const int ix) {
+  if (!CI->isLosslessCast())
+    return false;
+
+  // The size of ByVal arguments is derived from the type, so we
+  // can't change to a type with a different size.  If the size were
+  // passed explicitly we could avoid this check.
+  if (!CS.paramHasAttr(ix, ParamAttr::ByVal))
+    return true;
+
+  const Type* SrcTy = 
+            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+  if (!SrcTy->isSized() || !DstTy->isSized())
+    return false;
+  if (TD->getABITypeSize(SrcTy) != TD->getABITypeSize(DstTy))
+    return false;
+  return true;
+}
+
 // visitCallSite - Improvements for call and invoke instructions.
 //
 Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -7914,7 +8767,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
       // If the call and callee calling conventions don't match, this call must
       // be unreachable, as the call is undefined.
       new StoreInst(ConstantInt::getTrue(),
-                    UndefValue::get(PointerType::get(Type::Int1Ty)), OldCall);
+                    UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), 
+                                    OldCall);
       if (!OldCall->use_empty())
         OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
       if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
@@ -7927,7 +8781,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
     // undef so that we know that this code is not reachable, despite the fact
     // that we can't modify the CFG here.
     new StoreInst(ConstantInt::getTrue(),
-                  UndefValue::get(PointerType::get(Type::Int1Ty)),
+                  UndefValue::get(PointerType::getUnqual(Type::Int1Ty)),
                   CS.getInstruction());
 
     if (!CS.getInstruction()->use_empty())
@@ -7936,8 +8790,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
 
     if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
       // Don't break the CFG, insert a dummy cond branch.
-      new BranchInst(II->getNormalDest(), II->getUnwindDest(),
-                     ConstantInt::getTrue(), II);
+      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
+                         ConstantInt::getTrue(), II);
     }
     return EraseInstFromFunction(*CS.getInstruction());
   }
@@ -7950,19 +8804,23 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   const PointerType *PTy = cast<PointerType>(Callee->getType());
   const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
   if (FTy->isVarArg()) {
+    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
     // See if we can optimize any arguments passed through the varargs area of
     // the call.
     for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
-           E = CS.arg_end(); I != E; ++I)
-      if (CastInst *CI = dyn_cast<CastInst>(*I)) {
-        // If this cast does not effect the value passed through the varargs
-        // area, we can eliminate the use of the cast.
-        Value *Op = CI->getOperand(0);
-        if (CI->isLosslessCast()) {
-          *I = Op;
-          Changed = true;
-        }
+           E = CS.arg_end(); I != E; ++I, ++ix) {
+      CastInst *CI = dyn_cast<CastInst>(*I);
+      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+        *I = CI->getOperand(0);
+        Changed = true;
       }
+    }
+  }
+
+  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+    // Inline asm calls cannot throw - mark them 'nounwind'.
+    CS.setDoesNotThrow();
+    Changed = true;
   }
 
   return Changed ? CS.getInstruction() : 0;
@@ -7979,6 +8837,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     return false;
   Function *Callee = cast<Function>(CE->getOperand(0));
   Instruction *Caller = CS.getInstruction();
+  const PAListPtr &CallerPAL = CS.getParamAttrs();
 
   // Okay, this is a cast from a function to a different type.  Unless doing so
   // would cause a type conversion of one of our arguments, change this call to
@@ -7986,24 +8845,31 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   //
   const FunctionType *FT = Callee->getFunctionType();
   const Type *OldRetTy = Caller->getType();
+  const Type *NewRetTy = FT->getReturnType();
 
-  const FunctionType *ActualFT =
-    cast<FunctionType>(cast<PointerType>(CE->getType())->getElementType());
-  
-  // If the parameter attributes are not compatible, don't do the xform.  We
-  // don't want to lose an sret attribute or something.
-  if (!ParamAttrsList::areCompatible(FT->getParamAttrs(),
-                                     ActualFT->getParamAttrs()))
-    return false;
+  if (isa<StructType>(NewRetTy))
+    return false; // TODO: Handle multiple return values.
 
   // Check to see if we are changing the return type...
-  if (OldRetTy != FT->getReturnType()) {
-    if (Callee->isDeclaration() && !Caller->use_empty() && 
-        // Conversion is ok if changing from pointer to int of same size.
-        !(isa<PointerType>(FT->getReturnType()) &&
-          TD->getIntPtrType() == OldRetTy))
+  if (OldRetTy != NewRetTy) {
+    if (Callee->isDeclaration() &&
+        // Conversion is ok if changing from one pointer type to another or from
+        // a pointer to an integer of the same size.
+        !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) &&
+          (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType())))
+      return false;   // Cannot transform this return value.
+
+    if (!Caller->use_empty() &&
+        // void -> non-void is handled specially
+        NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy))
       return false;   // Cannot transform this return value.
 
+    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+      ParameterAttributes RAttrs = CallerPAL.getParamAttrs(0);
+      if (RAttrs & ParamAttr::typeIncompatible(NewRetTy))
+        return false;   // Attribute not compatible with transformed value.
+    }
+
     // If the callsite is an invoke instruction, and the return value is used by
     // a PHI node in a successor, we cannot change the return type of the call
     // because there is no place to put the cast instruction (without breaking
@@ -8025,60 +8891,55 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
     const Type *ParamTy = FT->getParamType(i);
     const Type *ActTy = (*AI)->getType();
-    ConstantInt *c = dyn_cast<ConstantInt>(*AI);
-    //Some conversions are safe even if we do not have a body.
-    //Either we can cast directly, or we can upconvert the argument
+
+    if (!CastInst::isCastable(ActTy, ParamTy))
+      return false;   // Cannot transform this parameter value.
+
+    if (CallerPAL.getParamAttrs(i + 1) & ParamAttr::typeIncompatible(ParamTy))
+      return false;   // Attribute not compatible with transformed value.
+
+    // Converting from one pointer type to another or between a pointer and an
+    // integer of the same size is safe even if we do not have a body.
     bool isConvertible = ActTy == ParamTy ||
-      (isa<PointerType>(ParamTy) && isa<PointerType>(ActTy)) ||
-      (ParamTy->isInteger() && ActTy->isInteger() &&
-       ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()) ||
-      (c && ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()
-       && c->getValue().isStrictlyPositive());
+      ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) &&
+       (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType()));
     if (Callee->isDeclaration() && !isConvertible) return false;
-
-    // Most other conversions can be done if we have a body, even if these
-    // lose information, e.g. int->short.
-    // Some conversions cannot be done at all, e.g. float to pointer.
-    // Logic here parallels CastInst::getCastOpcode (the design there
-    // requires legality checks like this be done before calling it).
-    if (ParamTy->isInteger()) {
-      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
-        if (VActTy->getBitWidth() != ParamTy->getPrimitiveSizeInBits())
-          return false;
-      }
-      if (!ActTy->isInteger() && !ActTy->isFloatingPoint() &&
-          !isa<PointerType>(ActTy))
-        return false;
-    } else if (ParamTy->isFloatingPoint()) {
-      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
-        if (VActTy->getBitWidth() != ParamTy->getPrimitiveSizeInBits())
-          return false;
-      }
-      if (!ActTy->isInteger() && !ActTy->isFloatingPoint())
-        return false;
-    } else if (const VectorType *VParamTy = dyn_cast<VectorType>(ParamTy)) {
-      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
-        if (VActTy->getBitWidth() != VParamTy->getBitWidth())
-          return false;
-      }
-      if (VParamTy->getBitWidth() != ActTy->getPrimitiveSizeInBits())      
-        return false;
-    } else if (isa<PointerType>(ParamTy)) {
-      if (!ActTy->isInteger() && !isa<PointerType>(ActTy))
-        return false;
-    } else {
-      return false;
-    }
   }
 
   if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
       Callee->isDeclaration())
-    return false;   // Do not delete arguments unless we have a function body...
+    return false;   // Do not delete arguments unless we have a function body.
+
+  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+      !CallerPAL.isEmpty())
+    // In this case we have more arguments than the new function type, but we
+    // won't be dropping them.  Check that these extra arguments have attributes
+    // that are compatible with being a vararg call argument.
+    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+        break;
+      ParameterAttributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+      if (PAttrs & ParamAttr::VarArgsIncompatible)
+        return false;
+    }
 
   // Okay, we decided that this is a safe thing to do: go ahead and start
   // inserting cast instructions as necessary...
   std::vector<Value*> Args;
   Args.reserve(NumActualArgs);
+  SmallVector<ParamAttrsWithIndex, 8> attrVec;
+  attrVec.reserve(NumCommonArgs);
+
+  // Get any return attributes.
+  ParameterAttributes RAttrs = CallerPAL.getParamAttrs(0);
+
+  // If the return value is not being used, the type may not be compatible
+  // with the existing attributes.  Wipe out any problematic attributes.
+  RAttrs &= ~ParamAttr::typeIncompatible(NewRetTy);
+
+  // Add the new return attributes.
+  if (RAttrs)
+    attrVec.push_back(ParamAttrsWithIndex::get(0, RAttrs));
 
   AI = CS.arg_begin();
   for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -8088,9 +8949,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     } else {
       Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
           false, ParamTy, false);
-      CastInst *NewCast = CastInst::create(opcode, *AI, ParamTy, "tmp");
+      CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp");
       Args.push_back(InsertNewInstBefore(NewCast, *Caller));
     }
+
+    // Add any parameter attributes.
+    if (ParameterAttributes PAttrs = CallerPAL.getParamAttrs(i + 1))
+      attrVec.push_back(ParamAttrsWithIndex::get(i + 1, PAttrs));
   }
 
   // If the function takes more arguments than the call was taking, add them
@@ -8099,7 +8964,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     Args.push_back(Constant::getNullValue(FT->getParamType(i)));
 
   // If we are removing arguments to the function, emit an obnoxious warning...
-  if (FT->getNumParams() < NumActualArgs)
+  if (FT->getNumParams() < NumActualArgs) {
     if (!FT->isVarArg()) {
       cerr << "WARNING: While resolving call to function '"
            << Callee->getName() << "' arguments were dropped!\n";
@@ -8111,45 +8976,54 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
           // Must promote to pass through va_arg area!
           Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, 
                                                                 PTy, false);
-          Instruction *Cast = CastInst::create(opcode, *AI, PTy, "tmp");
+          Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp");
           InsertNewInstBefore(Cast, *Caller);
           Args.push_back(Cast);
         } else {
           Args.push_back(*AI);
         }
+
+        // Add any parameter attributes.
+        if (ParameterAttributes PAttrs = CallerPAL.getParamAttrs(i + 1))
+          attrVec.push_back(ParamAttrsWithIndex::get(i + 1, PAttrs));
       }
     }
+  }
 
-  if (FT->getReturnType() == Type::VoidTy)
+  if (NewRetTy == Type::VoidTy)
     Caller->setName("");   // Void type should not have a name.
 
+  const PAListPtr &NewCallerPAL = PAListPtr::get(attrVec.begin(),attrVec.end());
+
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-    NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(),
-                        Args.begin(), Args.end(), Caller->getName(), Caller);
+    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
+                            Args.begin(), Args.end(),
+                            Caller->getName(), Caller);
     cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+    cast<InvokeInst>(NC)->setParamAttrs(NewCallerPAL);
   } else {
-    NC = new CallInst(Callee, Args.begin(), Args.end(),
-                      Caller->getName(), Caller);
-    if (cast<CallInst>(Caller)->isTailCall())
+    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
+                          Caller->getName(), Caller);
+    CallInst *CI = cast<CallInst>(Caller);
+    if (CI->isTailCall())
       cast<CallInst>(NC)->setTailCall();
-   cast<CallInst>(NC)->setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+    cast<CallInst>(NC)->setParamAttrs(NewCallerPAL);
   }
 
   // Insert a cast of the return type as necessary.
   Value *NV = NC;
-  if (Caller->getType() != NV->getType() && !Caller->use_empty()) {
+  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
     if (NV->getType() != Type::VoidTy) {
-      const Type *CallerTy = Caller->getType();
       Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 
-                                                            CallerTy, false);
-      NV = NC = CastInst::create(opcode, NC, CallerTy, "tmp");
+                                                            OldRetTy, false);
+      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
 
       // If this is an invoke instruction, we should insert it after the first
       // non-phi, instruction in the normal successor block.
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-        BasicBlock::iterator I = II->getNormalDest()->begin();
-        while (isa<PHINode>(I)) ++I;
+        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
         InsertNewInstBefore(NC, *I);
       } else {
         // Otherwise, it's a call, just insert cast right after the call instr
@@ -8175,27 +9049,33 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
   Value *Callee = CS.getCalledValue();
   const PointerType *PTy = cast<PointerType>(Callee->getType());
   const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+  const PAListPtr &Attrs = CS.getParamAttrs();
+
+  // If the call already has the 'nest' attribute somewhere then give up -
+  // otherwise 'nest' would occur twice after splicing in the chain.
+  if (Attrs.hasAttrSomewhere(ParamAttr::Nest))
+    return 0;
 
   IntrinsicInst *Tramp =
     cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
 
-  Function *NestF =
-    cast<Function>(IntrinsicInst::StripPointerCasts(Tramp->getOperand(2)));
+  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
   const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
   const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
 
-  if (const ParamAttrsList *NestAttrs = NestFTy->getParamAttrs()) {
+  const PAListPtr &NestAttrs = NestF->getParamAttrs();
+  if (!NestAttrs.isEmpty()) {
     unsigned NestIdx = 1;
     const Type *NestTy = 0;
-    uint16_t NestAttr = 0;
+    ParameterAttributes NestAttr = ParamAttr::None;
 
     // Look for a parameter marked with the 'nest' attribute.
     for (FunctionType::param_iterator I = NestFTy->param_begin(),
          E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
-      if (NestAttrs->paramHasAttr(NestIdx, ParamAttr::Nest)) {
+      if (NestAttrs.paramHasAttr(NestIdx, ParamAttr::Nest)) {
         // Record the parameter type and any other attributes.
         NestTy = *I;
-        NestAttr = NestAttrs->getParamAttrs(NestIdx);
+        NestAttr = NestAttrs.getParamAttrs(NestIdx);
         break;
       }
 
@@ -8204,25 +9084,37 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       std::vector<Value*> NewArgs;
       NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
 
+      SmallVector<ParamAttrsWithIndex, 8> NewAttrs;
+      NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
       // Insert the nest argument into the call argument list, which may
-      // mean appending it.
+      // mean appending it.  Likewise for attributes.
+
+      // Add any function result attributes.
+      if (ParameterAttributes Attr = Attrs.getParamAttrs(0))
+        NewAttrs.push_back(ParamAttrsWithIndex::get(0, Attr));
+
       {
         unsigned Idx = 1;
         CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
         do {
           if (Idx == NestIdx) {
-            // Add the chain argument.
+            // Add the chain argument and attributes.
             Value *NestVal = Tramp->getOperand(3);
             if (NestVal->getType() != NestTy)
               NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
             NewArgs.push_back(NestVal);
+            NewAttrs.push_back(ParamAttrsWithIndex::get(NestIdx, NestAttr));
           }
 
           if (I == E)
             break;
 
-          // Add the original argument.
+          // Add the original argument and attributes.
           NewArgs.push_back(*I);
+          if (ParameterAttributes Attr = Attrs.getParamAttrs(Idx))
+            NewAttrs.push_back
+              (ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr));
 
           ++Idx, ++I;
         } while (1);
@@ -8230,41 +9122,28 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
 
       // The trampoline may have been bitcast to a bogus type (FTy).
       // Handle this by synthesizing a new function type, equal to FTy
-      // with the chain parameter inserted.  Likewise for attributes.
+      // with the chain parameter inserted.
 
-      const ParamAttrsList *Attrs = FTy->getParamAttrs();
       std::vector<const Type*> NewTypes;
-      ParamAttrsVector NewAttrs;
       NewTypes.reserve(FTy->getNumParams()+1);
 
-      // Add any function result attributes.
-      uint16_t Attr = Attrs ? Attrs->getParamAttrs(0) : 0;
-      if (Attr)
-        NewAttrs.push_back (ParamAttrsWithIndex::get(0, Attr));
-
       // Insert the chain's type into the list of parameter types, which may
-      // mean appending it.  Likewise for the chain's attributes.
+      // mean appending it.
       {
         unsigned Idx = 1;
         FunctionType::param_iterator I = FTy->param_begin(),
           E = FTy->param_end();
 
         do {
-          if (Idx == NestIdx) {
-            // Add the chain's type and attributes.
+          if (Idx == NestIdx)
+            // Add the chain's type.
             NewTypes.push_back(NestTy);
-            NewAttrs.push_back(ParamAttrsWithIndex::get(NestIdx, NestAttr));
-          }
 
           if (I == E)
             break;
 
-          // Add the original type and attributes.
+          // Add the original type.
           NewTypes.push_back(*I);
-          Attr = Attrs ? Attrs->getParamAttrs(Idx) : 0;
-          if (Attr)
-            NewAttrs.push_back
-              (ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr));
 
           ++Idx, ++I;
         } while (1);
@@ -8273,25 +9152,27 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       // Replace the trampoline call with a direct call.  Let the generic
       // code sort out any function type mismatches.
       FunctionType *NewFTy =
-        FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg(),
-                          ParamAttrsList::get(NewAttrs));
-      Constant *NewCallee = NestF->getType() == PointerType::get(NewFTy) ?
-        NestF : ConstantExpr::getBitCast(NestF, PointerType::get(NewFTy));
+        FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
+      Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ?
+        NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy));
+      const PAListPtr &NewPAL = PAListPtr::get(NewAttrs.begin(),NewAttrs.end());
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-        NewCaller = new InvokeInst(NewCallee,
-                                   II->getNormalDest(), II->getUnwindDest(),
-                                   NewArgs.begin(), NewArgs.end(),
-                                   Caller->getName(), Caller);
+        NewCaller = InvokeInst::Create(NewCallee,
+                                       II->getNormalDest(), II->getUnwindDest(),
+                                       NewArgs.begin(), NewArgs.end(),
+                                       Caller->getName(), Caller);
         cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+        cast<InvokeInst>(NewCaller)->setParamAttrs(NewPAL);
       } else {
-        NewCaller = new CallInst(NewCallee, NewArgs.begin(), NewArgs.end(),
-                                 Caller->getName(), Caller);
+        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
+                                     Caller->getName(), Caller);
         if (cast<CallInst>(Caller)->isTailCall())
           cast<CallInst>(NewCaller)->setTailCall();
         cast<CallInst>(NewCaller)->
           setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+        cast<CallInst>(NewCaller)->setParamAttrs(NewPAL);
       }
       if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
         Caller->replaceAllUsesWith(NewCaller);
@@ -8358,7 +9239,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   Value *InRHS = FirstInst->getOperand(1);
   PHINode *NewLHS = 0, *NewRHS = 0;
   if (LHSVal == 0) {
-    NewLHS = new PHINode(LHSType, FirstInst->getOperand(0)->getName()+".pn");
+    NewLHS = PHINode::Create(LHSType,
+                             FirstInst->getOperand(0)->getName() + ".pn");
     NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
     NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
     InsertNewInstBefore(NewLHS, PN);
@@ -8366,7 +9248,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   }
   
   if (RHSVal == 0) {
-    NewRHS = new PHINode(RHSType, FirstInst->getOperand(1)->getName()+".pn");
+    NewRHS = PHINode::Create(RHSType,
+                             FirstInst->getOperand(1)->getName() + ".pn");
     NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
     NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
     InsertNewInstBefore(NewRHS, PN);
@@ -8386,13 +9269,13 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   }
     
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
-    return BinaryOperator::create(BinOp->getOpcode(), LHSVal, RHSVal);
+    return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
   else if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
-    return CmpInst::create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, 
+    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, 
                            RHSVal);
   else {
     assert(isa<GetElementPtrInst>(FirstInst));
-    return new GetElementPtrInst(LHSVal, RHSVal);
+    return GetElementPtrInst::Create(LHSVal, RHSVal);
   }
 }
 
@@ -8487,6 +9370,15 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
           LI->getParent() != PN.getIncomingBlock(i) ||
           !isSafeToSinkLoad(LI))
         return 0;
+      
+      // If the PHI is volatile and its block has multiple successors, sinking
+      // it would remove a load of the volatile value from the path through the
+      // other successor.
+      if (isVolatile &&
+          LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+        return 0;
+
+      
     } else if (I->getOperand(1) != ConstantOp) {
       return 0;
     }
@@ -8494,8 +9386,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
 
   // Okay, they are all the same operation.  Create a new PHI node of the
   // correct type, and PHI together all of the LHS's of the instructions.
-  PHINode *NewPN = new PHINode(FirstInst->getOperand(0)->getType(),
-                               PN.getName()+".in");
+  PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+                                   PN.getName()+".in");
   NewPN->reserveOperandSpace(PN.getNumOperands()/2);
 
   Value *InVal = FirstInst->getOperand(0);
@@ -8522,17 +9414,22 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
 
   // Insert and return the new operation.
   if (CastInst* FirstCI = dyn_cast<CastInst>(FirstInst))
-    return CastInst::create(FirstCI->getOpcode(), PhiVal, PN.getType());
-  else if (isa<LoadInst>(FirstInst))
-    return new LoadInst(PhiVal, "", isVolatile);
-  else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
-    return BinaryOperator::create(BinOp->getOpcode(), PhiVal, ConstantOp);
-  else if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
-    return CmpInst::create(CIOp->getOpcode(), CIOp->getPredicate(), 
+    return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+    return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+  if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
+    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), 
                            PhiVal, ConstantOp);
-  else
-    assert(0 && "Unknown operation");
-  return 0;
+  assert(isa<LoadInst>(FirstInst) && "Unknown operation");
+  
+  // If this was a volatile load that we are merging, make sure to loop through
+  // and mark all the input loads as non-volatile.  If we don't do this, we will
+  // insert a new volatile load and the old ones will not be deletable.
+  if (isVolatile)
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+  
+  return new LoadInst(PhiVal, "", isVolatile);
 }
 
 /// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
@@ -8698,9 +9595,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   bool MadeChange = false;
   
   gep_type_iterator GTI = gep_type_begin(GEP);
-  for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) {
+  for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end();
+       i != e; ++i, ++GTI) {
     if (isa<SequentialType>(*GTI)) {
-      if (CastInst *CI = dyn_cast<CastInst>(GEP.getOperand(i))) {
+      if (CastInst *CI = dyn_cast<CastInst>(*i)) {
         if (CI->getOpcode() == Instruction::ZExt ||
             CI->getOpcode() == Instruction::SExt) {
           const Type *SrcTy = CI->getOperand(0)->getType();
@@ -8708,7 +9606,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           // is a 32-bit pointer target.
           if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
             MadeChange = true;
-            GEP.setOperand(i, CI->getOperand(0));
+            *i = CI->getOperand(0);
           }
         }
       }
@@ -8716,17 +9614,18 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // to what we need.  If the incoming value needs a cast instruction,
       // insert it.  This explicit cast can make subsequent optimizations more
       // obvious.
-      Value *Op = GEP.getOperand(i);
-      if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits())
+      Value *Op = *i;
+      if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) {
         if (Constant *C = dyn_cast<Constant>(Op)) {
-          GEP.setOperand(i, ConstantExpr::getTrunc(C, TD->getIntPtrType()));
+          *i = ConstantExpr::getTrunc(C, TD->getIntPtrType());
           MadeChange = true;
         } else {
           Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(),
                                 GEP);
-          GEP.setOperand(i, Op);
+          *i = Op;
           MadeChange = true;
         }
+      }
     }
   }
   if (MadeChange) return &GEP;
@@ -8815,7 +9714,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         if (isa<Constant>(SO1) && isa<Constant>(GO1))
           Sum = ConstantExpr::getAdd(cast<Constant>(SO1), cast<Constant>(GO1));
         else {
-          Sum = BinaryOperator::createAdd(SO1, GO1, PtrOp->getName()+".sum");
+          Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
           InsertNewInstBefore(cast<Instruction>(Sum), GEP);
         }
       }
@@ -8841,8 +9740,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     }
 
     if (!Indices.empty())
-      return new GetElementPtrInst(SrcGEPOperands[0], Indices.begin(),
-                                   Indices.end(), GEP.getName());
+      return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(),
+                                       Indices.end(), GEP.getName());
 
   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) {
     // GEP of global variable.  If all of the indices for this GEP are
@@ -8865,8 +9764,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     if (!isa<PointerType>(X->getType())) {
       // Not interesting.  Source pointer must be a cast from pointer.
     } else if (HasZeroPointerIndex) {
-      // transform: GEP (cast [10 x ubyte]* X to [0 x ubyte]*), long 0, ...
-      // into     : GEP [10 x ubyte]* X, long 0, ...
+      // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+      // into     : GEP [10 x i8]* X, i32 0, ...
       //
       // This occurs when the program declares an array extern like "int X[];"
       //
@@ -8886,8 +9785,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           }
     } else if (GEP.getNumOperands() == 2) {
       // Transform things like:
-      // %t = getelementptr ubyte* cast ([2 x int]* %str to uint*), uint %V
-      // into:  %t1 = getelementptr [2 x int*]* %str, int 0, uint %V; cast
+      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
       const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
       const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
       if (isa<ArrayType>(SrcElTy) &&
@@ -8897,18 +9796,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         Idx[0] = Constant::getNullValue(Type::Int32Ty);
         Idx[1] = GEP.getOperand(1);
         Value *V = InsertNewInstBefore(
-               new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName()), GEP);
+               GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP);
         // V and GEP are both pointer types --> BitCast
         return new BitCastInst(V, GEP.getType());
       }
       
       // Transform things like:
-      // getelementptr sbyte* cast ([100 x double]* X to sbyte*), int %tmp
+      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
       //   (where tmp = 8*tmp2) into:
-      // getelementptr [100 x double]* %arr, int 0, int %tmp.2
+      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
       
-      if (isa<ArrayType>(SrcElTy) &&
-          (ResElTy == Type::Int8Ty || ResElTy == Type::Int8Ty)) {
+      if (isa<ArrayType>(SrcElTy) && ResElTy == Type::Int8Ty) {
         uint64_t ArrayEltSize =
             TD->getABITypeSize(cast<ArrayType>(SrcElTy)->getElementType());
         
@@ -8935,17 +9833,19 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             NewIdx = Inst->getOperand(0);
           }
         }
-
+        
         // If the index will be to exactly the right offset with the scale taken
-        // out, perform the transformation.
-        if (Scale && Scale->getZExtValue() % ArrayEltSize == 0) {
-          if (isa<ConstantInt>(Scale))
-            Scale = ConstantInt::get(Scale->getType(),
-                                      Scale->getZExtValue() / ArrayEltSize);
+        // out, perform the transformation. Note, we don't know whether Scale is
+        // signed or not. We'll use unsigned version of division/modulo
+        // operation after making sure Scale doesn't have the sign bit set.
+        if (Scale && Scale->getSExtValue() >= 0LL &&
+            Scale->getZExtValue() % ArrayEltSize == 0) {
+          Scale = ConstantInt::get(Scale->getType(),
+                                   Scale->getZExtValue() / ArrayEltSize);
           if (Scale->getZExtValue() != 1) {
             Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
-                                                       true /*SExt*/);
-            Instruction *Sc = BinaryOperator::createMul(NewIdx, C, "idxscale");
+                                                       false /*ZExt*/);
+            Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale");
             NewIdx = InsertNewInstBefore(Sc, GEP);
           }
 
@@ -8954,7 +9854,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           Idx[0] = Constant::getNullValue(Type::Int32Ty);
           Idx[1] = NewIdx;
           Instruction *NewGEP =
-            new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName());
+            GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName());
           NewGEP = InsertNewInstBefore(NewGEP, GEP);
           // The NewGEP must be pointer typed, so must the old one -> BitCast
           return new BitCastInst(NewGEP, GEP.getType());
@@ -8968,7 +9868,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
 Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
   // Convert: malloc Ty, C - where C is a constant != 1 into: malloc [C x Ty], 1
-  if (AI.isArrayAllocation())    // Check C != 1
+  if (AI.isArrayAllocation()) {  // Check C != 1
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
       const Type *NewTy = 
         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
@@ -8997,8 +9897,8 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
       Value *Idx[2];
       Idx[0] = NullIdx;
       Idx[1] = NullIdx;
-      Value *V = new GetElementPtrInst(New, Idx, Idx + 2,
-                                       New->getName()+".sub", It);
+      Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2,
+                                           New->getName()+".sub", It);
 
       // Now make everything use the getelementptr instead of the original
       // allocation.
@@ -9006,6 +9906,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
     } else if (isa<UndefValue>(AI.getArraySize())) {
       return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
     }
+  }
 
   // If alloca'ing a zero byte object, replace the alloca with a null pointer.
   // Note that we only do this for alloca's, because malloc should allocate and
@@ -9024,7 +9925,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   if (isa<UndefValue>(Op)) {
     // Insert a new store to null because we cannot modify the CFG here.
     new StoreInst(ConstantInt::getTrue(),
-                  UndefValue::get(PointerType::get(Type::Int1Ty)), &FI);
+                  UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), &FI);
     return EraseInstFromFunction(FI);
   }
   
@@ -9061,7 +9962,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
 
 /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
 static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
-					const TargetData *TD) {
+                                        const TargetData *TD) {
   User *CI = cast<User>(LI.getOperand(0));
   Value *CastOp = CI->getOperand(0);
 
@@ -9075,24 +9976,24 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
       unsigned numBits = Ty->getPrimitiveSizeInBits();
       // Replace LI with immediate integer store.
       if ((numBits >> 3) == len + 1) {
-	APInt StrVal(numBits, 0);
-	APInt SingleChar(numBits, 0);
-	if (TD->isLittleEndian()) {
-	  for (signed i = len-1; i >= 0; i--) {
-	    SingleChar = (uint64_t) Str[i];
-	    StrVal = (StrVal << 8) | SingleChar;
-	  }
-	} else {
-	  for (unsigned i = 0; i < len; i++) {
-	    SingleChar = (uint64_t) Str[i];
-		StrVal = (StrVal << 8) | SingleChar;
-	  }
-	  // Append NULL at the end.
-	  SingleChar = 0;
-	  StrVal = (StrVal << 8) | SingleChar;
-	}
-	Value *NL = ConstantInt::get(StrVal);
-	return IC.ReplaceInstUsesWith(LI, NL);
+        APInt StrVal(numBits, 0);
+        APInt SingleChar(numBits, 0);
+        if (TD->isLittleEndian()) {
+          for (signed i = len-1; i >= 0; i--) {
+            SingleChar = (uint64_t) Str[i];
+            StrVal = (StrVal << 8) | SingleChar;
+          }
+        } else {
+          for (unsigned i = 0; i < len; i++) {
+            SingleChar = (uint64_t) Str[i];
+            StrVal = (StrVal << 8) | SingleChar;
+          }
+          // Append NULL at the end.
+          SingleChar = 0;
+          StrVal = (StrVal << 8) | SingleChar;
+        }
+        Value *NL = ConstantInt::get(StrVal);
+        return IC.ReplaceInstUsesWith(LI, NL);
       }
     }
   }
@@ -9196,8 +10097,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   Value *Op = LI.getOperand(0);
 
   // Attempt to improve the alignment.
-  unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD);
-  if (KnownAlign > LI.getAlignment())
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Op);
+  if (KnownAlign >
+      (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
+                                LI.getAlignment()))
     LI.setAlignment(KnownAlign);
 
   // load (cast X) --> cast (load X) iff safe
@@ -9220,8 +10123,11 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
         return ReplaceInstUsesWith(LI, LIB);
   }
 
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op))
-    if (isa<ConstantPointerNull>(GEPI->getOperand(0))) {
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+    const Value *GEPI0 = GEPI->getOperand(0);
+    // TODO: Consider a target hook for valid address spaces for this xform.
+    if (isa<ConstantPointerNull>(GEPI0) &&
+        cast<PointerType>(GEPI0->getType())->getAddressSpace() == 0) {
       // Insert a new store to null instruction before the load to indicate
       // that this code is not reachable.  We do this instead of inserting
       // an unreachable instruction directly because we cannot modify the
@@ -9230,10 +10136,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
                     Constant::getNullValue(Op->getType()), &LI);
       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
+  } 
 
   if (Constant *C = dyn_cast<Constant>(Op)) {
     // load null/undef -> undef
-    if ((C->isNullValue() || isa<UndefValue>(C))) {
+    // TODO: Consider a target hook for valid address spaces for this xform.
+    if (isa<UndefValue>(C) || (C->isNullValue() && 
+        cast<PointerType>(Op->getType())->getAddressSpace() == 0)) {
       // Insert a new store to null instruction before the load to indicate that
       // this code is not reachable.  We do this instead of inserting an
       // unreachable instruction directly because we cannot modify the CFG.
@@ -9248,7 +10157,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
         return ReplaceInstUsesWith(LI, GV->getInitializer());
 
     // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) {
       if (CE->getOpcode() == Instruction::GetElementPtr) {
         if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
           if (GV->isConstant() && !GV->isDeclaration())
@@ -9269,6 +10178,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
         if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
           return Res;
       }
+    }
   }
     
   // If this load comes from anywhere in a constant global, and if the global
@@ -9301,7 +10211,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
                                      SI->getOperand(1)->getName()+".val"), LI);
         Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2),
                                      SI->getOperand(2)->getName()+".val"), LI);
-        return new SelectInst(SI->getCondition(), V1, V2);
+        return SelectInst::Create(SI->getCondition(), V1, V2);
       }
 
       // load (select (cond, null, P)) -> load P
@@ -9369,7 +10279,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
           NewCast = ConstantExpr::getCast(opcode, C, CastDstTy);
         else
           NewCast = IC.InsertNewInstBefore(
-            CastInst::create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), 
+            CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), 
             SI);
         return new StoreInst(NewCast, CastOp);
       }
@@ -9390,7 +10300,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   
   // If the RHS is an alloca with a single use, zapify the store, making the
   // alloca dead.
-  if (Ptr->hasOneUse()) {
+  if (Ptr->hasOneUse() && !SI.isVolatile()) {
     if (isa<AllocaInst>(Ptr)) {
       EraseInstFromFunction(SI);
       ++NumCombined;
@@ -9407,8 +10317,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   }
 
   // Attempt to improve the alignment.
-  unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD);
-  if (KnownAlign > SI.getAlignment())
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr);
+  if (KnownAlign >
+      (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
+                                SI.getAlignment()))
     SI.setAlignment(KnownAlign);
 
   // Do really simple DSE, to catch cases where there are several consequtive
@@ -9445,7 +10357,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
     }
     
     // Don't skip over loads or things that can modify memory.
-    if (BBI->mayWriteToMemory())
+    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
       break;
   }
   
@@ -9525,8 +10437,12 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   }
   if (++PI != pred_end(DestBB))
     return false;
-  
-  
+
+  // Bail out if all the relevant blocks aren't distinct (this can happen,
+  // for example, if SI is in an infinite loop)
+  if (StoreBB == DestBB || OtherBB == DestBB)
+    return false;
+
   // Verify that the other block ends in a branch and is not otherwise empty.
   BasicBlock::iterator BBI = OtherBB->getTerminator();
   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
@@ -9559,18 +10475,19 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
           return false;
         break;
       }
-      // If we find something that may be using the stored value, or if we run
-      // out of instructions, we can't do the xform.
-      if (isa<LoadInst>(BBI) || BBI->mayWriteToMemory() ||
+      // If we find something that may be using or overwriting the stored
+      // value, or if we run out of instructions, we can't do the xform.
+      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
           BBI == OtherBB->begin())
         return false;
     }
     
     // In order to eliminate the store in OtherBr, we have to
-    // make sure nothing reads the stored value in StoreBB.
+    // make sure nothing reads or overwrites the stored value in
+    // StoreBB.
     for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
       // FIXME: This should really be AA driven.
-      if (isa<LoadInst>(I) || I->mayWriteToMemory())
+      if (I->mayReadFromMemory() || I->mayWriteToMemory())
         return false;
     }
   }
@@ -9578,7 +10495,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   // Insert a PHI node now if we need it.
   Value *MergedVal = OtherStore->getOperand(0);
   if (MergedVal != SI.getOperand(0)) {
-    PHINode *PN = new PHINode(MergedVal->getType(), "storemerge");
+    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
     PN->reserveOperandSpace(2);
     PN->addIncoming(SI.getOperand(0), SI.getParent());
     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
@@ -9587,8 +10504,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   
   // Advance to a place where it is safe to insert the new store and
   // insert it.
-  BBI = DestBB->begin();
-  while (isa<PHINode>(BBI)) ++BBI;
+  BBI = DestBB->getFirstNonPHI();
   InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
                                     OtherStore->isVolatile()), *BBI);
   
@@ -9675,6 +10591,16 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
   return 0;
 }
 
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+  // See if we are trying to extract a known value. If so, use that instead.
+  if (Value *Elt = FindInsertedValue(EV.getOperand(0), EV.idx_begin(),
+                                     EV.idx_end(), &EV))
+    return ReplaceInstUsesWith(EV, Elt);
+
+  // No changes
+  return 0;
+}
+
 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
 /// is to leave as a vector operation.
 static bool CheapToScalarize(Value *V, bool isConstant) {
@@ -9726,11 +10652,11 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
 
   std::vector<unsigned> Result;
   const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
-  for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
-    if (isa<UndefValue>(CP->getOperand(i)))
+  for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+    if (isa<UndefValue>(*i))
       Result.push_back(NElts*2);  // undef -> 8
     else
-      Result.push_back(cast<ConstantInt>(CP->getOperand(i))->getZExtValue());
+      Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
   return Result;
 }
 
@@ -9779,7 +10705,6 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
 }
 
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-
   // If vector val is undef, replace extract with scalar undef.
   if (isa<UndefValue>(EI.getOperand(0)))
     return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
@@ -9789,8 +10714,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
   
   if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
-    // If vector val is constant with uniform operands, replace EI
-    // with that operand
+    // If vector val is constant with all elements the same, replace EI with
+    // that element. When the elements are not identical, we cannot replace yet
+    // (we do that below, but only when the index is constant).
     Constant *op0 = C->getOperand(0);
     for (unsigned i = 1; i < C->getNumOperands(); ++i)
       if (C->getOperand(i) != op0) {
@@ -9856,13 +10782,15 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
                                    EI.getName()+".rhs");
           InsertNewInstBefore(newEI0, EI);
           InsertNewInstBefore(newEI1, EI);
-          return BinaryOperator::create(BO->getOpcode(), newEI0, newEI1);
+          return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
         }
       } else if (isa<LoadInst>(I)) {
-        Value *Ptr = InsertCastBefore(Instruction::BitCast, I->getOperand(0),
-                                      PointerType::get(EI.getType()), EI);
-        GetElementPtrInst *GEP = 
-          new GetElementPtrInst(Ptr, EI.getOperand(1), I->getName() + ".gep");
+        unsigned AS = 
+          cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
+        Value *Ptr = InsertBitCastBefore(I->getOperand(0),
+                                         PointerType::get(EI.getType(), AS),EI);
+        GetElementPtrInst *GEP =
+          GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep");
         InsertNewInstBefore(GEP, EI);
         return new LoadInst(GEP);
       }
@@ -10237,7 +11165,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
   assert(I->hasOneUse() && "Invariants didn't hold!");
 
   // Cannot move control-flow-involving, volatile loads, vaarg, etc.
-  if (isa<PHINode>(I) || I->mayWriteToMemory()) return false;
+  if (isa<PHINode>(I) || I->mayWriteToMemory() || isa<TerminatorInst>(I))
+    return false;
 
   // Do not sink alloca instructions out of the entry block.
   if (isa<AllocaInst>(I) && I->getParent() ==
@@ -10246,15 +11175,14 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 
   // We can only sink load instructions if there is nothing between the load and
   // the end of block that could change the value.
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    for (BasicBlock::iterator Scan = LI, E = LI->getParent()->end();
+  if (I->mayReadFromMemory()) {
+    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
          Scan != E; ++Scan)
       if (Scan->mayWriteToMemory())
         return false;
   }
 
-  BasicBlock::iterator InsertPos = DestBlock->begin();
-  while (isa<PHINode>(InsertPos)) ++InsertPos;
+  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
 
   I->moveBefore(InsertPos);
   ++NumSunkInst;
@@ -10314,7 +11242,8 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
       if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
         bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
-        Worklist.push_back(BI->getSuccessor(!CondVal));
+        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+        Worklist.push_back(ReachableBB);
         continue;
       }
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -10322,7 +11251,8 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
         // See if this is an explicit destination.
         for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
           if (SI->getCaseValue(i) == Cond) {
-            Worklist.push_back(SI->getSuccessor(i));
+            BasicBlock *ReachableBB = SI->getSuccessor(i);
+            Worklist.push_back(ReachableBB);
             continue;
           }
         
@@ -10402,8 +11332,20 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
       continue;
     }
 
+    if (TD && I->getType()->getTypeID() == Type::VoidTyID) {
+      // See if we can constant fold its operands.
+      for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
+        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i)) {
+          if (Constant *NewC = ConstantFoldConstantExpression(CE, TD))
+            i->set(NewC);
+        }
+      }
+    }
+
     // See if we can trivially sink this instruction to a successor basic block.
-    if (I->hasOneUse()) {
+    // FIXME: Remove GetResultInst test when first class support for aggregates
+    // is implemented.
+    if (I->hasOneUse() && !isa<GetResultInst>(I)) {
       BasicBlock *BB = I->getParent();
       BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
       if (UserParent != BB) {
@@ -10508,7 +11450,7 @@ bool InstCombiner::runOnFunction(Function &F) {
 
   // Iterate while there is work to do.
   unsigned Iteration = 0;
-  while (DoOneIteration(F, Iteration++)) 
+  while (DoOneIteration(F, Iteration++))
     EverMadeChange = true;
   return EverMadeChange;
 }