Remove dead code causing a warning.

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index acda89bfe0b51fc6e3864fb55b12342dd722c01e..72a27f500c942f5cbb2711b4ea06fa69ea0f72bd 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -2,14 +2,14 @@
  //
  //                     The LLVM Compiler Infrastructure
  //
-// This file was developed by the LLVM research group and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
  //
  // InstructionCombining - Combine instructions to form fewer, simple
-// instructions.  This pass does not modify the CFG This pass is where algebraic
-// simplification happens.
+// instructions.  This pass does not modify the CFG.  This pass is where
+// algebraic simplification happens.
  //
  // This pass combines things like:
  //    %Y = add i32 %X, 1
@@ -39,12 +39,13 @@
  #include "llvm/Pass.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/GlobalVariable.h"
-#include "llvm/ParameterAttributes.h"
  #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
  #include "llvm/Target/TargetData.h"
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  #include "llvm/Transforms/Utils/Local.h"
  #include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Support/InstVisitor.h"
@@ -57,6 +58,7 @@
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/STLExtras.h"
  #include <algorithm>
+#include <climits>
  #include <sstream>
  using namespace llvm;
  using namespace llvm::PatternMatch;
@@ -120,8 +122,8 @@ namespace {
      /// the work lists because they might get more simplified now.
      ///
      void AddUsesToWorkList(Instruction &I) {
-      for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i)))
+      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+        if (Instruction *Op = dyn_cast<Instruction>(*i))
            AddToWorkList(Op);
      }
      
@@ -134,11 +136,11 @@ namespace {
      Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) {
        Value *R = I.getOperand(op);
        
-      for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-        if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
+      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+        if (Instruction *Op = dyn_cast<Instruction>(*i)) {
            AddToWorkList(Op);
            // Set the operand to undef to drop the use.
-          I.setOperand(i, UndefValue::get(Op->getType()));
+          *i = UndefValue::get(Op->getType());
          }
        
        return R;
@@ -184,6 +186,8 @@ namespace {
      Instruction *visitAShr(BinaryOperator &I);
      Instruction *visitLShr(BinaryOperator &I);
      Instruction *commonShiftTransforms(BinaryOperator &I);
+    Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+                                      Constant *RHSC);
      Instruction *visitFCmpInst(FCmpInst &I);
      Instruction *visitICmpInst(ICmpInst &I);
      Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
@@ -203,14 +207,14 @@ namespace {
      Instruction *visitTrunc(TruncInst &CI);
      Instruction *visitZExt(ZExtInst &CI);
      Instruction *visitSExt(SExtInst &CI);
-    Instruction *visitFPTrunc(CastInst &CI);
+    Instruction *visitFPTrunc(FPTruncInst &CI);
      Instruction *visitFPExt(CastInst &CI);
-    Instruction *visitFPToUI(CastInst &CI);
-    Instruction *visitFPToSI(CastInst &CI);
+    Instruction *visitFPToUI(FPToUIInst &FI);
+    Instruction *visitFPToSI(FPToSIInst &FI);
      Instruction *visitUIToFP(CastInst &CI);
      Instruction *visitSIToFP(CastInst &CI);
      Instruction *visitPtrToInt(CastInst &CI);
-    Instruction *visitIntToPtr(CastInst &CI);
+    Instruction *visitIntToPtr(IntToPtrInst &CI);
      Instruction *visitBitCast(BitCastInst &CI);
      Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                                  Instruction *FI);
@@ -228,6 +232,7 @@ namespace {
      Instruction *visitInsertElementInst(InsertElementInst &IE);
      Instruction *visitExtractElementInst(ExtractElementInst &EI);
      Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+    Instruction *visitExtractValueInst(ExtractValueInst &EV);
  
      // visitInstruction - Specify what to return for unhandled instructions...
      Instruction *visitInstruction(Instruction &I) { return 0; }
@@ -236,6 +241,9 @@ namespace {
      Instruction *visitCallSite(CallSite CS);
      bool transformConstExprCastCall(CallSite CS);
      Instruction *transformCallThroughTrampoline(CallSite CS);
+    Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                   bool DoXform = true);
+    bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
  
    public:
      // InsertNewInstBefore - insert an instruction New before instruction Old
@@ -260,10 +268,15 @@ namespace {
        if (Constant *CV = dyn_cast<Constant>(V))
          return ConstantExpr::getCast(opc, CV, Ty);
        
-      Instruction *C = CastInst::create(opc, V, Ty, V->getName(), &Pos);
+      Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos);
        AddToWorkList(C);
        return C;
      }
+        
+    Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) {
+      return InsertCastBefore(Instruction::BitCast, V, Ty, Pos);
+    }
+
  
      // ReplaceInstUsesWith - This method is to be used when an instruction is
      // found to be dead, replacable with another preexisting expression.  Here
@@ -312,6 +325,19 @@ namespace {
        I.eraseFromParent();
        return 0;  // Don't do anything with FI
      }
+        
+    void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+                           APInt &KnownOne, unsigned Depth = 0) const {
+      return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+    }
+    
+    bool MaskedValueIsZero(Value *V, const APInt &Mask, 
+                           unsigned Depth = 0) const {
+      return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+    }
+    unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+      return llvm::ComputeNumSignBits(Op, TD, Depth);
+    }
  
    private:
      /// InsertOperandCastBefore - This inserts a cast of V to DestTy before the
@@ -361,14 +387,25 @@ namespace {
      Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI);
      Instruction *MatchBSwap(BinaryOperator &I);
      bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+    Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+    Instruction *SimplifyMemSet(MemSetInst *MI);
+
  
      Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
-  };
  
-  char InstCombiner::ID = 0;
-  RegisterPass<InstCombiner> X("instcombine", "Combine redundant instructions");
+    bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+                                    unsigned CastOpc,
+                                    int &NumCastsRemoved);
+    unsigned GetOrEnforceKnownAlignment(Value *V,
+                                        unsigned PrefAlign = 0);
+
+  };
  }
  
+char InstCombiner::ID = 0;
+static RegisterPass<InstCombiner>
+X("instcombine", "Combine redundant instructions");
+
  // getComplexity:  Assign a complexity or rank value to LLVM Values...
  //   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
  static unsigned getComplexity(Value *V) {
@@ -492,7 +529,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
  
            // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2))
            Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);
-          Instruction *New = BinaryOperator::create(Opcode, Op->getOperand(0),
+          Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),
                                                      Op1->getOperand(0),
                                                      Op1->getName(), &I);
            AddToWorkList(New);
@@ -525,6 +562,11 @@ static inline Value *dyn_castNegVal(Value *V) {
    // Constants can be considered to be negated values if they can be folded.
    if (ConstantInt *C = dyn_cast<ConstantInt>(V))
      return ConstantExpr::getNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isInteger())
+      return ConstantExpr::getNeg(C);
+
    return 0;
  }
  
@@ -571,6 +613,17 @@ static User *dyn_castGetElementPtr(Value *V) {
    return false;
  }
  
+/// getOpcode - If this is an Instruction or a ConstantExpr, return the
+/// opcode value. Otherwise return UserOp1.
+static unsigned getOpcode(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getOpcode();
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    return CE->getOpcode();
+  // Use UserOp1 to mean there's no opcode.
+  return Instruction::UserOp1;
+}
+
  /// AddOne - Add one to a ConstantInt
  static ConstantInt *AddOne(ConstantInt *C) {
    APInt Val(C->getValue());
@@ -597,225 +650,29 @@ static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) {
  static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) {
    return ConstantInt::get(C1->getValue() * C2->getValue());
  }
-
-/// ComputeMaskedBits - Determine which of the bits specified in Mask are
-/// known to be either zero or one and return them in the KnownZero/KnownOne
-/// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
-/// processing.
-/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
-/// we cannot optimize based on the assumption that it is zero without changing
-/// it to be an explicit zero.  If we don't change it to zero, other code could
-/// optimized based on the contradictory assumption that it is non-zero.
-/// Because instcombine aggressively folds operations with undef args anyway,
-/// this won't lose us code quality.
-static void ComputeMaskedBits(Value *V, const APInt &Mask, APInt& KnownZero, 
-                              APInt& KnownOne, unsigned Depth = 0) {
-  assert(V && "No Value?");
-  assert(Depth <= 6 && "Limit Search Depth");
-  uint32_t BitWidth = Mask.getBitWidth();
-  assert(cast<IntegerType>(V->getType())->getBitWidth() == BitWidth &&
-         KnownZero.getBitWidth() == BitWidth && 
-         KnownOne.getBitWidth() == BitWidth &&
-         "V, Mask, KnownOne and KnownZero should have same BitWidth");
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    // We know all of the bits for a constant!
-    KnownOne = CI->getValue() & Mask;
-    KnownZero = ~KnownOne & Mask;
-    return;
-  }
-
-  if (Depth == 6 || Mask == 0)
-    return;  // Limit search depth.
-
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return;
-
-  KnownZero.clear(); KnownOne.clear();   // Don't know anything.
-  APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
-  
-  switch (I->getOpcode()) {
-  case Instruction::And: {
-    // If either the LHS or the RHS are Zero, the result is zero.
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    APInt Mask2(Mask & ~KnownZero);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
-    // Output known-1 bits are only known if set in both the LHS & RHS.
-    KnownOne &= KnownOne2;
-    // Output known-0 are known to be clear if zero in either the LHS | RHS.
-    KnownZero |= KnownZero2;
-    return;
-  }
-  case Instruction::Or: {
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    APInt Mask2(Mask & ~KnownOne);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
-    // Output known-0 bits are only known if clear in both the LHS & RHS.
-    KnownZero &= KnownZero2;
-    // Output known-1 are known to be set if set in either the LHS | RHS.
-    KnownOne |= KnownOne2;
-    return;
-  }
-  case Instruction::Xor: {
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
-    // Output known-0 bits are known if clear or set in both the LHS & RHS.
-    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
-    // Output known-1 are known to be set if set in only one of the LHS, RHS.
-    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-    KnownZero = KnownZeroOut;
-    return;
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+  uint32_t W = C1->getBitWidth();
+  APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+  if (sign) {
+    LHSExt.sext(W * 2);
+    RHSExt.sext(W * 2);
+  } else {
+    LHSExt.zext(W * 2);
+    RHSExt.zext(W * 2);
    }
-  case Instruction::Select:
-    ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
  
-    // Only known if known in both the LHS and RHS.
-    KnownOne &= KnownOne2;
-    KnownZero &= KnownZero2;
-    return;
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::SIToFP:
-  case Instruction::PtrToInt:
-  case Instruction::UIToFP:
-  case Instruction::IntToPtr:
-    return; // Can't work with floating point or pointers
-  case Instruction::Trunc: {
-    // All these have integer operands
-    uint32_t SrcBitWidth = 
-      cast<IntegerType>(I->getOperand(0)->getType())->getBitWidth();
-    APInt MaskIn(Mask);
-    MaskIn.zext(SrcBitWidth);
-    KnownZero.zext(SrcBitWidth);
-    KnownOne.zext(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
-    KnownZero.trunc(BitWidth);
-    KnownOne.trunc(BitWidth);
-    return;
-  }
-  case Instruction::BitCast: {
-    const Type *SrcTy = I->getOperand(0)->getType();
-    if (SrcTy->isInteger()) {
-      ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
-      return;
-    }
-    break;
-  }
-  case Instruction::ZExt:  {
-    // Compute the bits in the result that are not present in the input.
-    const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
-    uint32_t SrcBitWidth = SrcTy->getBitWidth();
-      
-    APInt MaskIn(Mask);
-    MaskIn.trunc(SrcBitWidth);
-    KnownZero.trunc(SrcBitWidth);
-    KnownOne.trunc(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    // The top bits are known to be zero.
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
-    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    return;
-  }
-  case Instruction::SExt: {
-    // Compute the bits in the result that are not present in the input.
-    const IntegerType *SrcTy = cast<IntegerType>(I->getOperand(0)->getType());
-    uint32_t SrcBitWidth = SrcTy->getBitWidth();
-      
-    APInt MaskIn(Mask); 
-    MaskIn.trunc(SrcBitWidth);
-    KnownZero.trunc(SrcBitWidth);
-    KnownOne.trunc(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    KnownZero.zext(BitWidth);
-    KnownOne.zext(BitWidth);
+  APInt MulExt = LHSExt * RHSExt;
  
-    // If the sign bit of the input is known set or clear, then we know the
-    // top bits of the result.
-    if (KnownZero[SrcBitWidth-1])             // Input sign bit known zero
-      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    else if (KnownOne[SrcBitWidth-1])           // Input sign bit known set
-      KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    return;
-  }
-  case Instruction::Shl:
-    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      APInt Mask2(Mask.lshr(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, Depth+1);
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-      KnownZero <<= ShiftAmt;
-      KnownOne  <<= ShiftAmt;
-      KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
-      return;
-    }
-    break;
-  case Instruction::LShr:
-    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      
-      // Unsigned shift right.
-      APInt Mask2(Mask.shl(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1);
-      assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
-      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
-      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-      // high bits known zero.
-      KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
-      return;
-    }
-    break;
-  case Instruction::AShr:
-    // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      
-      // Signed shift right.
-      APInt Mask2(Mask.shl(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne,Depth+1);
-      assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
-      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
-      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-        
-      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
-      if (KnownZero[BitWidth-ShiftAmt-1])    // New bits are known zero.
-        KnownZero |= HighBits;
-      else if (KnownOne[BitWidth-ShiftAmt-1])  // New bits are known one.
-        KnownOne |= HighBits;
-      return;
-    }
-    break;
-  }
+  if (sign) {
+    APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+    APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+    return MulExt.slt(Min) || MulExt.sgt(Max);
+  } else 
+    return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
  }
  
-/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
-/// this predicate to simplify operations downstream.  Mask is known to be zero
-/// for bits that V cannot have.
-static bool MaskedValueIsZero(Value *V, const APInt& Mask, unsigned Depth = 0) {
-  APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
-  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, Depth);
-  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-  return (KnownZero & Mask) == Mask;
-}
  
  /// ShrinkDemandedConstant - Check to see if the specified operand of the 
  /// specified instruction is a constant integer.  If so, check to see if there
@@ -947,7 +804,9 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
    APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
    APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne;
    switch (I->getOpcode()) {
-  default: break;
+  default:
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    break;
    case Instruction::And:
      // If either the LHS or the RHS are Zero, the result is zero.
      if (SimplifyDemandedBits(I->getOperand(1), DemandedMask,
@@ -1059,7 +918,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
      //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
      if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
        Instruction *Or =
-        BinaryOperator::createOr(I->getOperand(0), I->getOperand(1),
+        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                   I->getName());
        InsertNewInstBefore(Or, *I);
        return UpdateValueUsesWith(I, Or);
@@ -1074,7 +933,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
        if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
          Constant *AndC = ConstantInt::get(~RHSKnownOne & DemandedMask);
          Instruction *And = 
-          BinaryOperator::createAnd(I->getOperand(0), AndC, "tmp");
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
          InsertNewInstBefore(And, *I);
          return UpdateValueUsesWith(I, And);
        }
@@ -1233,7 +1092,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
        // Turn it into OR if input bits are zero.
        if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
          Instruction *Or =
-          BinaryOperator::createOr(I->getOperand(0), I->getOperand(1),
+          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
                                     I->getName());
          InsertNewInstBefore(Or, *I);
          return UpdateValueUsesWith(I, Or);
@@ -1293,6 +1152,9 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
                                 LHSKnownZero, LHSKnownOne, Depth+1))
          return true;
      }
+    // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+    // the known zeros and ones.
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
      break;
    case Instruction::Shl:
      if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -1338,7 +1200,7 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
      // the shift amount is >= the size of the datatype, which is undefined.
      if (DemandedMask == 1) {
        // Perform the logical shift right.
-      Value *NewVal = BinaryOperator::createLShr(
+      Value *NewVal = BinaryOperator::CreateLShr(
                          I->getOperand(0), I->getOperand(1), I->getName());
        InsertNewInstBefore(cast<Instruction>(NewVal), *I);
        return UpdateValueUsesWith(I, NewVal);
@@ -1376,10 +1238,10 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
          
        // If the input sign bit is known to be zero, or if none of the top bits
        // are demanded, turn this into an unsigned shift right.
-      if (RHSKnownZero[BitWidth-ShiftAmt-1] || 
+      if (BitWidth <= ShiftAmt || RHSKnownZero[BitWidth-ShiftAmt-1] || 
            (HighBits & ~DemandedMask) == HighBits) {
          // Perform the logical shift right.
-        Value *NewVal = BinaryOperator::createLShr(
+        Value *NewVal = BinaryOperator::CreateLShr(
                            I->getOperand(0), SA, I->getName());
          InsertNewInstBefore(cast<Instruction>(NewVal), *I);
          return UpdateValueUsesWith(I, NewVal);
@@ -1388,6 +1250,101 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask,
        }
      }
      break;
+  case Instruction::SRem:
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue();
+      if (RA.isPowerOf2() || (-RA).isPowerOf2()) {
+        APInt LowBits = RA.isStrictlyPositive() ? (RA - 1) : ~RA;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        if (SimplifyDemandedBits(I->getOperand(0), Mask2,
+                                 LHSKnownZero, LHSKnownOne, Depth+1))
+          return true;
+
+        if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+          LHSKnownZero |= ~LowBits;
+        else if (LHSKnownOne[BitWidth-1])
+          LHSKnownOne |= ~LowBits;
+
+        KnownZero |= LHSKnownZero & DemandedMask;
+        KnownOne |= LHSKnownOne & DemandedMask;
+
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
+      }
+    }
+    break;
+  case Instruction::URem: {
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = (RA - 1);
+        APInt Mask2 = LowBits & DemandedMask;
+        KnownZero |= ~LowBits & DemandedMask;
+        if (SimplifyDemandedBits(I->getOperand(0), Mask2,
+                                 KnownZero, KnownOne, Depth+1))
+          return true;
+
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); 
+        break;
+      }
+    }
+
+    APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    if (SimplifyDemandedBits(I->getOperand(0), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1))
+      return true;
+
+    uint32_t Leaders = KnownZero2.countLeadingOnes();
+    if (SimplifyDemandedBits(I->getOperand(1), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1))
+      return true;
+
+    Leaders = std::max(Leaders,
+                       KnownZero2.countLeadingOnes());
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+    break;
+  }
+  case Instruction::Call:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap: {
+        // If the only bits demanded come from one byte of the bswap result,
+        // just shift the input byte into position to eliminate the bswap.
+        unsigned NLZ = DemandedMask.countLeadingZeros();
+        unsigned NTZ = DemandedMask.countTrailingZeros();
+          
+        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
+        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
+        // have 14 leading zeros, round to 8.
+        NLZ &= ~7;
+        NTZ &= ~7;
+        // If we need exactly one byte, we can do this transformation.
+        if (BitWidth-NLZ-NTZ == 8) {
+          unsigned ResultBit = NTZ;
+          unsigned InputBit = BitWidth-NTZ-8;
+          
+          // Replace this with either a left or right shift to get the byte into
+          // the right place.
+          Instruction *NewVal;
+          if (InputBit > ResultBit)
+            NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+                    ConstantInt::get(I->getType(), InputBit-ResultBit));
+          else
+            NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+                    ConstantInt::get(I->getType(), ResultBit-InputBit));
+          NewVal->takeName(I);
+          InsertNewInstBefore(NewVal, *I);
+          return UpdateValueUsesWith(I, NewVal);
+        }
+          
+        // TODO: Could compute known zero/one bits based on the input.
+        break;
+      }
+      }
+    }
+    ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth);
+    break;
    }
    
    // If the client is only demanding bits that we know, return the known
@@ -1635,19 +1592,19 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
            default: assert(0 && "Case stmts out of sync!");
            case Intrinsic::x86_sse_sub_ss:
            case Intrinsic::x86_sse2_sub_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::createSub(LHS, RHS,
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateSub(LHS, RHS,
                                                          II->getName()), *II);
              break;
            case Intrinsic::x86_sse_mul_ss:
            case Intrinsic::x86_sse2_mul_sd:
-            TmpV = InsertNewInstBefore(BinaryOperator::createMul(LHS, RHS,
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateMul(LHS, RHS,
                                                           II->getName()), *II);
              break;
            }
            
            Instruction *New =
-            new InsertElementInst(UndefValue::get(II->getType()), TmpV, 0U,
-                                  II->getName());
+            InsertElementInst::Create(UndefValue::get(II->getType()), TmpV, 0U,
+                                      II->getName());
            InsertNewInstBefore(New, *II);
            AddSoonDeadInstToWorklist(*II, 0);
            return New;
@@ -1665,21 +1622,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
    return MadeChange ? I : 0;
  }
  
-/// @returns true if the specified compare predicate is
-/// true when both operands are equal...
-/// @brief Determine if the icmp Predicate is true when both operands are equal
-static bool isTrueWhenEqual(ICmpInst::Predicate pred) {
-  return pred == ICmpInst::ICMP_EQ  || pred == ICmpInst::ICMP_UGE ||
-         pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_ULE ||
-         pred == ICmpInst::ICMP_SLE;
-}
-
-/// @returns true if the specified compare instruction is
-/// true when both operands are equal...
-/// @brief Determine if the ICmpInst returns true when both operands are equal
-static bool isTrueWhenEqual(ICmpInst &ICI) {
-  return isTrueWhenEqual(ICI.getPredicate());
-}
  
  /// AssociativeOpt - Perform an optimization on an associative operator.  This
  /// function is designed to check a chain of associative operators for a
@@ -1690,7 +1632,7 @@ static bool isTrueWhenEqual(ICmpInst &ICI) {
  /// 'shouldApply' and 'apply' methods.
  ///
  template<typename Functor>
-Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
+static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
    unsigned Opcode = Root.getOpcode();
    Value *LHS = Root.getOperand(0);
  
@@ -1713,8 +1655,6 @@ Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
      // If the functor wants to apply the optimization to the RHS of LHSI,
      // reassociate the expression from ((? op A) op B) to (? op (A op B))
      if (ShouldApply) {
-      BasicBlock *BB = Root.getParent();
-
        // Now all of the instructions are in the current basic block, go ahead
        // and perform the reassociation.
        Instruction *TmpLHSI = cast<Instruction>(Root.getOperand(0));
@@ -1730,9 +1670,8 @@ Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
        }
        Root.replaceAllUsesWith(TmpLHSI);          // Users now use TmpLHSI
        TmpLHSI->setOperand(1, &Root);             // TmpLHSI now uses the root
-      TmpLHSI->getParent()->getInstList().remove(TmpLHSI);
        BasicBlock::iterator ARI = &Root; ++ARI;
-      BB->getInstList().insert(ARI, TmpLHSI);    // Move TmpLHSI to after Root
+      TmpLHSI->moveBefore(ARI);                  // Move TmpLHSI to after Root
        ARI = Root;
  
        // Now propagate the ExtraOperand down the chain of instructions until we
@@ -1741,8 +1680,7 @@ Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
          Instruction *NextLHSI = cast<Instruction>(TmpLHSI->getOperand(0));
          // Move the instruction to immediately before the chain we are
          // constructing to avoid breaking dominance properties.
-        NextLHSI->getParent()->getInstList().remove(NextLHSI);
-        BB->getInstList().insert(ARI, NextLHSI);
+        NextLHSI->moveBefore(ARI);
          ARI = NextLHSI;
  
          Value *NextOp = NextLHSI->getOperand(1);
@@ -1761,6 +1699,7 @@ Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {
    return 0;
  }
  
+namespace {
  
  // AddRHS - Implements: X + X --> X << 1
  struct AddRHS {
@@ -1768,8 +1707,8 @@ struct AddRHS {
    AddRHS(Value *rhs) : RHS(rhs) {}
    bool shouldApply(Value *LHS) const { return LHS == RHS; }
    Instruction *apply(BinaryOperator &Add) const {
-    return BinaryOperator::createShl(Add.getOperand(0),
-                                  ConstantInt::get(Add.getType(), 1));
+    return BinaryOperator::CreateShl(Add.getOperand(0),
+                                     ConstantInt::get(Add.getType(), 1));
    }
  };
  
@@ -1784,17 +1723,19 @@ struct AddMaskingAnd {
             ConstantExpr::getAnd(C1, C2)->isNullValue();
    }
    Instruction *apply(BinaryOperator &Add) const {
-    return BinaryOperator::createOr(Add.getOperand(0), Add.getOperand(1));
+    return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1));
    }
  };
  
+}
+
  static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
                                               InstCombiner *IC) {
    if (CastInst *CI = dyn_cast<CastInst>(&I)) {
      if (Constant *SOC = dyn_cast<Constant>(SO))
        return ConstantExpr::getCast(CI->getOpcode(), SOC, I.getType());
  
-    return IC->InsertNewInstBefore(CastInst::create(
+    return IC->InsertNewInstBefore(CastInst::Create(
            CI->getOpcode(), SO, I.getType(), SO->getName() + ".cast"), I);
    }
  
@@ -1813,9 +1754,9 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
      std::swap(Op0, Op1);
    Instruction *New;
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
-    New = BinaryOperator::create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
+    New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op");
    else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-    New = CmpInst::create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, 
+    New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, 
                            SO->getName()+".cmp");
    else {
      assert(0 && "Unknown binary instruction type!");
@@ -1842,8 +1783,8 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
      Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC);
      Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC);
  
-    return new SelectInst(SI->getCondition(), SelectTrueVal,
-                          SelectFalseVal);
+    return SelectInst::Create(SI->getCondition(), SelectTrueVal,
+                              SelectFalseVal);
    }
    return 0;
  }
@@ -1883,7 +1824,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    }
  
    // Okay, we can do the transformation: create the new PHI node.
-  PHINode *NewPN = new PHINode(I.getType(), "");
+  PHINode *NewPN = PHINode::Create(I.getType(), "");
    NewPN->reserveOperandSpace(PN->getNumOperands()/2);
    InsertNewInstBefore(NewPN, *PN);
    NewPN->takeName(PN);
@@ -1901,11 +1842,11 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
        } else {
          assert(PN->getIncomingBlock(i) == NonConstBB);
          if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) 
-          InV = BinaryOperator::create(BO->getOpcode(),
+          InV = BinaryOperator::Create(BO->getOpcode(),
                                         PN->getIncomingValue(i), C, "phitmp",
                                         NonConstBB->getTerminator());
          else if (CmpInst *CI = dyn_cast<CmpInst>(&I))
-          InV = CmpInst::create(CI->getOpcode(), 
+          InV = CmpInst::Create(CI->getOpcode(), 
                                  CI->getPredicate(),
                                  PN->getIncomingValue(i), C, "phitmp",
                                  NonConstBB->getTerminator());
@@ -1925,7 +1866,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
          InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
        } else {
          assert(PN->getIncomingBlock(i) == NonConstBB);
-        InV = CastInst::create(CI->getOpcode(), PN->getIncomingValue(i), 
+        InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), 
                                 I.getType(), "phitmp", 
                                 NonConstBB->getTerminator());
          AddToWorkList(cast<Instruction>(InV));
@@ -1936,6 +1877,34 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    return ReplaceInstUsesWith(I, NewPN);
  }
  
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+  // There are different heuristics we can use for this.  Here are some simple
+  // ones.
+  
+  // Add has the property that adding any two 2's complement numbers can only 
+  // have one carry bit which can change a sign.  As such, if LHS and RHS each
+  // have at least two sign bits, we know that the addition of the two values will
+  // sign extend fine.
+  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+    return true;
+  
+  
+  // If one of the operands only has one non-zero bit, and if the other operand
+  // has a known-zero bit in a more significant place than it (not including the
+  // sign bit) the ripple may go up to and fill the zero, but won't change the
+  // sign.  For example, (X & ~4) + 1.
+  
+  // TODO: Implement.
+  
+  return false;
+}
+
+
  Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
    bool Changed = SimplifyCommutative(I);
    Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
@@ -1960,7 +1929,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
        const APInt& Val = CI->getValue();
        uint32_t BitWidth = Val.getBitWidth();
        if (Val == APInt::getSignBit(BitWidth))
-        return BinaryOperator::createXor(LHS, RHS);
+        return BinaryOperator::CreateXor(LHS, RHS);
        
        // See if SimplifyDemandedBits can simplify this.  This handles stuff like
        // (X & 254)+1 -> (X&254)|1
@@ -2005,9 +1974,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
        } while (Size >= 1);
        
        // FIXME: This shouldn't be necessary. When the backends can handle types
-      // with funny bit widths then this whole cascade of if statements should
-      // be removed. It is just here to get the size of the "middle" type back
-      // up to something that the back ends can handle.
+      // with funny bit widths then this switch statement should be removed. It
+      // is just here to get the size of the "middle" type back up to something
+      // that the back ends can handle.
        const Type *MiddleType = 0;
        switch (Size) {
          default: break;
@@ -2023,8 +1992,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
      }
    }
  
+  if (I.getType() == Type::Int1Ty)
+    return BinaryOperator::CreateXor(LHS, RHS);
+
    // X + X --> X << 1
-  if (I.getType()->isInteger() && I.getType() != Type::Int1Ty) {
+  if (I.getType()->isInteger()) {
      if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result;
  
      if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) {
@@ -2040,29 +2012,39 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
    }
  
    // -A + B  -->  B - A
-  if (Value *V = dyn_castNegVal(LHS))
-    return BinaryOperator::createSub(RHS, V);
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castNegVal(LHS)) {
+    if (LHS->getType()->isIntOrIntVector()) {
+      if (Value *RHSV = dyn_castNegVal(RHS)) {
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum");
+        InsertNewInstBefore(NewAdd, I);
+        return BinaryOperator::CreateNeg(NewAdd);
+      }
+    }
+    
+    return BinaryOperator::CreateSub(RHS, LHSV);
+  }
  
    // A + -B  -->  A - B
    if (!isa<Constant>(RHS))
      if (Value *V = dyn_castNegVal(RHS))
-      return BinaryOperator::createSub(LHS, V);
+      return BinaryOperator::CreateSub(LHS, V);
  
  
    ConstantInt *C2;
    if (Value *X = dyn_castFoldableMul(LHS, C2)) {
      if (X == RHS)   // X*C + X --> X * (C+1)
-      return BinaryOperator::createMul(RHS, AddOne(C2));
+      return BinaryOperator::CreateMul(RHS, AddOne(C2));
  
      // X*C1 + X*C2 --> X * (C1+C2)
      ConstantInt *C1;
      if (X == dyn_castFoldableMul(RHS, C1))
-      return BinaryOperator::createMul(X, Add(C1, C2));
+      return BinaryOperator::CreateMul(X, Add(C1, C2));
    }
  
    // X + X*C --> X * (C+1)
    if (dyn_castFoldableMul(RHS, C2) == LHS)
-    return BinaryOperator::createMul(LHS, AddOne(C2));
+    return BinaryOperator::CreateMul(LHS, AddOne(C2));
  
    // X + ~X --> -1   since   ~X = -X-1
    if (dyn_castNotVal(LHS) == RHS || dyn_castNotVal(RHS) == LHS)
@@ -2073,11 +2055,52 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
    if (match(RHS, m_And(m_Value(), m_ConstantInt(C2))))
      if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))
        return R;
+  
+  // A+B --> A|B iff A and B have no bits set in common.
+  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+    APInt LHSKnownOne(IT->getBitWidth(), 0);
+    APInt LHSKnownZero(IT->getBitWidth(), 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    if (LHSKnownZero != 0) {
+      APInt RHSKnownOne(IT->getBitWidth(), 0);
+      APInt RHSKnownZero(IT->getBitWidth(), 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      
+      // No bits in common -> bitwise or.
+      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+        return BinaryOperator::CreateOr(LHS, RHS);
+    }
+  }
+
+  // W*X + Y*Z --> W * (X+Z)  iff W == Y
+  if (I.getType()->isIntOrIntVector()) {
+    Value *W, *X, *Y, *Z;
+    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+      if (W != Y) {
+        if (W == Z) {
+          std::swap(Y, Z);
+        } else if (Y == X) {
+          std::swap(W, X);
+        } else if (X == Z) {
+          std::swap(Y, Z);
+          std::swap(W, X);
+        }
+      }
+
+      if (W == Y) {
+        Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z,
+                                                            LHS->getName()), I);
+        return BinaryOperator::CreateMul(W, NewAdd);
+      }
+    }
+  }
  
    if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
      Value *X = 0;
      if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
-      return BinaryOperator::createSub(SubOne(CRHS), X);
+      return BinaryOperator::CreateSub(SubOne(CRHS), X);
  
      // (X & FF00) + xx00  -> (X+xx00) & FF00
      if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
@@ -2095,9 +2118,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
  
          if (AddRHSHighBits == AddRHSHighBitsAnd) {
            // Okay, the xform is safe.  Insert the new add pronto.
-          Value *NewAdd = InsertNewInstBefore(BinaryOperator::createAdd(X, CRHS,
+          Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS,
                                                              LHS->getName()), I);
-          return BinaryOperator::createAnd(NewAdd, C2);
+          return BinaryOperator::CreateAnd(NewAdd, C2);
          }
        }
      }
@@ -2109,8 +2132,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
    }
  
    // add (cast *A to intptrtype) B -> 
-  //   cast (GEP (cast *A to sbyte*) B) -> 
-  //     intptrtype
+  //   cast (GEP (cast *A to sbyte*) B)  -->  intptrtype
    {
      CastInst *CI = dyn_cast<CastInst>(LHS);
      Value *Other = RHS;
@@ -2124,18 +2146,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
          && isa<PointerType>(CI->getOperand(0)->getType())) {
        unsigned AS =
          cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace();
-      Value *I2 = InsertCastBefore(Instruction::BitCast, CI->getOperand(0),
-                                   PointerType::get(Type::Int8Ty, AS), I);
-      I2 = InsertNewInstBefore(new GetElementPtrInst(I2, Other, "ctg2"), I);
+      Value *I2 = InsertBitCastBefore(CI->getOperand(0),
+                                      PointerType::get(Type::Int8Ty, AS), I);
+      I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I);
        return new PtrToIntInst(I2, CI->getType());
      }
    }
    
-  // add (select (icmp 0 (sub m A)) X Y) A ->
-  //   add (select (icmp A m) X Y) A
-  // 
-  // add (select X 0 (sub n A)) A ->
-  //  select X A n
+  // add (select X 0 (sub n A)) A  -->  select X A n
    {
      SelectInst *SI = dyn_cast<SelectInst>(LHS);
      Value *Other = RHS;
@@ -2143,65 +2161,108 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
        SI = dyn_cast<SelectInst>(RHS);
        Other = LHS;
      }
-    if (SI) {
+    if (SI && SI->hasOneUse()) {
        Value *TV = SI->getTrueValue();
        Value *FV = SI->getFalseValue();
-      Value *A;
-            
-      // Can we fold the add into the argument of the compare?
-      Value *Cond = SI->getCondition();
-      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
-        Value *ICOp0 = IC->getOperand(0);
-        Value *ICOp1 = IC->getOperand(1);
-        ConstantInt *C3, *C4;
-        
-        // Check both arguments of the compare for a matching subtract.
-        if (match(ICOp0, m_ConstantInt(C3)) && C3->getValue() == 0 &&
-            match(ICOp1, m_Sub(m_ConstantInt(C4), m_Value(A))) &&
-            A == Other) {
-          // We managed to fold the add into the RHS of the select condition.
-          Cond = new ICmpInst(IC->getPredicate(), A, C4, "asis", SI);
-        } else if (match(ICOp1, m_ConstantInt(C3)) && C3->getValue() == 0 &&
-            match(ICOp0, m_Sub(m_ConstantInt(C4), m_Value(A))) &&
-            A == Other) {
-          // We managed to fold the add into the LHS of the select condition.
-          Cond = new ICmpInst(IC->getPredicate(), C4, A, "asis", SI);
-        }
-      }
+      Value *A, *N;
  
        // Can we fold the add into the argument of the select?
        // We check both true and false select arguments for a matching subtract.
-      ConstantInt *C1, *C2;
-      if (match(FV, m_ConstantInt(C1)) && C1->getValue() == 0 &&
-          match(TV, m_Sub(m_ConstantInt(C2), m_Value(A))) &&
-          A == Other) {
-        // We managed to fold the add into the true select value,
-        // picking up a simplified condition, if available.
-        return new SelectInst(Cond, C2, A, "adselsub");
-      } else if (match(TV, m_ConstantInt(C1)) && C1->getValue() == 0 && 
-                 match(FV, m_Sub(m_ConstantInt(C2), m_Value(A))) &&
-                 A == Other) {
-        // We managed to fold the add into the false select value,
-        // picking up a simplified condition, if available.
-        return new SelectInst(Cond, A, C2, "adselsub");
-      } else if (Cond != SI->getCondition()) {
-        // We only managed to fold the add into the select condition.
-        SI->setOperand(0, Cond);
-        Changed = true;
+      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Value(A))) &&
+          A == Other)  // Fold the add into the true select value.
+        return SelectInst::Create(SI->getCondition(), N, A);
+      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Value(A))) && 
+          A == Other)  // Fold the add into the false select value.
+        return SelectInst::Create(SI->getCondition(), A, N);
+    }
+  }
+  
+  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+      return ReplaceInstUsesWith(I, LHS);
+
+  // Check for (add (sext x), y), see if we can merge this into an
+  // integer add followed by a sext.
+  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+    // (add (sext x), cst) --> (sext (add x, cst'))
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+      Constant *CI = 
+        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new, smaller add.
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
+                                                        CI, "addconv");
+        InsertNewInstBefore(NewAdd, I);
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (add (sext x), (sext y)) --> (sext (add int x, y))
+    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of sexts), and if the
+      // integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
+                                                        RHSConv->getOperand(0),
+                                                        "addconv");
+        InsertNewInstBefore(NewAdd, I);
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+  }
+  
+  // Check for (add double (sitofp x), y), see if we can merge this into an
+  // integer add followed by a promotion.
+  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+    // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+    // ... if the constant fits in the integer value.  This is useful for things
+    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+    // requires a constant pool load, and generally allows the add to be better
+    // instcombined.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+      Constant *CI = 
+      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new integer add.
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
+                                                        CI, "addconv");
+        InsertNewInstBefore(NewAdd, I);
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of int->fp conversions),
+      // and if the integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), 
+                                                        RHSConv->getOperand(0),
+                                                        "addconv");
+        InsertNewInstBefore(NewAdd, I);
+        return new SIToFPInst(NewAdd, I.getType());
        }
      }
    }
-
+  
    return Changed ? &I : 0;
  }
  
-// isSignBit - Return true if the value represented by the constant only has the
-// highest order bit set.
-static bool isSignBit(ConstantInt *CI) {
-  uint32_t NumBits = CI->getType()->getPrimitiveSizeInBits();
-  return CI->getValue() == APInt::getSignBit(NumBits);
-}
-
  Instruction *InstCombiner::visitSub(BinaryOperator &I) {
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
@@ -2210,7 +2271,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
  
    // If this is a 'B = x-(-A)', change to B = x+A...
    if (Value *V = dyn_castNegVal(Op1))
-    return BinaryOperator::createAdd(Op0, V);
+    return BinaryOperator::CreateAdd(Op0, V);
  
    if (isa<UndefValue>(Op0))
      return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
@@ -2220,24 +2281,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
    if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
      // Replace (-1 - A) with (~A)...
      if (C->isAllOnesValue())
-      return BinaryOperator::createNot(Op1);
+      return BinaryOperator::CreateNot(Op1);
  
      // C - ~X == X + (1+C)
      Value *X = 0;
      if (match(Op1, m_Not(m_Value(X))))
-      return BinaryOperator::createAdd(X, AddOne(C));
+      return BinaryOperator::CreateAdd(X, AddOne(C));
  
      // -(X >>u 31) -> (X >>s 31)
      // -(X >>s 31) -> (X >>u 31)
      if (C->isZero()) {
-      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1))
+      if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op1)) {
          if (SI->getOpcode() == Instruction::LShr) {
            if (ConstantInt *CU = dyn_cast<ConstantInt>(SI->getOperand(1))) {
              // Check to see if we are shifting out everything but the sign bit.
              if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
                  SI->getType()->getPrimitiveSizeInBits()-1) {
                // Ok, the transformation is safe.  Insert AShr.
-              return BinaryOperator::create(Instruction::AShr, 
+              return BinaryOperator::Create(Instruction::AShr, 
                                            SI->getOperand(0), CU, SI->getName());
              }
            }
@@ -2248,11 +2309,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
              if (CU->getLimitedValue(SI->getType()->getPrimitiveSizeInBits()) ==
                  SI->getType()->getPrimitiveSizeInBits()-1) {
                // Ok, the transformation is safe.  Insert LShr. 
-              return BinaryOperator::createLShr(
+              return BinaryOperator::CreateLShr(
                                            SI->getOperand(0), CU, SI->getName());
              }
            }
-        } 
+        }
+      }
      }
  
      // Try to fold constant sub into select arguments.
@@ -2265,17 +2327,20 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
          return NV;
    }
  
+  if (I.getType() == Type::Int1Ty)
+    return BinaryOperator::CreateXor(Op0, Op1);
+
    if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {
      if (Op1I->getOpcode() == Instruction::Add &&
          !Op0->getType()->isFPOrFPVector()) {
        if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y
-        return BinaryOperator::createNeg(Op1I->getOperand(1), I.getName());
+        return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName());
        else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y
-        return BinaryOperator::createNeg(Op1I->getOperand(0), I.getName());
+        return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName());
        else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
          if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
            // C1-(X+C2) --> (C1-C2)-X
-          return BinaryOperator::createSub(Subtract(CI1, CI2), 
+          return BinaryOperator::CreateSub(Subtract(CI1, CI2), 
                                             Op1I->getOperand(0));
        }
      }
@@ -2292,7 +2357,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
          Op1I->setOperand(1, IIOp0);
  
          // Create the new top level add instruction...
-        return BinaryOperator::createAdd(Op0, Op1);
+        return BinaryOperator::CreateAdd(Op0, Op1);
        }
  
        // Replace (A - (A & B)) with (A & ~B) if this is the only use of (A&B)...
@@ -2302,8 +2367,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
          Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0);
  
          Value *NewNot =
-          InsertNewInstBefore(BinaryOperator::createNot(OtherOp, "B.not"), I);
-        return BinaryOperator::createAnd(Op0, NewNot);
+          InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I);
+        return BinaryOperator::CreateAnd(Op0, NewNot);
        }
  
        // 0 - (X sdiv C)  -> (X sdiv -C)
@@ -2311,14 +2376,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
          if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
            if (CSI->isZero())
              if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
-              return BinaryOperator::createSDiv(Op1I->getOperand(0),
+              return BinaryOperator::CreateSDiv(Op1I->getOperand(0),
                                                 ConstantExpr::getNeg(DivRHS));
  
        // X - X*C --> X * (1-C)
        ConstantInt *C2 = 0;
        if (dyn_castFoldableMul(Op1I, C2) == Op0) {
          Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
-        return BinaryOperator::createMul(Op0, CP1);
+        return BinaryOperator::CreateMul(Op0, CP1);
        }
  
        // X - ((X / Y) * Y) --> X % Y
@@ -2327,15 +2392,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
            if (Op0 == I->getOperand(0) &&
                Op1I->getOperand(1) == I->getOperand(1)) {
              if (I->getOpcode() == Instruction::SDiv)
-              return BinaryOperator::createSRem(Op0, Op1I->getOperand(1));
+              return BinaryOperator::CreateSRem(Op0, Op1I->getOperand(1));
              if (I->getOpcode() == Instruction::UDiv)
-              return BinaryOperator::createURem(Op0, Op1I->getOperand(1));
+              return BinaryOperator::CreateURem(Op0, Op1I->getOperand(1));
            }
      }
    }
  
    if (!Op0->getType()->isFPOrFPVector())
-    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
        if (Op0I->getOpcode() == Instruction::Add) {
          if (Op0I->getOperand(0) == Op1)             // (Y+X)-Y == X
            return ReplaceInstUsesWith(I, Op0I->getOperand(1));
@@ -2343,17 +2408,18 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
            return ReplaceInstUsesWith(I, Op0I->getOperand(0));
        } else if (Op0I->getOpcode() == Instruction::Sub) {
          if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y
-          return BinaryOperator::createNeg(Op0I->getOperand(1), I.getName());
+          return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName());
        }
+    }
  
    ConstantInt *C1;
    if (Value *X = dyn_castFoldableMul(Op0, C1)) {
      if (X == Op1)  // X*C - X --> X * (C-1)
-      return BinaryOperator::createMul(Op1, SubOne(C1));
+      return BinaryOperator::CreateMul(Op1, SubOne(C1));
  
      ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
      if (X == dyn_castFoldableMul(Op1, C2))
-      return BinaryOperator::createMul(Op1, Subtract(C1, C2));
+      return BinaryOperator::CreateMul(X, Subtract(C1, C2));
    }
    return 0;
  }
@@ -2382,8 +2448,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
    case ICmpInst::ICMP_UGE: 
      // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
      TrueIfSigned = true;
-    return RHS->getValue() == 
-      APInt::getSignBit(RHS->getType()->getPrimitiveSizeInBits());
+    return RHS->getValue().isSignBit();
    default:
      return false;
    }
@@ -2404,7 +2469,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
        if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
          if (SI->getOpcode() == Instruction::Shl)
            if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
-            return BinaryOperator::createMul(SI->getOperand(0),
+            return BinaryOperator::CreateMul(SI->getOperand(0),
                                               ConstantExpr::getShl(CI, ShOp));
  
        if (CI->isZero())
@@ -2412,11 +2477,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
        if (CI->equalsInt(1))                  // X * 1  == X
          return ReplaceInstUsesWith(I, Op0);
        if (CI->isAllOnesValue())              // X * -1 == 0 - X
-        return BinaryOperator::createNeg(Op0, I.getName());
+        return BinaryOperator::CreateNeg(Op0, I.getName());
  
        const APInt& Val = cast<ConstantInt>(CI)->getValue();
        if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
-        return BinaryOperator::createShl(Op0,
+        return BinaryOperator::CreateShl(Op0,
                   ConstantInt::get(Op0->getType(), Val.logBase2()));
        }
      } else if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) {
@@ -2433,14 +2498,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
      
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
        if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() &&
-          isa<ConstantInt>(Op0I->getOperand(1))) {
+          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) {
          // Canonicalize (X+C1)*C2 -> X*C2+C1*C2.
-        Instruction *Add = BinaryOperator::createMul(Op0I->getOperand(0),
+        Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0),
                                                       Op1, "tmp");
          InsertNewInstBefore(Add, I);
          Value *C1C2 = ConstantExpr::getMul(Op1, 
                                             cast<Constant>(Op0I->getOperand(1)));
-        return BinaryOperator::createAdd(Add, C1C2);
+        return BinaryOperator::CreateAdd(Add, C1C2);
          
        }
  
@@ -2456,14 +2521,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
  
    if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
      if (Value *Op1v = dyn_castNegVal(I.getOperand(1)))
-      return BinaryOperator::createMul(Op0v, Op1v);
+      return BinaryOperator::CreateMul(Op0v, Op1v);
+
+  if (I.getType() == Type::Int1Ty)
+    return BinaryOperator::CreateAnd(Op0, I.getOperand(1));
  
    // If one of the operands of the multiply is a cast from a boolean value, then
    // we know the bool is either zero or one, so this is a 'masking' multiply.
    // See if we can simplify things based on how the boolean was originally
    // formed.
    CastInst *BoolCast = 0;
-  if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(0)))
+  if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0))
      if (CI->getOperand(0)->getType() == Type::Int1Ty)
        BoolCast = CI;
    if (!BoolCast)
@@ -2486,7 +2554,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
                                            SCOpTy->getPrimitiveSizeInBits()-1);
          Value *V =
            InsertNewInstBefore(
-            BinaryOperator::create(Instruction::AShr, SCIOp0, Amt,
+            BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt,
                                              BoolCast->getOperand(0)->getName()+
                                              ".mask"), I);
  
@@ -2502,7 +2570,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
          }
  
          Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0;
-        return BinaryOperator::createAnd(V, OtherOp);
+        return BinaryOperator::CreateAnd(V, OtherOp);
        }
      }
    }
@@ -2517,22 +2585,27 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
  Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
-  // undef / X -> 0
-  if (isa<UndefValue>(Op0))
+  // undef / X -> 0        for integer.
+  // undef / X -> undef    for FP (the undef could be a snan).
+  if (isa<UndefValue>(Op0)) {
+    if (Op0->getType()->isFPOrFPVector())
+      return ReplaceInstUsesWith(I, Op0);
      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
  
    // X / undef -> undef
    if (isa<UndefValue>(Op1))
      return ReplaceInstUsesWith(I, Op1);
  
-  // Handle cases involving: div X, (select Cond, Y, Z)
+  // Handle cases involving: [su]div X, (select Cond, Y, Z)
+  // This does not apply for fdiv.
    if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) {
-    // div X, (Cond ? 0 : Y) -> div X, Y.  If the div and the select are in the
-    // same basic block, then we replace the select with Y, and the condition 
-    // of the select with false (if the cond value is in the same BB).  If the
-    // select has uses other than the div, this allows them to be simplified
-    // also. Note that div X, Y is just as good as div X, 0 (undef)
-    if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+    // [su]div X, (Cond ? 0 : Y) -> div X, Y.  If the div and the select are in
+    // the same basic block, then we replace the select with Y, and the
+    // condition of the select with false (if the cond value is in the same BB).
+    // If the select has uses other than the div, this allows them to be
+    // simplified also. Note that div X, Y is just as good as div X, 0 (undef)
+    if (ConstantInt *ST = dyn_cast<ConstantInt>(SI->getOperand(1)))
        if (ST->isNullValue()) {
          Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
          if (CondI && CondI->getParent() == I.getParent())
@@ -2544,8 +2617,8 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
          return &I;
        }
  
-    // Likewise for: div X, (Cond ? Y : 0) -> div X, Y
-    if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+    // Likewise for: [su]div X, (Cond ? Y : 0) -> div X, Y
+    if (ConstantInt *ST = dyn_cast<ConstantInt>(SI->getOperand(2)))
        if (ST->isNullValue()) {
          Instruction *CondI = dyn_cast<Instruction>(SI->getOperand(0));
          if (CondI && CondI->getParent() == I.getParent())
@@ -2568,6 +2641,18 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
  Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
+  // (sdiv X, X) --> 1     (udiv X, X) --> 1
+  if (Op0 == Op1) {
+    if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
+      ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+      std::vector<Constant*> Elts(Ty->getNumElements(), CI);
+      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
+    }
+
+    ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+    return ReplaceInstUsesWith(I, CI);
+  }
+  
    if (Instruction *Common = commonDivTransforms(I))
      return Common;
  
@@ -2580,8 +2665,11 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
      if (Instruction *LHS = dyn_cast<Instruction>(Op0))
        if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
          if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
-          return BinaryOperator::create(I.getOpcode(), LHS->getOperand(0),
-                                        Multiply(RHS, LHSRHS));
+          if (MultiplyOverflows(RHS, LHSRHS, I.getOpcode()==Instruction::SDiv))
+            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+          else 
+            return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+                                          Multiply(RHS, LHSRHS));
          }
  
      if (!RHS->isZero()) { // avoid X udiv 0
@@ -2599,6 +2687,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
      if (LHS->equalsInt(0))
        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
  
+  // It can't be division by zero, hence it must be division by one.
+  if (I.getType() == Type::Int1Ty)
+    return ReplaceInstUsesWith(I, Op0);
+
    return 0;
  }
  
@@ -2614,7 +2706,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
    // if so, convert to a right shift.
    if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
      if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2
-      return BinaryOperator::createLShr(Op0, 
+      return BinaryOperator::CreateLShr(Op0, 
                 ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
    }
  
@@ -2628,9 +2720,9 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
          const Type *NTy = N->getType();
          if (uint32_t C2 = C1.logBase2()) {
            Constant *C2V = ConstantInt::get(NTy, C2);
-          N = InsertNewInstBefore(BinaryOperator::createAdd(N, C2V, "tmp"), I);
+          N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I);
          }
-        return BinaryOperator::createLShr(Op0, N);
+        return BinaryOperator::CreateLShr(Op0, N);
        }
      }
    }
@@ -2646,18 +2738,18 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
            uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();
            // Construct the "on true" case of the select
            Constant *TC = ConstantInt::get(Op0->getType(), TSA);
-          Instruction *TSI = BinaryOperator::createLShr(
+          Instruction *TSI = BinaryOperator::CreateLShr(
                                                   Op0, TC, SI->getName()+".t");
            TSI = InsertNewInstBefore(TSI, I);
    
            // Construct the "on false" case of the select
            Constant *FC = ConstantInt::get(Op0->getType(), FSA); 
-          Instruction *FSI = BinaryOperator::createLShr(
+          Instruction *FSI = BinaryOperator::CreateLShr(
                                                   Op0, FC, SI->getName()+".f");
            FSI = InsertNewInstBefore(FSI, I);
  
            // construct the select instruction and return it.
-          return new SelectInst(SI->getOperand(0), TSI, FSI, SI->getName());
+          return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName());
          }
        }
    return 0;
@@ -2673,11 +2765,11 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
      // sdiv X, -1 == -X
      if (RHS->isAllOnesValue())
-      return BinaryOperator::createNeg(Op0);
+      return BinaryOperator::CreateNeg(Op0);
  
      // -X/C -> X/-C
      if (Value *LHSNeg = dyn_castNegVal(Op0))
-      return BinaryOperator::createSDiv(LHSNeg, ConstantExpr::getNeg(RHS));
+      return BinaryOperator::CreateSDiv(LHSNeg, ConstantExpr::getNeg(RHS));
    }
  
    // If the sign bits of both operands are zero (i.e. we can prove they are
@@ -2686,7 +2778,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
      APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
      if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
-      return BinaryOperator::createUDiv(Op0, Op1, I.getName());
+      return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
      }
    }      
    
@@ -2697,46 +2789,6 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
    return commonDivTransforms(I);
  }
  
-/// GetFactor - If we can prove that the specified value is at least a multiple
-/// of some factor, return that factor.
-static Constant *GetFactor(Value *V) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-    return CI;
-  
-  // Unless we can be tricky, we know this is a multiple of 1.
-  Constant *Result = ConstantInt::get(V->getType(), 1);
-  
-  Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return Result;
-  
-  if (I->getOpcode() == Instruction::Mul) {
-    // Handle multiplies by a constant, etc.
-    return ConstantExpr::getMul(GetFactor(I->getOperand(0)),
-                                GetFactor(I->getOperand(1)));
-  } else if (I->getOpcode() == Instruction::Shl) {
-    // (X<<C) -> X * (1 << C)
-    if (Constant *ShRHS = dyn_cast<Constant>(I->getOperand(1))) {
-      ShRHS = ConstantExpr::getShl(Result, ShRHS);
-      return ConstantExpr::getMul(GetFactor(I->getOperand(0)), ShRHS);
-    }
-  } else if (I->getOpcode() == Instruction::And) {
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // X & 0xFFF0 is known to be a multiple of 16.
-      uint32_t Zeros = RHS->getValue().countTrailingZeros();
-      if (Zeros != V->getType()->getPrimitiveSizeInBits())// don't shift by "32"
-        return ConstantExpr::getShl(Result, 
-                                    ConstantInt::get(Result->getType(), Zeros));
-    }
-  } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
-    // Only handle int->int casts.
-    if (!CI->isIntegerCast())
-      return Result;
-    Value *Op = CI->getOperand(0);
-    return ConstantExpr::getCast(CI->getOpcode(), GetFactor(Op), V->getType());
-  }    
-  return Result;
-}
-
  /// This function implements the transforms on rem instructions that work
  /// regardless of the kind of rem instruction it is (urem, srem, or frem). It 
  /// is used by the visitors to those instructions.
@@ -2744,13 +2796,16 @@ static Constant *GetFactor(Value *V) {
  Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
-  // 0 % X == 0, we don't need to preserve faults!
+  // 0 % X == 0 for integer, we don't need to preserve faults!
    if (Constant *LHS = dyn_cast<Constant>(Op0))
      if (LHS->isNullValue())
        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
  
-  if (isa<UndefValue>(Op0))              // undef % X -> 0
+  if (isa<UndefValue>(Op0)) {             // undef % X -> 0
+    if (I.getType()->isFPOrFPVector())
+      return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
    if (isa<UndefValue>(Op1))
      return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
  
@@ -2815,9 +2870,13 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
          if (Instruction *NV = FoldOpIntoPhi(I))
            return NV;
        }
-      // (X * C1) % C2 --> 0  iff  C1 % C2 == 0
-      if (ConstantExpr::getSRem(GetFactor(Op0I), RHS)->isNullValue())
-        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+      // See if we can fold away this rem instruction.
+      uint32_t BitWidth = cast<IntegerType>(I.getType())->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      if (SimplifyDemandedBits(&I, APInt::getAllOnesValue(BitWidth),
+                               KnownZero, KnownOne))
+        return &I;
      }
    }
  
@@ -2836,7 +2895,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
      // if so, convert to a bitwise and.
      if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))
        if (C->getValue().isPowerOf2())
-        return BinaryOperator::createAnd(Op0, SubOne(C));
+        return BinaryOperator::CreateAnd(Op0, SubOne(C));
    }
  
    if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) {
@@ -2845,9 +2904,9 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
          isa<ConstantInt>(RHSI->getOperand(0))) {
        if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) {
          Constant *N1 = ConstantInt::getAllOnesValue(I.getType());
-        Value *Add = InsertNewInstBefore(BinaryOperator::createAdd(RHSI, N1,
+        Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1,
                                                                     "tmp"), I);
-        return BinaryOperator::createAnd(Op0, Add);
+        return BinaryOperator::CreateAnd(Op0, Add);
        }
      }
    }
@@ -2861,10 +2920,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
          if ((STO->getValue().isPowerOf2()) && 
              (SFO->getValue().isPowerOf2())) {
            Value *TrueAnd = InsertNewInstBefore(
-            BinaryOperator::createAnd(Op0, SubOne(STO), SI->getName()+".t"), I);
+            BinaryOperator::CreateAnd(Op0, SubOne(STO), SI->getName()+".t"), I);
            Value *FalseAnd = InsertNewInstBefore(
-            BinaryOperator::createAnd(Op0, SubOne(SFO), SI->getName()+".f"), I);
-          return new SelectInst(SI->getOperand(0), TrueAnd, FalseAnd);
+            BinaryOperator::CreateAnd(Op0, SubOne(SFO), SI->getName()+".f"), I);
+          return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);
          }
        }
    }
@@ -2894,7 +2953,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
      APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
      if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
        // X srem Y -> X urem Y, iff X and Y don't have sign bit set
-      return BinaryOperator::createURem(Op0, Op1, I.getName());
+      return BinaryOperator::CreateURem(Op0, Op1, I.getName());
      }
    }
  
@@ -3032,8 +3091,8 @@ struct FoldICmpLogical {
    bool shouldApply(Value *V) const {
      if (ICmpInst *ICI = dyn_cast<ICmpInst>(V))
        if (PredicatesFoldable(pred, ICI->getPredicate()))
-        return (ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS ||
-                ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS);
+        return ((ICI->getOperand(0) == LHS && ICI->getOperand(1) == RHS) ||
+                (ICI->getOperand(0) == RHS && ICI->getOperand(1) == LHS));
      return false;
    }
    Instruction *apply(Instruction &Log) const {
@@ -3082,10 +3141,10 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
    case Instruction::Xor:
      if (Op->hasOneUse()) {
        // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
-      Instruction *And = BinaryOperator::createAnd(X, AndRHS);
+      Instruction *And = BinaryOperator::CreateAnd(X, AndRHS);
        InsertNewInstBefore(And, TheAnd);
        And->takeName(Op);
-      return BinaryOperator::createXor(And, Together);
+      return BinaryOperator::CreateXor(And, Together);
      }
      break;
    case Instruction::Or:
@@ -3094,10 +3153,10 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
  
      if (Op->hasOneUse() && Together != OpRHS) {
        // (X | C1) & C2 --> (X | (C1&C2)) & C2
-      Instruction *Or = BinaryOperator::createOr(X, Together);
+      Instruction *Or = BinaryOperator::CreateOr(X, Together);
        InsertNewInstBefore(Or, TheAnd);
        Or->takeName(Op);
-      return BinaryOperator::createAnd(Or, AndRHS);
+      return BinaryOperator::CreateAnd(Or, AndRHS);
      }
      break;
    case Instruction::Add:
@@ -3125,10 +3184,10 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
              return &TheAnd;
            } else {
              // Pull the XOR out of the AND.
-            Instruction *NewAnd = BinaryOperator::createAnd(X, AndRHS);
+            Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS);
              InsertNewInstBefore(NewAnd, TheAnd);
              NewAnd->takeName(Op);
-            return BinaryOperator::createXor(NewAnd, AndRHS);
+            return BinaryOperator::CreateXor(NewAnd, AndRHS);
            }
          }
        }
@@ -3187,9 +3246,9 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
          // Make the argument unsigned.
          Value *ShVal = Op->getOperand(0);
          ShVal = InsertNewInstBefore(
-            BinaryOperator::createLShr(ShVal, OpRHS, 
+            BinaryOperator::CreateLShr(ShVal, OpRHS, 
                                     Op->getName()), TheAnd);
-        return BinaryOperator::createAnd(ShVal, AndRHS, TheAnd.getName());
+        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
        }
      }
      break;
@@ -3223,7 +3282,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
  
      // Emit V-Lo <u Hi-Lo
      Constant *NegLo = ConstantExpr::getNeg(Lo);
-    Instruction *Add = BinaryOperator::createAdd(V, NegLo, V->getName()+".off");
+    Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
      InsertNewInstBefore(Add, IB);
      Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
      return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
@@ -3243,7 +3302,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
    // Emit V-Lo >u Hi-1-Lo
    // Note that Hi has already had one subtracted from it, above.
    ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
-  Instruction *Add = BinaryOperator::createAdd(V, NegLo, V->getName()+".off");
+  Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off");
    InsertNewInstBefore(Add, IB);
    Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
    return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
@@ -3318,9 +3377,9 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
    
    Instruction *New;
    if (isSub)
-    New = BinaryOperator::createSub(LHSI->getOperand(0), RHS, "fold");
+    New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold");
    else
-    New = BinaryOperator::createAdd(LHSI->getOperand(0), RHS, "fold");
+    New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold");
    return InsertNewInstBefore(New, I);
  }
  
@@ -3368,19 +3427,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
          if (Op0I->hasOneUse()) {
            if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
              // Not masking anything out for the LHS, move to RHS.
-            Instruction *NewRHS = BinaryOperator::createAnd(Op0RHS, AndRHS,
+            Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS,
                                                     Op0RHS->getName()+".masked");
              InsertNewInstBefore(NewRHS, I);
-            return BinaryOperator::create(
+            return BinaryOperator::Create(
                         cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS);
            }
            if (!isa<Constant>(Op0RHS) &&
                MaskedValueIsZero(Op0RHS, NotAndRHS)) {
              // Not masking anything out for the RHS, move to LHS.
-            Instruction *NewLHS = BinaryOperator::createAnd(Op0LHS, AndRHS,
+            Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS,
                                                     Op0LHS->getName()+".masked");
              InsertNewInstBefore(NewLHS, I);
-            return BinaryOperator::create(
+            return BinaryOperator::Create(
                         cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS);
            }
          }
@@ -3391,9 +3450,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
          // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
          // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
          if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
-          return BinaryOperator::createAnd(V, AndRHS);
+          return BinaryOperator::CreateAnd(V, AndRHS);
          if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
-          return BinaryOperator::createAnd(V, AndRHS);  // Add commutes
+          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
          break;
  
        case Instruction::Sub:
@@ -3401,7 +3460,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
          // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
          // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
          if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
-          return BinaryOperator::createAnd(V, AndRHS);
+          return BinaryOperator::CreateAnd(V, AndRHS);
          break;
        }
  
@@ -3415,20 +3474,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
        if (Instruction *CastOp = dyn_cast<Instruction>(CI->getOperand(0))) {
          if ((isa<TruncInst>(CI) || isa<BitCastInst>(CI)) &&
              CastOp->getNumOperands() == 2)
-          if (ConstantInt *AndCI = dyn_cast<ConstantInt>(CastOp->getOperand(1)))
+          if (ConstantInt *AndCI = dyn_cast<ConstantInt>(CastOp->getOperand(1))) {
              if (CastOp->getOpcode() == Instruction::And) {
                // Change: and (cast (and X, C1) to T), C2
                // into  : and (cast X to T), trunc_or_bitcast(C1)&C2
                // This will fold the two constants together, which may allow 
                // other simplifications.
-              Instruction *NewCast = CastInst::createTruncOrBitCast(
+              Instruction *NewCast = CastInst::CreateTruncOrBitCast(
                  CastOp->getOperand(0), I.getType(), 
                  CastOp->getName()+".shrunk");
                NewCast = InsertNewInstBefore(NewCast, I);
                // trunc_or_bitcast(C1)&C2
                Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType());
                C3 = ConstantExpr::getAnd(C3, AndRHS);
-              return BinaryOperator::createAnd(NewCast, C3);
+              return BinaryOperator::CreateAnd(NewCast, C3);
              } else if (CastOp->getOpcode() == Instruction::Or) {
                // Change: and (cast (or X, C1) to T), C2
                // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2
@@ -3436,6 +3495,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
                if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)   // trunc(C1)&C2
                  return ReplaceInstUsesWith(I, AndRHS);
              }
+          }
        }
      }
  
@@ -3456,10 +3516,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
  
    // (~A & ~B) == (~(A | B)) - De Morgan's Law
    if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-    Instruction *Or = BinaryOperator::createOr(Op0NotVal, Op1NotVal,
+    Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal,
                                                 I.getName()+".demorgan");
      InsertNewInstBefore(Or, I);
-    return BinaryOperator::createNot(Or);
+    return BinaryOperator::CreateNot(Or);
    }
    
    {
@@ -3471,7 +3531,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
        // (A|B) & ~(A&B) -> A^B
        if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
          if ((A == C && B == D) || (A == D && B == C))
-          return BinaryOperator::createXor(A, B);
+          return BinaryOperator::CreateXor(A, B);
        }
      }
      
@@ -3482,7 +3542,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
        // ~(A&B) & (A|B) -> A^B
        if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
          if ((A == C && B == D) || (A == D && B == C))
-          return BinaryOperator::createXor(A, B);
+          return BinaryOperator::CreateXor(A, B);
        }
      }
      
@@ -3504,9 +3564,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
          std::swap(A, B);
        }
        if (A == Op0) {                                // A&(A^B) -> A & ~B
-        Instruction *NotB = BinaryOperator::createNot(B, "tmp");
+        Instruction *NotB = BinaryOperator::CreateNot(B, "tmp");
          InsertNewInstBefore(NotB, I);
-        return BinaryOperator::createAnd(A, NotB);
+        return BinaryOperator::CreateAnd(A, NotB);
        }
      }
    }
@@ -3533,8 +3593,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
               ICmpInst::isSignedPredicate(LHSCC) == 
                   ICmpInst::isSignedPredicate(RHSCC))) {
            // Ensure that the larger constant is on the RHS.
-          ICmpInst::Predicate GT = ICmpInst::isSignedPredicate(LHSCC) ? 
-            ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+          ICmpInst::Predicate GT;
+          if (ICmpInst::isSignedPredicate(LHSCC) ||
+              (ICmpInst::isEquality(LHSCC) && 
+               ICmpInst::isSignedPredicate(RHSCC)))
+            GT = ICmpInst::ICMP_SGT;
+          else
+            GT = ICmpInst::ICMP_UGT;
+          
            Constant *Cmp = ConstantExpr::getICmp(GT, LHSCst, RHSCst);
            ICmpInst *LHS = cast<ICmpInst>(Op0);
            if (cast<ConstantInt>(Cmp)->getZExtValue()) {
@@ -3583,7 +3649,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
              case ICmpInst::ICMP_NE:
                if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
                  Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-                Instruction *Add = BinaryOperator::createAdd(LHSVal, AddCST,
+                Instruction *Add = BinaryOperator::CreateAdd(LHSVal, AddCST,
                                                        LHSVal->getName()+".off");
                  InsertNewInstBefore(Add, I);
                  return new ICmpInst(ICmpInst::ICMP_UGT, Add,
@@ -3676,11 +3742,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
                                I.getType(), TD) &&
              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                                I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::createAnd(Op0C->getOperand(0),
+          Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0),
                                                           Op1C->getOperand(0),
                                                           I.getName());
            InsertNewInstBefore(NewOp, I);
-          return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
          }
        }
      
@@ -3691,10 +3757,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
            SI0->getOperand(1) == SI1->getOperand(1) &&
            (SI0->hasOneUse() || SI1->hasOneUse())) {
          Instruction *NewOp =
-          InsertNewInstBefore(BinaryOperator::createAnd(SI0->getOperand(0),
+          InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0),
                                                          SI1->getOperand(0),
                                                          SI0->getName()), I);
-        return BinaryOperator::create(SI1->getOpcode(), NewOp, 
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
                                        SI1->getOperand(1));
        }
    }
@@ -3829,7 +3895,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
    const Type *Tys[] = { ITy };
    Module *M = I.getParent()->getParent()->getParent();
    Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
-  return new CallInst(F, V);
+  return CallInst::Create(F, V);
  }
  
  
@@ -3866,19 +3932,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
      ConstantInt *C1 = 0; Value *X = 0;
      // (X & C1) | C2 --> (X | C2) & (C1|C2)
      if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
-      Instruction *Or = BinaryOperator::createOr(X, RHS);
+      Instruction *Or = BinaryOperator::CreateOr(X, RHS);
        InsertNewInstBefore(Or, I);
        Or->takeName(Op0);
-      return BinaryOperator::createAnd(Or, 
+      return BinaryOperator::CreateAnd(Or, 
                 ConstantInt::get(RHS->getValue() | C1->getValue()));
      }
  
      // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
      if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) {
-      Instruction *Or = BinaryOperator::createOr(X, RHS);
+      Instruction *Or = BinaryOperator::CreateOr(X, RHS);
        InsertNewInstBefore(Or, I);
        Or->takeName(Op0);
-      return BinaryOperator::createXor(Or,
+      return BinaryOperator::CreateXor(Or,
                   ConstantInt::get(C1->getValue() & ~RHS->getValue()));
      }
  
@@ -3914,19 +3980,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    // (X^C)|Y -> (X|Y)^C iff Y&C == 0
    if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
        MaskedValueIsZero(Op1, C1->getValue())) {
-    Instruction *NOr = BinaryOperator::createOr(A, Op1);
+    Instruction *NOr = BinaryOperator::CreateOr(A, Op1);
      InsertNewInstBefore(NOr, I);
      NOr->takeName(Op0);
-    return BinaryOperator::createXor(NOr, C1);
+    return BinaryOperator::CreateXor(NOr, C1);
    }
  
    // Y|(X^C) -> (X|Y)^C iff Y&C == 0
    if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
        MaskedValueIsZero(Op0, C1->getValue())) {
-    Instruction *NOr = BinaryOperator::createOr(A, Op0);
+    Instruction *NOr = BinaryOperator::CreateOr(A, Op0);
      InsertNewInstBefore(NOr, I);
      NOr->takeName(Op0);
-    return BinaryOperator::createXor(NOr, C1);
+    return BinaryOperator::CreateXor(NOr, C1);
    }
  
    // (A & C)|(B & D)
@@ -3976,8 +4042,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
        
        if (V1) {
          Value *Or =
-          InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I);
-        return BinaryOperator::createAnd(V1, Or);
+          InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I);
+        return BinaryOperator::CreateAnd(V1, Or);
        }
      }
    }
@@ -3989,10 +4055,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
            SI0->getOperand(1) == SI1->getOperand(1) &&
            (SI0->hasOneUse() || SI1->hasOneUse())) {
          Instruction *NewOp =
-        InsertNewInstBefore(BinaryOperator::createOr(SI0->getOperand(0),
+        InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0),
                                                       SI1->getOperand(0),
                                                       SI0->getName()), I);
-        return BinaryOperator::create(SI1->getOpcode(), NewOp, 
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
                                        SI1->getOperand(1));
        }
    }
@@ -4010,9 +4076,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
  
      // (~A | ~B) == (~(A & B)) - De Morgan's Law
      if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
-      Value *And = InsertNewInstBefore(BinaryOperator::createAnd(A, B,
+      Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B,
                                                I.getName()+".demorgan"), I);
-      return BinaryOperator::createNot(And);
+      return BinaryOperator::CreateNot(And);
      }
    }
  
@@ -4064,7 +4130,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
              case ICmpInst::ICMP_EQ:
                if (LHSCst == SubOne(RHSCst)) {// (X == 13 | X == 14) -> X-13 <u 2
                  Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-                Instruction *Add = BinaryOperator::createAdd(LHSVal, AddCST,
+                Instruction *Add = BinaryOperator::CreateAdd(LHSVal, AddCST,
                                                        LHSVal->getName()+".off");
                  InsertNewInstBefore(Add, I);
                  AddCST = Subtract(AddOne(RHSCst), LHSCst);
@@ -4173,18 +4239,22 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
      if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
        if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
-        const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
-            // Only do this if the casts both really cause code to be generated.
-            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                              I.getType(), TD) &&
-            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                              I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::createOr(Op0C->getOperand(0),
-                                                        Op1C->getOperand(0),
-                                                        I.getName());
-          InsertNewInstBefore(NewOp, I);
-          return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
+            !isa<ICmpInst>(Op1C->getOperand(0))) {
+          const Type *SrcTy = Op0C->getOperand(0)->getType();
+          if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+              // Only do this if the casts both really cause code to be
+              // generated.
+              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+                                I.getType(), TD) &&
+              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                                I.getType(), TD)) {
+            Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0),
+                                                          Op1C->getOperand(0),
+                                                          I.getName());
+            InsertNewInstBefore(NewOp, I);
+            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+          }
          }
        }
    }
@@ -4194,7 +4264,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
      if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
        if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
-          RHS->getPredicate() == FCmpInst::FCMP_UNO)
+          RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+          LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType())
          if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
            if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
              // If either of the constants are nans, then the whole thing returns
@@ -4213,6 +4284,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    return Changed ? &I : 0;
  }
  
+namespace {
+
  // XorSelf - Implements: X ^ X --> 0
  struct XorSelf {
    Value *RHS;
@@ -4223,13 +4296,19 @@ struct XorSelf {
    }
  };
  
+}
  
  Instruction *InstCombiner::visitXor(BinaryOperator &I) {
    bool Changed = SimplifyCommutative(I);
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
-  if (isa<UndefValue>(Op1))
+  if (isa<UndefValue>(Op1)) {
+    if (isa<UndefValue>(Op0))
+      // Handle undef ^ undef -> 0 special case. This is a common
+      // idiom (misuse).
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
      return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef
+  }
  
    // xor X, X = 0, even if X is nested in a sequence of Xor's.
    if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
@@ -4259,13 +4338,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
          if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands();
          if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
            Instruction *NotY =
-            BinaryOperator::createNot(Op0I->getOperand(1),
+            BinaryOperator::CreateNot(Op0I->getOperand(1),
                                        Op0I->getOperand(1)->getName()+".not");
            InsertNewInstBefore(NotY, I);
            if (Op0I->getOpcode() == Instruction::And)
-            return BinaryOperator::createOr(Op0NotVal, NotY);
+            return BinaryOperator::CreateOr(Op0NotVal, NotY);
            else
-            return BinaryOperator::createAnd(Op0NotVal, NotY);
+            return BinaryOperator::CreateAnd(Op0NotVal, NotY);
          }
        }
      }
@@ -4284,6 +4363,25 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                              FCI->getOperand(0), FCI->getOperand(1));
      }
  
+    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+        if (CI->hasOneUse() && Op0C->hasOneUse()) {
+          Instruction::CastOps Opcode = Op0C->getOpcode();
+          if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+            if (RHS == ConstantExpr::getCast(Opcode, ConstantInt::getTrue(),
+                                             Op0C->getDestTy())) {
+              Instruction *NewCI = InsertNewInstBefore(CmpInst::Create(
+                                     CI->getOpcode(), CI->getInversePredicate(),
+                                     CI->getOperand(0), CI->getOperand(1)), I);
+              NewCI->takeName(CI);
+              return CastInst::Create(Opcode, NewCI, Op0C->getType());
+            }
+          }
+        }
+      }
+    }
+
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
        // ~(c-X) == X-c-1 == X+(-c-1)
        if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
@@ -4291,22 +4389,22 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
            Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
            Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
                                                ConstantInt::get(I.getType(), 1));
-          return BinaryOperator::createAdd(Op0I->getOperand(1), ConstantRHS);
+          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
          }
            
-      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
          if (Op0I->getOpcode() == Instruction::Add) {
            // ~(X-c) --> (-c-1)-X
            if (RHS->isAllOnesValue()) {
              Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
-            return BinaryOperator::createSub(
+            return BinaryOperator::CreateSub(
                             ConstantExpr::getSub(NegOp0CI,
                                               ConstantInt::get(I.getType(), 1)),
                                            Op0I->getOperand(0));
            } else if (RHS->getValue().isSignBit()) {
              // (X + C) ^ signbit -> (X + C + signbit)
              Constant *C = ConstantInt::get(RHS->getValue() + Op0CI->getValue());
-            return BinaryOperator::createAdd(Op0I->getOperand(0), C);
+            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
  
            }
          } else if (Op0I->getOpcode() == Instruction::Or) {
@@ -4324,6 +4422,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
              return &I;
            }
          }
+      }
      }
  
      // Try to fold constant and into select arguments.
@@ -4381,8 +4480,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
          std::swap(A, B);
        if (B == Op1) {                                // (A|B)^B == A & ~B
          Instruction *NotB =
-          InsertNewInstBefore(BinaryOperator::createNot(Op1, "tmp"), I);
-        return BinaryOperator::createAnd(A, NotB);
+          InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I);
+        return BinaryOperator::CreateAnd(A, NotB);
        }
      } else if (match(Op0I, m_Xor(m_Value(A), m_Value(B)))) {
        if (Op1 == A)                                          // (A^B)^A == B
@@ -4395,8 +4494,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
        if (B == Op1 &&                                      // (B&A)^A == ~B & A
            !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
          Instruction *N =
-          InsertNewInstBefore(BinaryOperator::createNot(A, "tmp"), I);
-        return BinaryOperator::createAnd(N, Op1);
+          InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I);
+        return BinaryOperator::CreateAnd(N, Op1);
        }
      }
    }
@@ -4407,10 +4506,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
        Op0I->getOperand(1) == Op1I->getOperand(1) &&
        (Op1I->hasOneUse() || Op1I->hasOneUse())) {
      Instruction *NewOp =
-      InsertNewInstBefore(BinaryOperator::createXor(Op0I->getOperand(0),
+      InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0),
                                                      Op1I->getOperand(0),
                                                      Op0I->getName()), I);
-    return BinaryOperator::create(Op1I->getOpcode(), NewOp, 
+    return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
                                    Op1I->getOperand(1));
    }
      
@@ -4420,13 +4519,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
      if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
          match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
        if ((A == C && B == D) || (A == D && B == C)) 
-        return BinaryOperator::createXor(A, B);
+        return BinaryOperator::CreateXor(A, B);
      }
      // (A | B)^(A & B) -> A ^ B
      if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
          match(Op1I, m_And(m_Value(C), m_Value(D)))) {
        if ((A == C && B == D) || (A == D && B == C)) 
-        return BinaryOperator::createXor(A, B);
+        return BinaryOperator::CreateXor(A, B);
      }
      
      // (A & B)^(C & D)
@@ -4446,8 +4545,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
        
        if (X) {
          Instruction *NewOp =
-        InsertNewInstBefore(BinaryOperator::createXor(Y, Z, Op0->getName()), I);
-        return BinaryOperator::createAnd(NewOp, X);
+        InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I);
+        return BinaryOperator::CreateAnd(NewOp, X);
        }
      }
    }
@@ -4468,14 +4567,15 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
                                I.getType(), TD) &&
              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
                                I.getType(), TD)) {
-          Instruction *NewOp = BinaryOperator::createXor(Op0C->getOperand(0),
+          Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0),
                                                           Op1C->getOperand(0),
                                                           I.getName());
            InsertNewInstBefore(NewOp, I);
-          return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
          }
        }
    }
+
    return Changed ? &I : 0;
  }
  
@@ -4504,11 +4604,12 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
    Value *Result = Constant::getNullValue(IntPtrTy);
  
    // Build a mask for high order bits.
-  unsigned IntPtrWidth = TD.getPointerSize()*8;
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
    uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
  
-  for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
-    Value *Op = GEP->getOperand(i);
+  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+       ++i, ++GTI) {
+    Value *Op = *i;
      uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()) & PtrSizeMask;
      if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
        if (OpC->isZero()) continue;
@@ -4521,7 +4622,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
            Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size));
          else
            Result = IC.InsertNewInstBefore(
-                   BinaryOperator::createAdd(Result,
+                   BinaryOperator::CreateAdd(Result,
                                               ConstantInt::get(IntPtrTy, Size),
                                               GEP->getName()+".offs"), I);
          continue;
@@ -4535,7 +4636,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
        else {
          // Emit an add instruction.
          Result = IC.InsertNewInstBefore(
-           BinaryOperator::createAdd(Result, Scale,
+           BinaryOperator::CreateAdd(Result, Scale,
                                       GEP->getName()+".offs"), I);
        }
        continue;
@@ -4553,7 +4654,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
        if (Constant *OpC = dyn_cast<Constant>(Op))
          Op = ConstantExpr::getMul(OpC, Scale);
        else    // We'll let instcombine(mul) convert this to a shl if possible.
-        Op = IC.InsertNewInstBefore(BinaryOperator::createMul(Op, Scale,
+        Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale,
                                                    GEP->getName()+".idx"), I);
      }
  
@@ -4562,79 +4663,142 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
        Result = ConstantExpr::getAdd(cast<Constant>(Op),
                                      cast<Constant>(Result));
      else
-      Result = IC.InsertNewInstBefore(BinaryOperator::createAdd(Op, Result,
+      Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result,
                                                    GEP->getName()+".offs"), I);
    }
    return Result;
  }
  
-/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
-/// else.  At this point we know that the GEP is on the LHS of the comparison.
-Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
-                                       ICmpInst::Predicate Cond,
-                                       Instruction &I) {
-  assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
  
-  if (CastInst *CI = dyn_cast<CastInst>(RHS))
-    if (isa<PointerType>(CI->getOperand(0)->getType()))
-      RHS = CI->getOperand(0);
+/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of
+/// the *offset* implied by GEP to zero.  For example, if we have &A[i], we want
+/// to return 'i' for "icmp ne i, 0".  Note that, in general, indices can be
+/// complex, and scales are involved.  The above expression would also be legal
+/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).  This
+/// later form is less amenable to optimization though, and we are allowed to
+/// generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+/// 
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+                                          InstCombiner &IC) {
+  TargetData &TD = IC.getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
  
-  Value *PtrBase = GEPLHS->getOperand(0);
-  if (PtrBase == RHS) {
-    // As an optimization, we don't actually have to compute the actual value of
-    // OFFSET if this is a icmp_eq or icmp_ne comparison, just return whether 
-    // each index is zero or not.
-    if (Cond == ICmpInst::ICMP_EQ || Cond == ICmpInst::ICMP_NE) {
-      Instruction *InVal = 0;
-      gep_type_iterator GTI = gep_type_begin(GEPLHS);
-      for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i, ++GTI) {
-        bool EmitIt = true;
-        if (Constant *C = dyn_cast<Constant>(GEPLHS->getOperand(i))) {
-          if (isa<UndefValue>(C))  // undef index -> undef.
-            return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-          if (C->isNullValue())
-            EmitIt = false;
-          else if (TD->getABITypeSize(GTI.getIndexedType()) == 0) {
-            EmitIt = false;  // This is indexing into a zero sized array?
-          } else if (isa<ConstantInt>(C))
-            return ReplaceInstUsesWith(I, // No comparison is needed here.
-                                 ConstantInt::get(Type::Int1Ty, 
-                                                  Cond == ICmpInst::ICMP_NE));
-        }
+  // Check to see if this gep only has a single variable index.  If so, and if
+  // any constant indices are a multiple of its scale, then we can compute this
+  // in terms of the scale of the variable index.  For example, if the GEP
+  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+  // because the expression will cross zero at the same point.
+  unsigned i, e = GEP->getNumOperands();
+  int64_t Offset = 0;
+  for (i = 1; i != e; ++i, ++GTI) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      // Compute the aggregate offset of constant indices.
+      if (CI->isZero()) continue;
  
-        if (EmitIt) {
-          Instruction *Comp =
-            new ICmpInst(Cond, GEPLHS->getOperand(i),
-                    Constant::getNullValue(GEPLHS->getOperand(i)->getType()));
-          if (InVal == 0)
-            InVal = Comp;
-          else {
-            InVal = InsertNewInstBefore(InVal, I);
-            InsertNewInstBefore(Comp, I);
-            if (Cond == ICmpInst::ICMP_NE)   // True if any are unequal
-              InVal = BinaryOperator::createOr(InVal, Comp);
-            else                              // True if all are equal
-              InVal = BinaryOperator::createAnd(InVal, Comp);
-          }
-        }
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+      } else {
+        uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+        Offset += Size*CI->getSExtValue();
        }
-
-      if (InVal)
-        return InVal;
-      else
-        // No comparison is needed here, all indexes = 0
-        ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
-                                                Cond == ICmpInst::ICMP_EQ));
-    }
-
-    // Only lower this if the icmp is the only user of the GEP or if we expect
-    // the result to fold to a constant!
-    if (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) {
-      // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
-      Value *Offset = EmitGEPOffset(GEPLHS, I, *this);
-      return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
-                          Constant::getNullValue(Offset->getType()));
+    } else {
+      // Found our variable index.
+      break;
+    }
+  }
+  
+  // If there are no variable indices, we must have a constant offset, just
+  // evaluate it the general way.
+  if (i == e) return 0;
+  
+  Value *VariableIdx = GEP->getOperand(i);
+  // Determine the scale factor of the variable element.  For example, this is
+  // 4 if the variable index is into an array of i32.
+  uint64_t VariableScale = TD.getABITypeSize(GTI.getIndexedType());
+  
+  // Verify that there are no other variable indices.  If so, emit the hard way.
+  for (++i, ++GTI; i != e; ++i, ++GTI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!CI) return 0;
+   
+    // Compute the aggregate offset of constant indices.
+    if (CI->isZero()) continue;
+    
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+    } else {
+      uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+      Offset += Size*CI->getSExtValue();
      }
+  }
+  
+  // Okay, we know we have a single variable index, which must be a
+  // pointer/array/vector index.  If there is no offset, life is simple, return
+  // the index.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  if (Offset == 0) {
+    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
+    // we don't need to bother extending: the extension won't affect where the
+    // computation crosses zero.
+    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+      VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(),
+                                  VariableIdx->getNameStart(), &I);
+    return VariableIdx;
+  }
+  
+  // Otherwise, there is an index.  The computation we will do will be modulo
+  // the pointer size, so get it.
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+  
+  Offset &= PtrSizeMask;
+  VariableScale &= PtrSizeMask;
+
+  // To do this transformation, any constant index must be a multiple of the
+  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
+  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
+  // multiple of the variable scale.
+  int64_t NewOffs = Offset / (int64_t)VariableScale;
+  if (Offset != NewOffs*(int64_t)VariableScale)
+    return 0;
+
+  // Okay, we can do this evaluation.  Start by converting the index to intptr.
+  const Type *IntPtrTy = TD.getIntPtrType();
+  if (VariableIdx->getType() != IntPtrTy)
+    VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
+                                              true /*SExt*/, 
+                                              VariableIdx->getNameStart(), &I);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+  return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else.  At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
+                                       ICmpInst::Predicate Cond,
+                                       Instruction &I) {
+  assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
+
+  // Look through bitcasts.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+    RHS = BCI->getOperand(0);
+
+  Value *PtrBase = GEPLHS->getOperand(0);
+  if (PtrBase == RHS) {
+    // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
+    // This transformation (ignoring the base and scales) is valid because we
+    // know pointers can't overflow.  See if we can output an optimized form.
+    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+    
+    // If not, synthesize the offset the hard way.
+    if (Offset == 0)
+      Offset = EmitGEPOffset(GEPLHS, I, *this);
+    return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+                        Constant::getNullValue(Offset->getType()));
    } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
      // If the base pointers are different, but the indices are the same, just
      // compare the base pointer.
@@ -4702,7 +4866,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
        if (NumDifferences == 0)   // SAME GEP?
          return ReplaceInstUsesWith(I, // No comparison is needed here.
                                     ConstantInt::get(Type::Int1Ty,
-                                                    isTrueWhenEqual(Cond)));
+                                             ICmpInst::isTrueWhenEqual(Cond)));
  
        else if (NumDifferences == 1) {
          Value *LHSV = GEPLHS->getOperand(DiffOperand);
@@ -4725,6 +4889,137 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
    return 0;
  }
  
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+                                                Instruction *LHSI,
+                                                Constant *RHSC) {
+  if (!isa<ConstantFP>(RHSC)) return 0;
+  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+  
+  // Get the width of the mantissa.  We don't want to hack on conversions that
+  // might lose information from the integer, e.g. "i64 -> float"
+  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+  if (MantissaWidth == -1) return 0;  // Unknown.
+  
+  // Check to see that the input is converted from an integer type that is small
+  // enough that preserves all bits.  TODO: check here for "known" sign bits.
+  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+  unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+  
+  // If this is a uitofp instruction, we need an extra bit to hold the sign.
+  if (isa<UIToFPInst>(LHSI))
+    ++InputSize;
+  
+  // If the conversion would lose info, don't hack on this.
+  if ((int)InputSize > MantissaWidth)
+    return 0;
+  
+  // Otherwise, we can potentially simplify the comparison.  We know that it
+  // will always come through as an integer value and we know the constant is
+  // not a NAN (it would have been previously simplified).
+  assert(!RHS.isNaN() && "NaN comparison not already folded!");
+  
+  ICmpInst::Predicate Pred;
+  switch (I.getPredicate()) {
+  default: assert(0 && "Unexpected predicate!");
+  case FCmpInst::FCMP_UEQ:
+  case FCmpInst::FCMP_OEQ: Pred = ICmpInst::ICMP_EQ; break;
+  case FCmpInst::FCMP_UGT:
+  case FCmpInst::FCMP_OGT: Pred = ICmpInst::ICMP_SGT; break;
+  case FCmpInst::FCMP_UGE:
+  case FCmpInst::FCMP_OGE: Pred = ICmpInst::ICMP_SGE; break;
+  case FCmpInst::FCMP_ULT:
+  case FCmpInst::FCMP_OLT: Pred = ICmpInst::ICMP_SLT; break;
+  case FCmpInst::FCMP_ULE:
+  case FCmpInst::FCMP_OLE: Pred = ICmpInst::ICMP_SLE; break;
+  case FCmpInst::FCMP_UNE:
+  case FCmpInst::FCMP_ONE: Pred = ICmpInst::ICMP_NE; break;
+  case FCmpInst::FCMP_ORD:
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+  case FCmpInst::FCMP_UNO:
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+  }
+  
+  const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+  
+  // Now we know that the APFloat is a normal number, zero or inf.
+  
+  // See if the FP constant is too large for the integer.  For example,
+  // comparing an i8 to 300.0.
+  unsigned IntWidth = IntTy->getPrimitiveSizeInBits();
+  
+  // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
+  // and large values. 
+  APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+  SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+                        APFloat::rmNearestTiesToEven);
+  if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
+    if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT ||
+        Pred == ICmpInst::ICMP_SLE)
+      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+  }
+  
+  // See if the RHS value is < SignedMin.
+  APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+  SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+                        APFloat::rmNearestTiesToEven);
+  if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+    if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+        Pred == ICmpInst::ICMP_SGE)
+      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+    return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+  }
+
+  // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] but
+  // it may still be fractional.  See if it is fractional by casting the FP
+  // value to the integer value and back, checking for equality.  Don't do this
+  // for zero, because -0.0 is not fractional.
+  Constant *RHSInt = ConstantExpr::getFPToSI(RHSC, IntTy);
+  if (!RHS.isZero() &&
+      ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) != RHSC) {
+    // If we had a comparison against a fractional value, we have to adjust
+    // the compare predicate and sometimes the value.  RHSC is rounded towards
+    // zero at this point.
+    switch (Pred) {
+    default: assert(0 && "Unexpected integer comparison!");
+    case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
+      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+    case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
+      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+    case ICmpInst::ICMP_SLE:
+      // (float)int <= 4.4   --> int <= 4
+      // (float)int <= -4.4  --> int < -4
+      if (RHS.isNegative())
+        Pred = ICmpInst::ICMP_SLT;
+      break;
+    case ICmpInst::ICMP_SLT:
+      // (float)int < -4.4   --> int < -4
+      // (float)int < 4.4    --> int <= 4
+      if (!RHS.isNegative())
+        Pred = ICmpInst::ICMP_SLE;
+      break;
+    case ICmpInst::ICMP_SGT:
+      // (float)int > 4.4    --> int > 4
+      // (float)int > -4.4   --> int >= -4
+      if (RHS.isNegative())
+        Pred = ICmpInst::ICMP_SGE;
+      break;
+    case ICmpInst::ICMP_SGE:
+      // (float)int >= -4.4   --> int >= -4
+      // (float)int >= 4.4    --> int > 4
+      if (!RHS.isNegative())
+        Pred = ICmpInst::ICMP_SGT;
+      break;
+    }
+  }
+
+  // Lower this FP comparison into an appropriate integer version of the
+  // comparison.
+  return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
  Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
    bool Changed = SimplifyCompare(I);
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -4773,10 +5068,31 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
  
    // Handle fcmp with constant RHS
    if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+    // If the constant is a nan, see if we can fold the comparison based on it.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->getValueAPF().isNaN()) {
+        if (FCmpInst::isOrdered(I.getPredicate()))   // True if ordered and...
+          return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 0));
+        assert(FCmpInst::isUnordered(I.getPredicate()) &&
+               "Comparison must be either ordered or unordered!");
+        // True if unordered.
+        return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 1));
+      }
+    }
+    
      if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
        switch (LHSI->getOpcode()) {
        case Instruction::PHI:
-        if (Instruction *NV = FoldOpIntoPhi(I))
+        // Only fold fcmp into the PHI if the phi and fcmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I))
+            return NV;
+        break;
+      case Instruction::SIToFP:
+      case Instruction::UIToFP:
+        if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
            return NV;
          break;
        case Instruction::Select:
@@ -4803,7 +5119,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
          }
  
          if (Op1)
-          return new SelectInst(LHSI->getOperand(0), Op1, Op2);
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
          break;
        }
    }
@@ -4819,11 +5135,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
    // icmp X, X
    if (Op0 == Op1)
      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
-                                                   isTrueWhenEqual(I)));
+                                                   I.isTrueWhenEqual()));
  
    if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
      return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
-
+  
    // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
    // addresses never equal each other!  We already know that Op0 != Op1.
    if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
@@ -4831,19 +5147,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
         isa<ConstantPointerNull>(Op1)))
      return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty, 
-                                                   !isTrueWhenEqual(I)));
+                                                   !I.isTrueWhenEqual()));
  
    // icmp's with boolean values can always be turned into bitwise operations
    if (Ty == Type::Int1Ty) {
      switch (I.getPredicate()) {
      default: assert(0 && "Invalid icmp instruction!");
      case ICmpInst::ICMP_EQ: {               // icmp eq bool %A, %B -> ~(A^B)
-      Instruction *Xor = BinaryOperator::createXor(Op0, Op1, I.getName()+"tmp");
+      Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp");
        InsertNewInstBefore(Xor, I);
-      return BinaryOperator::createNot(Xor);
+      return BinaryOperator::CreateNot(Xor);
      }
      case ICmpInst::ICMP_NE:                  // icmp eq bool %A, %B -> A^B
-      return BinaryOperator::createXor(Op0, Op1);
+      return BinaryOperator::CreateXor(Op0, Op1);
  
      case ICmpInst::ICMP_UGT:
      case ICmpInst::ICMP_SGT:
@@ -4851,9 +5167,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        // FALL THROUGH
      case ICmpInst::ICMP_ULT:
      case ICmpInst::ICMP_SLT: {               // icmp lt bool A, B -> ~X & Y
-      Instruction *Not = BinaryOperator::createNot(Op0, I.getName()+"tmp");
+      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
        InsertNewInstBefore(Not, I);
-      return BinaryOperator::createAnd(Not, Op1);
+      return BinaryOperator::CreateAnd(Not, Op1);
      }
      case ICmpInst::ICMP_UGE:
      case ICmpInst::ICMP_SGE:
@@ -4861,9 +5177,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        // FALL THROUGH
      case ICmpInst::ICMP_ULE:
      case ICmpInst::ICMP_SLE: {               //  icmp le bool %A, %B -> ~A | B
-      Instruction *Not = BinaryOperator::createNot(Op0, I.getName()+"tmp");
+      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp");
        InsertNewInstBefore(Not, I);
-      return BinaryOperator::createOr(Not, Op1);
+      return BinaryOperator::CreateOr(Not, Op1);
      }
      }
    }
@@ -4871,6 +5187,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
    // See if we are doing a comparison between a constant and an instruction that
    // can be folded into the comparison.
    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+      Value *A, *B;
+    
+    // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+    if (I.isEquality() && CI->isNullValue() &&
+        match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+      // (icmp cond A B) if cond is equality
+      return new ICmpInst(I.getPredicate(), A, B);
+    }
+    
      switch (I.getPredicate()) {
      default: break;
      case ICmpInst::ICMP_ULT:                        // A <u MIN -> FALSE
@@ -5067,8 +5392,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          break;
  
        case Instruction::PHI:
-        if (Instruction *NV = FoldOpIntoPhi(I))
-          return NV;
+        // Only fold icmp into the PHI if the phi and fcmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I))
+            return NV;
          break;
        case Instruction::Select: {
          // If either operand of the select is a constant, we can fold the
@@ -5094,7 +5423,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          }
  
          if (Op1)
-          return new SelectInst(LHSI->getOperand(0), Op1, Op2);
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
          break;
        }
        case Instruction::Malloc:
@@ -5103,7 +5432,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) {
            AddToWorkList(LHSI);
            return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
-                                                         !isTrueWhenEqual(I)));
+                                                         !I.isTrueWhenEqual()));
          }
          break;
        }
@@ -5134,13 +5463,14 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          Op1 = CI2->getOperand(0);
  
        // If Op1 is a constant, we can fold the cast into the constant.
-      if (Op0->getType() != Op1->getType())
+      if (Op0->getType() != Op1->getType()) {
          if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
            Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
          } else {
            // Otherwise, cast the RHS right before the icmp
-          Op1 = InsertCastBefore(Instruction::BitCast, Op1, Op0->getType(), I);
+          Op1 = InsertBitCastBefore(Op1, Op0->getType(), I);
          }
+      }
        return new ICmpInst(I.getPredicate(), Op0, Op1);
      }
    }
@@ -5157,8 +5487,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
          return R;
    }
    
+  // ~x < ~y --> y < x
+  { Value *A, *B;
+    if (match(Op0, m_Not(m_Value(A))) &&
+        match(Op1, m_Not(m_Value(B))))
+      return new ICmpInst(I.getPredicate(), B, A);
+  }
+  
    if (I.isEquality()) {
      Value *A, *B, *C, *D;
+    
+    // -x == -y --> x == y
+    if (match(Op0, m_Neg(m_Value(A))) &&
+        match(Op1, m_Neg(m_Value(B))))
+      return new ICmpInst(I.getPredicate(), A, B);
+    
      if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
        if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
          Value *OtherVal = A == Op1 ? B : A;
@@ -5172,7 +5515,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
            if (ConstantInt *C2 = dyn_cast<ConstantInt>(D))
              if (Op1->hasOneUse()) {
                Constant *NC = ConstantInt::get(C1->getValue() ^ C2->getValue());
-              Instruction *Xor = BinaryOperator::createXor(C, NC, "tmp");
+              Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp");
                return new ICmpInst(I.getPredicate(), A,
                                    InsertNewInstBefore(Xor, I));
              }
@@ -5220,8 +5563,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        }
        
        if (X) {   // Build (X^Y) & Z
-        Op1 = InsertNewInstBefore(BinaryOperator::createXor(X, Y, "tmp"), I);
-        Op1 = InsertNewInstBefore(BinaryOperator::createAnd(Op1, Z, "tmp"), I);
+        Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I);
+        Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I);
          I.setOperand(0, Op1);
          I.setOperand(1, Constant::getNullValue(Op1->getType()));
          return &I;
@@ -5285,12 +5628,12 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
      HiOverflow = LoOverflow = ProdOV;
      if (!HiOverflow)
        HiOverflow = AddWithOverflow(HiBound, LoBound, DivRHS, false);
-  } else if (DivRHS->getValue().isPositive()) { // Divisor is > 0.
+  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
      if (CmpRHSV == 0) {       // (X / pos) op 0
        // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
        LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));
        HiBound = DivRHS;
-    } else if (CmpRHSV.isPositive()) {   // (X / pos) op pos
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
        LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
        HiOverflow = LoOverflow = ProdOV;
        if (!HiOverflow)
@@ -5303,7 +5646,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
        HiBound = AddOne(Prod);
        HiOverflow = ProdOV ? -1 : 0;
      }
-  } else {                         // Divisor is < 0.
+  } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
      if (CmpRHSV == 0) {       // (X / neg) op 0
        // e.g. X/-5 op 0  --> [-4, 5)
        LoBound = AddOne(DivRHS);
@@ -5312,7 +5655,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
          HiOverflow = 1;            // [INTMIN+1, overflow)
          HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
        }
-    } else if (CmpRHSV.isPositive()) {   // (X / neg) op pos
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
        // e.g. X/-5 op 3  --> [-19, -14)
        HiOverflow = LoOverflow = ProdOV ? -1 : 0;
        if (!LoOverflow)
@@ -5387,8 +5730,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
      if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
        // If this is a comparison that tests the signbit (X < 0) or (x > -1),
        // fold the xor.
-      if (ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0 ||
-          ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue()) {
+      if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+          (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
          Value *CompareVal = LHSI->getOperand(0);
          
          // If the sign bit of the XorCST is not set, there is no change to
@@ -5426,8 +5769,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
          // Extending a relational comparison when we're checking the sign
          // bit would not work.
          if (Cast->hasOneUse() &&
-            (ICI.isEquality() || AndCST->getValue().isPositive() && 
-             RHSV.isPositive())) {
+            (ICI.isEquality() ||
+             (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
            uint32_t BitWidth = 
              cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
            APInt NewCST = AndCST->getValue();
@@ -5435,7 +5778,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
            APInt NewCI = RHSV;
            NewCI.zext(BitWidth);
            Instruction *NewAnd = 
-            BinaryOperator::createAnd(Cast->getOperand(0),
+            BinaryOperator::CreateAnd(Cast->getOperand(0),
                                        ConstantInt::get(NewCST),LHSI->getName());
            InsertNewInstBefore(NewAnd, ICI);
            return new ICmpInst(ICI.getPredicate(), NewAnd,
@@ -5515,18 +5858,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
          // Compute C << Y.
          Value *NS;
          if (Shift->getOpcode() == Instruction::LShr) {
-          NS = BinaryOperator::createShl(AndCST, 
+          NS = BinaryOperator::CreateShl(AndCST, 
                                           Shift->getOperand(1), "tmp");
          } else {
            // Insert a logical shift.
-          NS = BinaryOperator::createLShr(AndCST,
+          NS = BinaryOperator::CreateLShr(AndCST,
                                            Shift->getOperand(1), "tmp");
          }
          InsertNewInstBefore(cast<Instruction>(NS), ICI);
          
          // Compute X & (C << Y).
          Instruction *NewAnd = 
-          BinaryOperator::createAnd(Shift->getOperand(0), NS, LHSI->getName());
+          BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
          InsertNewInstBefore(NewAnd, ICI);
          
          ICI.setOperand(0, NewAnd);
@@ -5565,7 +5908,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
            ConstantInt::get(APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal));
          
          Instruction *AndI =
-          BinaryOperator::createAnd(LHSI->getOperand(0),
+          BinaryOperator::CreateAnd(LHSI->getOperand(0),
                                      Mask, LHSI->getName()+".mask");
          Value *And = InsertNewInstBefore(AndI, ICI);
          return new ICmpInst(ICI.getPredicate(), And,
@@ -5581,7 +5924,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
        Constant *Mask = ConstantInt::get(APInt(TypeBits, 1) <<
                                             (TypeBits-ShAmt->getZExtValue()-1));
        Instruction *AndI =
-        BinaryOperator::createAnd(LHSI->getOperand(0),
+        BinaryOperator::CreateAnd(LHSI->getOperand(0),
                                    Mask, LHSI->getName()+".mask");
        Value *And = InsertNewInstBefore(AndI, ICI);
        
@@ -5593,44 +5936,54 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
      
    case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
    case Instruction::AShr: {
+    // Only handle equality comparisons of shift-by-constant.
      ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-    if (!ShAmt) break;
+    if (!ShAmt || !ICI.isEquality()) break;
  
-    if (ICI.isEquality()) {
-      // Check that the shift amount is in range.  If not, don't perform
-      // undefined shifts.  When the shift is visited it will be
-      // simplified.
-      uint32_t TypeBits = RHSV.getBitWidth();
-      if (ShAmt->uge(TypeBits))
-        break;
-      uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+    // Check that the shift amount is in range.  If not, don't perform
+    // undefined shifts.  When the shift is visited it will be
+    // simplified.
+    uint32_t TypeBits = RHSV.getBitWidth();
+    if (ShAmt->uge(TypeBits))
+      break;
+    
+    uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
        
-      // If we are comparing against bits always shifted out, the
-      // comparison cannot succeed.
-      APInt Comp = RHSV << ShAmtVal;
-      if (LHSI->getOpcode() == Instruction::LShr)
-        Comp = Comp.lshr(ShAmtVal);
-      else
-        Comp = Comp.ashr(ShAmtVal);
+    // If we are comparing against bits always shifted out, the
+    // comparison cannot succeed.
+    APInt Comp = RHSV << ShAmtVal;
+    if (LHSI->getOpcode() == Instruction::LShr)
+      Comp = Comp.lshr(ShAmtVal);
+    else
+      Comp = Comp.ashr(ShAmtVal);
+    
+    if (Comp != RHSV) { // Comparing against a bit that we know is zero.
+      bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+      Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
+      return ReplaceInstUsesWith(ICI, Cst);
+    }
+    
+    // Otherwise, check to see if the bits shifted out are known to be zero.
+    // If so, we can compare against the unshifted value:
+    //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+    if (LHSI->hasOneUse() &&
+        MaskedValueIsZero(LHSI->getOperand(0), 
+                          APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {
+      return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                          ConstantExpr::getShl(RHS, ShAmt));
+    }
        
-      if (Comp != RHSV) { // Comparing against a bit that we know is zero.
-        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-        Constant *Cst = ConstantInt::get(Type::Int1Ty, IsICMP_NE);
-        return ReplaceInstUsesWith(ICI, Cst);
-      }
+    if (LHSI->hasOneUse()) {
+      // Otherwise strength reduce the shift into an and.
+      APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+      Constant *Mask = ConstantInt::get(Val);
        
-      if (LHSI->hasOneUse() || RHSV == 0) {
-        // Otherwise strength reduce the shift into an and.
-        APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-        Constant *Mask = ConstantInt::get(Val);
-        
-        Instruction *AndI =
-          BinaryOperator::createAnd(LHSI->getOperand(0),
-                                    Mask, LHSI->getName()+".mask");
-        Value *And = InsertNewInstBefore(AndI, ICI);
-        return new ICmpInst(ICI.getPredicate(), And,
-                            ConstantExpr::getShl(RHS, ShAmt));
-      }
+      Instruction *AndI =
+        BinaryOperator::CreateAnd(LHSI->getOperand(0),
+                                  Mask, LHSI->getName()+".mask");
+      Value *And = InsertNewInstBefore(AndI, ICI);
+      return new ICmpInst(ICI.getPredicate(), And,
+                          ConstantExpr::getShl(RHS, ShAmt));
      }
      break;
    }
@@ -5648,6 +6001,37 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                            DivRHS))
          return R;
      break;
+
+  case Instruction::Add:
+    // Fold: icmp pred (add, X, C1), C2
+
+    if (!ICI.isEquality()) {
+      ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+      if (!LHSC) break;
+      const APInt &LHSV = LHSC->getValue();
+
+      ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+                            .subtract(LHSV);
+
+      if (ICI.isSignedPredicate()) {
+        if (CR.getLower().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+                              ConstantInt::get(CR.getUpper()));
+        } else if (CR.getUpper().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+                              ConstantInt::get(CR.getLower()));
+        }
+      } else {
+        if (CR.getLower().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+                              ConstantInt::get(CR.getUpper()));
+        } else if (CR.getUpper().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+                              ConstantInt::get(CR.getLower()));
+        }
+      }
+    }
+    break;
    }
    
    // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
@@ -5664,7 +6048,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
            const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
            if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) {
              Instruction *NewRem =
-              BinaryOperator::createURem(BO->getOperand(0), BO->getOperand(1),
+              BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1),
                                           BO->getName());
              InsertNewInstBefore(NewRem, ICI);
              return new ICmpInst(ICI.getPredicate(), NewRem, 
@@ -5688,7 +6072,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
            else if (Value *NegVal = dyn_castNegVal(BOp0))
              return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
            else if (BO->hasOneUse()) {
-            Instruction *Neg = BinaryOperator::createNeg(BOp1);
+            Instruction *Neg = BinaryOperator::CreateNeg(BOp1);
              InsertNewInstBefore(Neg, ICI);
              Neg->takeName(BO);
              return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
@@ -5736,7 +6120,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
                                  Constant::getNullValue(RHS->getType()));
            
            // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
-          if (isSignBit(BOC)) {
+          if (BOC->getValue().isSignBit()) {
              Value *X = BO->getOperand(0);
              Constant *Zero = Constant::getNullValue(X->getType());
              ICmpInst::Predicate pred = isICMP_NE ? 
@@ -5813,8 +6197,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
        RHSOp = RHSC->getOperand(0);
        // If the pointer types don't match, insert a bitcast.
        if (LHSCIOp->getType() != RHSOp->getType())
-        RHSOp = InsertCastBefore(Instruction::BitCast, RHSOp,
-                                 LHSCIOp->getType(), ICI);
+        RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI);
      }
  
      if (RHSOp)
@@ -5836,18 +6219,22 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
      if (RHSCIOp->getType() != LHSCIOp->getType()) 
        return 0;
      
-    // If the signedness of the two compares doesn't agree (i.e. one is a sext
+    // If the signedness of the two casts doesn't agree (i.e. one is a sext
      // and the other is a zext), then we can't handle this.
      if (CI->getOpcode() != LHSCI->getOpcode())
        return 0;
  
-    // Likewise, if the signedness of the [sz]exts and the compare don't match, 
-    // then we can't handle this.
-    if (isSignedExt != isSignedCmp && !ICI.isEquality())
-      return 0;
-    
-    // Okay, just insert a compare of the reduced operands now!
-    return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+    // Deal with equality cases early.
+    if (ICI.isEquality())
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // A signed comparison of sign extended values simplifies into a
+    // signed comparison.
+    if (isSignedCmp && isSignedExt)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // The other three cases all fold into an unsigned comparison.
+    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
    }
  
    // If we aren't dealing with a constant on the RHS, exit early
@@ -5922,7 +6309,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
      if (Constant *CI = dyn_cast<Constant>(Result))
        return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));
      else
-      return BinaryOperator::createNot(Result);
+      return BinaryOperator::CreateNot(Result);
    }
  }
  
@@ -5948,7 +6335,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
    // See if we can turn a signed shr into an unsigned shr.
    if (MaskedValueIsZero(Op0, 
                        APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
-    return BinaryOperator::createLShr(Op0, I.getOperand(1));
+    return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
    
    return 0;
  }
@@ -6016,7 +6403,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
      if (BO->getOpcode() == Instruction::Mul && isLeftShift)
        if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
-        return BinaryOperator::createMul(BO->getOperand(0),
+        return BinaryOperator::CreateMul(BO->getOperand(0),
                                           ConstantExpr::getShl(BOOp, Op1));
    
    // Try to fold constant and into select arguments.
@@ -6027,6 +6414,50 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
      if (Instruction *NV = FoldOpIntoPhi(I))
        return NV;
    
+  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+    // place.  Don't try to do this transformation in this case.  Also, we
+    // require that the input operand is a shift-by-constant so that we have
+    // confidence that the shifts will get folded together.  We could do this
+    // xform in more cases, but it is unlikely to be profitable.
+    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
+        isa<ConstantInt>(TrOp->getOperand(1))) {
+      // Okay, we'll do this xform.  Make the shift of shift.
+      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+      Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt,
+                                                I.getName());
+      InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2)
+
+      // For logical shifts, the truncation has the effect of making the high
+      // part of the register be zeros.  Emulate this by inserting an AND to
+      // clear the top bits as needed.  This 'and' will usually be zapped by
+      // other xforms later if dead.
+      unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits();
+      unsigned DstSize = TI->getType()->getPrimitiveSizeInBits();
+      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+      
+      // The mask we constructed says what the trunc would do if occurring
+      // between the shifts.  We want to know the effect *after* the second
+      // shift.  We know that it is a logical shift by a constant, so adjust the
+      // mask as appropriate.
+      if (I.getOpcode() == Instruction::Shl)
+        MaskV <<= Op1->getZExtValue();
+      else {
+        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+        MaskV = MaskV.lshr(Op1->getZExtValue());
+      }
+
+      Instruction *And = BinaryOperator::CreateAnd(NSh, ConstantInt::get(MaskV),
+                                                   TI->getName());
+      InsertNewInstBefore(And, I); // shift1 & 0x00FF
+
+      // Return the value truncated to the interesting size.
+      return new TruncInst(And, I.getType());
+    }
+  }
+  
    if (Op0->hasOneUse()) {
      if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
        // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
@@ -6043,16 +6474,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
            if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
                match(Op0BO->getOperand(1),
                      m_Shr(m_Value(V1), m_ConstantInt(CC))) && CC == Op1) {
-            Instruction *YS = BinaryOperator::createShl(
+            Instruction *YS = BinaryOperator::CreateShl(
                                              Op0BO->getOperand(0), Op1,
                                              Op0BO->getName());
              InsertNewInstBefore(YS, I); // (Y << C)
              Instruction *X = 
-              BinaryOperator::create(Op0BO->getOpcode(), YS, V1,
+              BinaryOperator::Create(Op0BO->getOpcode(), YS, V1,
                                       Op0BO->getOperand(1)->getName());
              InsertNewInstBefore(X, I);  // (X + (Y << C))
              uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::createAnd(X, ConstantInt::get(
+            return BinaryOperator::CreateAnd(X, ConstantInt::get(
                         APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
            }
            
@@ -6063,16 +6494,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                      m_And(m_Shr(m_Value(V1), m_Value(V2)),m_ConstantInt(CC))) &&
                cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse() &&
                V2 == Op1) {
-            Instruction *YS = BinaryOperator::createShl(
+            Instruction *YS = BinaryOperator::CreateShl(
                                                       Op0BO->getOperand(0), Op1,
                                                       Op0BO->getName());
              InsertNewInstBefore(YS, I); // (Y << C)
              Instruction *XM =
-              BinaryOperator::createAnd(V1, ConstantExpr::getShl(CC, Op1),
+              BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                          V1->getName()+".mask");
              InsertNewInstBefore(XM, I); // X & (CC << C)
              
-            return BinaryOperator::create(Op0BO->getOpcode(), YS, XM);
+            return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
            }
          }
            
@@ -6082,16 +6513,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
            if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
                match(Op0BO->getOperand(0),
                      m_Shr(m_Value(V1), m_ConstantInt(CC))) && CC == Op1) {
-            Instruction *YS = BinaryOperator::createShl(
+            Instruction *YS = BinaryOperator::CreateShl(
                                                       Op0BO->getOperand(1), Op1,
                                                       Op0BO->getName());
              InsertNewInstBefore(YS, I); // (Y << C)
              Instruction *X =
-              BinaryOperator::create(Op0BO->getOpcode(), V1, YS,
+              BinaryOperator::Create(Op0BO->getOpcode(), V1, YS,
                                       Op0BO->getOperand(0)->getName());
              InsertNewInstBefore(X, I);  // (X + (Y << C))
              uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
-            return BinaryOperator::createAnd(X, ConstantInt::get(
+            return BinaryOperator::CreateAnd(X, ConstantInt::get(
                         APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
            }
            
@@ -6102,16 +6533,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                            m_ConstantInt(CC))) && V2 == Op1 &&
                cast<BinaryOperator>(Op0BO->getOperand(0))
                    ->getOperand(0)->hasOneUse()) {
-            Instruction *YS = BinaryOperator::createShl(
+            Instruction *YS = BinaryOperator::CreateShl(
                                                       Op0BO->getOperand(1), Op1,
                                                       Op0BO->getName());
              InsertNewInstBefore(YS, I); // (Y << C)
              Instruction *XM =
-              BinaryOperator::createAnd(V1, ConstantExpr::getShl(CC, Op1),
+              BinaryOperator::CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                          V1->getName()+".mask");
              InsertNewInstBefore(XM, I); // X & (CC << C)
              
-            return BinaryOperator::create(Op0BO->getOpcode(), XM, YS);
+            return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
            }
            
            break;
@@ -6152,11 +6583,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
            Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
            
            Instruction *NewShift =
-            BinaryOperator::create(I.getOpcode(), Op0BO->getOperand(0), Op1);
+            BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1);
            InsertNewInstBefore(NewShift, I);
            NewShift->takeName(Op0BO);
            
-          return BinaryOperator::create(Op0BO->getOpcode(), NewShift,
+          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
                                          NewRHS);
          }
        }
@@ -6184,21 +6615,21 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
      
      // Check for (X << c1) << c2  and  (X >> c1) >> c2
      if (I.getOpcode() == ShiftOp->getOpcode()) {
-      return BinaryOperator::create(I.getOpcode(), X,
+      return BinaryOperator::Create(I.getOpcode(), X,
                                      ConstantInt::get(Ty, AmtSum));
      } else if (ShiftOp->getOpcode() == Instruction::LShr &&
                 I.getOpcode() == Instruction::AShr) {
        // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0.
-      return BinaryOperator::createLShr(X, ConstantInt::get(Ty, AmtSum));
+      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
      } else if (ShiftOp->getOpcode() == Instruction::AShr &&
                 I.getOpcode() == Instruction::LShr) {
        // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
        Instruction *Shift =
-        BinaryOperator::createAShr(X, ConstantInt::get(Ty, AmtSum));
+        BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
        InsertNewInstBefore(Shift, I);
  
        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-      return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+      return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
      }
      
      // Okay, if we get here, one shift must be left, and the other shift must be
@@ -6207,12 +6638,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
        // If we have ((X >>? C) << C), turn this into X & (-1 << C).
        if (I.getOpcode() == Instruction::Shl) {
          APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::createAnd(X, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
        }
        // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
        if (I.getOpcode() == Instruction::LShr) {
          APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::createAnd(X, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(Mask));
        }
        // We can simplify ((X << C) >>s C) into a trunc + sext.
        // NOTE: we could do this for any C, but that would make 'unusual' integer
@@ -6244,22 +6675,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
          assert(ShiftOp->getOpcode() == Instruction::LShr ||
                 ShiftOp->getOpcode() == Instruction::AShr);
          Instruction *Shift =
-          BinaryOperator::createShl(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
          InsertNewInstBefore(Shift, I);
          
          APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
        }
        
        // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
        if (I.getOpcode() == Instruction::LShr) {
          assert(ShiftOp->getOpcode() == Instruction::Shl);
          Instruction *Shift =
-          BinaryOperator::createLShr(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
          InsertNewInstBefore(Shift, I);
          
          APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
        }
        
        // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
@@ -6272,23 +6703,23 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
          assert(ShiftOp->getOpcode() == Instruction::LShr ||
                 ShiftOp->getOpcode() == Instruction::AShr);
          Instruction *Shift =
-          BinaryOperator::create(ShiftOp->getOpcode(), X,
+          BinaryOperator::Create(ShiftOp->getOpcode(), X,
                                   ConstantInt::get(Ty, ShiftDiff));
          InsertNewInstBefore(Shift, I);
          
          APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
        }
        
        // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
        if (I.getOpcode() == Instruction::LShr) {
          assert(ShiftOp->getOpcode() == Instruction::Shl);
          Instruction *Shift =
-          BinaryOperator::createShl(X, ConstantInt::get(Ty, ShiftDiff));
+          BinaryOperator::CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
          InsertNewInstBefore(Shift, I);
          
          APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::createAnd(Shift, ConstantInt::get(Mask));
+        return BinaryOperator::CreateAnd(Shift, ConstantInt::get(Mask));
        }
        
        // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
@@ -6403,14 +6834,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
        Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
      // otherwise multiply the amount and the number of elements
      else if (Scale != 1) {
-      Instruction *Tmp = BinaryOperator::createMul(Amt, NumElements, "tmp");
+      Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
        Amt = InsertNewInstBefore(Tmp, AI);
      }
    }
    
    if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
      Value *Off = ConstantInt::get(Type::Int32Ty, Offset, true);
-    Instruction *Tmp = BinaryOperator::createAdd(Amt, Off, "tmp");
+    Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp");
      Amt = InsertNewInstBefore(Tmp, AI);
    }
    
@@ -6444,8 +6875,19 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
  ///
  /// This is a truncation operation if Ty is smaller than V->getType(), or an
  /// extension operation if Ty is larger.
-static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
-                                       unsigned CastOpc, int &NumCastsRemoved) {
+///
+/// If CastOpc is a truncation, then Ty will be a type smaller than V.  We
+/// should return true if trunc(V) can be computed by computing V in the smaller
+/// type.  If V is an instruction, then trunc(inst(x,y)) can be computed as
+/// inst(trunc(x),trunc(y)), which only makes sense if x and y can be
+/// efficiently truncated.
+///
+/// If CastOpc is a sext or zext, we are asking if the low bits of the value can
+/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
+/// the final result.
+bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+                                              unsigned CastOpc,
+                                              int &NumCastsRemoved) {
    // We can always evaluate constants in another type.
    if (isa<ConstantInt>(V))
      return true;
@@ -6463,7 +6905,7 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
        // If the first operand is itself a cast, and is eliminable, do not count
        // this as an eliminable cast.  We would prefer to eliminate those two
        // casts first.
-      if (!isa<CastInst>(I->getOperand(0)))
+      if (!isa<CastInst>(I->getOperand(0)) && I->hasOneUse())
          ++NumCastsRemoved;
        return true;
      }
@@ -6485,6 +6927,14 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
             CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
                                        NumCastsRemoved);
  
+  case Instruction::Mul:
+    // A multiply can be truncated by truncating its operands.
+    return Ty->getBitWidth() < OrigTy->getBitWidth() && 
+           CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                      NumCastsRemoved) &&
+           CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
+                                      NumCastsRemoved);
+
    case Instruction::Shl:
      // If we are truncating the result of this SHL, and if it's a shift of a
      // constant amount, we can always perform a SHL in a smaller type.
@@ -6520,8 +6970,17 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
      // of casts in the input.
      if (I->getOpcode() == CastOpc)
        return true;
-    
      break;
+      
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateInDifferentType(PN->getIncomingValue(i), Ty, CastOpc,
+                                      NumCastsRemoved))
+        return false;
+    return true;
+  }
    default:
      // TODO: Can handle more cases here.
      break;
@@ -6544,6 +7003,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
    switch (I->getOpcode()) {
    case Instruction::Add:
    case Instruction::Sub:
+  case Instruction::Mul:
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
@@ -6552,8 +7012,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
    case Instruction::Shl: {
      Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
      Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
-    Res = BinaryOperator::create((Instruction::BinaryOps)I->getOpcode(),
-                                 LHS, RHS, I->getName());
+    Res = BinaryOperator::Create((Instruction::BinaryOps)I->getOpcode(),
+                                 LHS, RHS);
      break;
    }    
    case Instruction::Trunc:
@@ -6565,16 +7025,27 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
      if (I->getOperand(0)->getType() == Ty)
        return I->getOperand(0);
      
-    // Otherwise, must be the same type of case, so just reinsert a new one.
-    Res = CastInst::create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
-                           Ty, I->getName());
+    // Otherwise, must be the same type of cast, so just reinsert a new one.
+    Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
+                           Ty);
      break;
+  case Instruction::PHI: {
+    PHINode *OPN = cast<PHINode>(I);
+    PHINode *NPN = PHINode::Create(Ty);
+    for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+      Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+      NPN->addIncoming(V, OPN->getIncomingBlock(i));
+    }
+    Res = NPN;
+    break;
+  }
    default: 
      // TODO: Can handle more cases here.
      assert(0 && "Unreachable!");
      break;
    }
    
+  Res->takeName(I);
    return InsertNewInstBefore(Res, *I);
  }
  
@@ -6589,7 +7060,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
          isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
        // The first cast (CSrc) is eliminable so we need to fix up or replace
        // the second cast (CI). CSrc will then have a good chance of being dead.
-      return CastInst::create(opc, CSrc->getOperand(0), CI.getType());
+      return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
      }
    }
  
@@ -6693,9 +7164,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
              // If we were able to index down into an element, create the GEP
              // and bitcast the result.  This eliminates one bitcast, potentially
              // two.
-            Instruction *NGEP = new GetElementPtrInst(OrigBase, 
-                                                      NewIndices.begin(),
-                                                      NewIndices.end(), "");
+            Instruction *NGEP = GetElementPtrInst::Create(OrigBase, 
+                                                          NewIndices.begin(),
+                                                          NewIndices.end(), "");
              InsertNewInstBefore(NGEP, CI);
              NGEP->takeName(GEP);
              
@@ -6784,11 +7255,11 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
          assert(SrcBitSize < DestBitSize && "Not a zext?");
          Constant *C = ConstantInt::get(APInt::getLowBitsSet(DestBitSize,
                                                              SrcBitSize));
-        return BinaryOperator::createAnd(Res, C);
+        return BinaryOperator::CreateAnd(Res, C);
        }
        case Instruction::SExt:
          // We need to emit a cast to truncate, then a cast to sext.
-        return CastInst::create(Instruction::SExt,
+        return CastInst::Create(Instruction::SExt,
              InsertCastBefore(Instruction::Trunc, Res, Src->getType(), 
                               CI), DestTy);
        }
@@ -6815,7 +7286,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
          Instruction::CastOps opcode = CI.getOpcode();
          Value *Op0c = InsertOperandCastBefore(opcode, Op0, DestTy, SrcI);
          Value *Op1c = InsertOperandCastBefore(opcode, Op1, DestTy, SrcI);
-        return BinaryOperator::create(
+        return BinaryOperator::Create(
              cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
        }
      }
@@ -6826,7 +7297,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
          Op1 == ConstantInt::getTrue() &&
          (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) {
        Value *New = InsertOperandCastBefore(Instruction::ZExt, Op0, DestTy, &CI);
-      return BinaryOperator::createXor(New, ConstantInt::get(CI.getType(), 1));
+      return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
      }
      break;
    case Instruction::SDiv:
@@ -6844,7 +7315,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
                                                Op0, DestTy, SrcI);
          Value *Op1c = InsertOperandCastBefore(Instruction::BitCast, 
                                                Op1, DestTy, SrcI);
-        return BinaryOperator::create(
+        return BinaryOperator::Create(
            cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);
        }
      }
@@ -6862,7 +7333,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
            Instruction::BitCast : Instruction::Trunc);
        Value *Op0c = InsertOperandCastBefore(opcode, Op0, DestTy, SrcI);
        Value *Op1c = InsertOperandCastBefore(opcode, Op1, DestTy, SrcI);
-      return BinaryOperator::createShl(Op0c, Op1c);
+      return BinaryOperator::CreateShl(Op0c, Op1c);
      }
      break;
    case Instruction::AShr:
@@ -6874,7 +7345,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
        uint32_t ShiftAmt = cast<ConstantInt>(Op1)->getLimitedValue(SrcBitSize);
        if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) {
          // Insert the new logical shift right.
-        return BinaryOperator::createLShr(Op0, Op1);
+        return BinaryOperator::CreateLShr(Op0, Op1);
        }
      }
      break;
@@ -6912,7 +7383,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
            Value *V1 = InsertCastBefore(Instruction::Trunc, SrcIOp0, Ty, CI);
            Value *V2 = InsertCastBefore(Instruction::Trunc, SrcI->getOperand(1),
                                         Ty, CI);
-          return BinaryOperator::createLShr(V1, V2);
+          return BinaryOperator::CreateLShr(V1, V2);
          }
        } else {     // This is a variable shr.
          
@@ -6923,9 +7394,9 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
            Value *One = ConstantInt::get(SrcI->getType(), 1);
  
            Value *V = InsertNewInstBefore(
-              BinaryOperator::createShl(One, SrcI->getOperand(1),
+              BinaryOperator::CreateShl(One, SrcI->getOperand(1),
                                       "tmp"), CI);
-          V = InsertNewInstBefore(BinaryOperator::createAnd(V,
+          V = InsertNewInstBefore(BinaryOperator::CreateAnd(V,
                                                              SrcI->getOperand(0),
                                                              "tmp"), CI);
            Value *Zero = Constant::getNullValue(V->getType());
@@ -6939,6 +7410,101 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
    return 0;
  }
  
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                             bool DoXform) {
+  // If we are just checking for a icmp eq of a single bit and zext'ing it
+  // to an integer, then shift the bit to the appropriate place and then
+  // cast to integer to avoid the comparison.
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+    const APInt &Op1CV = Op1C->getValue();
+      
+    // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
+    // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
+    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+        (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+      if (!DoXform) return ICI;
+
+      Value *In = ICI->getOperand(0);
+      Value *Sh = ConstantInt::get(In->getType(),
+                                   In->getType()->getPrimitiveSizeInBits()-1);
+      In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
+                                                        In->getName()+".lobit"),
+                               CI);
+      if (In->getType() != CI.getType())
+        In = CastInst::CreateIntegerCast(In, CI.getType(),
+                                         false/*ZExt*/, "tmp", &CI);
+
+      if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+        Constant *One = ConstantInt::get(In->getType(), 1);
+        In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One,
+                                                         In->getName()+".not"),
+                                 CI);
+      }
+
+      return ReplaceInstUsesWith(CI, In);
+    }
+      
+      
+      
+    // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    // zext (X == 1) to i32 --> X        iff X has only the low bit set.
+    // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 0) to i32 --> X        iff X has only the low bit set.
+    // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
+        // This only works for EQ and NE
+        ICI->isEquality()) {
+      // If Op1C some other power of two, convert:
+      uint32_t BitWidth = Op1C->getType()->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+        
+      APInt KnownZeroMask(~KnownZero);
+      if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+        if (!DoXform) return ICI;
+
+        bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+        if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+          // (X&4) == 2 --> false
+          // (X&4) != 2 --> true
+          Constant *Res = ConstantInt::get(Type::Int1Ty, isNE);
+          Res = ConstantExpr::getZExt(Res, CI.getType());
+          return ReplaceInstUsesWith(CI, Res);
+        }
+          
+        uint32_t ShiftAmt = KnownZeroMask.logBase2();
+        Value *In = ICI->getOperand(0);
+        if (ShiftAmt) {
+          // Perform a logical shr by shiftamt.
+          // Insert the shift to put the result in the low bit.
+          In = InsertNewInstBefore(BinaryOperator::CreateLShr(In,
+                                  ConstantInt::get(In->getType(), ShiftAmt),
+                                                   In->getName()+".lobit"), CI);
+        }
+          
+        if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = BinaryOperator::CreateXor(In, One, "tmp");
+          InsertNewInstBefore(cast<Instruction>(In), CI);
+        }
+          
+        if (CI.getType() == In->getType())
+          return ReplaceInstUsesWith(CI, In);
+        else
+          return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+      }
+    }
+  }
+
+  return 0;
+}
+
  Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
    // If one of the common conversion will work ..
    if (Instruction *Result = commonIntCastTransforms(CI))
@@ -6963,104 +7529,36 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
          APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
          Constant *AndConst = ConstantInt::get(AndValue);
          Instruction *And = 
-          BinaryOperator::createAnd(CSrc->getOperand(0), AndConst);
+          BinaryOperator::CreateAnd(CSrc->getOperand(0), AndConst);
          // Unfortunately, if the type changed, we need to cast it back.
          if (And->getType() != CI.getType()) {
            And->setName(CSrc->getName()+".mask");
            InsertNewInstBefore(And, CI);
-          And = CastInst::createIntegerCast(And, CI.getType(), false/*ZExt*/);
+          And = CastInst::CreateIntegerCast(And, CI.getType(), false/*ZExt*/);
          }
          return And;
        }
      }
    }
  
-  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) {
-    // If we are just checking for a icmp eq of a single bit and zext'ing it
-    // to an integer, then shift the bit to the appropriate place and then
-    // cast to integer to avoid the comparison.
-    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
-      const APInt &Op1CV = Op1C->getValue();
-      
-      // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
-      // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
-      if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
-          (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){
-        Value *In = ICI->getOperand(0);
-        Value *Sh = ConstantInt::get(In->getType(),
-                                    In->getType()->getPrimitiveSizeInBits()-1);
-        In = InsertNewInstBefore(BinaryOperator::createLShr(In, Sh,
-                                                        In->getName()+".lobit"),
-                                 CI);
-        if (In->getType() != CI.getType())
-          In = CastInst::createIntegerCast(In, CI.getType(),
-                                           false/*ZExt*/, "tmp", &CI);
-
-        if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
-          Constant *One = ConstantInt::get(In->getType(), 1);
-          In = InsertNewInstBefore(BinaryOperator::createXor(In, One,
-                                                          In->getName()+".not"),
-                                   CI);
-        }
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+    return transformZExtICmp(ICI, CI);
  
-        return ReplaceInstUsesWith(CI, In);
-      }
-      
-      
-      
-      // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
-      // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-      // zext (X == 1) to i32 --> X        iff X has only the low bit set.
-      // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
-      // zext (X != 0) to i32 --> X        iff X has only the low bit set.
-      // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
-      // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
-      // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
-      if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
-          // This only works for EQ and NE
-          ICI->isEquality()) {
-        // If Op1C some other power of two, convert:
-        uint32_t BitWidth = Op1C->getType()->getBitWidth();
-        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-        APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-        ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
-        
-        APInt KnownZeroMask(~KnownZero);
-        if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
-          bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
-          if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
-            // (X&4) == 2 --> false
-            // (X&4) != 2 --> true
-            Constant *Res = ConstantInt::get(Type::Int1Ty, isNE);
-            Res = ConstantExpr::getZExt(Res, CI.getType());
-            return ReplaceInstUsesWith(CI, Res);
-          }
-          
-          uint32_t ShiftAmt = KnownZeroMask.logBase2();
-          Value *In = ICI->getOperand(0);
-          if (ShiftAmt) {
-            // Perform a logical shr by shiftamt.
-            // Insert the shift to put the result in the low bit.
-            In = InsertNewInstBefore(
-                   BinaryOperator::createLShr(In,
-                                     ConstantInt::get(In->getType(), ShiftAmt),
-                                              In->getName()+".lobit"), CI);
-          }
-          
-          if ((Op1CV != 0) == isNE) { // Toggle the low bit.
-            Constant *One = ConstantInt::get(In->getType(), 1);
-            In = BinaryOperator::createXor(In, One, "tmp");
-            InsertNewInstBefore(cast<Instruction>(In), CI);
-          }
-          
-          if (CI.getType() == In->getType())
-            return ReplaceInstUsesWith(CI, In);
-          else
-            return CastInst::createIntegerCast(In, CI.getType(), false/*ZExt*/);
-        }
-      }
+  BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+  if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+    // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+    // of the (zext icmp) will be transformed.
+    ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+    ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+    if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        (transformZExtICmp(LHS, CI, false) ||
+         transformZExtICmp(RHS, CI, false))) {
+      Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI);
+      Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI);
+      return BinaryOperator::Create(Instruction::Or, LCast, RCast);
      }
-  }    
+  }
+
    return 0;
  }
  
@@ -7086,39 +7584,155 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
          Value *In = ICI->getOperand(0);
          Value *Sh = ConstantInt::get(In->getType(),
                                       In->getType()->getPrimitiveSizeInBits()-1);
-        In = InsertNewInstBefore(BinaryOperator::createAShr(In, Sh,
+        In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
                                                          In->getName()+".lobit"),
                                   CI);
          if (In->getType() != CI.getType())
-          In = CastInst::createIntegerCast(In, CI.getType(),
+          In = CastInst::CreateIntegerCast(In, CI.getType(),
                                             true/*SExt*/, "tmp", &CI);
          
          if (ICI->getPredicate() == ICmpInst::ICMP_SGT)
-          In = InsertNewInstBefore(BinaryOperator::createNot(In,
+          In = InsertNewInstBefore(BinaryOperator::CreateNot(In,
                                       In->getName()+".not"), CI);
          
          return ReplaceInstUsesWith(CI, In);
        }
      }
    }
+
+  // See if the value being truncated is already sign extended.  If so, just
+  // eliminate the trunc/sext pair.
+  if (getOpcode(Src) == Instruction::Trunc) {
+    Value *Op = cast<User>(Src)->getOperand(0);
+    unsigned OpBits   = cast<IntegerType>(Op->getType())->getBitWidth();
+    unsigned MidBits  = cast<IntegerType>(Src->getType())->getBitWidth();
+    unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();
+    unsigned NumSignBits = ComputeNumSignBits(Op);
+
+    if (OpBits == DestBits) {
+      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+      // bits, it is already ready.
+      if (NumSignBits > DestBits-MidBits)
+        return ReplaceInstUsesWith(CI, Op);
+    } else if (OpBits < DestBits) {
+      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+      // bits, just sext from i32.
+      if (NumSignBits > OpBits-MidBits)
+        return new SExtInst(Op, CI.getType(), "tmp");
+    } else {
+      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+      // bits, just truncate to i32.
+      if (NumSignBits > OpBits-MidBits)
+        return new TruncInst(Op, CI.getType(), "tmp");
+    }
+  }
        
    return 0;
  }
  
-Instruction *InstCombiner::visitFPTrunc(CastInst &CI) {
-  return commonCastTransforms(CI);
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+  APFloat F = CFP->getValueAPF();
+  if (F.convert(Sem, APFloat::rmNearestTiesToEven) == APFloat::opOK)
+    return ConstantFP::get(F);
+  return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (I->getOpcode() == Instruction::FPExt)
+      return LookThroughFPExtensions(I->getOperand(0));
+  
+  // If this value is a constant, return the constant in the smallest FP type
+  // that can accurately represent it.  This allows us to turn
+  // (float)((double)X+2.0) into x+2.0f.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType() == Type::PPC_FP128Ty)
+      return V;  // No constant folding of this.
+    // See if the value can be truncated to float and then reextended.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+      return V;
+    if (CFP->getType() == Type::DoubleTy)
+      return V;  // Won't shrink.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+      return V;
+    // Don't try to shrink to various long double types.
+  }
+  
+  return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  // If we have fptrunc(add (fpextend x), (fpextend y)), where x and y are
+  // smaller than the destination type, we can eliminate the truncate by doing
+  // the add as the smaller type.  This applies to add/sub/mul/div as well as
+  // many builtins (sqrt, etc).
+  BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+  if (OpI && OpI->hasOneUse()) {
+    switch (OpI->getOpcode()) {
+    default: break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      const Type *SrcTy = OpI->getType();
+      Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+      Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+      if (LHSTrunc->getType() != SrcTy && 
+          RHSTrunc->getType() != SrcTy) {
+        unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+        // If the source types were both smaller than the destination type of
+        // the cast, do this xform.
+        if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize &&
+            RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) {
+          LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
+                                      CI.getType(), CI);
+          RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
+                                      CI.getType(), CI);
+          return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+        }
+      }
+      break;  
+    }
+  }
+  return 0;
  }
  
  Instruction *InstCombiner::visitFPExt(CastInst &CI) {
    return commonCastTransforms(CI);
  }
  
-Instruction *InstCombiner::visitFPToUI(CastInst &CI) {
-  return commonCastTransforms(CI);
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+  // fptoui(uitofp(X)) --> X  if the intermediate type has enough bits in its
+  // mantissa to accurately represent all values of X.  For example, do not
+  // do this with i64->float->i64.
+  if (UIToFPInst *SrcI = dyn_cast<UIToFPInst>(FI.getOperand(0)))
+    if (SrcI->getOperand(0)->getType() == FI.getType() &&
+        (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+                    SrcI->getType()->getFPMantissaWidth())
+      return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+
+  return commonCastTransforms(FI);
  }
  
-Instruction *InstCombiner::visitFPToSI(CastInst &CI) {
-  return commonCastTransforms(CI);
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+  // fptosi(sitofp(X)) --> X  if the intermediate type has enough bits in its
+  // mantissa to accurately represent all values of X.  For example, do not
+  // do this with i64->float->i64.
+  if (SIToFPInst *SrcI = dyn_cast<SIToFPInst>(FI.getOperand(0)))
+    if (SrcI->getOperand(0)->getType() == FI.getType() &&
+        (int)FI.getType()->getPrimitiveSizeInBits() <= 
+                    SrcI->getType()->getFPMantissaWidth())
+      return ReplaceInstUsesWith(FI, SrcI->getOperand(0));
+  
+  return commonCastTransforms(FI);
  }
  
  Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
@@ -7133,8 +7747,58 @@ Instruction *InstCombiner::visitPtrToInt(CastInst &CI) {
    return commonPointerCastTransforms(CI);
  }
  
-Instruction *InstCombiner::visitIntToPtr(CastInst &CI) {
-  return commonCastTransforms(CI);
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  const Type *DestPointee = cast<PointerType>(CI.getType())->getElementType();
+  if (!DestPointee->isSized()) return 0;
+
+  // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP.
+  ConstantInt *Cst;
+  Value *X;
+  if (match(CI.getOperand(0), m_Add(m_Cast<PtrToIntInst>(m_Value(X)),
+                                    m_ConstantInt(Cst)))) {
+    // If the source and destination operands have the same type, see if this
+    // is a single-index GEP.
+    if (X->getType() == CI.getType()) {
+      // Get the size of the pointee type.
+      uint64_t Size = TD->getABITypeSize(DestPointee);
+
+      // Convert the constant to intptr type.
+      APInt Offset = Cst->getValue();
+      Offset.sextOrTrunc(TD->getPointerSizeInBits());
+
+      // If Offset is evenly divisible by Size, we can do this xform.
+      if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
+        Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
+        return GetElementPtrInst::Create(X, ConstantInt::get(Offset));
+      }
+    }
+    // TODO: Could handle other cases, e.g. where add is indexing into field of
+    // struct etc.
+  } else if (CI.getOperand(0)->hasOneUse() &&
+             match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) {
+    // Otherwise, if this is inttoptr(add x, cst), try to turn this into an
+    // "inttoptr+GEP" instead of "add+intptr".
+    
+    // Get the size of the pointee type.
+    uint64_t Size = TD->getABITypeSize(DestPointee);
+    
+    // Convert the constant to intptr type.
+    APInt Offset = Cst->getValue();
+    Offset.sextOrTrunc(TD->getPointerSizeInBits());
+    
+    // If Offset is evenly divisible by Size, we can do this xform.
+    if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){
+      Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size));
+      
+      Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(),
+                                                            "tmp"), CI);
+      return GetElementPtrInst::Create(P, ConstantInt::get(Offset), "tmp");
+    }
+  }
+  return 0;
  }
  
  Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -7166,6 +7830,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      const Type *DstElTy = DstPTy->getElementType();
      const Type *SrcElTy = SrcPTy->getElementType();
      
+    // If the address spaces don't match, don't eliminate the bitcast, which is
+    // required for changing types.
+    if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+      return 0;
+    
      // If we are casting a malloc or alloca to a pointer to a type of the same
      // size, rewrite the allocation instruction to allocate the "right" type.
      if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
@@ -7187,8 +7856,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      // If we found a path from the src to dest, create the getelementptr now.
      if (SrcElTy == DstElTy) {
        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      return new GetElementPtrInst(Src, Idxs.begin(), Idxs.end(), "", 
-                                   ((Instruction*) NULL));
+      return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "", 
+                                       ((Instruction*) NULL));
      }
    }
  
@@ -7285,10 +7954,10 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
      }
  
      // Fold this by inserting a select from the input values.
-    SelectInst *NewSI = new SelectInst(SI.getCondition(), TI->getOperand(0),
-                                       FI->getOperand(0), SI.getName()+".v");
+    SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
+                                           FI->getOperand(0), SI.getName()+".v");
      InsertNewInstBefore(NewSI, SI);
-    return CastInst::create(Instruction::CastOps(TI->getOpcode()), NewSI, 
+    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, 
                              TI->getType());
    }
  
@@ -7326,15 +7995,15 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
    }
  
    // If we reach here, they do have operations in common.
-  SelectInst *NewSI = new SelectInst(SI.getCondition(), OtherOpT,
-                                     OtherOpF, SI.getName()+".v");
+  SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT,
+                                         OtherOpF, SI.getName()+".v");
    InsertNewInstBefore(NewSI, SI);
  
    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
      if (MatchIsOpZero)
-      return BinaryOperator::create(BO->getOpcode(), MatchOp, NewSI);
+      return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
      else
-      return BinaryOperator::create(BO->getOpcode(), NewSI, MatchOp);
+      return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
    }
    assert(0 && "Shouldn't get here");
    return 0;
@@ -7369,33 +8038,33 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
      if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
        if (C->getZExtValue()) {
          // Change: A = select B, true, C --> A = or B, C
-        return BinaryOperator::createOr(CondVal, FalseVal);
+        return BinaryOperator::CreateOr(CondVal, FalseVal);
        } else {
          // Change: A = select B, false, C --> A = and !B, C
          Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
                                               "not."+CondVal->getName()), SI);
-        return BinaryOperator::createAnd(NotCond, FalseVal);
+        return BinaryOperator::CreateAnd(NotCond, FalseVal);
        }
      } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
        if (C->getZExtValue() == false) {
          // Change: A = select B, C, false --> A = and B, C
-        return BinaryOperator::createAnd(CondVal, TrueVal);
+        return BinaryOperator::CreateAnd(CondVal, TrueVal);
        } else {
          // Change: A = select B, C, true --> A = or !B, C
          Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
                                               "not."+CondVal->getName()), SI);
-        return BinaryOperator::createOr(NotCond, TrueVal);
+        return BinaryOperator::CreateOr(NotCond, TrueVal);
        }
      }
      
      // select a, b, a  -> a&b
      // select a, a, b  -> a|b
      if (CondVal == TrueVal)
-      return BinaryOperator::createOr(CondVal, FalseVal);
+      return BinaryOperator::CreateOr(CondVal, FalseVal);
      else if (CondVal == FalseVal)
-      return BinaryOperator::createAnd(CondVal, TrueVal);
+      return BinaryOperator::CreateAnd(CondVal, TrueVal);
    }
  
    // Selecting between two integer constants?
@@ -7403,13 +8072,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
      if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
        // select C, 1, 0 -> zext C to int
        if (FalseValC->isZero() && TrueValC->getValue() == 1) {
-        return CastInst::create(Instruction::ZExt, CondVal, SI.getType());
+        return CastInst::Create(Instruction::ZExt, CondVal, SI.getType());
        } else if (TrueValC->isZero() && FalseValC->getValue() == 1) {
          // select C, 0, 1 -> zext !C to int
          Value *NotCond =
-          InsertNewInstBefore(BinaryOperator::createNot(CondVal,
+          InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
                                                 "not."+CondVal->getName()), SI);
-        return CastInst::create(Instruction::ZExt, NotCond, SI.getType());
+        return CastInst::Create(Instruction::ZExt, NotCond, SI.getType());
        }
        
        // FIXME: Turn select 0/-1 and -1/0 into sext from condition!
@@ -7425,7 +8094,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                Value *X = IC->getOperand(0);
                uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
                Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
-              Instruction *SRA = BinaryOperator::create(Instruction::AShr, X,
+              Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
                                                          ShAmt, "ones");
                InsertNewInstBefore(SRA, SI);
                
@@ -7438,7 +8107,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                  opc = Instruction::SExt;
                else if (SRASize > SISize)
                  opc = Instruction::Trunc;
-              return CastInst::create(opc, SRA, SI.getType());
+              return CastInst::Create(opc, SRA, SI.getType());
              }
            }
  
@@ -7463,7 +8132,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                  ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
                  Value *V = ICA;
                  if (ShouldNotVal)
-                  V = InsertNewInstBefore(BinaryOperator::create(
+                  V = InsertNewInstBefore(BinaryOperator::Create(
                                    Instruction::Xor, V, ICA->getOperand(1)), SI);
                  return ReplaceInstUsesWith(SI, V);
                }
@@ -7568,7 +8237,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
                NegVal = ConstantExpr::getNeg(C);
              } else {
                NegVal = InsertNewInstBefore(
-                    BinaryOperator::createNeg(SubOp->getOperand(1), "tmp"), SI);
+                    BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI);
              }
  
              Value *NewTrueOp = OtherAddOp;
@@ -7576,10 +8245,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
              if (AddOp != TI)
                std::swap(NewTrueOp, NewFalseOp);
              Instruction *NewSel =
-              new SelectInst(CondVal, NewTrueOp,NewFalseOp,SI.getName()+".p");
+              SelectInst::Create(CondVal, NewTrueOp,
+                                 NewFalseOp, SI.getName() + ".p");
  
              NewSel = InsertNewInstBefore(NewSel, SI);
-            return BinaryOperator::createAdd(SubOp->getOperand(0), NewSel);
+            return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
            }
          }
        }
@@ -7602,11 +8272,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
            if (OpToFold) {
              Constant *C = GetSelectFoldableConstant(TVI);
              Instruction *NewSel =
-              new SelectInst(SI.getCondition(), TVI->getOperand(2-OpToFold), C);
+              SelectInst::Create(SI.getCondition(),
+                                 TVI->getOperand(2-OpToFold), C);
              InsertNewInstBefore(NewSel, SI);
              NewSel->takeName(TVI);
              if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
-              return BinaryOperator::create(BO->getOpcode(), FalseVal, NewSel);
+              return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
              else {
                assert(0 && "Unknown instruction!!");
              }
@@ -7627,11 +8298,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
            if (OpToFold) {
              Constant *C = GetSelectFoldableConstant(FVI);
              Instruction *NewSel =
-              new SelectInst(SI.getCondition(), C, FVI->getOperand(2-OpToFold));
+              SelectInst::Create(SI.getCondition(), C,
+                                 FVI->getOperand(2-OpToFold));
              InsertNewInstBefore(NewSel, SI);
              NewSel->takeName(FVI);
              if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
-              return BinaryOperator::create(BO->getOpcode(), TrueVal, NewSel);
+              return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
              else
                assert(0 && "Unknown instruction!!");
            }
@@ -7648,88 +8320,195 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    return 0;
  }
  
-/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
-/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
-/// and it is more than the alignment of the ultimate object, see if we can
-/// increase the alignment of the ultimate object, making this check succeed.
-static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD,
-                                           unsigned PrefAlign = 0) {
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
-    unsigned Align = GV->getAlignment();
-    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) 
-      Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
+/// EnforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned EnforceKnownAlignment(Value *V,
+                                      unsigned Align, unsigned PrefAlign) {
+
+  User *U = dyn_cast<User>(V);
+  if (!U) return Align;
+
+  switch (getOpcode(U)) {
+  default: break;
+  case Instruction::BitCast:
+    return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+  case Instruction::GetElementPtr: {
+    // If all indexes are zero, it is just the alignment of the base pointer.
+    bool AllZeroOperands = true;
+    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+      if (!isa<Constant>(*i) ||
+          !cast<Constant>(*i)->isNullValue()) {
+        AllZeroOperands = false;
+        break;
+      }
+
+    if (AllZeroOperands) {
+      // Treat this like a bitcast.
+      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+    }
+    break;
+  }
+  }
  
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
      // If there is a large requested alignment and we can, bump up the alignment
      // of the global.
-    if (PrefAlign > Align && GV->hasInitializer()) {
+    if (!GV->isDeclaration()) {
        GV->setAlignment(PrefAlign);
        Align = PrefAlign;
      }
-    return Align;
    } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
-    unsigned Align = AI->getAlignment();
-    if (Align == 0 && TD) {
-      if (isa<AllocaInst>(AI))
-        Align = TD->getPrefTypeAlignment(AI->getType()->getElementType());
-      else if (isa<MallocInst>(AI)) {
-        // Malloc returns maximally aligned memory.
-        Align = TD->getABITypeAlignment(AI->getType()->getElementType());
-        Align =
-          std::max(Align,
-                   (unsigned)TD->getABITypeAlignment(Type::DoubleTy));
-        Align =
-          std::max(Align,
-                   (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
-      }
-    }
-    
      // If there is a requested alignment and if this is an alloca, round up.  We
      // don't do this for malloc, because some systems can't respect the request.
-    if (PrefAlign > Align && isa<AllocaInst>(AI)) {
+    if (isa<AllocaInst>(AI)) {
        AI->setAlignment(PrefAlign);
        Align = PrefAlign;
      }
-    return Align;
-  } else if (isa<BitCastInst>(V) ||
-             (isa<ConstantExpr>(V) && 
-              cast<ConstantExpr>(V)->getOpcode() == Instruction::BitCast)) {
-    return GetOrEnforceKnownAlignment(cast<User>(V)->getOperand(0),
-                                      TD, PrefAlign);
-  } else if (User *GEPI = dyn_castGetElementPtr(V)) {
-    // If all indexes are zero, it is just the alignment of the base pointer.
-    bool AllZeroOperands = true;
-    for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
-      if (!isa<Constant>(GEPI->getOperand(i)) ||
-          !cast<Constant>(GEPI->getOperand(i))->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
+  }
  
-    if (AllZeroOperands) {
-      // Treat this like a bitcast.
-      return GetOrEnforceKnownAlignment(GEPI->getOperand(0), TD, PrefAlign);
-    }
+  return Align;
+}
  
-    unsigned BaseAlignment = GetOrEnforceKnownAlignment(GEPI->getOperand(0),TD);
-    if (BaseAlignment == 0) return 0;
+/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
+                                                  unsigned PrefAlign) {
+  unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
+                      sizeof(PrefAlign) * CHAR_BIT;
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
+  unsigned TrailZ = KnownZero.countTrailingOnes();
+  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+
+  if (PrefAlign > Align)
+    Align = EnforceKnownAlignment(V, Align, PrefAlign);
+  
+    // We don't need to make any adjustment.
+  return Align;
+}
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+  unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
+  unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
+  unsigned MinAlign = std::min(DstAlign, SrcAlign);
+  unsigned CopyAlign = MI->getAlignment()->getZExtValue();
+
+  if (CopyAlign < MinAlign) {
+    MI->setAlignment(ConstantInt::get(Type::Int32Ty, MinAlign));
+    return MI;
+  }
+  
+  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+  // load/store.
+  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
+  if (MemOpLength == 0) return 0;
+  
+  // Source and destination pointer types are always "i8*" for intrinsic.  See
+  // if the size is something we can handle with a single primitive load/store.
+  // A single load+store correctly handles overlapping memory in the memmove
+  // case.
+  unsigned Size = MemOpLength->getZExtValue();
+  if (Size == 0) return MI;  // Delete this mem transfer.
+  
+  if (Size > 8 || (Size&(Size-1)))
+    return 0;  // If not 1/2/4/8 bytes, exit.
+  
+  // Use an integer load+store unless we can find something better.
+  Type *NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3));
+  
+  // Memcpy forces the use of i8* for the source and destination.  That means
+  // that if you're using memcpy to move one double around, you'll get a cast
+  // from double* to i8*.  We'd much rather use a double load+store rather than
+  // an i64 load+store, here because this improves the odds that the source or
+  // dest address will be promotable.  See if we can find a better type than the
+  // integer datatype.
+  if (Value *Op = getBitCastOperand(MI->getOperand(1))) {
+    const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType();
+    if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
+      // down through these levels if so.
+      while (!SrcETy->isSingleValueType()) {
+        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+          if (STy->getNumElements() == 1)
+            SrcETy = STy->getElementType(0);
+          else
+            break;
+        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+          if (ATy->getNumElements() == 1)
+            SrcETy = ATy->getElementType();
+          else
+            break;
+        } else
+          break;
+      }
+      
+      if (SrcETy->isSingleValueType())
+        NewPtrTy = PointerType::getUnqual(SrcETy);
+    }
+  }
+  
+  
+  // If the memcpy/memmove provides better alignment info than we can
+  // infer, use it.
+  SrcAlign = std::max(SrcAlign, CopyAlign);
+  DstAlign = std::max(DstAlign, CopyAlign);
+  
+  Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI);
+  Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI);
+  Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);
+  InsertNewInstBefore(L, *MI);
+  InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);
  
-    // Otherwise, if the base alignment is >= the alignment we expect for the
-    // base pointer type, then we know that the resultant pointer is aligned at
-    // least as much as its type requires.
-    if (!TD) return 0;
+  // Set the size of the copy to 0, it will be deleted on the next iteration.
+  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+  return MI;
+}
  
-    const Type *BasePtrTy = GEPI->getOperand(0)->getType();
-    const PointerType *PtrTy = cast<PointerType>(BasePtrTy);
-    unsigned Align = TD->getABITypeAlignment(PtrTy->getElementType());
-    if (Align <= BaseAlignment) {
-      const Type *GEPTy = GEPI->getType();
-      const PointerType *GEPPtrTy = cast<PointerType>(GEPTy);
-      Align = std::min(Align, (unsigned)
-                       TD->getABITypeAlignment(GEPPtrTy->getElementType()));
-      return Align;
-    }
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+  unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());
+  if (MI->getAlignment()->getZExtValue() < Alignment) {
+    MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
+    return MI;
+  }
+  
+  // Extract the length and alignment and fill if they are constant.
+  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+  if (!LenC || !FillC || FillC->getType() != Type::Int8Ty)
      return 0;
+  uint64_t Len = LenC->getZExtValue();
+  Alignment = MI->getAlignment()->getZExtValue();
+  
+  // If the length is zero, this is a no-op
+  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+  
+  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+    const Type *ITy = IntegerType::get(Len*8);  // n=1 -> i8.
+    
+    Value *Dest = MI->getDest();
+    Dest = InsertBitCastBefore(Dest, PointerType::getUnqual(ITy), *MI);
+
+    // Alignment 0 is identity for alignment 1 for memset, but not store.
+    if (Alignment == 0) Alignment = 1;
+    
+    // Extract the fill value and store.
+    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false,
+                                      Alignment), *MI);
+    
+    // Set the size of the copy to 0, it will be deleted on the next iteration.
+    MI->setLength(Constant::getNullValue(LenC->getType()));
+    return MI;
    }
+
    return 0;
  }
  
@@ -7762,203 +8541,179 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      // If we have a memmove and the source operation is a constant global,
      // then the source and dest pointers can't alias, so we can change this
      // into a call to memcpy.
-    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(II)) {
+    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
        if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
          if (GVSrc->isConstant()) {
            Module *M = CI.getParent()->getParent()->getParent();
-          const char *Name;
-          if (CI.getCalledFunction()->getFunctionType()->getParamType(2) == 
-              Type::Int32Ty)
-            Name = "llvm.memcpy.i32";
+          Intrinsic::ID MemCpyID;
+          if (CI.getOperand(3)->getType() == Type::Int32Ty)
+            MemCpyID = Intrinsic::memcpy_i32;
            else
-            Name = "llvm.memcpy.i64";
-          Constant *MemCpy = M->getOrInsertFunction(Name,
-                                     CI.getCalledFunction()->getFunctionType());
-          CI.setOperand(0, MemCpy);
+            MemCpyID = Intrinsic::memcpy_i64;
+          CI.setOperand(0, Intrinsic::getDeclaration(M, MemCpyID));
            Changed = true;
          }
+
+      // memmove(x,x,size) -> noop.
+      if (MMI->getSource() == MMI->getDest())
+        return EraseInstFromFunction(CI);
      }
  
      // If we can determine a pointer alignment that is bigger than currently
      // set, update the alignment.
      if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
-      unsigned Alignment1 = GetOrEnforceKnownAlignment(MI->getOperand(1), TD);
-      unsigned Alignment2 = GetOrEnforceKnownAlignment(MI->getOperand(2), TD);
-      unsigned Align = std::min(Alignment1, Alignment2);
-      if (MI->getAlignment()->getZExtValue() < Align) {
-        MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align));
-        Changed = true;
-      }
-
-      // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
-      // load/store.
-      ConstantInt *MemOpLength = dyn_cast<ConstantInt>(CI.getOperand(3));
-      if (MemOpLength) {
-        unsigned Size = MemOpLength->getZExtValue();
-        unsigned Align = cast<ConstantInt>(CI.getOperand(4))->getZExtValue();
-        PointerType *NewPtrTy = NULL;
-        // Destination pointer type is always i8 *
-        // If Size is 8 then use Int64Ty
-        // If Size is 4 then use Int32Ty
-        // If Size is 2 then use Int16Ty
-        // If Size is 1 then use Int8Ty
-        if (Size && Size <=8 && !(Size&(Size-1)))
-          NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3));
-
-        if (NewPtrTy) {
-          Value *Src = InsertCastBefore(Instruction::BitCast, CI.getOperand(2),
-                                        NewPtrTy, CI);
-          Value *Dest = InsertCastBefore(Instruction::BitCast, CI.getOperand(1),
-                                         NewPtrTy, CI);
-          Value *L = new LoadInst(Src, "tmp", false, Align, &CI);
-          Value *NS = new StoreInst(L, Dest, false, Align, &CI);
-          CI.replaceAllUsesWith(NS);
-          Changed = true;
-          return EraseInstFromFunction(CI);
-        }
-      }
-    } else if (isa<MemSetInst>(MI)) {
-      unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD);
-      if (MI->getAlignment()->getZExtValue() < Alignment) {
-        MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
-        Changed = true;
-      }
+      if (Instruction *I = SimplifyMemTransfer(MI))
+        return I;
+    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+      if (Instruction *I = SimplifyMemSet(MSI))
+        return I;
      }
            
      if (Changed) return II;
-  } else {
-    switch (II->getIntrinsicID()) {
-    default: break;
-    case Intrinsic::ppc_altivec_lvx:
-    case Intrinsic::ppc_altivec_lvxl:
-    case Intrinsic::x86_sse_loadu_ps:
-    case Intrinsic::x86_sse2_loadu_pd:
-    case Intrinsic::x86_sse2_loadu_dq:
-      // Turn PPC lvx     -> load if the pointer is known aligned.
-      // Turn X86 loadups -> load if the pointer is known aligned.
-      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
-        Value *Ptr = 
-          InsertCastBefore(Instruction::BitCast, II->getOperand(1),
-                           PointerType::getUnqual(II->getType()), CI);
-        return new LoadInst(Ptr);
-      }
-      break;
-    case Intrinsic::ppc_altivec_stvx:
-    case Intrinsic::ppc_altivec_stvxl:
-      // Turn stvx -> store if the pointer is known aligned.
-      if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) {
-        const Type *OpPtrTy = 
-          PointerType::getUnqual(II->getOperand(1)->getType());
-        Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2),
-                                      OpPtrTy, CI);
-        return new StoreInst(II->getOperand(1), Ptr);
-      }
-      break;
-    case Intrinsic::x86_sse_storeu_ps:
-    case Intrinsic::x86_sse2_storeu_pd:
-    case Intrinsic::x86_sse2_storeu_dq:
-    case Intrinsic::x86_sse2_storel_dq:
-      // Turn X86 storeu -> store if the pointer is known aligned.
-      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
-        const Type *OpPtrTy = 
-          PointerType::getUnqual(II->getOperand(2)->getType());
-        Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
-                                      OpPtrTy, CI);
-        return new StoreInst(II->getOperand(2), Ptr);
-      }
-      break;
+  }
+  
+  switch (II->getIntrinsicID()) {
+  default: break;
+  case Intrinsic::bswap:
+    // bswap(bswap(x)) -> x
+    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+      if (Operand->getIntrinsicID() == Intrinsic::bswap)
+        return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+    break;
+  case Intrinsic::ppc_altivec_lvx:
+  case Intrinsic::ppc_altivec_lvxl:
+  case Intrinsic::x86_sse_loadu_ps:
+  case Intrinsic::x86_sse2_loadu_pd:
+  case Intrinsic::x86_sse2_loadu_dq:
+    // Turn PPC lvx     -> load if the pointer is known aligned.
+    // Turn X86 loadups -> load if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+      Value *Ptr = InsertBitCastBefore(II->getOperand(1),
+                                       PointerType::getUnqual(II->getType()),
+                                       CI);
+      return new LoadInst(Ptr);
+    }
+    break;
+  case Intrinsic::ppc_altivec_stvx:
+  case Intrinsic::ppc_altivec_stvxl:
+    // Turn stvx -> store if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getOperand(1)->getType());
+      Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI);
+      return new StoreInst(II->getOperand(1), Ptr);
+    }
+    break;
+  case Intrinsic::x86_sse_storeu_ps:
+  case Intrinsic::x86_sse2_storeu_pd:
+  case Intrinsic::x86_sse2_storeu_dq:
+  case Intrinsic::x86_sse2_storel_dq:
+    // Turn X86 storeu -> store if the pointer is known aligned.
+    if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getOperand(2)->getType());
+      Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI);
+      return new StoreInst(II->getOperand(2), Ptr);
+    }
+    break;
+    
+  case Intrinsic::x86_sse_cvttss2si: {
+    // These intrinsics only demands the 0th element of its input vector.  If
+    // we can simplify the input based on that, do so now.
+    uint64_t UndefElts;
+    if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1, 
+                                              UndefElts)) {
+      II->setOperand(1, V);
+      return II;
+    }
+    break;
+  }
+    
+  case Intrinsic::ppc_altivec_vperm:
+    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
        
-    case Intrinsic::x86_sse_cvttss2si: {
-      // These intrinsics only demands the 0th element of its input vector.  If
-      // we can simplify the input based on that, do so now.
-      uint64_t UndefElts;
-      if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), 1, 
-                                                UndefElts)) {
-        II->setOperand(1, V);
-        return II;
+      // Check that all of the elements are integer constants or undefs.
+      bool AllEltsOk = true;
+      for (unsigned i = 0; i != 16; ++i) {
+        if (!isa<ConstantInt>(Mask->getOperand(i)) && 
+            !isa<UndefValue>(Mask->getOperand(i))) {
+          AllEltsOk = false;
+          break;
+        }
        }
-      break;
-    }
        
-    case Intrinsic::ppc_altivec_vperm:
-      // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
-      if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
-        assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+      if (AllEltsOk) {
+        // Cast the input vectors to byte vectors.
+        Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI);
+        Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI);
+        Value *Result = UndefValue::get(Op0->getType());
          
-        // Check that all of the elements are integer constants or undefs.
-        bool AllEltsOk = true;
-        for (unsigned i = 0; i != 16; ++i) {
-          if (!isa<ConstantInt>(Mask->getOperand(i)) && 
-              !isa<UndefValue>(Mask->getOperand(i))) {
-            AllEltsOk = false;
-            break;
-          }
-        }
+        // Only extract each element once.
+        Value *ExtractedElts[32];
+        memset(ExtractedElts, 0, sizeof(ExtractedElts));
          
-        if (AllEltsOk) {
-          // Cast the input vectors to byte vectors.
-          Value *Op0 = InsertCastBefore(Instruction::BitCast, 
-                                        II->getOperand(1), Mask->getType(), CI);
-          Value *Op1 = InsertCastBefore(Instruction::BitCast,
-                                        II->getOperand(2), Mask->getType(), CI);
-          Value *Result = UndefValue::get(Op0->getType());
-          
-          // Only extract each element once.
-          Value *ExtractedElts[32];
-          memset(ExtractedElts, 0, sizeof(ExtractedElts));
-          
-          for (unsigned i = 0; i != 16; ++i) {
-            if (isa<UndefValue>(Mask->getOperand(i)))
-              continue;
-            unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
-            Idx &= 31;  // Match the hardware behavior.
-            
-            if (ExtractedElts[Idx] == 0) {
-              Instruction *Elt = 
-                new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
-              InsertNewInstBefore(Elt, CI);
-              ExtractedElts[Idx] = Elt;
-            }
+        for (unsigned i = 0; i != 16; ++i) {
+          if (isa<UndefValue>(Mask->getOperand(i)))
+            continue;
+          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+          Idx &= 31;  // Match the hardware behavior.
            
-            // Insert this value into the result vector.
-            Result = new InsertElementInst(Result, ExtractedElts[Idx], i,"tmp");
-            InsertNewInstBefore(cast<Instruction>(Result), CI);
+          if (ExtractedElts[Idx] == 0) {
+            Instruction *Elt = 
+              new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp");
+            InsertNewInstBefore(Elt, CI);
+            ExtractedElts[Idx] = Elt;
            }
-          return CastInst::create(Instruction::BitCast, Result, CI.getType());
+        
+          // Insert this value into the result vector.
+          Result = InsertElementInst::Create(Result, ExtractedElts[Idx],
+                                             i, "tmp");
+          InsertNewInstBefore(cast<Instruction>(Result), CI);
          }
+        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
        }
-      break;
+    }
+    break;
  
-    case Intrinsic::stackrestore: {
-      // If the save is right next to the restore, remove the restore.  This can
-      // happen when variable allocas are DCE'd.
-      if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
-        if (SS->getIntrinsicID() == Intrinsic::stacksave) {
-          BasicBlock::iterator BI = SS;
-          if (&*++BI == II)
-            return EraseInstFromFunction(CI);
-        }
-      }
-      
-      // If the stack restore is in a return/unwind block and if there are no
-      // allocas or calls between the restore and the return, nuke the restore.
-      TerminatorInst *TI = II->getParent()->getTerminator();
-      if (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)) {
-        BasicBlock::iterator BI = II;
-        bool CannotRemove = false;
-        for (++BI; &*BI != TI; ++BI) {
-          if (isa<AllocaInst>(BI) ||
-              (isa<CallInst>(BI) && !isa<IntrinsicInst>(BI))) {
-            CannotRemove = true;
-            break;
-          }
-        }
-        if (!CannotRemove)
+  case Intrinsic::stackrestore: {
+    // If the save is right next to the restore, remove the restore.  This can
+    // happen when variable allocas are DCE'd.
+    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+        BasicBlock::iterator BI = SS;
+        if (&*++BI == II)
            return EraseInstFromFunction(CI);
        }
-      break;
      }
+    
+    // Scan down this block to see if there is another stack restore in the
+    // same block without an intervening call/alloca.
+    BasicBlock::iterator BI = II;
+    TerminatorInst *TI = II->getParent()->getTerminator();
+    bool CannotRemove = false;
+    for (++BI; &*BI != TI; ++BI) {
+      if (isa<AllocaInst>(BI)) {
+        CannotRemove = true;
+        break;
+      }
+      if (isa<CallInst>(BI)) {
+        if (!isa<IntrinsicInst>(BI)) {
+          CannotRemove = true;
+          break;
+        }
+        // If there is a stackrestore below this one, remove this one.
+        return EraseInstFromFunction(CI);
+      }
      }
+    
+    // If the stack restore is in a return/unwind block and if there are no
+    // allocas or calls between the restore and the return, nuke the restore.
+    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+      return EraseInstFromFunction(CI);
+    break;
+  }
    }
  
    return visitCallSite(II);
@@ -7970,6 +8725,31 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
    return visitCallSite(&II);
  }
  
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value 
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+                                         const CastInst * const CI,
+                                         const TargetData * const TD,
+                                         const int ix) {
+  if (!CI->isLosslessCast())
+    return false;
+
+  // The size of ByVal arguments is derived from the type, so we
+  // can't change to a type with a different size.  If the size were
+  // passed explicitly we could avoid this check.
+  if (!CS.paramHasAttr(ix, ParamAttr::ByVal))
+    return true;
+
+  const Type* SrcTy = 
+            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+  if (!SrcTy->isSized() || !DstTy->isSized())
+    return false;
+  if (TD->getABITypeSize(SrcTy) != TD->getABITypeSize(DstTy))
+    return false;
+  return true;
+}
+
  // visitCallSite - Improvements for call and invoke instructions.
  //
  Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -8010,8 +8790,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
  
      if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
        // Don't break the CFG, insert a dummy cond branch.
-      new BranchInst(II->getNormalDest(), II->getUnwindDest(),
-                     ConstantInt::getTrue(), II);
+      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
+                         ConstantInt::getTrue(), II);
      }
      return EraseInstFromFunction(*CS.getInstruction());
    }
@@ -8024,31 +8804,22 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
    const PointerType *PTy = cast<PointerType>(Callee->getType());
    const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
    if (FTy->isVarArg()) {
+    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
      // See if we can optimize any arguments passed through the varargs area of
      // the call.
      for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
-           E = CS.arg_end(); I != E; ++I)
-      if (CastInst *CI = dyn_cast<CastInst>(*I)) {
-        // If this cast does not effect the value passed through the varargs
-        // area, we can eliminate the use of the cast.
-        Value *Op = CI->getOperand(0);
-        if (CI->isLosslessCast()) {
-          *I = Op;
-          Changed = true;
-        }
+           E = CS.arg_end(); I != E; ++I, ++ix) {
+      CastInst *CI = dyn_cast<CastInst>(*I);
+      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+        *I = CI->getOperand(0);
+        Changed = true;
        }
+    }
    }
  
-  if (isa<InlineAsm>(Callee) && !CS.isNoUnwind()) {
+  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
      // Inline asm calls cannot throw - mark them 'nounwind'.
-    const ParamAttrsList *PAL = CS.getParamAttrs();
-    uint16_t RAttributes = PAL ? PAL->getParamAttrs(0) : 0;
-    RAttributes |= ParamAttr::NoUnwind;
-
-    ParamAttrsVector modVec;
-    modVec.push_back(ParamAttrsWithIndex::get(0, RAttributes));
-    PAL = ParamAttrsList::getModified(PAL, modVec);
-    CS.setParamAttrs(PAL);
+    CS.setDoesNotThrow();
      Changed = true;
    }
  
@@ -8066,6 +8837,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
      return false;
    Function *Callee = cast<Function>(CE->getOperand(0));
    Instruction *Caller = CS.getInstruction();
+  const PAListPtr &CallerPAL = CS.getParamAttrs();
  
    // Okay, this is a cast from a function to a different type.  Unless doing so
    // would cause a type conversion of one of our arguments, change this call to
@@ -8073,26 +8845,31 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    //
    const FunctionType *FT = Callee->getFunctionType();
    const Type *OldRetTy = Caller->getType();
+  const Type *NewRetTy = FT->getReturnType();
  
-  const ParamAttrsList* CallerPAL = 0;
-  if (CallInst *CallerCI = dyn_cast<CallInst>(Caller))
-    CallerPAL = CallerCI->getParamAttrs();
-  else if (InvokeInst *CallerII = dyn_cast<InvokeInst>(Caller))
-    CallerPAL = CallerII->getParamAttrs();
-
-  // If the parameter attributes are not compatible, don't do the xform.  We
-  // don't want to lose an sret attribute or something.
-  if (!ParamAttrsList::areCompatible(CallerPAL, Callee->getParamAttrs()))
-    return false;
+  if (isa<StructType>(NewRetTy))
+    return false; // TODO: Handle multiple return values.
  
    // Check to see if we are changing the return type...
-  if (OldRetTy != FT->getReturnType()) {
-    if (Callee->isDeclaration() && !Caller->use_empty() && 
-        // Conversion is ok if changing from pointer to int of same size.
-        !(isa<PointerType>(FT->getReturnType()) &&
-          TD->getIntPtrType() == OldRetTy))
+  if (OldRetTy != NewRetTy) {
+    if (Callee->isDeclaration() &&
+        // Conversion is ok if changing from one pointer type to another or from
+        // a pointer to an integer of the same size.
+        !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) &&
+          (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType())))
+      return false;   // Cannot transform this return value.
+
+    if (!Caller->use_empty() &&
+        // void -> non-void is handled specially
+        NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy))
        return false;   // Cannot transform this return value.
  
+    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+      ParameterAttributes RAttrs = CallerPAL.getParamAttrs(0);
+      if (RAttrs & ParamAttr::typeIncompatible(NewRetTy))
+        return false;   // Attribute not compatible with transformed value.
+    }
+
      // If the callsite is an invoke instruction, and the return value is used by
      // a PHI node in a successor, we cannot change the return type of the call
      // because there is no place to put the cast instruction (without breaking
@@ -8114,60 +8891,55 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
      const Type *ParamTy = FT->getParamType(i);
      const Type *ActTy = (*AI)->getType();
-    ConstantInt *c = dyn_cast<ConstantInt>(*AI);
-    //Some conversions are safe even if we do not have a body.
-    //Either we can cast directly, or we can upconvert the argument
+
+    if (!CastInst::isCastable(ActTy, ParamTy))
+      return false;   // Cannot transform this parameter value.
+
+    if (CallerPAL.getParamAttrs(i + 1) & ParamAttr::typeIncompatible(ParamTy))
+      return false;   // Attribute not compatible with transformed value.
+
+    // Converting from one pointer type to another or between a pointer and an
+    // integer of the same size is safe even if we do not have a body.
      bool isConvertible = ActTy == ParamTy ||
-      (isa<PointerType>(ParamTy) && isa<PointerType>(ActTy)) ||
-      (ParamTy->isInteger() && ActTy->isInteger() &&
-       ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()) ||
-      (c && ParamTy->getPrimitiveSizeInBits() >= ActTy->getPrimitiveSizeInBits()
-       && c->getValue().isStrictlyPositive());
+      ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) &&
+       (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType()));
      if (Callee->isDeclaration() && !isConvertible) return false;
-
-    // Most other conversions can be done if we have a body, even if these
-    // lose information, e.g. int->short.
-    // Some conversions cannot be done at all, e.g. float to pointer.
-    // Logic here parallels CastInst::getCastOpcode (the design there
-    // requires legality checks like this be done before calling it).
-    if (ParamTy->isInteger()) {
-      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
-        if (VActTy->getBitWidth() != ParamTy->getPrimitiveSizeInBits())
-          return false;
-      }
-      if (!ActTy->isInteger() && !ActTy->isFloatingPoint() &&
-          !isa<PointerType>(ActTy))
-        return false;
-    } else if (ParamTy->isFloatingPoint()) {
-      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
-        if (VActTy->getBitWidth() != ParamTy->getPrimitiveSizeInBits())
-          return false;
-      }
-      if (!ActTy->isInteger() && !ActTy->isFloatingPoint())
-        return false;
-    } else if (const VectorType *VParamTy = dyn_cast<VectorType>(ParamTy)) {
-      if (const VectorType *VActTy = dyn_cast<VectorType>(ActTy)) {
-        if (VActTy->getBitWidth() != VParamTy->getBitWidth())
-          return false;
-      }
-      if (VParamTy->getBitWidth() != ActTy->getPrimitiveSizeInBits())      
-        return false;
-    } else if (isa<PointerType>(ParamTy)) {
-      if (!ActTy->isInteger() && !isa<PointerType>(ActTy))
-        return false;
-    } else {
-      return false;
-    }
    }
  
    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() &&
        Callee->isDeclaration())
-    return false;   // Do not delete arguments unless we have a function body...
+    return false;   // Do not delete arguments unless we have a function body.
+
+  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+      !CallerPAL.isEmpty())
+    // In this case we have more arguments than the new function type, but we
+    // won't be dropping them.  Check that these extra arguments have attributes
+    // that are compatible with being a vararg call argument.
+    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+        break;
+      ParameterAttributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+      if (PAttrs & ParamAttr::VarArgsIncompatible)
+        return false;
+    }
  
    // Okay, we decided that this is a safe thing to do: go ahead and start
    // inserting cast instructions as necessary...
    std::vector<Value*> Args;
    Args.reserve(NumActualArgs);
+  SmallVector<ParamAttrsWithIndex, 8> attrVec;
+  attrVec.reserve(NumCommonArgs);
+
+  // Get any return attributes.
+  ParameterAttributes RAttrs = CallerPAL.getParamAttrs(0);
+
+  // If the return value is not being used, the type may not be compatible
+  // with the existing attributes.  Wipe out any problematic attributes.
+  RAttrs &= ~ParamAttr::typeIncompatible(NewRetTy);
+
+  // Add the new return attributes.
+  if (RAttrs)
+    attrVec.push_back(ParamAttrsWithIndex::get(0, RAttrs));
  
    AI = CS.arg_begin();
    for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -8177,9 +8949,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
      } else {
        Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
            false, ParamTy, false);
-      CastInst *NewCast = CastInst::create(opcode, *AI, ParamTy, "tmp");
+      CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp");
        Args.push_back(InsertNewInstBefore(NewCast, *Caller));
      }
+
+    // Add any parameter attributes.
+    if (ParameterAttributes PAttrs = CallerPAL.getParamAttrs(i + 1))
+      attrVec.push_back(ParamAttrsWithIndex::get(i + 1, PAttrs));
    }
  
    // If the function takes more arguments than the call was taking, add them
@@ -8188,7 +8964,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
      Args.push_back(Constant::getNullValue(FT->getParamType(i)));
  
    // If we are removing arguments to the function, emit an obnoxious warning...
-  if (FT->getNumParams() < NumActualArgs)
+  if (FT->getNumParams() < NumActualArgs) {
      if (!FT->isVarArg()) {
        cerr << "WARNING: While resolving call to function '"
             << Callee->getName() << "' arguments were dropped!\n";
@@ -8200,48 +8976,54 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
            // Must promote to pass through va_arg area!
            Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, 
                                                                  PTy, false);
-          Instruction *Cast = CastInst::create(opcode, *AI, PTy, "tmp");
+          Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp");
            InsertNewInstBefore(Cast, *Caller);
            Args.push_back(Cast);
          } else {
            Args.push_back(*AI);
          }
+
+        // Add any parameter attributes.
+        if (ParameterAttributes PAttrs = CallerPAL.getParamAttrs(i + 1))
+          attrVec.push_back(ParamAttrsWithIndex::get(i + 1, PAttrs));
        }
      }
+  }
  
-  if (FT->getReturnType() == Type::VoidTy)
+  if (NewRetTy == Type::VoidTy)
      Caller->setName("");   // Void type should not have a name.
  
+  const PAListPtr &NewCallerPAL = PAListPtr::get(attrVec.begin(),attrVec.end());
+
    Instruction *NC;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-    NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(),
-                        Args.begin(), Args.end(), Caller->getName(), Caller);
+    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
+                            Args.begin(), Args.end(),
+                            Caller->getName(), Caller);
      cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
-    cast<InvokeInst>(NC)->setParamAttrs(CallerPAL);
+    cast<InvokeInst>(NC)->setParamAttrs(NewCallerPAL);
    } else {
-    NC = new CallInst(Callee, Args.begin(), Args.end(),
-                      Caller->getName(), Caller);
+    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
+                          Caller->getName(), Caller);
      CallInst *CI = cast<CallInst>(Caller);
      if (CI->isTailCall())
        cast<CallInst>(NC)->setTailCall();
      cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
-    cast<CallInst>(NC)->setParamAttrs(CallerPAL);
+    cast<CallInst>(NC)->setParamAttrs(NewCallerPAL);
    }
  
    // Insert a cast of the return type as necessary.
    Value *NV = NC;
-  if (Caller->getType() != NV->getType() && !Caller->use_empty()) {
+  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
      if (NV->getType() != Type::VoidTy) {
-      const Type *CallerTy = Caller->getType();
        Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, 
-                                                            CallerTy, false);
-      NV = NC = CastInst::create(opcode, NC, CallerTy, "tmp");
+                                                            OldRetTy, false);
+      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
  
        // If this is an invoke instruction, we should insert it after the first
        // non-phi, instruction in the normal successor block.
        if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-        BasicBlock::iterator I = II->getNormalDest()->begin();
-        while (isa<PHINode>(I)) ++I;
+        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
          InsertNewInstBefore(NC, *I);
        } else {
          // Otherwise, it's a call, just insert cast right after the call instr
@@ -8267,27 +9049,33 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
    Value *Callee = CS.getCalledValue();
    const PointerType *PTy = cast<PointerType>(Callee->getType());
    const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+  const PAListPtr &Attrs = CS.getParamAttrs();
+
+  // If the call already has the 'nest' attribute somewhere then give up -
+  // otherwise 'nest' would occur twice after splicing in the chain.
+  if (Attrs.hasAttrSomewhere(ParamAttr::Nest))
+    return 0;
  
    IntrinsicInst *Tramp =
      cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
  
-  Function *NestF =
-    cast<Function>(IntrinsicInst::StripPointerCasts(Tramp->getOperand(2)));
+  Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
    const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
    const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
  
-  if (const ParamAttrsList *NestAttrs = NestF->getParamAttrs()) {
+  const PAListPtr &NestAttrs = NestF->getParamAttrs();
+  if (!NestAttrs.isEmpty()) {
      unsigned NestIdx = 1;
      const Type *NestTy = 0;
-    uint16_t NestAttr = 0;
+    ParameterAttributes NestAttr = ParamAttr::None;
  
      // Look for a parameter marked with the 'nest' attribute.
      for (FunctionType::param_iterator I = NestFTy->param_begin(),
           E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
-      if (NestAttrs->paramHasAttr(NestIdx, ParamAttr::Nest)) {
+      if (NestAttrs.paramHasAttr(NestIdx, ParamAttr::Nest)) {
          // Record the parameter type and any other attributes.
          NestTy = *I;
-        NestAttr = NestAttrs->getParamAttrs(NestIdx);
+        NestAttr = NestAttrs.getParamAttrs(NestIdx);
          break;
        }
  
@@ -8296,25 +9084,37 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
        std::vector<Value*> NewArgs;
        NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
  
+      SmallVector<ParamAttrsWithIndex, 8> NewAttrs;
+      NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
        // Insert the nest argument into the call argument list, which may
-      // mean appending it.
+      // mean appending it.  Likewise for attributes.
+
+      // Add any function result attributes.
+      if (ParameterAttributes Attr = Attrs.getParamAttrs(0))
+        NewAttrs.push_back(ParamAttrsWithIndex::get(0, Attr));
+
        {
          unsigned Idx = 1;
          CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
          do {
            if (Idx == NestIdx) {
-            // Add the chain argument.
+            // Add the chain argument and attributes.
              Value *NestVal = Tramp->getOperand(3);
              if (NestVal->getType() != NestTy)
                NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
              NewArgs.push_back(NestVal);
+            NewAttrs.push_back(ParamAttrsWithIndex::get(NestIdx, NestAttr));
            }
  
            if (I == E)
              break;
  
-          // Add the original argument.
+          // Add the original argument and attributes.
            NewArgs.push_back(*I);
+          if (ParameterAttributes Attr = Attrs.getParamAttrs(Idx))
+            NewAttrs.push_back
+              (ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr));
  
            ++Idx, ++I;
          } while (1);
@@ -8322,41 +9122,28 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
  
        // The trampoline may have been bitcast to a bogus type (FTy).
        // Handle this by synthesizing a new function type, equal to FTy
-      // with the chain parameter inserted.  Likewise for attributes.
+      // with the chain parameter inserted.
  
-      const ParamAttrsList *Attrs = CS.getParamAttrs();
        std::vector<const Type*> NewTypes;
-      ParamAttrsVector NewAttrs;
        NewTypes.reserve(FTy->getNumParams()+1);
  
-      // Add any function result attributes.
-      uint16_t Attr = Attrs ? Attrs->getParamAttrs(0) : 0;
-      if (Attr)
-        NewAttrs.push_back (ParamAttrsWithIndex::get(0, Attr));
-
        // Insert the chain's type into the list of parameter types, which may
-      // mean appending it.  Likewise for the chain's attributes.
+      // mean appending it.
        {
          unsigned Idx = 1;
          FunctionType::param_iterator I = FTy->param_begin(),
            E = FTy->param_end();
  
          do {
-          if (Idx == NestIdx) {
-            // Add the chain's type and attributes.
+          if (Idx == NestIdx)
+            // Add the chain's type.
              NewTypes.push_back(NestTy);
-            NewAttrs.push_back(ParamAttrsWithIndex::get(NestIdx, NestAttr));
-          }
  
            if (I == E)
              break;
  
-          // Add the original type and attributes.
+          // Add the original type.
            NewTypes.push_back(*I);
-          Attr = Attrs ? Attrs->getParamAttrs(Idx) : 0;
-          if (Attr)
-            NewAttrs.push_back
-              (ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr));
  
            ++Idx, ++I;
          } while (1);
@@ -8368,19 +9155,19 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
          FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
        Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ?
          NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy));
-      const ParamAttrsList *NewPAL = ParamAttrsList::get(NewAttrs);
+      const PAListPtr &NewPAL = PAListPtr::get(NewAttrs.begin(),NewAttrs.end());
  
        Instruction *NewCaller;
        if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-        NewCaller = new InvokeInst(NewCallee,
-                                   II->getNormalDest(), II->getUnwindDest(),
-                                   NewArgs.begin(), NewArgs.end(),
-                                   Caller->getName(), Caller);
+        NewCaller = InvokeInst::Create(NewCallee,
+                                       II->getNormalDest(), II->getUnwindDest(),
+                                       NewArgs.begin(), NewArgs.end(),
+                                       Caller->getName(), Caller);
          cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
          cast<InvokeInst>(NewCaller)->setParamAttrs(NewPAL);
        } else {
-        NewCaller = new CallInst(NewCallee, NewArgs.begin(), NewArgs.end(),
-                                 Caller->getName(), Caller);
+        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
+                                     Caller->getName(), Caller);
          if (cast<CallInst>(Caller)->isTailCall())
            cast<CallInst>(NewCaller)->setTailCall();
          cast<CallInst>(NewCaller)->
@@ -8452,7 +9239,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
    Value *InRHS = FirstInst->getOperand(1);
    PHINode *NewLHS = 0, *NewRHS = 0;
    if (LHSVal == 0) {
-    NewLHS = new PHINode(LHSType, FirstInst->getOperand(0)->getName()+".pn");
+    NewLHS = PHINode::Create(LHSType,
+                             FirstInst->getOperand(0)->getName() + ".pn");
      NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
      NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
      InsertNewInstBefore(NewLHS, PN);
@@ -8460,7 +9248,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
    }
    
    if (RHSVal == 0) {
-    NewRHS = new PHINode(RHSType, FirstInst->getOperand(1)->getName()+".pn");
+    NewRHS = PHINode::Create(RHSType,
+                             FirstInst->getOperand(1)->getName() + ".pn");
      NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
      NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
      InsertNewInstBefore(NewRHS, PN);
@@ -8480,13 +9269,13 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
    }
      
    if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
-    return BinaryOperator::create(BinOp->getOpcode(), LHSVal, RHSVal);
+    return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
    else if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
-    return CmpInst::create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, 
+    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, 
                             RHSVal);
    else {
      assert(isa<GetElementPtrInst>(FirstInst));
-    return new GetElementPtrInst(LHSVal, RHSVal);
+    return GetElementPtrInst::Create(LHSVal, RHSVal);
    }
  }
  
@@ -8581,6 +9370,15 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
            LI->getParent() != PN.getIncomingBlock(i) ||
            !isSafeToSinkLoad(LI))
          return 0;
+      
+      // If the PHI is volatile and its block has multiple successors, sinking
+      // it would remove a load of the volatile value from the path through the
+      // other successor.
+      if (isVolatile &&
+          LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+        return 0;
+
+      
      } else if (I->getOperand(1) != ConstantOp) {
        return 0;
      }
@@ -8588,8 +9386,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
  
    // Okay, they are all the same operation.  Create a new PHI node of the
    // correct type, and PHI together all of the LHS's of the instructions.
-  PHINode *NewPN = new PHINode(FirstInst->getOperand(0)->getType(),
-                               PN.getName()+".in");
+  PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+                                   PN.getName()+".in");
    NewPN->reserveOperandSpace(PN.getNumOperands()/2);
  
    Value *InVal = FirstInst->getOperand(0);
@@ -8616,17 +9414,22 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
  
    // Insert and return the new operation.
    if (CastInst* FirstCI = dyn_cast<CastInst>(FirstInst))
-    return CastInst::create(FirstCI->getOpcode(), PhiVal, PN.getType());
-  else if (isa<LoadInst>(FirstInst))
-    return new LoadInst(PhiVal, "", isVolatile);
-  else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
-    return BinaryOperator::create(BinOp->getOpcode(), PhiVal, ConstantOp);
-  else if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
-    return CmpInst::create(CIOp->getOpcode(), CIOp->getPredicate(), 
+    return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))
+    return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+  if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
+    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), 
                             PhiVal, ConstantOp);
-  else
-    assert(0 && "Unknown operation");
-  return 0;
+  assert(isa<LoadInst>(FirstInst) && "Unknown operation");
+  
+  // If this was a volatile load that we are merging, make sure to loop through
+  // and mark all the input loads as non-volatile.  If we don't do this, we will
+  // insert a new volatile load and the old ones will not be deletable.
+  if (isVolatile)
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+  
+  return new LoadInst(PhiVal, "", isVolatile);
  }
  
  /// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
@@ -8792,9 +9595,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    bool MadeChange = false;
    
    gep_type_iterator GTI = gep_type_begin(GEP);
-  for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) {
+  for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end();
+       i != e; ++i, ++GTI) {
      if (isa<SequentialType>(*GTI)) {
-      if (CastInst *CI = dyn_cast<CastInst>(GEP.getOperand(i))) {
+      if (CastInst *CI = dyn_cast<CastInst>(*i)) {
          if (CI->getOpcode() == Instruction::ZExt ||
              CI->getOpcode() == Instruction::SExt) {
            const Type *SrcTy = CI->getOperand(0)->getType();
@@ -8802,7 +9606,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            // is a 32-bit pointer target.
            if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
              MadeChange = true;
-            GEP.setOperand(i, CI->getOperand(0));
+            *i = CI->getOperand(0);
            }
          }
        }
@@ -8810,17 +9614,18 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        // to what we need.  If the incoming value needs a cast instruction,
        // insert it.  This explicit cast can make subsequent optimizations more
        // obvious.
-      Value *Op = GEP.getOperand(i);
-      if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits())
+      Value *Op = *i;
+      if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) {
          if (Constant *C = dyn_cast<Constant>(Op)) {
-          GEP.setOperand(i, ConstantExpr::getTrunc(C, TD->getIntPtrType()));
+          *i = ConstantExpr::getTrunc(C, TD->getIntPtrType());
            MadeChange = true;
          } else {
            Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(),
                                  GEP);
-          GEP.setOperand(i, Op);
+          *i = Op;
            MadeChange = true;
          }
+      }
      }
    }
    if (MadeChange) return &GEP;
@@ -8909,7 +9714,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
          if (isa<Constant>(SO1) && isa<Constant>(GO1))
            Sum = ConstantExpr::getAdd(cast<Constant>(SO1), cast<Constant>(GO1));
          else {
-          Sum = BinaryOperator::createAdd(SO1, GO1, PtrOp->getName()+".sum");
+          Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
            InsertNewInstBefore(cast<Instruction>(Sum), GEP);
          }
        }
@@ -8935,8 +9740,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
      }
  
      if (!Indices.empty())
-      return new GetElementPtrInst(SrcGEPOperands[0], Indices.begin(),
-                                   Indices.end(), GEP.getName());
+      return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(),
+                                       Indices.end(), GEP.getName());
  
    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) {
      // GEP of global variable.  If all of the indices for this GEP are
@@ -8991,7 +9796,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
          Idx[0] = Constant::getNullValue(Type::Int32Ty);
          Idx[1] = GEP.getOperand(1);
          Value *V = InsertNewInstBefore(
-               new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName()), GEP);
+               GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP);
          // V and GEP are both pointer types --> BitCast
          return new BitCastInst(V, GEP.getType());
        }
@@ -9040,7 +9845,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            if (Scale->getZExtValue() != 1) {
              Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
                                                         false /*ZExt*/);
-            Instruction *Sc = BinaryOperator::createMul(NewIdx, C, "idxscale");
+            Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale");
              NewIdx = InsertNewInstBefore(Sc, GEP);
            }
  
@@ -9049,7 +9854,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            Idx[0] = Constant::getNullValue(Type::Int32Ty);
            Idx[1] = NewIdx;
            Instruction *NewGEP =
-            new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName());
+            GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName());
            NewGEP = InsertNewInstBefore(NewGEP, GEP);
            // The NewGEP must be pointer typed, so must the old one -> BitCast
            return new BitCastInst(NewGEP, GEP.getType());
@@ -9063,7 +9868,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
  
  Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
    // Convert: malloc Ty, C - where C is a constant != 1 into: malloc [C x Ty], 1
-  if (AI.isArrayAllocation())    // Check C != 1
+  if (AI.isArrayAllocation()) {  // Check C != 1
      if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
        const Type *NewTy = 
          ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
@@ -9092,8 +9897,8 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
        Value *Idx[2];
        Idx[0] = NullIdx;
        Idx[1] = NullIdx;
-      Value *V = new GetElementPtrInst(New, Idx, Idx + 2,
-                                       New->getName()+".sub", It);
+      Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2,
+                                           New->getName()+".sub", It);
  
        // Now make everything use the getelementptr instead of the original
        // allocation.
@@ -9101,6 +9906,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
      } else if (isa<UndefValue>(AI.getArraySize())) {
        return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
      }
+  }
  
    // If alloca'ing a zero byte object, replace the alloca with a null pointer.
    // Note that we only do this for alloca's, because malloc should allocate and
@@ -9156,7 +9962,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
  
  /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
  static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
-                                       const TargetData *TD) {
+                                        const TargetData *TD) {
    User *CI = cast<User>(LI.getOperand(0));
    Value *CastOp = CI->getOperand(0);
  
@@ -9170,24 +9976,24 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
        unsigned numBits = Ty->getPrimitiveSizeInBits();
        // Replace LI with immediate integer store.
        if ((numBits >> 3) == len + 1) {
-       APInt StrVal(numBits, 0);
-       APInt SingleChar(numBits, 0);
-       if (TD->isLittleEndian()) {
-         for (signed i = len-1; i >= 0; i--) {
-           SingleChar = (uint64_t) Str[i];
-           StrVal = (StrVal << 8) | SingleChar;
-         }
-       } else {
-         for (unsigned i = 0; i < len; i++) {
-           SingleChar = (uint64_t) Str[i];
-               StrVal = (StrVal << 8) | SingleChar;
-         }
-         // Append NULL at the end.
-         SingleChar = 0;
-         StrVal = (StrVal << 8) | SingleChar;
-       }
-       Value *NL = ConstantInt::get(StrVal);
-       return IC.ReplaceInstUsesWith(LI, NL);
+        APInt StrVal(numBits, 0);
+        APInt SingleChar(numBits, 0);
+        if (TD->isLittleEndian()) {
+          for (signed i = len-1; i >= 0; i--) {
+            SingleChar = (uint64_t) Str[i];
+            StrVal = (StrVal << 8) | SingleChar;
+          }
+        } else {
+          for (unsigned i = 0; i < len; i++) {
+            SingleChar = (uint64_t) Str[i];
+            StrVal = (StrVal << 8) | SingleChar;
+          }
+          // Append NULL at the end.
+          SingleChar = 0;
+          StrVal = (StrVal << 8) | SingleChar;
+        }
+        Value *NL = ConstantInt::get(StrVal);
+        return IC.ReplaceInstUsesWith(LI, NL);
        }
      }
    }
@@ -9291,8 +10097,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    Value *Op = LI.getOperand(0);
  
    // Attempt to improve the alignment.
-  unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD);
-  if (KnownAlign > LI.getAlignment())
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Op);
+  if (KnownAlign >
+      (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
+                                LI.getAlignment()))
      LI.setAlignment(KnownAlign);
  
    // load (cast X) --> cast (load X) iff safe
@@ -9315,8 +10123,11 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
          return ReplaceInstUsesWith(LI, LIB);
    }
  
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op))
-    if (isa<ConstantPointerNull>(GEPI->getOperand(0))) {
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+    const Value *GEPI0 = GEPI->getOperand(0);
+    // TODO: Consider a target hook for valid address spaces for this xform.
+    if (isa<ConstantPointerNull>(GEPI0) &&
+        cast<PointerType>(GEPI0->getType())->getAddressSpace() == 0) {
        // Insert a new store to null instruction before the load to indicate
        // that this code is not reachable.  We do this instead of inserting
        // an unreachable instruction directly because we cannot modify the
@@ -9325,10 +10136,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
                      Constant::getNullValue(Op->getType()), &LI);
        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
      }
+  } 
  
    if (Constant *C = dyn_cast<Constant>(Op)) {
      // load null/undef -> undef
-    if ((C->isNullValue() || isa<UndefValue>(C))) {
+    // TODO: Consider a target hook for valid address spaces for this xform.
+    if (isa<UndefValue>(C) || (C->isNullValue() && 
+        cast<PointerType>(Op->getType())->getAddressSpace() == 0)) {
        // Insert a new store to null instruction before the load to indicate that
        // this code is not reachable.  We do this instead of inserting an
        // unreachable instruction directly because we cannot modify the CFG.
@@ -9343,7 +10157,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
          return ReplaceInstUsesWith(LI, GV->getInitializer());
  
      // Instcombine load (constantexpr_GEP global, 0, ...) into the value loaded.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) {
        if (CE->getOpcode() == Instruction::GetElementPtr) {
          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
            if (GV->isConstant() && !GV->isDeclaration())
@@ -9364,6 +10178,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
          if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
            return Res;
        }
+    }
    }
      
    // If this load comes from anywhere in a constant global, and if the global
@@ -9396,7 +10211,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
                                       SI->getOperand(1)->getName()+".val"), LI);
          Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2),
                                       SI->getOperand(2)->getName()+".val"), LI);
-        return new SelectInst(SI->getCondition(), V1, V2);
+        return SelectInst::Create(SI->getCondition(), V1, V2);
        }
  
        // load (select (cond, null, P)) -> load P
@@ -9464,7 +10279,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
            NewCast = ConstantExpr::getCast(opcode, C, CastDstTy);
          else
            NewCast = IC.InsertNewInstBefore(
-            CastInst::create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), 
+            CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), 
              SI);
          return new StoreInst(NewCast, CastOp);
        }
@@ -9485,7 +10300,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    
    // If the RHS is an alloca with a single use, zapify the store, making the
    // alloca dead.
-  if (Ptr->hasOneUse()) {
+  if (Ptr->hasOneUse() && !SI.isVolatile()) {
      if (isa<AllocaInst>(Ptr)) {
        EraseInstFromFunction(SI);
        ++NumCombined;
@@ -9502,8 +10317,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    }
  
    // Attempt to improve the alignment.
-  unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD);
-  if (KnownAlign > SI.getAlignment())
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr);
+  if (KnownAlign >
+      (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
+                                SI.getAlignment()))
      SI.setAlignment(KnownAlign);
  
    // Do really simple DSE, to catch cases where there are several consequtive
@@ -9540,7 +10357,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
      }
      
      // Don't skip over loads or things that can modify memory.
-    if (BBI->mayWriteToMemory())
+    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
        break;
    }
    
@@ -9620,8 +10437,12 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
    }
    if (++PI != pred_end(DestBB))
      return false;
-  
-  
+
+  // Bail out if all the relevant blocks aren't distinct (this can happen,
+  // for example, if SI is in an infinite loop)
+  if (StoreBB == DestBB || OtherBB == DestBB)
+    return false;
+
    // Verify that the other block ends in a branch and is not otherwise empty.
    BasicBlock::iterator BBI = OtherBB->getTerminator();
    BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
@@ -9654,18 +10475,19 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
            return false;
          break;
        }
-      // If we find something that may be using the stored value, or if we run
-      // out of instructions, we can't do the xform.
-      if (isa<LoadInst>(BBI) || BBI->mayWriteToMemory() ||
+      // If we find something that may be using or overwriting the stored
+      // value, or if we run out of instructions, we can't do the xform.
+      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
            BBI == OtherBB->begin())
          return false;
      }
      
      // In order to eliminate the store in OtherBr, we have to
-    // make sure nothing reads the stored value in StoreBB.
+    // make sure nothing reads or overwrites the stored value in
+    // StoreBB.
      for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
        // FIXME: This should really be AA driven.
-      if (isa<LoadInst>(I) || I->mayWriteToMemory())
+      if (I->mayReadFromMemory() || I->mayWriteToMemory())
          return false;
      }
    }
@@ -9673,7 +10495,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
    // Insert a PHI node now if we need it.
    Value *MergedVal = OtherStore->getOperand(0);
    if (MergedVal != SI.getOperand(0)) {
-    PHINode *PN = new PHINode(MergedVal->getType(), "storemerge");
+    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
      PN->reserveOperandSpace(2);
      PN->addIncoming(SI.getOperand(0), SI.getParent());
      PN->addIncoming(OtherStore->getOperand(0), OtherBB);
@@ -9682,8 +10504,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
    
    // Advance to a place where it is safe to insert the new store and
    // insert it.
-  BBI = DestBB->begin();
-  while (isa<PHINode>(BBI)) ++BBI;
+  BBI = DestBB->getFirstNonPHI();
    InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
                                      OtherStore->isVolatile()), *BBI);
    
@@ -9770,6 +10591,16 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
    return 0;
  }
  
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+  // See if we are trying to extract a known value. If so, use that instead.
+  if (Value *Elt = FindInsertedValue(EV.getOperand(0), EV.idx_begin(),
+                                     EV.idx_end(), &EV))
+    return ReplaceInstUsesWith(EV, Elt);
+
+  // No changes
+  return 0;
+}
+
  /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
  /// is to leave as a vector operation.
  static bool CheapToScalarize(Value *V, bool isConstant) {
@@ -9821,11 +10652,11 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
  
    std::vector<unsigned> Result;
    const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
-  for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
-    if (isa<UndefValue>(CP->getOperand(i)))
+  for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+    if (isa<UndefValue>(*i))
        Result.push_back(NElts*2);  // undef -> 8
      else
-      Result.push_back(cast<ConstantInt>(CP->getOperand(i))->getZExtValue());
+      Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
    return Result;
  }
  
@@ -9874,7 +10705,6 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
  }
  
  Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-
    // If vector val is undef, replace extract with scalar undef.
    if (isa<UndefValue>(EI.getOperand(0)))
      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
@@ -9884,8 +10714,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
      return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
    
    if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
-    // If vector val is constant with uniform operands, replace EI
-    // with that operand
+    // If vector val is constant with all elements the same, replace EI with
+    // that element. When the elements are not identical, we cannot replace yet
+    // (we do that below, but only when the index is constant).
      Constant *op0 = C->getOperand(0);
      for (unsigned i = 1; i < C->getNumOperands(); ++i)
        if (C->getOperand(i) != op0) {
@@ -9951,15 +10782,15 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
                                     EI.getName()+".rhs");
            InsertNewInstBefore(newEI0, EI);
            InsertNewInstBefore(newEI1, EI);
-          return BinaryOperator::create(BO->getOpcode(), newEI0, newEI1);
+          return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
          }
        } else if (isa<LoadInst>(I)) {
          unsigned AS = 
            cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
-        Value *Ptr = InsertCastBefore(Instruction::BitCast, I->getOperand(0),
-                                      PointerType::get(EI.getType(), AS), EI);
-        GetElementPtrInst *GEP = 
-          new GetElementPtrInst(Ptr, EI.getOperand(1), I->getName() + ".gep");
+        Value *Ptr = InsertBitCastBefore(I->getOperand(0),
+                                         PointerType::get(EI.getType(), AS),EI);
+        GetElementPtrInst *GEP =
+          GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep");
          InsertNewInstBefore(GEP, EI);
          return new LoadInst(GEP);
        }
@@ -10334,7 +11165,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
    assert(I->hasOneUse() && "Invariants didn't hold!");
  
    // Cannot move control-flow-involving, volatile loads, vaarg, etc.
-  if (isa<PHINode>(I) || I->mayWriteToMemory()) return false;
+  if (isa<PHINode>(I) || I->mayWriteToMemory() || isa<TerminatorInst>(I))
+    return false;
  
    // Do not sink alloca instructions out of the entry block.
    if (isa<AllocaInst>(I) && I->getParent() ==
@@ -10343,15 +11175,14 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
  
    // We can only sink load instructions if there is nothing between the load and
    // the end of block that could change the value.
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    for (BasicBlock::iterator Scan = LI, E = LI->getParent()->end();
+  if (I->mayReadFromMemory()) {
+    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
           Scan != E; ++Scan)
        if (Scan->mayWriteToMemory())
          return false;
    }
  
-  BasicBlock::iterator InsertPos = DestBlock->begin();
-  while (isa<PHINode>(InsertPos)) ++InsertPos;
+  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
  
    I->moveBefore(InsertPos);
    ++NumSunkInst;
@@ -10411,7 +11242,8 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
        if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
          bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
-        Worklist.push_back(BI->getSuccessor(!CondVal));
+        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+        Worklist.push_back(ReachableBB);
          continue;
        }
      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -10419,7 +11251,8 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,
          // See if this is an explicit destination.
          for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
            if (SI->getCaseValue(i) == Cond) {
-            Worklist.push_back(SI->getSuccessor(i));
+            BasicBlock *ReachableBB = SI->getSuccessor(i);
+            Worklist.push_back(ReachableBB);
              continue;
            }
          
@@ -10499,8 +11332,20 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
        continue;
      }
  
+    if (TD && I->getType()->getTypeID() == Type::VoidTyID) {
+      // See if we can constant fold its operands.
+      for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
+        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i)) {
+          if (Constant *NewC = ConstantFoldConstantExpression(CE, TD))
+            i->set(NewC);
+        }
+      }
+    }
+
      // See if we can trivially sink this instruction to a successor basic block.
-    if (I->hasOneUse()) {
+    // FIXME: Remove GetResultInst test when first class support for aggregates
+    // is implemented.
+    if (I->hasOneUse() && !isa<GetResultInst>(I)) {
        BasicBlock *BB = I->getParent();
        BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent();
        if (UserParent != BB) {
@@ -10605,7 +11450,7 @@ bool InstCombiner::runOnFunction(Function &F) {
  
    // Iterate while there is work to do.
    unsigned Iteration = 0;
-  while (DoOneIteration(F, Iteration++)) 
+  while (DoOneIteration(F, Iteration++))
      EverMadeChange = true;
    return EverMadeChange;
  }