Add in the first iteration of support for llvm/clang/lldb to allow variable per addre...

[oota-llvm.git] / lib / Transforms / InstCombine / InstCombineCasts.cpp
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp

index 1ffcc2b12204dc31d3d75f8f5ef0f30364f2814e..f3f3f8f585d1a5e33b1861da5fc59758812e530c 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -12,7 +12,9 @@
  //===----------------------------------------------------------------------===//
  
  #include "InstCombine.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLibraryInfo.h"
  #include "llvm/Support/PatternMatch.h"
  using namespace llvm;
  using namespace PatternMatch;
@@ -30,6 +32,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
    }
    
    if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    // Cannot look past anything that might overflow.
+    OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
+    if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
+      Scale = 1;
+      Offset = 0;
+      return Val;
+    }
+
      if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
        if (I->getOpcode() == Instruction::Shl) {
          // This is a value scaled by '1 << the shift amt'.
@@ -68,17 +78,17 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
  /// try to eliminate the cast by moving the type information into the alloc.
  Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                     AllocaInst &AI) {
-  // This requires TargetData to get the alloca alignment and size information.
+  // This requires DataLayout to get the alloca alignment and size information.
    if (!TD) return 0;
  
-  const PointerType *PTy = cast<PointerType>(CI.getType());
+  PointerType *PTy = cast<PointerType>(CI.getType());
    
    BuilderTy AllocaBuilder(*Builder);
    AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
  
    // Get the type really allocated and the type casted to.
-  const Type *AllocElTy = AI.getAllocatedType();
-  const Type *CastElTy = PTy->getElementType();
+  Type *AllocElTy = AI.getAllocatedType();
+  Type *CastElTy = PTy->getElementType();
    if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
  
    unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
@@ -87,10 +97,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
  
    // If the allocation has multiple uses, only promote it if we are strictly
    // increasing the alignment of the resultant allocation.  If we keep it the
-  // same, we open the door to infinite loops of various kinds.  (A reference
-  // from a dbg.declare doesn't count as a use for this purpose.)
-  if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) &&
-      CastElTyAlign == AllocElTyAlign) return 0;
+  // same, we open the door to infinite loops of various kinds.
+  if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
  
    uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
    uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
@@ -115,48 +123,41 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
    } else {
      Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
      // Insert before the alloca, not before the cast.
-    Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
+    Amt = AllocaBuilder.CreateMul(Amt, NumElements);
    }
    
    if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
      Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
                                    Offset, true);
-    Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
+    Amt = AllocaBuilder.CreateAdd(Amt, Off);
    }
    
    AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
    New->setAlignment(AI.getAlignment());
    New->takeName(&AI);
    
-  // If the allocation has one real use plus a dbg.declare, just remove the
-  // declare.
-  if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) {
-    EraseInstFromFunction(*(Instruction*)DI);
-  }
    // If the allocation has multiple real uses, insert a cast and change all
    // things that used it to use the new cast.  This will also hack on CI, but it
    // will die soon.
-  else if (!AI.hasOneUse()) {
+  if (!AI.hasOneUse()) {
      // New is the allocation instruction, pointer typed. AI is the original
      // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
      Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
-    AI.replaceAllUsesWith(NewCast);
+    ReplaceInstUsesWith(AI, NewCast);
    }
    return ReplaceInstUsesWith(CI, New);
  }
  
-
-
  /// EvaluateInDifferentType - Given an expression that 
  /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
  /// insert the code to evaluate the expression.
-Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
+Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, 
                                               bool isSigned) {
    if (Constant *C = dyn_cast<Constant>(V)) {
      C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
      // If we got a constantexpr back, try to simplify it with TD info.
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      C = ConstantFoldConstantExpression(CE, TD);
+      C = ConstantFoldConstantExpression(CE, TD, TLI);
      return C;
    }
  
@@ -203,7 +204,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
    }
    case Instruction::PHI: {
      PHINode *OPN = cast<PHINode>(I);
-    PHINode *NPN = PHINode::Create(Ty);
+    PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
      for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
        Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
        NPN->addIncoming(V, OPN->getIncomingBlock(i));
@@ -214,11 +215,10 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
    default: 
      // TODO: Can handle more cases here.
      llvm_unreachable("Unreachable!");
-    break;
    }
    
    Res->takeName(I);
-  return InsertNewInstBefore(Res, *I);
+  return InsertNewInstWith(Res, *I);
  }
  
  
@@ -228,12 +228,12 @@ static Instruction::CastOps
  isEliminableCastPair(
    const CastInst *CI, ///< The first cast instruction
    unsigned opcode,       ///< The opcode of the second cast instruction
-  const Type *DstTy,     ///< The target type for the second cast instruction
-  TargetData *TD         ///< The target data for pointer size
+  Type *DstTy,     ///< The target type for the second cast instruction
+  DataLayout *TD         ///< The target data for pointer size
  ) {
  
-  const Type *SrcTy = CI->getOperand(0)->getType();   // A from above
-  const Type *MidTy = CI->getType();                  // B from above
+  Type *SrcTy = CI->getOperand(0)->getType();   // A from above
+  Type *MidTy = CI->getType();                  // B from above
  
    // Get the opcodes of the two Cast instructions
    Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
@@ -259,7 +259,7 @@ isEliminableCastPair(
  /// the cast can be eliminated by some other simple transformation, we prefer
  /// to do the simplification first.
  bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
-                                      const Type *Ty) {
+                                      Type *Ty) {
    // Noop casts and casts of constants should be eliminated trivially.
    if (V->getType() == Ty || isa<Constant>(V)) return false;
    
@@ -323,7 +323,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
  ///
  /// This function works on both vectors and scalars.
  ///
-static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
+static bool CanEvaluateTruncated(Value *V, Type *Ty) {
    // We can always evaluate constants in another type.
    if (isa<Constant>(V))
      return true;
@@ -331,7 +331,7 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
    Instruction *I = dyn_cast<Instruction>(V);
    if (!I) return false;
    
-  const Type *OrigTy = V->getType();
+  Type *OrigTy = V->getType();
    
    // If this is an extension from the dest type, we can eliminate it, even if it
    // has multiple uses.
@@ -434,7 +434,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
      return &CI;
    
    Value *Src = CI.getOperand(0);
-  const Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+  Type *DestTy = CI.getType(), *SrcTy = Src->getType();
    
    // Attempt to truncate the entire input expression tree to the destination
    // type.   Only do this if the dest type is a simple type, don't convert the
@@ -455,7 +455,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
    // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
    if (DestTy->getScalarSizeInBits() == 1) {
      Constant *One = ConstantInt::get(Src->getType(), 1);
-    Src = Builder->CreateAnd(Src, One, "tmp");
+    Src = Builder->CreateAnd(Src, One);
      Value *Zero = Constant::getNullValue(Src->getType());
      return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
    }
@@ -517,7 +517,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
                                     In->getType()->getScalarSizeInBits()-1);
        In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
        if (In->getType() != CI.getType())
-        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
+        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/);
  
        if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
          Constant *One = ConstantInt::get(In->getType(), 1);
@@ -526,9 +526,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
  
        return ReplaceInstUsesWith(CI, In);
      }
-      
-      
-      
+
      // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
      // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
      // zext (X == 1) to i32 --> X        iff X has only the low bit set.
@@ -543,8 +541,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
        // If Op1C some other power of two, convert:
        uint32_t BitWidth = Op1C->getType()->getBitWidth();
        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-      ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+      ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
          
        APInt KnownZeroMask(~KnownZero);
        if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
@@ -571,7 +568,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
            
          if ((Op1CV != 0) == isNE) { // Toggle the low bit.
            Constant *One = ConstantInt::get(In->getType(), 1);
-          In = Builder->CreateXor(In, One, "tmp");
+          In = Builder->CreateXor(In, One);
          }
            
          if (CI.getType() == In->getType())
@@ -585,16 +582,15 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
    // It is also profitable to transform icmp eq into not(xor(A, B)) because that
    // may lead to additional simplifications.
    if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
-    if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+    if (IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
        uint32_t BitWidth = ITy->getBitWidth();
        Value *LHS = ICI->getOperand(0);
        Value *RHS = ICI->getOperand(1);
  
        APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
        APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
-      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-      ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
-      ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+      ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS);
+      ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS);
  
        if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
          APInt KnownBits = KnownZeroLHS | KnownOneLHS;
@@ -643,7 +639,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
  /// clear the top bits anyway, doing this has no extra cost.
  ///
  /// This function works on both vectors and scalars.
-static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) {
    BitsToClear = 0;
    if (isa<Constant>(V))
      return true;
@@ -652,10 +648,8 @@ static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
    if (!I) return false;
    
    // If the input is a truncate from the destination type, we can trivially
-  // eliminate it, even if it has multiple uses.
-  // FIXME: This is currently disabled until codegen can handle this without
-  // pessimizing code, PR5997.
-  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+  // eliminate it.
+  if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
      return true;
    
    // We can't extend or shrink something that has multiple uses: doing so would
@@ -757,7 +751,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
      return &CI;
    
    Value *Src = CI.getOperand(0);
-  const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
    
    // Attempt to extend the entire input expression tree to the destination
    // type.   Only do this if the dest type is a simple type, don't convert the
@@ -819,7 +813,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
                                                             AndValue));
      }
      if (SrcSize > DstSize) {
-      Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
+      Value *Trunc = Builder->CreateTrunc(A, CI.getType());
        APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
        return BinaryOperator::CreateAnd(Trunc, 
                                         ConstantInt::get(Trunc->getType(),
@@ -866,7 +860,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
              Value *TI0 = TI->getOperand(0);
              if (TI0->getType() == CI.getType()) {
                Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
-              Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
+              Value *NewAnd = Builder->CreateAnd(TI0, ZC);
                return BinaryOperator::CreateXor(NewAnd, ZC);
              }
            }
@@ -883,6 +877,101 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
    return 0;
  }
  
+/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
+  Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    // (x <s  0) ? -1 : 0 -> ashr x, 31        -> all ones if negative
+    // (x >s -1) ? -1 : 0 -> not (ashr x, 31)  -> all ones if positive
+    if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) ||
+        (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) {
+
+      Value *Sh = ConstantInt::get(Op0->getType(),
+                                   Op0->getType()->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/);
+
+      if (Pred == ICmpInst::ICMP_SGT)
+        In = Builder->CreateNot(In, In->getName()+".not");
+      return ReplaceInstUsesWith(CI, In);
+    }
+
+    // If we know that only one bit of the LHS of the icmp can be set and we
+    // have an equality comparison with zero or a power of 2, we can transform
+    // the icmp and sext into bitwise/integer operations.
+    if (ICI->hasOneUse() &&
+        ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
+      unsigned BitWidth = Op1C->getType()->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(Op0, KnownZero, KnownOne);
+
+      APInt KnownZeroMask(~KnownZero);
+      if (KnownZeroMask.isPowerOf2()) {
+        Value *In = ICI->getOperand(0);
+
+        // If the icmp tests for a known zero bit we can constant fold it.
+        if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
+          Value *V = Pred == ICmpInst::ICMP_NE ?
+                       ConstantInt::getAllOnesValue(CI.getType()) :
+                       ConstantInt::getNullValue(CI.getType());
+          return ReplaceInstUsesWith(CI, V);
+        }
+
+        if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
+          // sext ((x & 2^n) == 0)   -> (x >> n) - 1
+          // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
+          unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+          // Perform a right shift to place the desired bit in the LSB.
+          if (ShiftAmt)
+            In = Builder->CreateLShr(In,
+                                     ConstantInt::get(In->getType(), ShiftAmt));
+
+          // At this point "In" is either 1 or 0. Subtract 1 to turn
+          // {1, 0} -> {0, -1}.
+          In = Builder->CreateAdd(In,
+                                  ConstantInt::getAllOnesValue(In->getType()),
+                                  "sext");
+        } else {
+          // sext ((x & 2^n) != 0)   -> (x << bitwidth-n) a>> bitwidth-1
+          // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
+          unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+          // Perform a left shift to place the desired bit in the MSB.
+          if (ShiftAmt)
+            In = Builder->CreateShl(In,
+                                    ConstantInt::get(In->getType(), ShiftAmt));
+
+          // Distribute the bit over the whole bit width.
+          In = Builder->CreateAShr(In, ConstantInt::get(In->getType(),
+                                                        BitWidth - 1), "sext");
+        }
+
+        if (CI.getType() == In->getType())
+          return ReplaceInstUsesWith(CI, In);
+        return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
+      }
+    }
+  }
+
+  // vector (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed.
+  if (VectorType *VTy = dyn_cast<VectorType>(CI.getType())) {
+    if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) &&
+        Op0->getType() == CI.getType()) {
+      Type *EltTy = VTy->getElementType();
+
+      // splat the shift constant to a constant vector.
+      Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit");
+      return ReplaceInstUsesWith(CI, In);
+    }
+  }
+
+  return 0;
+}
+
  /// CanEvaluateSExtd - Return true if we can take the specified value
  /// and return it as type Ty without inserting any new casts and without
  /// changing the value of the common low bits.  This is used by code that tries
@@ -891,7 +980,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
  ///
  /// This function works on both vectors and scalars.
  ///
-static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
+static bool CanEvaluateSExtd(Value *V, Type *Ty) {
    assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
           "Can't sign extend type to a smaller type");
    // If this is a constant, it can be trivially promoted.
@@ -901,11 +990,8 @@ static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
    Instruction *I = dyn_cast<Instruction>(V);
    if (!I) return false;
    
-  // If this is a truncate from the dest type, we can trivially eliminate it,
-  // even if it has multiple uses.
-  // FIXME: This is currently disabled until codegen can handle this without
-  // pessimizing code, PR5997.
-  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+  // If this is a truncate from the dest type, we can trivially eliminate it.
+  if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
      return true;
    
    // We can't extend or shrink something that has multiple uses: doing so would
@@ -966,7 +1052,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
      return &CI;
    
    Value *Src = CI.getOperand(0);
-  const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
  
    // Attempt to extend the entire input expression tree to the destination
    // type.   Only do this if the dest type is a simple type, don't convert the
@@ -1006,47 +1092,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
        Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
        return BinaryOperator::CreateAShr(Res, ShAmt);
      }
-  
-  
-  // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
-  // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
-  {
-  ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS;
-  if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) {
-    // sext (x <s  0) to i32 --> x>>s31       true if signbit set.
-    // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
-    if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) ||
-        (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) {
-      Value *Sh = ConstantInt::get(CmpLHS->getType(),
-                                   CmpLHS->getType()->getScalarSizeInBits()-1);
-      Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit");
-      if (In->getType() != CI.getType())
-        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
-      
-      if (Pred == ICmpInst::ICMP_SGT)
-        In = Builder->CreateNot(In, In->getName()+".not");
-      return ReplaceInstUsesWith(CI, In);
-    }
-  }
-  }
  
-  // vector (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed.
-  if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) {
-    ICmpInst::Predicate Pred; Value *CmpLHS;
-    if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) {
-      if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) {
-        const Type *EltTy = VTy->getElementType();
-
-        // splat the shift constant to a cosntant vector
-        Constant *Sh = ConstantInt::get(EltTy, EltTy->getScalarSizeInBits()-1);
-        std::vector<Constant *> Elts(VTy->getNumElements(), Sh);
-        Constant *VSh = ConstantVector::get(Elts);
-
-        Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit");
-        return ReplaceInstUsesWith(CI, In);
-      }
-    }
-  }
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+    return transformSExtICmp(ICI, CI);
  
    // If the input is a shl/ashr pair of a same constant, then this is a sign
    // extension from a smaller value.  If we could trust arbitrary bitwidth
@@ -1103,6 +1151,9 @@ static Value *LookThroughFPExtensions(Value *V) {
    if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
      if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
        return V;  // No constant folding of this.
+    // See if the value can be truncated to half and then reextended.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf))
+      return V;
      // See if the value can be truncated to float and then reextended.
      if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
        return V;
@@ -1133,7 +1184,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
      case Instruction::FMul:
      case Instruction::FDiv:
      case Instruction::FRem:
-      const Type *SrcTy = OpI->getType();
+      Type *SrcTy = OpI->getType();
        Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
        Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
        if (LHSTrunc->getType() != SrcTy && 
@@ -1153,11 +1204,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
    }
    
    // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
-  // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
    CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
-  if (Call && Call->getCalledFunction() &&
-      Call->getCalledFunction()->getName() == "sqrt" &&
-      Call->getNumArgOperands() == 1) {
+  if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
+      Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
+      Call->getNumArgOperands() == 1 &&
+      Call->hasOneUse()) {
      CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
      if (Arg && Arg->getOpcode() == Instruction::FPExt &&
          CI.getType()->isFloatTy() &&
@@ -1177,7 +1228,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
        
        
        // Remove the old Call.  With -fmath-errno, it won't get marked readnone.
-      Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+      ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType()));
        EraseInstFromFunction(*Call);
        return ret;
      }
@@ -1242,17 +1293,18 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
    // If the source integer type is not the intptr_t type for this target, do a
    // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
    // cast to be exposed to other transforms.
+  unsigned AS = CI.getAddressSpace();
    if (TD) {
      if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
-        TD->getPointerSizeInBits()) {
+        TD->getPointerSizeInBits(AS)) {
        Value *P = Builder->CreateTrunc(CI.getOperand(0),
-                                      TD->getIntPtrType(CI.getContext()), "tmp");
+                                      TD->getIntPtrType(CI.getContext()));
        return new IntToPtrInst(P, CI.getType());
      }
      if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
-        TD->getPointerSizeInBits()) {
+        TD->getPointerSizeInBits(AS)) {
        Value *P = Builder->CreateZExt(CI.getOperand(0),
-                                     TD->getIntPtrType(CI.getContext()), "tmp");
+                                     TD->getIntPtrType(CI.getContext()));
        return new IntToPtrInst(P, CI.getType());
      }
    }
@@ -1285,13 +1337,12 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
      // non-type-safe code.
      if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
          GEP->hasAllConstantIndices()) {
-      // We are guaranteed to get a constant from EmitGEPOffset.
-      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP));
-      int64_t Offset = OffsetV->getSExtValue();
-      
+      SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+      int64_t Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
+
        // Get the base pointer input of the bitcast, and the type it points to.
        Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
-      const Type *GEPIdxTy =
+      Type *GEPIdxTy =
        cast<PointerType>(OrigBase->getType())->getElementType();
        SmallVector<Value*, 8> NewIndices;
        if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
@@ -1299,9 +1350,8 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
          // and bitcast the result.  This eliminates one bitcast, potentially
          // two.
          Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
-        Builder->CreateInBoundsGEP(OrigBase,
-                                   NewIndices.begin(), NewIndices.end()) :
-        Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
+        Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
+        Builder->CreateGEP(OrigBase, NewIndices);
          NGEP->takeName(GEP);
          
          if (isa<BitCastInst>(CI))
@@ -1319,17 +1369,16 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
    // If the destination integer type is not the intptr_t type for this target,
    // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
    // to be exposed to other transforms.
+  unsigned AS = CI.getPointerAddressSpace();
    if (TD) {
-    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) {
        Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                         TD->getIntPtrType(CI.getContext()),
-                                         "tmp");
+                                         TD->getIntPtrType(CI.getContext()));
        return new TruncInst(P, CI.getType());
      }
-    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) {
        Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
-                                         TD->getIntPtrType(CI.getContext()),
-                                         "tmp");
+                                         TD->getIntPtrType(CI.getContext()));
        return new ZExtInst(P, CI.getType());
      }
    }
@@ -1342,12 +1391,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
  /// replace it with a shuffle (and vector/vector bitcast) if possible.
  ///
  /// The source and destination vector types may have different element types.
-static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
                                           InstCombiner &IC) {
    // We can only do this optimization if the output is a multiple of the input
    // element size, or the input is a multiple of the output element size.
    // Convert the input type to have the same element type as the output.
-  const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+  VectorType *SrcTy = cast<VectorType>(InVal->getType());
    
    if (SrcTy->getElementType() != DestTy->getElementType()) {
      // The input types don't need to be identical, but for now they must be the
@@ -1365,16 +1414,15 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
    // Now that the element types match, get the shuffle mask and RHS of the
    // shuffle to use, which depends on whether we're increasing or decreasing the
    // size of the input.
-  SmallVector<Constant*, 16> ShuffleMask;
+  SmallVector<uint32_t, 16> ShuffleMask;
    Value *V2;
-  const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
    
    if (SrcTy->getNumElements() > DestTy->getNumElements()) {
      // If we're shrinking the number of elements, just shuffle in the low
      // elements from the input and use undef as the second shuffle input.
      V2 = UndefValue::get(SrcTy);
      for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
-      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+      ShuffleMask.push_back(i);
      
    } else {
      // If we're increasing the number of elements, shuffle in all of the
@@ -1383,22 +1431,23 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
      V2 = Constant::getNullValue(SrcTy);
      unsigned SrcElts = SrcTy->getNumElements();
      for (unsigned i = 0, e = SrcElts; i != e; ++i)
-      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+      ShuffleMask.push_back(i);
  
      // The excess elements reference the first element of the zero input.
-    ShuffleMask.append(DestTy->getNumElements()-SrcElts,
-                       ConstantInt::get(Int32Ty, SrcElts));
+    for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
+      ShuffleMask.push_back(SrcElts);
    }
    
-  Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
-  return new ShuffleVectorInst(InVal, V2, Mask);
+  return new ShuffleVectorInst(InVal, V2,
+                               ConstantDataVector::get(V2->getContext(),
+                                                       ShuffleMask));
  }
  
-static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
    return Value % Ty->getPrimitiveSizeInBits() == 0;
  }
  
-static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
    return Value / Ty->getPrimitiveSizeInBits();
  }
  
@@ -1412,7 +1461,7 @@ static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
  /// filling in Elements with the elements found here.
  static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
                                       SmallVectorImpl<Value*> &Elements,
-                                     const Type *VecEltTy) {
+                                     Type *VecEltTy) {
    // Undef values never contribute useful bits to the result.
    if (isa<UndefValue>(V)) return true;
    
@@ -1449,7 +1498,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
        C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
                                         C->getType()->getPrimitiveSizeInBits()));
      unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
-    const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+    Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
      
      for (unsigned i = 0; i != NumElts; ++i) {
        Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
@@ -1513,7 +1562,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
  /// Into two insertelements that do "buildvector{%inc, %inc5}".
  static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
                                                  InstCombiner &IC) {
-  const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+  VectorType *DestVecTy = cast<VectorType>(CI.getType());
    Value *IntInput = CI.getOperand(0);
  
    SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
@@ -1540,7 +1589,7 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
  /// bitcast.  The various long double bitcasts can't get in here.
  static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
    Value *Src = CI.getOperand(0);
-  const Type *DestTy = CI.getType();
+  Type *DestTy = CI.getType();
  
    // If this is a bitcast from int to float, check to see if the int is an
    // extraction from a vector.
@@ -1548,7 +1597,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
    // bitcast(trunc(bitcast(somevector)))
    if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
        isa<VectorType>(VecInput->getType())) {
-    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    VectorType *VecTy = cast<VectorType>(VecInput->getType());
      unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
  
      if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
@@ -1569,7 +1618,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
    if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
                                  m_ConstantInt(ShAmt)))) &&
        isa<VectorType>(VecInput->getType())) {
-    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    VectorType *VecTy = cast<VectorType>(VecInput->getType());
      unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
      if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
          ShAmt->getZExtValue() % DestWidth == 0) {
@@ -1592,18 +1641,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
    // If the operands are integer typed then apply the integer transforms,
    // otherwise just apply the common ones.
    Value *Src = CI.getOperand(0);
-  const Type *SrcTy = Src->getType();
-  const Type *DestTy = CI.getType();
+  Type *SrcTy = Src->getType();
+  Type *DestTy = CI.getType();
  
    // Get rid of casts from one type to the same type. These are useless and can
    // be replaced by the operand.
    if (DestTy == Src->getType())
      return ReplaceInstUsesWith(CI, Src);
  
-  if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
-    const PointerType *SrcPTy = cast<PointerType>(SrcTy);
-    const Type *DstElTy = DstPTy->getElementType();
-    const Type *SrcElTy = SrcPTy->getElementType();
+  if (PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+    PointerType *SrcPTy = cast<PointerType>(SrcTy);
+    Type *DstElTy = DstPTy->getElementType();
+    Type *SrcElTy = SrcPTy->getElementType();
      
      // If the address spaces don't match, don't eliminate the bitcast, which is
      // required for changing types.
@@ -1634,8 +1683,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      // If we found a path from the src to dest, create the getelementptr now.
      if (SrcElTy == DstElTy) {
        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"",
-                                               ((Instruction*)NULL));
+      return GetElementPtrInst::CreateInBounds(Src, Idxs);
      }
    }
    
@@ -1644,7 +1692,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
        return I;
  
-  if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+  if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
      if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
        Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
        return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
@@ -1673,7 +1721,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      }
    }
  
-  if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+  if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
      if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
        Value *Elem = 
          Builder->CreateExtractElement(Src,