Change the PointerType api for creating pointer types. The old functionality of Point...

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index 416e1f012a64eaca2cfaf8ce18f44f4c485c5d2f..7989ebf1979639d4bedd8ddc793d04bf7fb60bd4 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -39,6 +39,7 @@
  #include "llvm/Pass.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/GlobalVariable.h"
+#include "llvm/ParameterAttributes.h"
  #include "llvm/Analysis/ConstantFolding.h"
  #include "llvm/Target/TargetData.h"
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -234,6 +235,7 @@ namespace {
    private:
      Instruction *visitCallSite(CallSite CS);
      bool transformConstExprCastCall(CallSite CS);
+    Instruction *transformCallThroughTrampoline(CallSite CS);
  
    public:
      // InsertNewInstBefore - insert an instruction New before instruction Old
@@ -1663,16 +1665,22 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
    return MadeChange ? I : 0;
  }
  
-/// @returns true if the specified compare instruction is
+/// @returns true if the specified compare predicate is
  /// true when both operands are equal...
-/// @brief Determine if the ICmpInst returns true if both operands are equal
-static bool isTrueWhenEqual(ICmpInst &ICI) {
-  ICmpInst::Predicate pred = ICI.getPredicate();
+/// @brief Determine if the icmp Predicate is true when both operands are equal
+static bool isTrueWhenEqual(ICmpInst::Predicate pred) {
    return pred == ICmpInst::ICMP_EQ  || pred == ICmpInst::ICMP_UGE ||
           pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_ULE ||
           pred == ICmpInst::ICMP_SLE;
  }
  
+/// @returns true if the specified compare instruction is
+/// true when both operands are equal...
+/// @brief Determine if the ICmpInst returns true when both operands are equal
+static bool isTrueWhenEqual(ICmpInst &ICI) {
+  return isTrueWhenEqual(ICI.getPredicate());
+}
+
  /// AssociativeOpt - Perform an optimization on an associative operator.  This
  /// function is designed to check a chain of associative operators for a
  /// potential to apply a certain optimization.  Since the optimization may be
@@ -1942,7 +1950,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
        if (RHSC->isNullValue())
          return ReplaceInstUsesWith(I, LHS);
      } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(-0.0))
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+                              (I.getType())->getValueAPF()))
          return ReplaceInstUsesWith(I, LHS);
      }
  
@@ -2113,8 +2122,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
          (CI->getType()->getPrimitiveSizeInBits() == 
           TD->getIntPtrType()->getPrimitiveSizeInBits()) 
          && isa<PointerType>(CI->getOperand(0)->getType())) {
+      unsigned AS =
+        cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace();
        Value *I2 = InsertCastBefore(Instruction::BitCast, CI->getOperand(0),
-                                   PointerType::get(Type::Int8Ty), I);
+                                   PointerType::get(Type::Int8Ty, AS), I);
        I2 = InsertNewInstBefore(new GetElementPtrInst(I2, Other, "ctg2"), I);
        return new PtrToIntInst(I2, CI->getType());
      }
@@ -2248,6 +2259,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
          Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
          return BinaryOperator::createMul(Op0, CP1);
        }
+
+      // X - ((X / Y) * Y) --> X % Y
+      if (Op1I->getOpcode() == Instruction::Mul)
+        if (Instruction *I = dyn_cast<Instruction>(Op1I->getOperand(0)))
+          if (Op0 == I->getOperand(0) &&
+              Op1I->getOperand(1) == I->getOperand(1)) {
+            if (I->getOpcode() == Instruction::SDiv)
+              return BinaryOperator::createSRem(Op0, Op1I->getOperand(1));
+            if (I->getOpcode() == Instruction::UDiv)
+              return BinaryOperator::createURem(Op0, Op1I->getOperand(1));
+          }
      }
    }
  
@@ -2342,8 +2364,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
  
        // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
        // ANSI says we can drop signals, so we can do this anyway." (from GCC)
-      if (Op1F->getValue() == 1.0)
-        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
+      // We need a better interface for long double here.
+      if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy)
+        if (Op1F->isExactlyValue(1.0))
+          return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
      }
      
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
@@ -2600,6 +2624,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
    if (I.getType()->isInteger()) {
      APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
      if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+      // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
        return BinaryOperator::createUDiv(Op0, Op1, I.getName());
      }
    }      
@@ -2637,7 +2662,7 @@ static Constant *GetFactor(Value *V) {
      if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
        // X & 0xFFF0 is known to be a multiple of 16.
        uint32_t Zeros = RHS->getValue().countTrailingZeros();
-      if (Zeros != V->getType()->getPrimitiveSizeInBits())
+      if (Zeros != V->getType()->getPrimitiveSizeInBits())// don't shift by "32"
          return ConstantExpr::getShl(Result, 
                                      ConstantInt::get(Result->getType(), Zeros));
      }
@@ -2789,6 +2814,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
  Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
  
+  // Handle the integer rem common cases
    if (Instruction *common = commonIRemTransforms(I))
      return common;
    
@@ -2801,12 +2827,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
        return &I;
      }
   
-  // If the top bits of both operands are zero (i.e. we can prove they are
+  // If the sign bits of both operands are zero (i.e. we can prove they are
    // unsigned inputs), turn this into a urem.
-  APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
-  if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
-    // X srem Y -> X urem Y, iff X and Y don't have sign bit set
-    return BinaryOperator::createURem(Op0, Op1, I.getName());
+  if (I.getType()->isInteger()) {
+    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+      // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+      return BinaryOperator::createURem(Op0, Op1, I.getName());
+    }
    }
  
    return 0;
@@ -2891,7 +2919,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) {
  
  /// getICmpValue - This is the complement of getICmpCode, which turns an
  /// opcode and two operands into either a constant true or false, or a brand 
-/// new /// ICmp instruction. The sign is passed in to determine which kind
+/// new ICmp instruction. The sign is passed in to determine which kind
  /// of predicate to use in new icmp instructions.
  static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
    switch (code) {
@@ -3437,7 +3465,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
              LHSCC != ICmpInst::ICMP_UGE && LHSCC != ICmpInst::ICMP_ULE &&
              RHSCC != ICmpInst::ICMP_UGE && RHSCC != ICmpInst::ICMP_ULE &&
              LHSCC != ICmpInst::ICMP_SGE && LHSCC != ICmpInst::ICMP_SLE &&
-            RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE) {
+            RHSCC != ICmpInst::ICMP_SGE && RHSCC != ICmpInst::ICMP_SLE &&
+            
+            // Don't try to fold ICMP_SLT + ICMP_ULT.
+            (ICmpInst::isEquality(LHSCC) || ICmpInst::isEquality(RHSCC) ||
+             ICmpInst::isSignedPredicate(LHSCC) == 
+                 ICmpInst::isSignedPredicate(RHSCC))) {
            // Ensure that the larger constant is on the RHS.
            ICmpInst::Predicate GT = ICmpInst::isSignedPredicate(LHSCC) ? 
              ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
@@ -3551,8 +3584,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
            case ICmpInst::ICMP_SGT:
              switch (RHSCC) {
              default: assert(0 && "Unknown integer condition code!");
-            case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X s> 13
-              return ReplaceInstUsesWith(I, LHS);
+            case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
              case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
                return ReplaceInstUsesWith(I, RHS);
              case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
@@ -3606,6 +3638,23 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
        }
    }
  
+  // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
+      if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+          RHS->getPredicate() == FCmpInst::FCMP_ORD)
+        if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+          if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+            // If either of the constants are nans, then the whole thing returns
+            // false.
+            if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+              return ReplaceInstUsesWith(I, ConstantInt::getFalse());
+            return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0),
+                                RHS->getOperand(0));
+          }
+    }
+  }
+      
    return Changed ? &I : 0;
  }
  
@@ -3989,6 +4038,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
              case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
                break;
              case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) ->(X-13) u> 2
+              // If RHSCst is [us]MAXINT, it is always false.  Not handling
+              // this can cause overflow.
+              if (RHSCst->isMaxValue(false))
+                return ReplaceInstUsesWith(I, LHS);
                return InsertRangeTest(LHSVal, LHSCst, AddOne(RHSCst), false, 
                                       false, I);
              case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
@@ -4006,6 +4059,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
              case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
                break;
              case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) ->(X-13) s> 2
+              // If RHSCst is [us]MAXINT, it is always false.  Not handling
+              // this can cause overflow.
+              if (RHSCst->isMaxValue(true))
+                return ReplaceInstUsesWith(I, LHS);
                return InsertRangeTest(LHSVal, LHSCst, AddOne(RHSCst), true, 
                                       false, I);
              case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
@@ -4052,7 +4109,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
    }
      
    // fold (or (cast A), (cast B)) -> (cast (or A, B))
-  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
      if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
        if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
          const Type *SrcTy = Op0C->getOperand(0)->getType();
@@ -4069,7 +4126,28 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
            return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
          }
        }
-      
+  }
+  
+    
+  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) {
+      if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+          RHS->getPredicate() == FCmpInst::FCMP_UNO)
+        if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+          if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+            // If either of the constants are nans, then the whole thing returns
+            // true.
+            if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+              return ReplaceInstUsesWith(I, ConstantInt::getTrue());
+            
+            // Otherwise, no need to compare the two constants, compare the
+            // rest.
+            return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0),
+                                RHS->getOperand(0));
+          }
+    }
+  }
  
    return Changed ? &I : 0;
  }
@@ -4319,7 +4397,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
        return R;
  
    // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
-  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) 
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
      if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
        if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
          const Type *SrcTy = Op0C->getOperand(0)->getType();
@@ -4336,7 +4414,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
            return CastInst::create(Op0C->getOpcode(), NewOp, I.getType());
          }
        }
-
+  }
    return Changed ? &I : 0;
  }
  
@@ -4370,7 +4448,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
  
    for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
      Value *Op = GEP->getOperand(i);
-    uint64_t Size = TD.getTypeSize(GTI.getIndexedType()) & PtrSizeMask;
+    uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()) & PtrSizeMask;
      if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
        if (OpC->isZero()) continue;
        
@@ -4455,7 +4533,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
              return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
            if (C->isNullValue())
              EmitIt = false;
-          else if (TD->getTypeSize(GTI.getIndexedType()) == 0) {
+          else if (TD->getABITypeSize(GTI.getIndexedType()) == 0) {
              EmitIt = false;  // This is indexing into a zero sized array?
            } else if (isa<ConstantInt>(C))
              return ReplaceInstUsesWith(I, // No comparison is needed here.
@@ -4562,8 +4640,9 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
  
        if (NumDifferences == 0)   // SAME GEP?
          return ReplaceInstUsesWith(I, // No comparison is needed here.
-                                   ConstantInt::get(Type::Int1Ty, 
-                                                    Cond == ICmpInst::ICMP_EQ));
+                                   ConstantInt::get(Type::Int1Ty,
+                                                    isTrueWhenEqual(Cond)));
+
        else if (NumDifferences == 1) {
          Value *LHSV = GEPLHS->getOperand(DiffOperand);
          Value *RHSV = GEPRHS->getOperand(DiffOperand);
@@ -4684,14 +4763,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
    if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
      return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
  
-  // icmp of GlobalValues can never equal each other as long as they aren't
-  // external weak linkage type.
-  if (GlobalValue *GV0 = dyn_cast<GlobalValue>(Op0))
-    if (GlobalValue *GV1 = dyn_cast<GlobalValue>(Op1))
-      if (!GV0->hasExternalWeakLinkage() || !GV1->hasExternalWeakLinkage())
-        return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
-                                                       !isTrueWhenEqual(I)));
-
    // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
    // addresses never equal each other!  We already know that Op0 != Op1.
    if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
@@ -5803,7 +5874,22 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
  }
  
  Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
-  return commonShiftTransforms(I);
+  if (Instruction *R = commonShiftTransforms(I))
+    return R;
+  
+  Value *Op0 = I.getOperand(0);
+  
+  // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
+  if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
+    if (CSI->isAllOnesValue())
+      return ReplaceInstUsesWith(I, CSI);
+  
+  // See if we can turn a signed shr into an unsigned shr.
+  if (MaskedValueIsZero(Op0, 
+                      APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
+    return BinaryOperator::createLShr(Op0, I.getOperand(1));
+  
+  return 0;
  }
  
  Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
@@ -5829,26 +5915,12 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
    }
  
-  // ashr int -1, X = -1   (for any arithmetic shift rights of ~0)
-  if (I.getOpcode() == Instruction::AShr)
-    if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-      if (CSI->isAllOnesValue())
-        return ReplaceInstUsesWith(I, CSI);
-
    // Try to fold constant and into select arguments.
    if (isa<Constant>(Op0))
      if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
        if (Instruction *R = FoldOpIntoSelect(I, SI, this))
          return R;
  
-  // See if we can turn a signed shr into an unsigned shr.
-  if (I.isArithmeticShift()) {
-    if (MaskedValueIsZero(Op0, 
-          APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()))) {
-      return BinaryOperator::createLShr(Op0, Op1, I.getName());
-    }
-  }
-
    if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
      if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
        return Res;
@@ -6012,9 +6084,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
          // the constant which would cause it to be modified for this
          // operation.
          //
-        if (isValid && !isLeftShift && I.getOpcode() == Instruction::AShr) {
+        if (isValid && I.getOpcode() == Instruction::AShr)
            isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
-        }
          
          if (isValid) {
            Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
@@ -6175,33 +6246,29 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
    assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!");
    if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
      Offset = CI->getZExtValue();
-    Scale  = 1;
+    Scale  = 0;
      return ConstantInt::get(Type::Int32Ty, 0);
-  } else if (Instruction *I = dyn_cast<Instruction>(Val)) {
-    if (I->getNumOperands() == 2) {
-      if (ConstantInt *CUI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-        if (I->getOpcode() == Instruction::Shl) {
-          // This is a value scaled by '1 << the shift amt'.
-          Scale = 1U << CUI->getZExtValue();
-          Offset = 0;
-          return I->getOperand(0);
-        } else if (I->getOpcode() == Instruction::Mul) {
-          // This value is scaled by 'CUI'.
-          Scale = CUI->getZExtValue();
-          Offset = 0;
-          return I->getOperand(0);
-        } else if (I->getOpcode() == Instruction::Add) {
-          // We have X+C.  Check to see if we really have (X*C2)+C1, 
-          // where C1 is divisible by C2.
-          unsigned SubScale;
-          Value *SubVal = 
-            DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
-          Offset += CUI->getZExtValue();
-          if (SubScale > 1 && (Offset % SubScale == 0)) {
-            Scale = SubScale;
-            return SubVal;
-          }
-        }
+  } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (I->getOpcode() == Instruction::Shl) {
+        // This is a value scaled by '1 << the shift amt'.
+        Scale = 1U << RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      } else if (I->getOpcode() == Instruction::Mul) {
+        // This value is scaled by 'RHS'.
+        Scale = RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      } else if (I->getOpcode() == Instruction::Add) {
+        // We have X+C.  Check to see if we really have (X*C2)+C1, 
+        // where C1 is divisible by C2.
+        unsigned SubScale;
+        Value *SubVal = 
+          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+        Offset += RHS->getZExtValue();
+        Scale = SubScale;
+        return SubVal;
        }
      }
    }
@@ -6248,8 +6315,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
    // same, we open the door to infinite loops of various kinds.
    if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
  
-  uint64_t AllocElTySize = TD->getTypeSize(AllocElTy);
-  uint64_t CastElTySize = TD->getTypeSize(CastElTy);
+  uint64_t AllocElTySize = TD->getABITypeSize(AllocElTy);
+  uint64_t CastElTySize = TD->getABITypeSize(CastElTy);
    if (CastElTySize == 0 || AllocElTySize == 0) return 0;
  
    // See if we can satisfy the modulus by pulling a scale out of the array
@@ -6392,6 +6459,7 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
      // of casts in the input.
      if (I->getOpcode() == CastOpc)
        return true;
+    
      break;
    default:
      // TODO: Can handle more cases here.
@@ -6515,7 +6583,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
            // is something like [0 x {int, int}]
            const Type *IntPtrTy = TD->getIntPtrType();
            int64_t FirstIdx = 0;
-          if (int64_t TySize = TD->getTypeSize(GEPIdxTy)) {
+          if (int64_t TySize = TD->getABITypeSize(GEPIdxTy)) {
              FirstIdx = Offset/TySize;
              Offset %= TySize;
            
@@ -6547,7 +6615,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
                }
              } else if (isa<ArrayType>(GEPIdxTy) || isa<VectorType>(GEPIdxTy)) {
                const SequentialType *STy = cast<SequentialType>(GEPIdxTy);
-              if (uint64_t EltSize = TD->getTypeSize(STy->getElementType())) {
+              if (uint64_t EltSize = TD->getABITypeSize(STy->getElementType())){
                  NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
                  Offset %= EltSize;
                } else {
@@ -6564,8 +6632,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
              // If we were able to index down into an element, create the GEP
              // and bitcast the result.  This eliminates one bitcast, potentially
              // two.
-            Instruction *NGEP = new GetElementPtrInst(OrigBase, &NewIndices[0],
-                                                      NewIndices.size(), "");
+            Instruction *NGEP = new GetElementPtrInst(OrigBase, 
+                                                      NewIndices.begin(),
+                                                      NewIndices.end(), "");
              InsertNewInstBefore(NGEP, CI);
              NGEP->takeName(GEP);
              
@@ -7057,7 +7126,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      // If we found a path from the src to dest, create the getelementptr now.
      if (SrcElTy == DstElTy) {
        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      return new GetElementPtrInst(Src, &Idxs[0], Idxs.size());
+      return new GetElementPtrInst(Src, Idxs.begin(), Idxs.end(), "", 
+                                   ((Instruction*) NULL));
      }
    }
  
@@ -7258,6 +7328,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
          return BinaryOperator::createOr(NotCond, TrueVal);
        }
      }
+    
+    // select a, b, a  -> a&b
+    // select a, a, b  -> a|b
+    if (CondVal == TrueVal)
+      return BinaryOperator::createOr(CondVal, FalseVal);
+    else if (CondVal == FalseVal)
+      return BinaryOperator::createAnd(CondVal, TrueVal);
    }
  
    // Selecting between two integer constants?
@@ -7336,8 +7413,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
      if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
        // Transform (X == Y) ? X : Y  -> Y
-      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ)
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:  
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
          return ReplaceInstUsesWith(SI, FalseVal);
+      }
        // Transform (X != Y) ? X : Y  -> X
        if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
          return ReplaceInstUsesWith(SI, TrueVal);
@@ -7345,8 +7431,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
  
      } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
        // Transform (X == Y) ? Y : X  -> X
-      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ)
-        return ReplaceInstUsesWith(SI, FalseVal);
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:  
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+          return ReplaceInstUsesWith(SI, FalseVal);
+      }
        // Transform (X != Y) ? Y : X  -> Y
        if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
          return ReplaceInstUsesWith(SI, TrueVal);
@@ -7500,7 +7595,7 @@ static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD,
                                             unsigned PrefAlign = 0) {
    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
      unsigned Align = GV->getAlignment();
-    if (Align == 0 && TD) 
+    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) 
        Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
  
      // If there is a large requested alignment and we can, bump up the alignment
@@ -7633,6 +7728,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
          MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align));
          Changed = true;
        }
+
+      // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+      // load/store.
+      ConstantInt *MemOpLength = dyn_cast<ConstantInt>(CI.getOperand(3));
+      if (MemOpLength) {
+        unsigned Size = MemOpLength->getZExtValue();
+        unsigned Align = cast<ConstantInt>(CI.getOperand(4))->getZExtValue();
+        PointerType *NewPtrTy = NULL;
+        // Destination pointer type is always i8 *
+        // If Size is 8 then use Int64Ty
+        // If Size is 4 then use Int32Ty
+        // If Size is 2 then use Int16Ty
+        // If Size is 1 then use Int8Ty
+        if (Size && Size <=8 && !(Size&(Size-1)))
+          NewPtrTy = PointerType::getUnqual(IntegerType::get(Size<<3));
+
+        if (NewPtrTy) {
+          Value *Src = InsertCastBefore(Instruction::BitCast, CI.getOperand(2),
+                                        NewPtrTy, CI);
+          Value *Dest = InsertCastBefore(Instruction::BitCast, CI.getOperand(1),
+                                         NewPtrTy, CI);
+          Value *L = new LoadInst(Src, "tmp", false, Align, &CI);
+          Value *NS = new StoreInst(L, Dest, false, Align, &CI);
+          CI.replaceAllUsesWith(NS);
+          Changed = true;
+          return EraseInstFromFunction(CI);
+        }
+      }
      } else if (isa<MemSetInst>(MI)) {
        unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD);
        if (MI->getAlignment()->getZExtValue() < Alignment) {
@@ -7653,8 +7776,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
        // Turn PPC lvx     -> load if the pointer is known aligned.
        // Turn X86 loadups -> load if the pointer is known aligned.
        if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
-        Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
-                                      PointerType::get(II->getType()), CI);
+        Value *Ptr = 
+          InsertCastBefore(Instruction::BitCast, II->getOperand(1),
+                           PointerType::getUnqual(II->getType()), CI);
          return new LoadInst(Ptr);
        }
        break;
@@ -7662,7 +7786,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::ppc_altivec_stvxl:
        // Turn stvx -> store if the pointer is known aligned.
        if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) {
-        const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType());
+        const Type *OpPtrTy = 
+          PointerType::getUnqual(II->getOperand(1)->getType());
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2),
                                        OpPtrTy, CI);
          return new StoreInst(II->getOperand(1), Ptr);
@@ -7674,7 +7799,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::x86_sse2_storel_dq:
        // Turn X86 storeu -> store if the pointer is known aligned.
        if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
-        const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
+        const Type *OpPtrTy = 
+          PointerType::getUnqual(II->getOperand(2)->getType());
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
                                        OpPtrTy, CI);
          return new StoreInst(II->getOperand(2), Ptr);
@@ -7800,7 +7926,8 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
        // If the call and callee calling conventions don't match, this call must
        // be unreachable, as the call is undefined.
        new StoreInst(ConstantInt::getTrue(),
-                    UndefValue::get(PointerType::get(Type::Int1Ty)), OldCall);
+                    UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), 
+                                    OldCall);
        if (!OldCall->use_empty())
          OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
        if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.
@@ -7813,7 +7940,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
      // undef so that we know that this code is not reachable, despite the fact
      // that we can't modify the CFG here.
      new StoreInst(ConstantInt::getTrue(),
-                  UndefValue::get(PointerType::get(Type::Int1Ty)),
+                  UndefValue::get(PointerType::getUnqual(Type::Int1Ty)),
                    CS.getInstruction());
  
      if (!CS.getInstruction()->use_empty())
@@ -7828,6 +7955,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
      return EraseInstFromFunction(*CS.getInstruction());
    }
  
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
+    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
+      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
+        return transformCallThroughTrampoline(CS);
+
    const PointerType *PTy = cast<PointerType>(Callee->getType());
    const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
    if (FTy->isVarArg()) {
@@ -7846,6 +7978,19 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
        }
    }
  
+  if (isa<InlineAsm>(Callee) && !CS.isNoUnwind()) {
+    // Inline asm calls cannot throw - mark them 'nounwind'.
+    const ParamAttrsList *PAL = CS.getParamAttrs();
+    uint16_t RAttributes = PAL ? PAL->getParamAttrs(0) : 0;
+    RAttributes |= ParamAttr::NoUnwind;
+
+    ParamAttrsVector modVec;
+    modVec.push_back(ParamAttrsWithIndex::get(0, RAttributes));
+    PAL = ParamAttrsList::getModified(PAL, modVec);
+    CS.setParamAttrs(PAL);
+    Changed = true;
+  }
+
    return Changed ? CS.getInstruction() : 0;
  }
  
@@ -7868,14 +8013,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    const FunctionType *FT = Callee->getFunctionType();
    const Type *OldRetTy = Caller->getType();
  
-  const FunctionType *ActualFT =
-    cast<FunctionType>(cast<PointerType>(CE->getType())->getElementType());
-  
-  // If the parameter attributes don't match up, don't do the xform.  We don't
-  // want to lose an sret attribute or something.
-  if (FT->getParamAttrs() != ActualFT->getParamAttrs())
+  const ParamAttrsList* CallerPAL = 0;
+  if (CallInst *CallerCI = dyn_cast<CallInst>(Caller))
+    CallerPAL = CallerCI->getParamAttrs();
+  else if (InvokeInst *CallerII = dyn_cast<InvokeInst>(Caller))
+    CallerPAL = CallerII->getParamAttrs();
+
+  // If the parameter attributes are not compatible, don't do the xform.  We
+  // don't want to lose an sret attribute or something.
+  if (!ParamAttrsList::areCompatible(CallerPAL, Callee->getParamAttrs()))
      return false;
-  
+
    // Check to see if we are changing the return type...
    if (OldRetTy != FT->getReturnType()) {
      if (Callee->isDeclaration() && !Caller->use_empty() && 
@@ -8008,12 +8156,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
      NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(),
                          Args.begin(), Args.end(), Caller->getName(), Caller);
      cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+    cast<InvokeInst>(NC)->setParamAttrs(CallerPAL);
    } else {
      NC = new CallInst(Callee, Args.begin(), Args.end(),
                        Caller->getName(), Caller);
-    if (cast<CallInst>(Caller)->isTailCall())
+    CallInst *CI = cast<CallInst>(Caller);
+    if (CI->isTailCall())
        cast<CallInst>(NC)->setTailCall();
-   cast<CallInst>(NC)->setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+    cast<CallInst>(NC)->setParamAttrs(CallerPAL);
    }
  
    // Insert a cast of the return type as necessary.
@@ -8048,6 +8199,150 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    return true;
  }
  
+// transformCallThroughTrampoline - Turn a call to a function created by the
+// init_trampoline intrinsic into a direct call to the underlying function.
+//
+Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+  Value *Callee = CS.getCalledValue();
+  const PointerType *PTy = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+  IntrinsicInst *Tramp =
+    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+
+  Function *NestF =
+    cast<Function>(IntrinsicInst::StripPointerCasts(Tramp->getOperand(2)));
+  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+  if (const ParamAttrsList *NestAttrs = NestF->getParamAttrs()) {
+    unsigned NestIdx = 1;
+    const Type *NestTy = 0;
+    uint16_t NestAttr = 0;
+
+    // Look for a parameter marked with the 'nest' attribute.
+    for (FunctionType::param_iterator I = NestFTy->param_begin(),
+         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+      if (NestAttrs->paramHasAttr(NestIdx, ParamAttr::Nest)) {
+        // Record the parameter type and any other attributes.
+        NestTy = *I;
+        NestAttr = NestAttrs->getParamAttrs(NestIdx);
+        break;
+      }
+
+    if (NestTy) {
+      Instruction *Caller = CS.getInstruction();
+      std::vector<Value*> NewArgs;
+      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+      // Insert the nest argument into the call argument list, which may
+      // mean appending it.
+      {
+        unsigned Idx = 1;
+        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+        do {
+          if (Idx == NestIdx) {
+            // Add the chain argument.
+            Value *NestVal = Tramp->getOperand(3);
+            if (NestVal->getType() != NestTy)
+              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
+            NewArgs.push_back(NestVal);
+          }
+
+          if (I == E)
+            break;
+
+          // Add the original argument.
+          NewArgs.push_back(*I);
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // The trampoline may have been bitcast to a bogus type (FTy).
+      // Handle this by synthesizing a new function type, equal to FTy
+      // with the chain parameter inserted.  Likewise for attributes.
+
+      const ParamAttrsList *Attrs = CS.getParamAttrs();
+      std::vector<const Type*> NewTypes;
+      ParamAttrsVector NewAttrs;
+      NewTypes.reserve(FTy->getNumParams()+1);
+
+      // Add any function result attributes.
+      uint16_t Attr = Attrs ? Attrs->getParamAttrs(0) : 0;
+      if (Attr)
+        NewAttrs.push_back (ParamAttrsWithIndex::get(0, Attr));
+
+      // Insert the chain's type into the list of parameter types, which may
+      // mean appending it.  Likewise for the chain's attributes.
+      {
+        unsigned Idx = 1;
+        FunctionType::param_iterator I = FTy->param_begin(),
+          E = FTy->param_end();
+
+        do {
+          if (Idx == NestIdx) {
+            // Add the chain's type and attributes.
+            NewTypes.push_back(NestTy);
+            NewAttrs.push_back(ParamAttrsWithIndex::get(NestIdx, NestAttr));
+          }
+
+          if (I == E)
+            break;
+
+          // Add the original type and attributes.
+          NewTypes.push_back(*I);
+          Attr = Attrs ? Attrs->getParamAttrs(Idx) : 0;
+          if (Attr)
+            NewAttrs.push_back
+              (ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // Replace the trampoline call with a direct call.  Let the generic
+      // code sort out any function type mismatches.
+      FunctionType *NewFTy =
+        FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
+      Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ?
+        NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy));
+      const ParamAttrsList *NewPAL = ParamAttrsList::get(NewAttrs);
+
+      Instruction *NewCaller;
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+        NewCaller = new InvokeInst(NewCallee,
+                                   II->getNormalDest(), II->getUnwindDest(),
+                                   NewArgs.begin(), NewArgs.end(),
+                                   Caller->getName(), Caller);
+        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+        cast<InvokeInst>(NewCaller)->setParamAttrs(NewPAL);
+      } else {
+        NewCaller = new CallInst(NewCallee, NewArgs.begin(), NewArgs.end(),
+                                 Caller->getName(), Caller);
+        if (cast<CallInst>(Caller)->isTailCall())
+          cast<CallInst>(NewCaller)->setTailCall();
+        cast<CallInst>(NewCaller)->
+          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+        cast<CallInst>(NewCaller)->setParamAttrs(NewPAL);
+      }
+      if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
+        Caller->replaceAllUsesWith(NewCaller);
+      Caller->eraseFromParent();
+      RemoveFromWorkList(Caller);
+      return 0;
+    }
+  }
+
+  // Replace the trampoline call with a direct call.  Since there is no 'nest'
+  // parameter, there is no need to adjust the argument list.  Let the generic
+  // code sort out any function type mismatches.
+  Constant *NewCallee =
+    NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy);
+  CS.setCalledFunction(NewCallee);
+  return CS.getInstruction();
+}
+
  /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)]
  /// and if a/b/c/d and the add's all have a single use, turn this into two phi's
  /// and a single binop.
@@ -8283,6 +8578,10 @@ static bool DeadPHICycle(PHINode *PN,
    // Remember this node, and if we find the cycle, return.
    if (!PotentiallyDeadPHIs.insert(PN))
      return true;
+  
+  // Don't scan crazily complex things.
+  if (PotentiallyDeadPHIs.size() == 16)
+    return false;
  
    if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
      return DeadPHICycle(PU, PotentiallyDeadPHIs);
@@ -8290,6 +8589,34 @@ static bool DeadPHICycle(PHINode *PN,
    return false;
  }
  
+/// PHIsEqualValue - Return true if this phi node is always equal to
+/// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
+///   z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 
+                           SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+  // See if we already saw this PHI node.
+  if (!ValueEqualPHIs.insert(PN))
+    return true;
+  
+  // Don't scan crazily complex things.
+  if (ValueEqualPHIs.size() == 16)
+    return false;
+ 
+  // Scan the operands to see if they are either phi nodes or are equal to
+  // the value.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *Op = PN->getIncomingValue(i);
+    if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+      if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+        return false;
+    } else if (Op != NonPhiInVal)
+      return false;
+  }
+  
+  return true;
+}
+
+
  // PHINode simplification
  //
  Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -8331,6 +8658,40 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
      }
    }
  
+  // We sometimes end up with phi cycles that non-obviously end up being the
+  // same value, for example:
+  //   z = some value; x = phi (y, z); y = phi (x, z)
+  // where the phi nodes don't necessarily need to be in the same block.  Do a
+  // quick check to see if the PHI node only contains a single non-phi value, if
+  // so, scan to see if the phi cycle is actually equal to that value.
+  {
+    unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
+    // Scan for the first non-phi operand.
+    while (InValNo != NumOperandVals && 
+           isa<PHINode>(PN.getIncomingValue(InValNo)))
+      ++InValNo;
+
+    if (InValNo != NumOperandVals) {
+      Value *NonPhiInVal = PN.getOperand(InValNo);
+      
+      // Scan the rest of the operands to see if there are any conflicts, if so
+      // there is no need to recursively scan other phis.
+      for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
+        Value *OpVal = PN.getIncomingValue(InValNo);
+        if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+          break;
+      }
+      
+      // If we scanned over all operands, then we have one unique value plus
+      // phi values.  Scan PHI nodes to see if they all merge in each other or
+      // the value.
+      if (InValNo == NumOperandVals) {
+        SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+        if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+          return ReplaceInstUsesWith(PN, NonPhiInVal);
+      }
+    }
+  }
    return 0;
  }
  
@@ -8389,7 +8750,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        // insert it.  This explicit cast can make subsequent optimizations more
        // obvious.
        Value *Op = GEP.getOperand(i);
-      if (TD->getTypeSize(Op->getType()) > TD->getPointerSize())
+      if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits())
          if (Constant *C = dyn_cast<Constant>(Op)) {
            GEP.setOperand(i, ConstantExpr::getTrunc(C, TD->getIntPtrType()));
            MadeChange = true;
@@ -8406,10 +8767,25 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    // If this GEP instruction doesn't move the pointer, and if the input operand
    // is a bitcast of another pointer, just replace the GEP with a bitcast of the
    // real input to the dest type.
-  if (GEP.hasAllZeroIndices() && isa<BitCastInst>(GEP.getOperand(0)))
-    return new BitCastInst(cast<BitCastInst>(GEP.getOperand(0))->getOperand(0),
-                           GEP.getType());
-    
+  if (GEP.hasAllZeroIndices()) {
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(GEP.getOperand(0))) {
+      // If the bitcast is of an allocation, and the allocation will be
+      // converted to match the type of the cast, don't touch this.
+      if (isa<AllocationInst>(BCI->getOperand(0))) {
+        // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+        if (Instruction *I = visitBitCast(*BCI)) {
+          if (I != BCI) {
+            I->takeName(BCI);
+            BCI->getParent()->getInstList().insert(BCI, I);
+            ReplaceInstUsesWith(*BCI, I);
+          }
+          return &GEP;
+        }
+      }
+      return new BitCastInst(BCI->getOperand(0), GEP.getType());
+    }
+  }
+  
    // Combine Indices - If the source pointer to this getelementptr instruction
    // is a getelementptr instruction, combine the indices of the two
    // getelementptr instructions into a single instruction.
@@ -8454,12 +8830,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            } else if (Constant *GO1C = dyn_cast<Constant>(GO1)) {
              GO1 = ConstantExpr::getIntegerCast(GO1C, SO1->getType(), true);
            } else {
-            unsigned PS = TD->getPointerSize();
-            if (TD->getTypeSize(SO1->getType()) == PS) {
+            unsigned PS = TD->getPointerSizeInBits();
+            if (TD->getTypeSizeInBits(SO1->getType()) == PS) {
                // Convert GO1 to SO1's type.
                GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this);
  
-            } else if (TD->getTypeSize(GO1->getType()) == PS) {
+            } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) {
                // Convert SO1 to GO1's type.
                SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this);
              } else {
@@ -8498,8 +8874,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
      }
  
      if (!Indices.empty())
-      return new GetElementPtrInst(SrcGEPOperands[0], &Indices[0],
-                                   Indices.size(), GEP.getName());
+      return new GetElementPtrInst(SrcGEPOperands[0], Indices.begin(),
+                                   Indices.end(), GEP.getName());
  
    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) {
      // GEP of global variable.  If all of the indices for this GEP are
@@ -8522,8 +8898,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
      if (!isa<PointerType>(X->getType())) {
        // Not interesting.  Source pointer must be a cast from pointer.
      } else if (HasZeroPointerIndex) {
-      // transform: GEP (cast [10 x ubyte]* X to [0 x ubyte]*), long 0, ...
-      // into     : GEP [10 x ubyte]* X, long 0, ...
+      // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+      // into     : GEP [10 x i8]* X, i32 0, ...
        //
        // This occurs when the program declares an array extern like "int X[];"
        //
@@ -8543,29 +8919,30 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            }
      } else if (GEP.getNumOperands() == 2) {
        // Transform things like:
-      // %t = getelementptr ubyte* cast ([2 x int]* %str to uint*), uint %V
-      // into:  %t1 = getelementptr [2 x int*]* %str, int 0, uint %V; cast
+      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
        const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
        const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
        if (isa<ArrayType>(SrcElTy) &&
-          TD->getTypeSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
-          TD->getTypeSize(ResElTy)) {
+          TD->getABITypeSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+          TD->getABITypeSize(ResElTy)) {
+        Value *Idx[2];
+        Idx[0] = Constant::getNullValue(Type::Int32Ty);
+        Idx[1] = GEP.getOperand(1);
          Value *V = InsertNewInstBefore(
-               new GetElementPtrInst(X, Constant::getNullValue(Type::Int32Ty),
-                                     GEP.getOperand(1), GEP.getName()), GEP);
+               new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName()), GEP);
          // V and GEP are both pointer types --> BitCast
          return new BitCastInst(V, GEP.getType());
        }
        
        // Transform things like:
-      // getelementptr sbyte* cast ([100 x double]* X to sbyte*), int %tmp
+      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
        //   (where tmp = 8*tmp2) into:
-      // getelementptr [100 x double]* %arr, int 0, int %tmp.2
+      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
        
-      if (isa<ArrayType>(SrcElTy) &&
-          (ResElTy == Type::Int8Ty || ResElTy == Type::Int8Ty)) {
+      if (isa<ArrayType>(SrcElTy) && ResElTy == Type::Int8Ty) {
          uint64_t ArrayEltSize =
-            TD->getTypeSize(cast<ArrayType>(SrcElTy)->getElementType());
+            TD->getABITypeSize(cast<ArrayType>(SrcElTy)->getElementType());
          
          // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
          // allow either a mul, shift, or constant here.
@@ -8590,24 +8967,28 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              NewIdx = Inst->getOperand(0);
            }
          }
-
+        
          // If the index will be to exactly the right offset with the scale taken
-        // out, perform the transformation.
-        if (Scale && Scale->getZExtValue() % ArrayEltSize == 0) {
-          if (isa<ConstantInt>(Scale))
-            Scale = ConstantInt::get(Scale->getType(),
-                                      Scale->getZExtValue() / ArrayEltSize);
+        // out, perform the transformation. Note, we don't know whether Scale is
+        // signed or not. We'll use unsigned version of division/modulo
+        // operation after making sure Scale doesn't have the sign bit set.
+        if (Scale && Scale->getSExtValue() >= 0LL &&
+            Scale->getZExtValue() % ArrayEltSize == 0) {
+          Scale = ConstantInt::get(Scale->getType(),
+                                   Scale->getZExtValue() / ArrayEltSize);
            if (Scale->getZExtValue() != 1) {
              Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
-                                                       true /*SExt*/);
+                                                       false /*ZExt*/);
              Instruction *Sc = BinaryOperator::createMul(NewIdx, C, "idxscale");
              NewIdx = InsertNewInstBefore(Sc, GEP);
            }
  
            // Insert the new GEP instruction.
+          Value *Idx[2];
+          Idx[0] = Constant::getNullValue(Type::Int32Ty);
+          Idx[1] = NewIdx;
            Instruction *NewGEP =
-            new GetElementPtrInst(X, Constant::getNullValue(Type::Int32Ty),
-                                  NewIdx, GEP.getName());
+            new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName());
            NewGEP = InsertNewInstBefore(NewGEP, GEP);
            // The NewGEP must be pointer typed, so must the old one -> BitCast
            return new BitCastInst(NewGEP, GEP.getType());
@@ -8647,7 +9028,10 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
        // insert our getelementptr instruction...
        //
        Value *NullIdx = Constant::getNullValue(Type::Int32Ty);
-      Value *V = new GetElementPtrInst(New, NullIdx, NullIdx,
+      Value *Idx[2];
+      Idx[0] = NullIdx;
+      Idx[1] = NullIdx;
+      Value *V = new GetElementPtrInst(New, Idx, Idx + 2,
                                         New->getName()+".sub", It);
  
        // Now make everything use the getelementptr instead of the original
@@ -8661,7 +9045,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
    // Note that we only do this for alloca's, because malloc should allocate and
    // return a unique pointer, even for a zero byte allocation.
    if (isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized() &&
-      TD->getTypeSize(AI.getAllocatedType()) == 0)
+      TD->getABITypeSize(AI.getAllocatedType()) == 0)
      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
  
    return 0;
@@ -8674,7 +9058,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
    if (isa<UndefValue>(Op)) {
      // Insert a new store to null because we cannot modify the CFG here.
      new StoreInst(ConstantInt::getTrue(),
-                  UndefValue::get(PointerType::get(Type::Int1Ty)), &FI);
+                  UndefValue::get(PointerType::getUnqual(Type::Int1Ty)), &FI);
      return EraseInstFromFunction(FI);
    }
    
@@ -8710,10 +9094,43 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
  
  
  /// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
-static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI) {
+static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
+                                       const TargetData *TD) {
    User *CI = cast<User>(LI.getOperand(0));
    Value *CastOp = CI->getOperand(0);
  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) {
+    // Instead of loading constant c string, use corresponding integer value
+    // directly if string length is small enough.
+    const std::string &Str = CE->getOperand(0)->getStringValue();
+    if (!Str.empty()) {
+      unsigned len = Str.length();
+      const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+      unsigned numBits = Ty->getPrimitiveSizeInBits();
+      // Replace LI with immediate integer store.
+      if ((numBits >> 3) == len + 1) {
+       APInt StrVal(numBits, 0);
+       APInt SingleChar(numBits, 0);
+       if (TD->isLittleEndian()) {
+         for (signed i = len-1; i >= 0; i--) {
+           SingleChar = (uint64_t) Str[i];
+           StrVal = (StrVal << 8) | SingleChar;
+         }
+       } else {
+         for (unsigned i = 0; i < len; i++) {
+           SingleChar = (uint64_t) Str[i];
+               StrVal = (StrVal << 8) | SingleChar;
+         }
+         // Append NULL at the end.
+         SingleChar = 0;
+         StrVal = (StrVal << 8) | SingleChar;
+       }
+       Value *NL = ConstantInt::get(StrVal);
+       return IC.ReplaceInstUsesWith(LI, NL);
+      }
+    }
+  }
+
    const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
    if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
      const Type *SrcPTy = SrcTy->getElementType();
@@ -8760,8 +9177,13 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI) {
  /// specified pointer, we do a quick local scan of the basic block containing
  /// ScanFrom, to determine if the address is already accessed.
  static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
-  // If it is an alloca or global variable, it is always safe to load from.
-  if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true;
+  // If it is an alloca it is always safe to load from.
+  if (isa<AllocaInst>(V)) return true;
+
+  // If it is a global variable it is mostly safe to load from.
+  if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
+    // Don't try to evaluate aliases.  External weak GV can be null.
+    return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
  
    // Otherwise, be a little bit agressive by scanning the local block where we
    // want to check to see if the pointer is already being loaded or stored
@@ -8814,7 +9236,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
  
    // load (cast X) --> cast (load X) iff safe
    if (isa<CastInst>(Op))
-    if (Instruction *Res = InstCombineLoadCast(*this, LI))
+    if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
        return Res;
  
    // None of the following transforms are legal for volatile loads.
@@ -8878,7 +9300,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
          }
  
        } else if (CE->isCast()) {
-        if (Instruction *Res = InstCombineLoadCast(*this, LI))
+        if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
            return Res;
        }
    }
@@ -9046,7 +9468,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
      // the pointer we're loading and is producing the pointer we're storing,
      // then *this* store is dead (X = load P; store X -> P).
      if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
-      if (LI == Val && LI->getOperand(0) == Ptr) {
+      if (LI == Val && LI->getOperand(0) == Ptr && !SI.isVolatile()) {
          EraseInstFromFunction(SI);
          ++NumCombined;
          return 0;
@@ -9471,8 +9893,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
            return BinaryOperator::create(BO->getOpcode(), newEI0, newEI1);
          }
        } else if (isa<LoadInst>(I)) {
+        unsigned AS = 
+          cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace();
          Value *Ptr = InsertCastBefore(Instruction::BitCast, I->getOperand(0),
-                                      PointerType::get(EI.getType()), EI);
+                                      PointerType::get(EI.getType(), AS), EI);
          GetElementPtrInst *GEP = 
            new GetElementPtrInst(Ptr, EI.getOperand(1), I->getName() + ".gep");
          InsertNewInstBefore(GEP, EI);