Apply "Instead of loading small c string constant, use integer constant directly...

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index 792d615363f4dadf6781fb43dbe1d10d89483c0e..649dd46c81feadfcc093a64a12e5a1f92b92db90 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -39,6 +39,7 @@
  #include "llvm/Pass.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/GlobalVariable.h"
+#include "llvm/ParameterAttributes.h"
  #include "llvm/Analysis/ConstantFolding.h"
  #include "llvm/Target/TargetData.h"
  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -234,6 +235,7 @@ namespace {
    private:
      Instruction *visitCallSite(CallSite CS);
      bool transformConstExprCastCall(CallSite CS);
+    Instruction *transformCallThroughTrampoline(CallSite CS);
  
    public:
      // InsertNewInstBefore - insert an instruction New before instruction Old
@@ -870,11 +872,10 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const Type *Ty,
  // could have the specified known zero and known one bits, returning them in
  // min/max.
  static void ComputeUnsignedMinMaxValuesFromKnownBits(const Type *Ty,
-                                                     const APInt& KnownZero,
-                                                     const APInt& KnownOne,
-                                                     APInt& Min,
-                                                     APInt& Max) {
-  uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+                                                     const APInt &KnownZero,
+                                                     const APInt &KnownOne,
+                                                     APInt &Min, APInt &Max) {
+  uint32_t BitWidth = cast<IntegerType>(Ty)->getBitWidth(); BitWidth = BitWidth;
    assert(KnownZero.getBitWidth() == BitWidth && 
           KnownOne.getBitWidth() == BitWidth &&
           Min.getBitWidth() == BitWidth && Max.getBitWidth() &&
@@ -1664,16 +1665,22 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
    return MadeChange ? I : 0;
  }
  
-/// @returns true if the specified compare instruction is
+/// @returns true if the specified compare predicate is
  /// true when both operands are equal...
-/// @brief Determine if the ICmpInst returns true if both operands are equal
-static bool isTrueWhenEqual(ICmpInst &ICI) {
-  ICmpInst::Predicate pred = ICI.getPredicate();
+/// @brief Determine if the icmp Predicate is true when both operands are equal
+static bool isTrueWhenEqual(ICmpInst::Predicate pred) {
    return pred == ICmpInst::ICMP_EQ  || pred == ICmpInst::ICMP_UGE ||
           pred == ICmpInst::ICMP_SGE || pred == ICmpInst::ICMP_ULE ||
           pred == ICmpInst::ICMP_SLE;
  }
  
+/// @returns true if the specified compare instruction is
+/// true when both operands are equal...
+/// @brief Determine if the ICmpInst returns true when both operands are equal
+static bool isTrueWhenEqual(ICmpInst &ICI) {
+  return isTrueWhenEqual(ICI.getPredicate());
+}
+
  /// AssociativeOpt - Perform an optimization on an associative operator.  This
  /// function is designed to check a chain of associative operators for a
  /// potential to apply a certain optimization.  Since the optimization may be
@@ -1885,7 +1892,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
    if (I.getNumOperands() == 2) {
      Constant *C = cast<Constant>(I.getOperand(1));
      for (unsigned i = 0; i != NumPHIValues; ++i) {
-      Value *InV;
+      Value *InV = 0;
        if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {
          if (CmpInst *CI = dyn_cast<CmpInst>(&I))
            InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
@@ -1943,7 +1950,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
        if (RHSC->isNullValue())
          return ReplaceInstUsesWith(I, LHS);
      } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
-      if (CFP->isExactlyValue(-0.0))
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+                              (I.getType())->getValueAPF()))
          return ReplaceInstUsesWith(I, LHS);
      }
  
@@ -2249,6 +2257,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
          Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
          return BinaryOperator::createMul(Op0, CP1);
        }
+
+      // X - ((X / Y) * Y) --> X % Y
+      if (Op1I->getOpcode() == Instruction::Mul)
+        if (Instruction *I = dyn_cast<Instruction>(Op1I->getOperand(0)))
+          if (Op0 == I->getOperand(0) &&
+              Op1I->getOperand(1) == I->getOperand(1)) {
+            if (I->getOpcode() == Instruction::SDiv)
+              return BinaryOperator::createSRem(Op0, Op1I->getOperand(1));
+            if (I->getOpcode() == Instruction::UDiv)
+              return BinaryOperator::createURem(Op0, Op1I->getOperand(1));
+          }
      }
    }
  
@@ -2343,8 +2362,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
  
        // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
        // ANSI says we can drop signals, so we can do this anyway." (from GCC)
-      if (Op1F->getValue() == 1.0)
-        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
+      // We need a better interface for long double here.
+      if (Op1->getType() == Type::FloatTy || Op1->getType() == Type::DoubleTy)
+        if (Op1F->isExactlyValue(1.0))
+          return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
      }
      
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))
@@ -2892,7 +2913,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) {
  
  /// getICmpValue - This is the complement of getICmpCode, which turns an
  /// opcode and two operands into either a constant true or false, or a brand 
-/// new /// ICmp instruction. The sign is passed in to determine which kind
+/// new ICmp instruction. The sign is passed in to determine which kind
  /// of predicate to use in new icmp instructions.
  static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS) {
    switch (code) {
@@ -3717,9 +3738,9 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
    for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
      if (ByteValues[i] != V)
        return 0;
-  const Type *Tys[] = { ITy, ITy };
+  const Type *Tys[] = { ITy };
    Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 2);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
    return new CallInst(F, V);
  }
  
@@ -3870,40 +3891,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
            InsertNewInstBefore(BinaryOperator::createOr(V2, V3, "tmp"), I);
          return BinaryOperator::createAnd(V1, Or);
        }
-      
-      // (V1 & V3)|(V2 & ~V3) -> ((V1 ^ V2) & V3) ^ V2
-      if (isOnlyUse(Op0) && isOnlyUse(Op1)) {
-        // Try all combination of terms to find V3 and ~V3.
-        if (A->hasOneUse() && match(A, m_Not(m_Value(V3)))) {
-          if (V3 == B)
-            V1 = D, V2 = C;
-          else if (V3 == D)
-            V1 = B, V2 = C;
-        }
-        if (B->hasOneUse() && match(B, m_Not(m_Value(V3)))) {
-          if (V3 == A)
-            V1 = C, V2 = D;
-          else if (V3 == C)
-            V1 = A, V2 = D;
-        }
-        if (C->hasOneUse() && match(C, m_Not(m_Value(V3)))) {
-          if (V3 == B)
-            V1 = D, V2 = A;
-          else if (V3 == D)
-            V1 = B, V2 = A;
-        }
-        if (D->hasOneUse() && match(D, m_Not(m_Value(V3)))) {
-          if (V3 == A)
-            V1 = C, V2 = B;
-          else if (V3 == C)
-            V1 = A, V2 = B;
-        }
-        if (V1) {
-          A = InsertNewInstBefore(BinaryOperator::createXor(V1, V2, "tmp"), I);
-          A = InsertNewInstBefore(BinaryOperator::createAnd(A, V3, "tmp"), I);
-          return BinaryOperator::createXor(A, V2);
-        }
-      }
      }
    }
    
@@ -4129,7 +4116,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
  
    // xor X, X = 0, even if X is nested in a sequence of Xor's.
    if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {
-    assert(Result == &I && "AssociativeOpt didn't work?");
+    assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result;
      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
    }
    
@@ -4169,12 +4156,17 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
    
    
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // xor (icmp A, B), true = not (icmp A, B) = !icmp A, B
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
-      if (RHS == ConstantInt::getTrue() && ICI->hasOneUse())
+    // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+    if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) {
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
          return new ICmpInst(ICI->getInversePredicate(),
                              ICI->getOperand(0), ICI->getOperand(1));
  
+      if (FCmpInst *FCI = dyn_cast<FCmpInst>(Op0))
+        return new FCmpInst(FCI->getInversePredicate(),
+                            FCI->getOperand(0), FCI->getOperand(1));
+    }
+
      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
        // ~(c-X) == X-c-1 == X+(-c-1)
        if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
@@ -4592,8 +4584,9 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
  
        if (NumDifferences == 0)   // SAME GEP?
          return ReplaceInstUsesWith(I, // No comparison is needed here.
-                                   ConstantInt::get(Type::Int1Ty, 
-                                                    Cond == ICmpInst::ICMP_EQ));
+                                   ConstantInt::get(Type::Int1Ty,
+                                                    isTrueWhenEqual(Cond)));
+
        else if (NumDifferences == 1) {
          Value *LHSV = GEPLHS->getOperand(DiffOperand);
          Value *RHSV = GEPRHS->getOperand(DiffOperand);
@@ -4714,14 +4707,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
    if (isa<UndefValue>(Op1))                  // X icmp undef -> undef
      return ReplaceInstUsesWith(I, UndefValue::get(Type::Int1Ty));
  
-  // icmp of GlobalValues can never equal each other as long as they aren't
-  // external weak linkage type.
-  if (GlobalValue *GV0 = dyn_cast<GlobalValue>(Op0))
-    if (GlobalValue *GV1 = dyn_cast<GlobalValue>(Op1))
-      if (!GV0->hasExternalWeakLinkage() || !GV1->hasExternalWeakLinkage())
-        return ReplaceInstUsesWith(I, ConstantInt::get(Type::Int1Ty,
-                                                       !isTrueWhenEqual(I)));
-
    // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
    // addresses never equal each other!  We already know that Op0 != Op1.
    if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
@@ -6205,33 +6190,29 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
    assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!");
    if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
      Offset = CI->getZExtValue();
-    Scale  = 1;
+    Scale  = 0;
      return ConstantInt::get(Type::Int32Ty, 0);
-  } else if (Instruction *I = dyn_cast<Instruction>(Val)) {
-    if (I->getNumOperands() == 2) {
-      if (ConstantInt *CUI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-        if (I->getOpcode() == Instruction::Shl) {
-          // This is a value scaled by '1 << the shift amt'.
-          Scale = 1U << CUI->getZExtValue();
-          Offset = 0;
-          return I->getOperand(0);
-        } else if (I->getOpcode() == Instruction::Mul) {
-          // This value is scaled by 'CUI'.
-          Scale = CUI->getZExtValue();
-          Offset = 0;
-          return I->getOperand(0);
-        } else if (I->getOpcode() == Instruction::Add) {
-          // We have X+C.  Check to see if we really have (X*C2)+C1, 
-          // where C1 is divisible by C2.
-          unsigned SubScale;
-          Value *SubVal = 
-            DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
-          Offset += CUI->getZExtValue();
-          if (SubScale > 1 && (Offset % SubScale == 0)) {
-            Scale = SubScale;
-            return SubVal;
-          }
-        }
+  } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (I->getOpcode() == Instruction::Shl) {
+        // This is a value scaled by '1 << the shift amt'.
+        Scale = 1U << RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      } else if (I->getOpcode() == Instruction::Mul) {
+        // This value is scaled by 'RHS'.
+        Scale = RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      } else if (I->getOpcode() == Instruction::Add) {
+        // We have X+C.  Check to see if we really have (X*C2)+C1, 
+        // where C1 is divisible by C2.
+        unsigned SubScale;
+        Value *SubVal = 
+          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+        Offset += RHS->getZExtValue();
+        Scale = SubScale;
+        return SubVal;
        }
      }
    }
@@ -6347,7 +6328,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
  /// This is a truncation operation if Ty is smaller than V->getType(), or an
  /// extension operation if Ty is larger.
  static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
-                                       int &NumCastsRemoved) {
+                                       unsigned CastOpc, int &NumCastsRemoved) {
    // We can always evaluate constants in another type.
    if (isa<ConstantInt>(V))
      return true;
@@ -6357,30 +6338,48 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
    
    const IntegerType *OrigTy = cast<IntegerType>(V->getType());
    
+  // If this is an extension or truncate, we can often eliminate it.
+  if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+    // If this is a cast from the destination type, we can trivially eliminate
+    // it, and this will remove a cast overall.
+    if (I->getOperand(0)->getType() == Ty) {
+      // If the first operand is itself a cast, and is eliminable, do not count
+      // this as an eliminable cast.  We would prefer to eliminate those two
+      // casts first.
+      if (!isa<CastInst>(I->getOperand(0)))
+        ++NumCastsRemoved;
+      return true;
+    }
+  }
+
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
    switch (I->getOpcode()) {
    case Instruction::Add:
    case Instruction::Sub:
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
-    if (!I->hasOneUse()) return false;
      // These operators can all arbitrarily be extended or truncated.
-    return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved) &&
-           CanEvaluateInDifferentType(I->getOperand(1), Ty, NumCastsRemoved);
+    return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                      NumCastsRemoved) &&
+           CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,
+                                      NumCastsRemoved);
  
    case Instruction::Shl:
-    if (!I->hasOneUse()) return false;
      // If we are truncating the result of this SHL, and if it's a shift of a
      // constant amount, we can always perform a SHL in a smaller type.
      if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
        uint32_t BitWidth = Ty->getBitWidth();
        if (BitWidth < OrigTy->getBitWidth() && 
            CI->getLimitedValue(BitWidth) < BitWidth)
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved);
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                          NumCastsRemoved);
      }
      break;
    case Instruction::LShr:
-    if (!I->hasOneUse()) return false;
      // If this is a truncate of a logical shr, we can truncate it to a smaller
      // lshr iff we know that the bits we would otherwise be shifting in are
      // already zeros.
@@ -6391,25 +6390,20 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
            MaskedValueIsZero(I->getOperand(0),
              APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
            CI->getLimitedValue(BitWidth) < BitWidth) {
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved);
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
+                                          NumCastsRemoved);
        }
      }
      break;
-  case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
-    // If this is a cast from the destination type, we can trivially eliminate
-    // it, and this will remove a cast overall.
-    if (I->getOperand(0)->getType() == Ty) {
-      // If the first operand is itself a cast, and is eliminable, do not count
-      // this as an eliminable cast.  We would prefer to eliminate those two
-      // casts first.
-      if (isa<CastInst>(I->getOperand(0)))
-        return true;
-      
-      ++NumCastsRemoved;
+  case Instruction::Trunc:
+    // If this is the same kind of case as our original (e.g. zext+zext), we
+    // can safely replace it.  Note that replacing it does not reduce the number
+    // of casts in the input.
+    if (I->getOpcode() == CastOpc)
        return true;
-    }
+    
      break;
    default:
      // TODO: Can handle more cases here.
@@ -6448,14 +6442,16 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
    case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
-  case Instruction::BitCast:
      // If the source type of the cast is the type we're trying for then we can
-    // just return the source. There's no need to insert it because its not new.
+    // just return the source.  There's no need to insert it because it is not
+    // new.
      if (I->getOperand(0)->getType() == Ty)
        return I->getOperand(0);
      
-    // Some other kind of cast, which shouldn't happen, so just ..
-    // FALL THROUGH
+    // Otherwise, must be the same type of case, so just reinsert a new one.
+    Res = CastInst::create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
+                           Ty, I->getName());
+    break;
    default: 
      // TODO: Can handle more cases here.
      assert(0 && "Unreachable!");
@@ -6580,8 +6576,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
              // If we were able to index down into an element, create the GEP
              // and bitcast the result.  This eliminates one bitcast, potentially
              // two.
-            Instruction *NGEP = new GetElementPtrInst(OrigBase, &NewIndices[0],
-                                                      NewIndices.size(), "");
+            Instruction *NGEP = new GetElementPtrInst(OrigBase, 
+                                                      NewIndices.begin(),
+                                                      NewIndices.end(), "");
              InsertNewInstBefore(NGEP, CI);
              NGEP->takeName(GEP);
              
@@ -6631,14 +6628,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
    int NumCastsRemoved = 0;
    if (!isa<BitCastInst>(CI) &&
        CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),
-                                 NumCastsRemoved)) {
+                                 CI.getOpcode(), NumCastsRemoved)) {
      // If this cast is a truncate, evaluting in a different type always
-    // eliminates the cast, so it is always a win.  If this is a noop-cast
-    // this just removes a noop cast which isn't pointful, but simplifies
-    // the code.  If this is a zero-extension, we need to do an AND to
-    // maintain the clear top-part of the computation, so we require that
-    // the input have eliminated at least one cast.  If this is a sign
-    // extension, we insert two new casts (to do the extension) so we
+    // eliminates the cast, so it is always a win.  If this is a zero-extension,
+    // we need to do an AND to maintain the clear top-part of the computation,
+    // so we require that the input have eliminated at least one cast.  If this
+    // is a sign extension, we insert two new casts (to do the extension) so we
      // require that two casts have been eliminated.
      bool DoXForm;
      switch (CI.getOpcode()) {
@@ -6655,9 +6650,6 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
      case Instruction::SExt:
        DoXForm = NumCastsRemoved >= 2;
        break;
-    case Instruction::BitCast:
-      DoXForm = false;
-      break;
      }
      
      if (DoXForm) {
@@ -7078,7 +7070,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
      // If we found a path from the src to dest, create the getelementptr now.
      if (SrcElTy == DstElTy) {
        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-      return new GetElementPtrInst(Src, &Idxs[0], Idxs.size());
+      return new GetElementPtrInst(Src, Idxs.begin(), Idxs.end(), "", 
+                                   ((Instruction*) NULL));
      }
    }
  
@@ -7357,8 +7350,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
      if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
        // Transform (X == Y) ? X : Y  -> Y
-      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ)
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:  
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
          return ReplaceInstUsesWith(SI, FalseVal);
+      }
        // Transform (X != Y) ? X : Y  -> X
        if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
          return ReplaceInstUsesWith(SI, TrueVal);
@@ -7366,8 +7368,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
  
      } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
        // Transform (X == Y) ? Y : X  -> X
-      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ)
-        return ReplaceInstUsesWith(SI, FalseVal);
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:  
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+          return ReplaceInstUsesWith(SI, FalseVal);
+      }
        // Transform (X != Y) ? Y : X  -> Y
        if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
          return ReplaceInstUsesWith(SI, TrueVal);
@@ -7513,13 +7524,23 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    return 0;
  }
  
-/// GetKnownAlignment - If the specified pointer has an alignment that we can
-/// determine, return it, otherwise return 0.
-static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
+/// GetOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+static unsigned GetOrEnforceKnownAlignment(Value *V, TargetData *TD,
+                                           unsigned PrefAlign = 0) {
    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
      unsigned Align = GV->getAlignment();
      if (Align == 0 && TD) 
        Align = TD->getPrefTypeAlignment(GV->getType()->getElementType());
+
+    // If there is a large requested alignment and we can, bump up the alignment
+    // of the global.
+    if (PrefAlign > Align && GV->hasInitializer()) {
+      GV->setAlignment(PrefAlign);
+      Align = PrefAlign;
+    }
      return Align;
    } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
      unsigned Align = AI->getAlignment();
@@ -7537,18 +7558,20 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
                     (unsigned)TD->getABITypeAlignment(Type::Int64Ty));
        }
      }
+    
+    // If there is a requested alignment and if this is an alloca, round up.  We
+    // don't do this for malloc, because some systems can't respect the request.
+    if (PrefAlign > Align && isa<AllocaInst>(AI)) {
+      AI->setAlignment(PrefAlign);
+      Align = PrefAlign;
+    }
      return Align;
    } else if (isa<BitCastInst>(V) ||
               (isa<ConstantExpr>(V) && 
                cast<ConstantExpr>(V)->getOpcode() == Instruction::BitCast)) {
-    User *CI = cast<User>(V);
-    if (isa<PointerType>(CI->getOperand(0)->getType()))
-      return GetKnownAlignment(CI->getOperand(0), TD);
-    return 0;
+    return GetOrEnforceKnownAlignment(cast<User>(V)->getOperand(0),
+                                      TD, PrefAlign);
    } else if (User *GEPI = dyn_castGetElementPtr(V)) {
-    unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD);
-    if (BaseAlignment == 0) return 0;
-    
      // If all indexes are zero, it is just the alignment of the base pointer.
      bool AllZeroOperands = true;
      for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
@@ -7557,9 +7580,15 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
          AllZeroOperands = false;
          break;
        }
-    if (AllZeroOperands)
-      return BaseAlignment;
-    
+
+    if (AllZeroOperands) {
+      // Treat this like a bitcast.
+      return GetOrEnforceKnownAlignment(GEPI->getOperand(0), TD, PrefAlign);
+    }
+
+    unsigned BaseAlignment = GetOrEnforceKnownAlignment(GEPI->getOperand(0),TD);
+    if (BaseAlignment == 0) return 0;
+
      // Otherwise, if the base alignment is >= the alignment we expect for the
      // base pointer type, then we know that the resultant pointer is aligned at
      // least as much as its type requires.
@@ -7567,11 +7596,13 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
  
      const Type *BasePtrTy = GEPI->getOperand(0)->getType();
      const PointerType *PtrTy = cast<PointerType>(BasePtrTy);
-    if (TD->getABITypeAlignment(PtrTy->getElementType())
-        <= BaseAlignment) {
+    unsigned Align = TD->getABITypeAlignment(PtrTy->getElementType());
+    if (Align <= BaseAlignment) {
        const Type *GEPTy = GEPI->getType();
        const PointerType *GEPPtrTy = cast<PointerType>(GEPTy);
-      return TD->getABITypeAlignment(GEPPtrTy->getElementType());
+      Align = std::min(Align, (unsigned)
+                       TD->getABITypeAlignment(GEPPtrTy->getElementType()));
+      return Align;
      }
      return 0;
    }
@@ -7627,15 +7658,43 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      // If we can determine a pointer alignment that is bigger than currently
      // set, update the alignment.
      if (isa<MemCpyInst>(MI) || isa<MemMoveInst>(MI)) {
-      unsigned Alignment1 = GetKnownAlignment(MI->getOperand(1), TD);
-      unsigned Alignment2 = GetKnownAlignment(MI->getOperand(2), TD);
+      unsigned Alignment1 = GetOrEnforceKnownAlignment(MI->getOperand(1), TD);
+      unsigned Alignment2 = GetOrEnforceKnownAlignment(MI->getOperand(2), TD);
        unsigned Align = std::min(Alignment1, Alignment2);
        if (MI->getAlignment()->getZExtValue() < Align) {
          MI->setAlignment(ConstantInt::get(Type::Int32Ty, Align));
          Changed = true;
        }
+
+      // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+      // load/store.
+      ConstantInt *MemOpLength = dyn_cast<ConstantInt>(CI.getOperand(3));
+      if (MemOpLength) {
+        unsigned Size = MemOpLength->getZExtValue();
+        unsigned Align = cast<ConstantInt>(CI.getOperand(4))->getZExtValue();
+        PointerType *NewPtrTy = NULL;
+        // Destination pointer type is always i8 *
+        // If Size is 8 then use Int64Ty
+        // If Size is 4 then use Int32Ty
+        // If Size is 2 then use Int16Ty
+        // If Size is 1 then use Int8Ty
+        if (Size && Size <=8 && !(Size&(Size-1)))
+          NewPtrTy = PointerType::get(IntegerType::get(Size<<3));
+
+        if (NewPtrTy) {
+          Value *Src = InsertCastBefore(Instruction::BitCast, CI.getOperand(2), NewPtrTy, CI);
+          Value *Dest = InsertCastBefore(Instruction::BitCast, CI.getOperand(1), NewPtrTy, CI);
+          Value *L = new LoadInst(Src, "tmp", false, Align, &CI);
+          Value *NS = new StoreInst(L, Dest, false, Align, &CI);
+          AddToWorkList(cast<Instruction>(L));
+          AddToWorkList(cast<Instruction>(NS));
+          CI.replaceAllUsesWith(NS);
+          Changed = true;
+          return EraseInstFromFunction(CI);
+        }
+      }
      } else if (isa<MemSetInst>(MI)) {
-      unsigned Alignment = GetKnownAlignment(MI->getDest(), TD);
+      unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest(), TD);
        if (MI->getAlignment()->getZExtValue() < Alignment) {
          MI->setAlignment(ConstantInt::get(Type::Int32Ty, Alignment));
          Changed = true;
@@ -7653,7 +7712,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::x86_sse2_loadu_dq:
        // Turn PPC lvx     -> load if the pointer is known aligned.
        // Turn X86 loadups -> load if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
                                        PointerType::get(II->getType()), CI);
          return new LoadInst(Ptr);
@@ -7662,7 +7721,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::ppc_altivec_stvx:
      case Intrinsic::ppc_altivec_stvxl:
        // Turn stvx -> store if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(2), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(2), TD, 16) >= 16) {
          const Type *OpPtrTy = PointerType::get(II->getOperand(1)->getType());
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(2),
                                        OpPtrTy, CI);
@@ -7674,7 +7733,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
      case Intrinsic::x86_sse2_storeu_dq:
      case Intrinsic::x86_sse2_storel_dq:
        // Turn X86 storeu -> store if the pointer is known aligned.
-      if (GetKnownAlignment(II->getOperand(1), TD) >= 16) {
+      if (GetOrEnforceKnownAlignment(II->getOperand(1), TD, 16) >= 16) {
          const Type *OpPtrTy = PointerType::get(II->getOperand(2)->getType());
          Value *Ptr = InsertCastBefore(Instruction::BitCast, II->getOperand(1),
                                        OpPtrTy, CI);
@@ -7829,6 +7888,11 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
      return EraseInstFromFunction(*CS.getInstruction());
    }
  
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
+    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
+      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
+        return transformCallThroughTrampoline(CS);
+
    const PointerType *PTy = cast<PointerType>(Callee->getType());
    const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
    if (FTy->isVarArg()) {
@@ -8007,10 +8071,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    Instruction *NC;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
      NC = new InvokeInst(Callee, II->getNormalDest(), II->getUnwindDest(),
-                        &Args[0], Args.size(), Caller->getName(), Caller);
-    cast<InvokeInst>(II)->setCallingConv(II->getCallingConv());
+                        Args.begin(), Args.end(), Caller->getName(), Caller);
+    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
    } else {
-    NC = new CallInst(Callee, &Args[0], Args.size(), Caller->getName(), Caller);
+    NC = new CallInst(Callee, Args.begin(), Args.end(),
+                      Caller->getName(), Caller);
      if (cast<CallInst>(Caller)->isTailCall())
        cast<CallInst>(NC)->setTailCall();
     cast<CallInst>(NC)->setCallingConv(cast<CallInst>(Caller)->getCallingConv());
@@ -8048,6 +8113,148 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
    return true;
  }
  
+// transformCallThroughTrampoline - Turn a call to a function created by the
+// init_trampoline intrinsic into a direct call to the underlying function.
+//
+Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+  Value *Callee = CS.getCalledValue();
+  const PointerType *PTy = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+  IntrinsicInst *Tramp =
+    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+
+  Function *NestF =
+    cast<Function>(IntrinsicInst::StripPointerCasts(Tramp->getOperand(2)));
+  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+  if (const ParamAttrsList *NestAttrs = NestFTy->getParamAttrs()) {
+    unsigned NestIdx = 1;
+    const Type *NestTy = 0;
+    uint16_t NestAttr = 0;
+
+    // Look for a parameter marked with the 'nest' attribute.
+    for (FunctionType::param_iterator I = NestFTy->param_begin(),
+         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+      if (NestAttrs->paramHasAttr(NestIdx, ParamAttr::Nest)) {
+        // Record the parameter type and any other attributes.
+        NestTy = *I;
+        NestAttr = NestAttrs->getParamAttrs(NestIdx);
+        break;
+      }
+
+    if (NestTy) {
+      Instruction *Caller = CS.getInstruction();
+      std::vector<Value*> NewArgs;
+      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+      // Insert the nest argument into the call argument list, which may
+      // mean appending it.
+      {
+        unsigned Idx = 1;
+        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+        do {
+          if (Idx == NestIdx) {
+            // Add the chain argument.
+            Value *NestVal = Tramp->getOperand(3);
+            if (NestVal->getType() != NestTy)
+              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
+            NewArgs.push_back(NestVal);
+          }
+
+          if (I == E)
+            break;
+
+          // Add the original argument.
+          NewArgs.push_back(*I);
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // The trampoline may have been bitcast to a bogus type (FTy).
+      // Handle this by synthesizing a new function type, equal to FTy
+      // with the chain parameter inserted.  Likewise for attributes.
+
+      const ParamAttrsList *Attrs = FTy->getParamAttrs();
+      std::vector<const Type*> NewTypes;
+      ParamAttrsVector NewAttrs;
+      NewTypes.reserve(FTy->getNumParams()+1);
+
+      // Add any function result attributes.
+      uint16_t Attr = Attrs ? Attrs->getParamAttrs(0) : 0;
+      if (Attr)
+        NewAttrs.push_back (ParamAttrsWithIndex::get(0, Attr));
+
+      // Insert the chain's type into the list of parameter types, which may
+      // mean appending it.  Likewise for the chain's attributes.
+      {
+        unsigned Idx = 1;
+        FunctionType::param_iterator I = FTy->param_begin(),
+          E = FTy->param_end();
+
+        do {
+          if (Idx == NestIdx) {
+            // Add the chain's type and attributes.
+            NewTypes.push_back(NestTy);
+            NewAttrs.push_back(ParamAttrsWithIndex::get(NestIdx, NestAttr));
+          }
+
+          if (I == E)
+            break;
+
+          // Add the original type and attributes.
+          NewTypes.push_back(*I);
+          Attr = Attrs ? Attrs->getParamAttrs(Idx) : 0;
+          if (Attr)
+            NewAttrs.push_back
+              (ParamAttrsWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // Replace the trampoline call with a direct call.  Let the generic
+      // code sort out any function type mismatches.
+      FunctionType *NewFTy =
+        FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg(),
+                          ParamAttrsList::get(NewAttrs));
+      Constant *NewCallee = NestF->getType() == PointerType::get(NewFTy) ?
+        NestF : ConstantExpr::getBitCast(NestF, PointerType::get(NewFTy));
+
+      Instruction *NewCaller;
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+        NewCaller = new InvokeInst(NewCallee,
+                                   II->getNormalDest(), II->getUnwindDest(),
+                                   NewArgs.begin(), NewArgs.end(),
+                                   Caller->getName(), Caller);
+        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+      } else {
+        NewCaller = new CallInst(NewCallee, NewArgs.begin(), NewArgs.end(),
+                                 Caller->getName(), Caller);
+        if (cast<CallInst>(Caller)->isTailCall())
+          cast<CallInst>(NewCaller)->setTailCall();
+        cast<CallInst>(NewCaller)->
+          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+      }
+      if (Caller->getType() != Type::VoidTy && !Caller->use_empty())
+        Caller->replaceAllUsesWith(NewCaller);
+      Caller->eraseFromParent();
+      RemoveFromWorkList(Caller);
+      return 0;
+    }
+  }
+
+  // Replace the trampoline call with a direct call.  Since there is no 'nest'
+  // parameter, there is no need to adjust the argument list.  Let the generic
+  // code sort out any function type mismatches.
+  Constant *NewCallee =
+    NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy);
+  CS.setCalledFunction(NewCallee);
+  return CS.getInstruction();
+}
+
  /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)]
  /// and if a/b/c/d and the add's all have a single use, turn this into two phi's
  /// and a single binop.
@@ -8283,6 +8490,10 @@ static bool DeadPHICycle(PHINode *PN,
    // Remember this node, and if we find the cycle, return.
    if (!PotentiallyDeadPHIs.insert(PN))
      return true;
+  
+  // Don't scan crazily complex things.
+  if (PotentiallyDeadPHIs.size() == 16)
+    return false;
  
    if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
      return DeadPHICycle(PU, PotentiallyDeadPHIs);
@@ -8406,10 +8617,25 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    // If this GEP instruction doesn't move the pointer, and if the input operand
    // is a bitcast of another pointer, just replace the GEP with a bitcast of the
    // real input to the dest type.
-  if (GEP.hasAllZeroIndices() && isa<BitCastInst>(GEP.getOperand(0)))
-    return new BitCastInst(cast<BitCastInst>(GEP.getOperand(0))->getOperand(0),
-                           GEP.getType());
-    
+  if (GEP.hasAllZeroIndices()) {
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(GEP.getOperand(0))) {
+      // If the bitcast is of an allocation, and the allocation will be
+      // converted to match the type of the cast, don't touch this.
+      if (isa<AllocationInst>(BCI->getOperand(0))) {
+        // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+        if (Instruction *I = visitBitCast(*BCI)) {
+          if (I != BCI) {
+            I->takeName(BCI);
+            BCI->getParent()->getInstList().insert(BCI, I);
+            ReplaceInstUsesWith(*BCI, I);
+          }
+          return &GEP;
+        }
+      }
+      return new BitCastInst(BCI->getOperand(0), GEP.getType());
+    }
+  }
+  
    // Combine Indices - If the source pointer to this getelementptr instruction
    // is a getelementptr instruction, combine the indices of the two
    // getelementptr instructions into a single instruction.
@@ -8498,8 +8724,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
      }
  
      if (!Indices.empty())
-      return new GetElementPtrInst(SrcGEPOperands[0], &Indices[0],
-                                   Indices.size(), GEP.getName());
+      return new GetElementPtrInst(SrcGEPOperands[0], Indices.begin(),
+                                   Indices.end(), GEP.getName());
  
    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) {
      // GEP of global variable.  If all of the indices for this GEP are
@@ -8550,9 +8776,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        if (isa<ArrayType>(SrcElTy) &&
            TD->getTypeSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
            TD->getTypeSize(ResElTy)) {
+        Value *Idx[2];
+        Idx[0] = Constant::getNullValue(Type::Int32Ty);
+        Idx[1] = GEP.getOperand(1);
          Value *V = InsertNewInstBefore(
-               new GetElementPtrInst(X, Constant::getNullValue(Type::Int32Ty),
-                                     GEP.getOperand(1), GEP.getName()), GEP);
+               new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName()), GEP);
          // V and GEP are both pointer types --> BitCast
          return new BitCastInst(V, GEP.getType());
        }
@@ -8605,9 +8833,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
            }
  
            // Insert the new GEP instruction.
+          Value *Idx[2];
+          Idx[0] = Constant::getNullValue(Type::Int32Ty);
+          Idx[1] = NewIdx;
            Instruction *NewGEP =
-            new GetElementPtrInst(X, Constant::getNullValue(Type::Int32Ty),
-                                  NewIdx, GEP.getName());
+            new GetElementPtrInst(X, Idx, Idx + 2, GEP.getName());
            NewGEP = InsertNewInstBefore(NewGEP, GEP);
            // The NewGEP must be pointer typed, so must the old one -> BitCast
            return new BitCastInst(NewGEP, GEP.getType());
@@ -8647,7 +8877,10 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
        // insert our getelementptr instruction...
        //
        Value *NullIdx = Constant::getNullValue(Type::Int32Ty);
-      Value *V = new GetElementPtrInst(New, NullIdx, NullIdx,
+      Value *Idx[2];
+      Idx[0] = NullIdx;
+      Idx[1] = NullIdx;
+      Value *V = new GetElementPtrInst(New, Idx, Idx + 2,
                                         New->getName()+".sub", It);
  
        // Now make everything use the getelementptr instead of the original
@@ -8760,8 +8993,13 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI) {
  /// specified pointer, we do a quick local scan of the basic block containing
  /// ScanFrom, to determine if the address is already accessed.
  static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
-  // If it is an alloca or global variable, it is always safe to load from.
-  if (isa<AllocaInst>(V) || isa<GlobalVariable>(V)) return true;
+  // If it is an alloca it is always safe to load from.
+  if (isa<AllocaInst>(V)) return true;
+
+  // If it is a global variable it is mostly safe to load from.
+  if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
+    // Don't try to evaluate aliases.  External weak GV can be null.
+    return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
  
    // Otherwise, be a little bit agressive by scanning the local block where we
    // want to check to see if the pointer is already being loaded or stored
@@ -8782,11 +9020,33 @@ static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
    return false;
  }
  
+/// GetUnderlyingObject - Trace through a series of getelementptrs and bitcasts
+/// until we find the underlying object a pointer is referring to or something
+/// we don't understand.  Note that the returned pointer may be offset from the
+/// input, because we ignore GEP indices.
+static Value *GetUnderlyingObject(Value *Ptr) {
+  while (1) {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+      if (CE->getOpcode() == Instruction::BitCast ||
+          CE->getOpcode() == Instruction::GetElementPtr)
+        Ptr = CE->getOperand(0);
+      else
+        return Ptr;
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr)) {
+      Ptr = BCI->getOperand(0);
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+      Ptr = GEP->getOperand(0);
+    } else {
+      return Ptr;
+    }
+  }
+}
+
  Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
    Value *Op = LI.getOperand(0);
  
    // Attempt to improve the alignment.
-  unsigned KnownAlign = GetKnownAlignment(Op, TD);
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Op, TD);
    if (KnownAlign > LI.getAlignment())
      LI.setAlignment(KnownAlign);
  
@@ -8856,10 +9116,44 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
          }
  
        } else if (CE->isCast()) {
+        // Instead of loading constant c string, use corresponding integer value
+        // directly if string length is small enough.
+        const std::string &Str = CE->getOperand(0)->getStringValue();
+        if (!Str.empty()) {
+          unsigned len = Str.length();
+          const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+          unsigned numBits = Ty->getPrimitiveSizeInBits();
+          if ((numBits >> 3) == len + 1) {
+            // Replace LI with immediate integer store.
+            APInt StrVal(numBits, 0);
+            APInt SingleChar(numBits, 0);
+            for (unsigned i = 0; i < len; i++) {
+              SingleChar = (uint64_t) Str[i];
+              StrVal = (StrVal << 8) | SingleChar;
+            }
+            // Append NULL at the end.
+            SingleChar = 0;
+            StrVal = (StrVal << 8) | SingleChar;
+            Value *NL = ConstantInt::get(StrVal);
+            return ReplaceInstUsesWith(LI, NL);
+          }
+        }
+
          if (Instruction *Res = InstCombineLoadCast(*this, LI))
            return Res;
        }
    }
+    
+  // If this load comes from anywhere in a constant global, and if the global
+  // is all undef or zero, we know what it loads.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Op))) {
+    if (GV->isConstant() && GV->hasInitializer()) {
+      if (GV->getInitializer()->isNullValue())
+        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));
+      else if (isa<UndefValue>(GV->getInitializer()))
+        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+    }
+  }
  
    if (Op->hasOneUse()) {
      // Change select and PHI nodes to select values instead of addresses: this
@@ -8986,7 +9280,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    }
  
    // Attempt to improve the alignment.
-  unsigned KnownAlign = GetKnownAlignment(Ptr, TD);
+  unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD);
    if (KnownAlign > SI.getAlignment())
      SI.setAlignment(KnownAlign);
  
@@ -9013,7 +9307,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
      // the pointer we're loading and is producing the pointer we're storing,
      // then *this* store is dead (X = load P; store X -> P).
      if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
-      if (LI == Val && LI->getOperand(0) == Ptr) {
+      if (LI == Val && LI->getOperand(0) == Ptr && !SI.isVolatile()) {
          EraseInstFromFunction(SI);
          ++NumCombined;
          return 0;
@@ -10073,6 +10367,9 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
    }
  
    assert(WorklistMap.empty() && "Worklist empty, but map not?");
+    
+  // Do an explicit clear, this shrinks the map if needed.
+  WorklistMap.clear();
    return Changed;
  }