X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FInstructionCombining.cpp;h=b80d4d630afc2d8144174494251d42a19a1a4779;hb=1997473cf72957d0e70322e2fe6fe2ab141c58a6;hp=78fc0de2cdf8667ac1e538b07a6868e9c586b675;hpb=ba41783dbb1b8da52fac9859bd9d4770de7a82bd;p=oota-llvm.git

diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 78fc0de2cdf..b80d4d630af 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -76,6 +76,9 @@ namespace {
     TargetData *TD;
     bool MustPreserveLCSSA;
   public:
+    static char ID; // Pass identifcation, replacement for typeid
+    InstCombiner() : FunctionPass((intptr_t)&ID) {}
+
     /// AddToWorkList - Add the specified instruction to the worklist if it
     /// isn't already in it.
     void AddToWorkList(Instruction *I) {
@@ -193,9 +196,10 @@ namespace {
                                      BinaryOperator &I);
     Instruction *commonCastTransforms(CastInst &CI);
     Instruction *commonIntCastTransforms(CastInst &CI);
-    Instruction *visitTrunc(CastInst &CI);
-    Instruction *visitZExt(CastInst &CI);
-    Instruction *visitSExt(CastInst &CI);
+    Instruction *commonPointerCastTransforms(CastInst &CI);
+    Instruction *visitTrunc(TruncInst &CI);
+    Instruction *visitZExt(ZExtInst &CI);
+    Instruction *visitSExt(SExtInst &CI);
     Instruction *visitFPTrunc(CastInst &CI);
     Instruction *visitFPExt(CastInst &CI);
     Instruction *visitFPToUI(CastInst &CI);
@@ -204,7 +208,7 @@ namespace {
     Instruction *visitSIToFP(CastInst &CI);
     Instruction *visitPtrToInt(CastInst &CI);
     Instruction *visitIntToPtr(CastInst &CI);
-    Instruction *visitBitCast(CastInst &CI);
+    Instruction *visitBitCast(BitCastInst &CI);
     Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                                 Instruction *FI);
     Instruction *visitSelectInst(SelectInst &CI);
@@ -350,12 +354,14 @@ namespace {
                               bool isSub, Instruction &I);
     Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
                                  bool isSigned, bool Inside, Instruction &IB);
-    Instruction *PromoteCastOfAllocation(CastInst &CI, AllocationInst &AI);
+    Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI);
     Instruction *MatchBSwap(BinaryOperator &I);
+    bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
 
     Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
   };
 
+  char InstCombiner::ID = 0;
   RegisterPass<InstCombiner> X("instcombine", "Combine redundant instructions");
 }
 
@@ -1489,7 +1495,73 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, uint64_t DemandedElts,
     UndefElts |= 1ULL << IdxNo;
     break;
   }
+  case Instruction::BitCast: {
+    // Packed->packed casts only.
+    const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+    if (!VTy) break;
+    unsigned InVWidth = VTy->getNumElements();
+    uint64_t InputDemandedElts = 0;
+    unsigned Ratio;
+
+    if (VWidth == InVWidth) {
+      // If we are converting from <4x i32> -> <4 x f32>, we demand the same
+      // elements as are demanded of us.
+      Ratio = 1;
+      InputDemandedElts = DemandedElts;
+    } else if (VWidth > InVWidth) {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the result than there are in the source,
+      // then an input element is live if any of the corresponding output
+      // elements are live.
+      Ratio = VWidth/InVWidth;
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+        if (DemandedElts & (1ULL << OutIdx))
+          InputDemandedElts |= 1ULL << (OutIdx/Ratio);
+      }
+    } else {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the source than there are in the result,
+      // then an input element is live if the corresponding output element is
+      // live.
+      Ratio = InVWidth/VWidth;
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (DemandedElts & (1ULL << InIdx/Ratio))
+          InputDemandedElts |= 1ULL << InIdx;
+    }
+    
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) {
+      I->setOperand(0, TmpV);
+      MadeChange = true;
+    }
     
+    UndefElts = UndefElts2;
+    if (VWidth > InVWidth) {
+      assert(0 && "Unimp");
+      // If there are more elements in the result than there are in the source,
+      // then an output element is undef if the corresponding input element is
+      // undef.
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+        if (UndefElts2 & (1ULL << (OutIdx/Ratio)))
+          UndefElts |= 1ULL << OutIdx;
+    } else if (VWidth < InVWidth) {
+      assert(0 && "Unimp");
+      // If there are more elements in the source than there are in the result,
+      // then a result element is undef if all of the corresponding input
+      // elements are undef.
+      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if ((UndefElts2 & (1ULL << InIdx)) == 0)    // Not undef?
+          UndefElts &= ~(1ULL << (InIdx/Ratio));    // Clear undef bit.
+    }
+    break;
+  }
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
@@ -2157,7 +2229,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       // 0 - (X sdiv C)  -> (X sdiv -C)
       if (Op1I->getOpcode() == Instruction::SDiv)
         if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
-          if (CSI->isNullValue())
+          if (CSI->isZero())
             if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))
               return BinaryOperator::createSDiv(Op1I->getOperand(0),
                                                ConstantExpr::getNeg(DivRHS));
@@ -2201,7 +2273,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS) {
   switch (pred) {
     case ICmpInst::ICMP_SLT: 
       // True if LHS s< RHS and RHS == 0
-      return RHS->isNullValue();
+      return RHS->isZero();
     case ICmpInst::ICMP_SLE: 
       // True if LHS s<= RHS and RHS == -1
       return RHS->isAllOnesValue();
@@ -2236,7 +2308,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
             return BinaryOperator::createMul(SI->getOperand(0),
                                              ConstantExpr::getShl(CI, ShOp));
 
-      if (CI->isNullValue())
+      if (CI->isZero())
         return ReplaceInstUsesWith(I, Op1);  // X * 0  == 0
       if (CI->equalsInt(1))                  // X * 1  == X
         return ReplaceInstUsesWith(I, Op0);
@@ -4269,38 +4341,66 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
   Value *Result = Constant::getNullValue(IntPtrTy);
 
   // Build a mask for high order bits.
-  uint64_t PtrSizeMask = ~0ULL >> (64-TD.getPointerSize()*8);
+  unsigned IntPtrWidth = TD.getPointerSize()*8;
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
 
   for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
     Value *Op = GEP->getOperand(i);
     uint64_t Size = TD.getTypeSize(GTI.getIndexedType()) & PtrSizeMask;
-    Constant *Scale = ConstantInt::get(IntPtrTy, Size);
-    if (Constant *OpC = dyn_cast<Constant>(Op)) {
-      if (!OpC->isNullValue()) {
-        OpC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
-        Scale = ConstantExpr::getMul(OpC, Scale);
-        if (Constant *RC = dyn_cast<Constant>(Result))
-          Result = ConstantExpr::getAdd(RC, Scale);
-        else {
-          // Emit an add instruction.
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+      if (OpC->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+        
+        if (ConstantInt *RC = dyn_cast<ConstantInt>(Result))
+          Result = ConstantInt::get(RC->getValue() + APInt(IntPtrWidth, Size));
+        else
           Result = IC.InsertNewInstBefore(
-             BinaryOperator::createAdd(Result, Scale,
-                                       GEP->getName()+".offs"), I);
-        }
+                   BinaryOperator::createAdd(Result,
+                                             ConstantInt::get(IntPtrTy, Size),
+                                             GEP->getName()+".offs"), I);
+        continue;
       }
-    } else {
-      // Convert to correct type.
-      Op = IC.InsertNewInstBefore(CastInst::createSExtOrBitCast(Op, IntPtrTy,
-                                               Op->getName()+".c"), I);
-      if (Size != 1)
-        // We'll let instcombine(mul) convert this to a shl if possible.
+      
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale);
+      if (Constant *RC = dyn_cast<Constant>(Result))
+        Result = ConstantExpr::getAdd(RC, Scale);
+      else {
+        // Emit an add instruction.
+        Result = IC.InsertNewInstBefore(
+           BinaryOperator::createAdd(Result, Scale,
+                                     GEP->getName()+".offs"), I);
+      }
+      continue;
+    }
+    // Convert to correct type.
+    if (Op->getType() != IntPtrTy) {
+      if (Constant *OpC = dyn_cast<Constant>(Op))
+        Op = ConstantExpr::getSExt(OpC, IntPtrTy);
+      else
+        Op = IC.InsertNewInstBefore(new SExtInst(Op, IntPtrTy,
+                                                 Op->getName()+".c"), I);
+    }
+    if (Size != 1) {
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      if (Constant *OpC = dyn_cast<Constant>(Op))
+        Op = ConstantExpr::getMul(OpC, Scale);
+      else    // We'll let instcombine(mul) convert this to a shl if possible.
         Op = IC.InsertNewInstBefore(BinaryOperator::createMul(Op, Scale,
-                                                    GEP->getName()+".idx"), I);
+                                                  GEP->getName()+".idx"), I);
+    }
 
-      // Emit an add instruction.
+    // Emit an add instruction.
+    if (isa<Constant>(Op) && isa<Constant>(Result))
+      Result = ConstantExpr::getAdd(cast<Constant>(Op),
+                                    cast<Constant>(Result));
+    else
       Result = IC.InsertNewInstBefore(BinaryOperator::createAdd(Op, Result,
-                                                    GEP->getName()+".offs"), I);
-    }
+                                                  GEP->getName()+".offs"), I);
   }
   return Result;
 }
@@ -5457,8 +5557,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       }
     }
   } else {  // Not a ICMP_EQ/ICMP_NE
-            // If the LHS is a cast from an integral value of the same size, then 
-            // since we know the RHS is a constant, try to simlify.
+            // If the LHS is a cast from an integral value of the same size, 
+            // then since we know the RHS is a constant, try to simlify.
     if (CastInst *Cast = dyn_cast<CastInst>(LHSI)) {
       Value *CastOp = Cast->getOperand(0);
       const Type *SrcTy = CastOp->getType();
@@ -6018,10 +6118,9 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
 
 /// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
 /// try to eliminate the cast by moving the type information into the alloc.
-Instruction *InstCombiner::PromoteCastOfAllocation(CastInst &CI,
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                    AllocationInst &AI) {
-  const PointerType *PTy = dyn_cast<PointerType>(CI.getType());
-  if (!PTy) return 0;   // Not casting the allocation to a pointer type.
+  const PointerType *PTy = cast<PointerType>(CI.getType());
   
   // Remove any uses of AI that are dead.
   assert(!CI.use_empty() && "Dead instructions should be removed earlier!");
@@ -6165,7 +6264,7 @@ static bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
           MaskedValueIsZero(I->getOperand(0),
             APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
           CI->getLimitedValue(BitWidth) < BitWidth) {
-        return CanEvaluateInDifferentType(I->getOperand(0), Ty, NumCastsRemoved);
+        return CanEvaluateInDifferentType(I->getOperand(0), Ty,NumCastsRemoved);
       }
     }
     break;
@@ -6259,32 +6358,6 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
     }
   }
 
-  // If casting the result of a getelementptr instruction with no offset, turn
-  // this into a cast of the original pointer!
-  //
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
-    bool AllZeroOperands = true;
-    for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i)
-      if (!isa<Constant>(GEP->getOperand(i)) ||
-          !cast<Constant>(GEP->getOperand(i))->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
-    if (AllZeroOperands) {
-      // Changing the cast operand is usually not a good idea but it is safe
-      // here because the pointer operand is being replaced with another 
-      // pointer operand so the opcode doesn't need to change.
-      CI.setOperand(0, GEP->getOperand(0));
-      return &CI;
-    }
-  }
-    
-  // If we are casting a malloc or alloca to a pointer to a type of the same
-  // size, rewrite the allocation instruction to allocate the "right" type.
-  if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
-    if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
-      return V;
-
   // If we are casting a select then fold the cast into the select
   if (SelectInst *SI = dyn_cast<SelectInst>(Src))
     if (Instruction *NV = FoldOpIntoSelect(CI, SI, this))
@@ -6298,6 +6371,100 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
   return 0;
 }
 
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+  
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+    // If casting the result of a getelementptr instruction with no offset, turn
+    // this into a cast of the original pointer!
+    if (GEP->hasAllZeroIndices()) {
+      // Changing the cast operand is usually not a good idea but it is safe
+      // here because the pointer operand is being replaced with another 
+      // pointer operand so the opcode doesn't need to change.
+      AddToWorkList(GEP);
+      CI.setOperand(0, GEP->getOperand(0));
+      return &CI;
+    }
+    
+    // If the GEP has a single use, and the base pointer is a bitcast, and the
+    // GEP computes a constant offset, see if we can convert these three
+    // instructions into fewer.  This typically happens with unions and other
+    // non-type-safe code.
+    if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {
+      if (GEP->hasAllConstantIndices()) {
+        // We are guaranteed to get a constant from EmitGEPOffset.
+        ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP, CI, *this));
+        int64_t Offset = OffsetV->getSExtValue();
+        
+        // Get the base pointer input of the bitcast, and the type it points to.
+        Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+        const Type *GEPIdxTy =
+          cast<PointerType>(OrigBase->getType())->getElementType();
+        if (GEPIdxTy->isSized()) {
+          SmallVector<Value*, 8> NewIndices;
+          
+          // Start with the index over the outer type.
+          const Type *IntPtrTy = TD->getIntPtrType();
+          int64_t TySize = TD->getTypeSize(GEPIdxTy);
+          int64_t FirstIdx = Offset/TySize;
+          Offset %= TySize;
+          
+          // Handle silly modulus not returning values values [0..TySize).
+          if (Offset < 0) {
+            --FirstIdx;
+            Offset += TySize;
+            assert(Offset >= 0);
+          }
+          
+          NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+          assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+
+          // Index into the types.  If we fail, set OrigBase to null.
+          while (Offset) {
+            if (const StructType *STy = dyn_cast<StructType>(GEPIdxTy)) {
+              const StructLayout *SL = TD->getStructLayout(STy);
+              unsigned Elt = SL->getElementContainingOffset(Offset);
+              NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt));
+              
+              Offset -= SL->getElementOffset(Elt);
+              GEPIdxTy = STy->getElementType(Elt);
+            } else if (isa<ArrayType>(GEPIdxTy) || isa<VectorType>(GEPIdxTy)) {
+              const SequentialType *STy = cast<SequentialType>(GEPIdxTy);
+              uint64_t EltSize = TD->getTypeSize(STy->getElementType());
+              NewIndices.push_back(ConstantInt::get(IntPtrTy, Offset/EltSize));
+              Offset %= EltSize;
+              GEPIdxTy = STy->getElementType();
+            } else {
+              // Otherwise, we can't index into this, bail out.
+              Offset = 0;
+              OrigBase = 0;
+            }
+          }
+          if (OrigBase) {
+            // If we were able to index down into an element, create the GEP
+            // and bitcast the result.  This eliminates one bitcast, potentially
+            // two.
+            Instruction *NGEP = new GetElementPtrInst(OrigBase, &NewIndices[0],
+                                                      NewIndices.size(), "");
+            InsertNewInstBefore(NGEP, CI);
+            NGEP->takeName(GEP);
+            
+            if (isa<BitCastInst>(CI))
+              return new BitCastInst(NGEP, CI.getType());
+            assert(isa<PtrToIntInst>(CI));
+            return new PtrToIntInst(NGEP, CI.getType());
+          }
+        }
+      }      
+    }
+  }
+    
+  return commonCastTransforms(CI);
+}
+
+
+
 /// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as
 /// integer types. This function implements the common transforms for all those
 /// cases.
@@ -6471,7 +6638,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
   return 0;
 }
 
-Instruction *InstCombiner::visitTrunc(CastInst &CI) {
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   if (Instruction *Result = commonIntCastTransforms(CI))
     return Result;
   
@@ -6528,7 +6695,7 @@ Instruction *InstCombiner::visitTrunc(CastInst &CI) {
   return 0;
 }
 
-Instruction *InstCombiner::visitZExt(CastInst &CI) {
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   // If one of the common conversion will work ..
   if (Instruction *Result = commonIntCastTransforms(CI))
     return Result;
@@ -6570,6 +6737,33 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) {
     // cast to integer to avoid the comparison.
     if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
       const APInt &Op1CV = Op1C->getValue();
+      
+      // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
+      // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
+      if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+          (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){
+        Value *In = ICI->getOperand(0);
+        Value *Sh = ConstantInt::get(In->getType(),
+                                    In->getType()->getPrimitiveSizeInBits()-1);
+        In = InsertNewInstBefore(BinaryOperator::createLShr(In, Sh,
+                                                        In->getName()+".lobit"),
+                                 CI);
+        if (In->getType() != CI.getType())
+          In = CastInst::createIntegerCast(In, CI.getType(),
+                                           false/*ZExt*/, "tmp", &CI);
+
+        if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = InsertNewInstBefore(BinaryOperator::createXor(In, One,
+                                                          In->getName()+".not"),
+                                   CI);
+        }
+
+        return ReplaceInstUsesWith(CI, In);
+      }
+      
+      
+      
       // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
       // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
       // zext (X == 1) to i32 --> X        iff X has only the low bit set.
@@ -6626,13 +6820,44 @@ Instruction *InstCombiner::visitZExt(CastInst &CI) {
   return 0;
 }
 
-Instruction *InstCombiner::visitSExt(CastInst &CI) {
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   if (Instruction *I = commonIntCastTransforms(CI))
     return I;
   
-  // (x <s 0) ? -1 : 0 -> ashr x, 31
-  // (x >u 2147483647) ? -1 : 0 -> ashr x, 31
-
+  Value *Src = CI.getOperand(0);
+  
+  // sext (x <s 0) -> ashr x, 31   -> all ones if signed
+  // sext (x >s -1) -> ashr x, 31  -> all ones if not signed
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) {
+    // If we are just checking for a icmp eq of a single bit and zext'ing it
+    // to an integer, then shift the bit to the appropriate place and then
+    // cast to integer to avoid the comparison.
+    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+      const APInt &Op1CV = Op1C->getValue();
+      
+      // sext (x <s  0) to i32 --> x>>s31      true if signbit set.
+      // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
+      if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+          (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())){
+        Value *In = ICI->getOperand(0);
+        Value *Sh = ConstantInt::get(In->getType(),
+                                     In->getType()->getPrimitiveSizeInBits()-1);
+        In = InsertNewInstBefore(BinaryOperator::createAShr(In, Sh,
+                                                        In->getName()+".lobit"),
+                                 CI);
+        if (In->getType() != CI.getType())
+          In = CastInst::createIntegerCast(In, CI.getType(),
+                                           true/*SExt*/, "tmp", &CI);
+        
+        if (ICI->getPredicate() == ICmpInst::ICMP_SGT)
+          In = InsertNewInstBefore(BinaryOperator::createNot(In,
+                                     In->getName()+".not"), CI);
+        
+        return ReplaceInstUsesWith(CI, In);
+      }
+    }
+  }
+      
   return 0;
 }
 
@@ -6661,15 +6886,14 @@ Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
 }
 
 Instruction *InstCombiner::visitPtrToInt(CastInst &CI) {
-  return commonCastTransforms(CI);
+  return commonPointerCastTransforms(CI);
 }
 
 Instruction *InstCombiner::visitIntToPtr(CastInst &CI) {
   return commonCastTransforms(CI);
 }
 
-Instruction *InstCombiner::visitBitCast(CastInst &CI) {
-
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   // If the operands are integer typed then apply the integer transforms,
   // otherwise just apply the common ones.
   Value *Src = CI.getOperand(0);
@@ -6679,6 +6903,9 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) {
   if (SrcTy->isInteger() && DestTy->isInteger()) {
     if (Instruction *Result = commonIntCastTransforms(CI))
       return Result;
+  } else if (isa<PointerType>(SrcTy)) {
+    if (Instruction *I = commonPointerCastTransforms(CI))
+      return I;
   } else {
     if (Instruction *Result = commonCastTransforms(CI))
       return Result;
@@ -6690,28 +6917,33 @@ Instruction *InstCombiner::visitBitCast(CastInst &CI) {
   if (DestTy == Src->getType())
     return ReplaceInstUsesWith(CI, Src);
 
-  // If the source and destination are pointers, and this cast is equivalent to
-  // a getelementptr X, 0, 0, 0...  turn it into the appropriate getelementptr.
-  // This can enhance SROA and other transforms that want type-safe pointers.
   if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
-    if (const PointerType *SrcPTy = dyn_cast<PointerType>(SrcTy)) {
-      const Type *DstElTy = DstPTy->getElementType();
-      const Type *SrcElTy = SrcPTy->getElementType();
-      
-      Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty);
-      unsigned NumZeros = 0;
-      while (SrcElTy != DstElTy && 
-             isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
-             SrcElTy->getNumContainedTypes() /* not "{}" */) {
-        SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
-        ++NumZeros;
-      }
+    const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+    const Type *DstElTy = DstPTy->getElementType();
+    const Type *SrcElTy = SrcPTy->getElementType();
+    
+    // If we are casting a malloc or alloca to a pointer to a type of the same
+    // size, rewrite the allocation instruction to allocate the "right" type.
+    if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))
+      if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+        return V;
+    
+    // If the source and destination are pointers, and this cast is equivalent to
+    // a getelementptr X, 0, 0, 0...  turn it into the appropriate getelementptr.
+    // This can enhance SROA and other transforms that want type-safe pointers.
+    Constant *ZeroUInt = Constant::getNullValue(Type::Int32Ty);
+    unsigned NumZeros = 0;
+    while (SrcElTy != DstElTy && 
+           isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+           SrcElTy->getNumContainedTypes() /* not "{}" */) {
+      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+      ++NumZeros;
+    }
 
-      // If we found a path from the src to dest, create the getelementptr now.
-      if (SrcElTy == DstElTy) {
-        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
-        return new GetElementPtrInst(Src, &Idxs[0], Idxs.size());
-      }
+    // If we found a path from the src to dest, create the getelementptr now.
+    if (SrcElTy == DstElTy) {
+      SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+      return new GetElementPtrInst(Src, &Idxs[0], Idxs.size());
     }
   }
 
@@ -7178,10 +7410,7 @@ static unsigned GetKnownAlignment(Value *V, TargetData *TD) {
     if (isa<PointerType>(CI->getOperand(0)->getType()))
       return GetKnownAlignment(CI->getOperand(0), TD);
     return 0;
-  } else if (isa<GetElementPtrInst>(V) ||
-             (isa<ConstantExpr>(V) && 
-              cast<ConstantExpr>(V)->getOpcode()==Instruction::GetElementPtr)) {
-    User *GEPI = cast<User>(V);
+  } else if (User *GEPI = dyn_castGetElementPtr(V)) {
     unsigned BaseAlignment = GetKnownAlignment(GEPI->getOperand(0), TD);
     if (BaseAlignment == 0) return 0;
     
@@ -7360,7 +7589,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           for (unsigned i = 0; i != 16; ++i) {
             if (isa<UndefValue>(Mask->getOperand(i)))
               continue;
-            unsigned Idx =cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+            unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
             Idx &= 31;  // Match the hardware behavior.
             
             if (ExtractedElts[Idx] == 0) {
@@ -7979,7 +8208,7 @@ static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
 
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Value *PtrOp = GEP.getOperand(0);
-  // Is it 'getelementptr %P, long 0'  or 'getelementptr %P'
+  // Is it 'getelementptr %P, i32 0'  or 'getelementptr %P'
   // If so, eliminate the noop.
   if (GEP.getNumOperands() == 1)
     return ReplaceInstUsesWith(GEP, PtrOp);
@@ -7994,22 +8223,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   if (GEP.getNumOperands() == 2 && HasZeroPointerIndex)
     return ReplaceInstUsesWith(GEP, PtrOp);
 
-  // Keep track of whether all indices are zero constants integers.
-  bool AllZeroIndices = true;
-  
   // Eliminate unneeded casts for indices.
   bool MadeChange = false;
   
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (unsigned i = 1, e = GEP.getNumOperands(); i != e; ++i, ++GTI) {
-    // Track whether this GEP has all zero indices, if so, it doesn't move the
-    // input pointer, it just changes its type.
-    if (AllZeroIndices) {
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(i)))
-        AllZeroIndices = CI->isNullValue();
-      else
-        AllZeroIndices = false;
-    }
     if (isa<SequentialType>(*GTI)) {
       if (CastInst *CI = dyn_cast<CastInst>(GEP.getOperand(i))) {
         if (CI->getOpcode() == Instruction::ZExt ||
@@ -8045,7 +8263,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // If this GEP instruction doesn't move the pointer, and if the input operand
   // is a bitcast of another pointer, just replace the GEP with a bitcast of the
   // real input to the dest type.
-  if (AllZeroIndices && isa<BitCastInst>(GEP.getOperand(0)))
+  if (GEP.hasAllZeroIndices() && isa<BitCastInst>(GEP.getOperand(0)))
     return new BitCastInst(cast<BitCastInst>(GEP.getOperand(0))->getOperand(0),
                            GEP.getType());
     
@@ -8309,13 +8527,6 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
 Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
   Value *Op = FI.getOperand(0);
 
-  // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
-  if (CastInst *CI = dyn_cast<CastInst>(Op))
-    if (isa<PointerType>(CI->getOperand(0)->getType())) {
-      FI.setOperand(0, CI->getOperand(0));
-      return &FI;
-    }
-
   // free undef -> unreachable.
   if (isa<UndefValue>(Op)) {
     // Insert a new store to null because we cannot modify the CFG here.
@@ -8323,11 +8534,33 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {
                   UndefValue::get(PointerType::get(Type::Int1Ty)), &FI);
     return EraseInstFromFunction(FI);
   }
-
+  
   // If we have 'free null' delete the instruction.  This can happen in stl code
   // when lots of inlining happens.
   if (isa<ConstantPointerNull>(Op))
     return EraseInstFromFunction(FI);
+  
+  // Change free <ty>* (cast <ty2>* X to <ty>*) into free <ty2>* X
+  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op)) {
+    FI.setOperand(0, CI->getOperand(0));
+    return &FI;
+  }
+  
+  // Change free (gep X, 0,0,0,0) into free(X)
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+    if (GEPI->hasAllZeroIndices()) {
+      AddToWorkList(GEPI);
+      FI.setOperand(0, GEPI->getOperand(0));
+      return &FI;
+    }
+  }
+  
+  // Change free(malloc) into nothing, if the malloc has a single use.
+  if (MallocInst *MI = dyn_cast<MallocInst>(Op))
+    if (MI->hasOneUse()) {
+      EraseInstFromFunction(FI);
+      return EraseInstFromFunction(*MI);
+    }
 
   return 0;
 }
@@ -8430,8 +8663,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   }
 
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op))
-    if (isa<ConstantPointerNull>(GEPI->getOperand(0)) ||
-        isa<UndefValue>(GEPI->getOperand(0))) {
+    if (isa<ConstantPointerNull>(GEPI->getOperand(0))) {
       // Insert a new store to null instruction before the load to indicate
       // that this code is not reachable.  We do this instead of inserting
       // an unreachable instruction directly because we cannot modify the
@@ -8679,66 +8911,118 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   // ends with an unconditional branch, try to move it to the successor block.
   BBI = &SI; ++BBI;
   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
-    if (BI->isUnconditional()) {
-      // Check to see if the successor block has exactly two incoming edges.  If
-      // so, see if the other predecessor contains a store to the same location.
-      // if so, insert a PHI node (if needed) and move the stores down.
-      BasicBlock *Dest = BI->getSuccessor(0);
-
-      pred_iterator PI = pred_begin(Dest);
-      BasicBlock *Other = 0;
-      if (*PI != BI->getParent())
-        Other = *PI;
-      ++PI;
-      if (PI != pred_end(Dest)) {
-        if (*PI != BI->getParent())
-          if (Other)
-            Other = 0;
-          else
-            Other = *PI;
-        if (++PI != pred_end(Dest))
-          Other = 0;
-      }
-      if (Other) {  // If only one other pred...
-        BBI = Other->getTerminator();
-        // Make sure this other block ends in an unconditional branch and that
-        // there is an instruction before the branch.
-        if (isa<BranchInst>(BBI) && cast<BranchInst>(BBI)->isUnconditional() &&
-            BBI != Other->begin()) {
-          --BBI;
-          StoreInst *OtherStore = dyn_cast<StoreInst>(BBI);
-          
-          // If this instruction is a store to the same location.
-          if (OtherStore && OtherStore->getOperand(1) == SI.getOperand(1)) {
-            // Okay, we know we can perform this transformation.  Insert a PHI
-            // node now if we need it.
-            Value *MergedVal = OtherStore->getOperand(0);
-            if (MergedVal != SI.getOperand(0)) {
-              PHINode *PN = new PHINode(MergedVal->getType(), "storemerge");
-              PN->reserveOperandSpace(2);
-              PN->addIncoming(SI.getOperand(0), SI.getParent());
-              PN->addIncoming(OtherStore->getOperand(0), Other);
-              MergedVal = InsertNewInstBefore(PN, Dest->front());
-            }
-            
-            // Advance to a place where it is safe to insert the new store and
-            // insert it.
-            BBI = Dest->begin();
-            while (isa<PHINode>(BBI)) ++BBI;
-            InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
-                                              OtherStore->isVolatile()), *BBI);
-
-            // Nuke the old stores.
-            EraseInstFromFunction(SI);
-            EraseInstFromFunction(*OtherStore);
-            ++NumCombined;
-            return 0;
-          }
-        }
+    if (BI->isUnconditional())
+      if (SimplifyStoreAtEndOfBlock(SI))
+        return 0;  // xform done!
+  
+  return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+///   if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+///   *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+  BasicBlock *StoreBB = SI.getParent();
+  
+  // Check to see if the successor block has exactly two incoming edges.  If
+  // so, see if the other predecessor contains a store to the same location.
+  // if so, insert a PHI node (if needed) and move the stores down.
+  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+  
+  // Determine whether Dest has exactly two predecessors and, if so, compute
+  // the other predecessor.
+  pred_iterator PI = pred_begin(DestBB);
+  BasicBlock *OtherBB = 0;
+  if (*PI != StoreBB)
+    OtherBB = *PI;
+  ++PI;
+  if (PI == pred_end(DestBB))
+    return false;
+  
+  if (*PI != StoreBB) {
+    if (OtherBB)
+      return false;
+    OtherBB = *PI;
+  }
+  if (++PI != pred_end(DestBB))
+    return false;
+  
+  
+  // Verify that the other block ends in a branch and is not otherwise empty.
+  BasicBlock::iterator BBI = OtherBB->getTerminator();
+  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+  if (!OtherBr || BBI == OtherBB->begin())
+    return false;
+  
+  // If the other block ends in an unconditional branch, check for the 'if then
+  // else' case.  there is an instruction before the branch.
+  StoreInst *OtherStore = 0;
+  if (OtherBr->isUnconditional()) {
+    // If this isn't a store, or isn't a store to the same location, bail out.
+    --BBI;
+    OtherStore = dyn_cast<StoreInst>(BBI);
+    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1))
+      return false;
+  } else {
+    // Otherwise, the other block ended with a conditional branch.  If one of the
+    // destinations is StoreBB, then we have the if/then case.
+    if (OtherBr->getSuccessor(0) != StoreBB && 
+        OtherBr->getSuccessor(1) != StoreBB)
+      return false;
+    
+    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+    // if/then triangle.  See if there is a store to the same ptr as SI that lives
+    // in OtherBB.
+    for (;; --BBI) {
+      // Check to see if we find the matching store.
+      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+        if (OtherStore->getOperand(1) != SI.getOperand(1))
+          return false;
+        break;
       }
+      // If we find something that may be using the stored value, or if we run out
+      // of instructions, we can't do the xform.
+      if (isa<LoadInst>(BBI) || BBI->mayWriteToMemory() ||
+          BBI == OtherBB->begin())
+        return false;
     }
+    
+    // In order to eliminate the store in OtherBr, we have to
+    // make sure nothing reads the stored value in StoreBB.
+    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+      // FIXME: This should really be AA driven.
+      if (isa<LoadInst>(I) || I->mayWriteToMemory())
+        return false;
+    }
+  }
   
-  return 0;
+  // Insert a PHI node now if we need it.
+  Value *MergedVal = OtherStore->getOperand(0);
+  if (MergedVal != SI.getOperand(0)) {
+    PHINode *PN = new PHINode(MergedVal->getType(), "storemerge");
+    PN->reserveOperandSpace(2);
+    PN->addIncoming(SI.getOperand(0), SI.getParent());
+    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+    MergedVal = InsertNewInstBefore(PN, DestBB->front());
+  }
+  
+  // Advance to a place where it is safe to insert the new store and
+  // insert it.
+  BBI = DestBB->begin();
+  while (isa<PHINode>(BBI)) ++BBI;
+  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
+                                    OtherStore->isVolatile()), *BBI);
+  
+  // Nuke the old stores.
+  EraseInstFromFunction(SI);
+  EraseInstFromFunction(*OtherStore);
+  ++NumCombined;
+  return true;
 }
 
 
@@ -8970,6 +9254,17 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     
     if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
       return ReplaceInstUsesWith(EI, Elt);
+    
+    // If the this extractelement is directly using a bitcast from a vector of
+    // the same number of elements, see if we can find the source element from
+    // it.  In this case, we will end up needing to bitcast the scalars.
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+      if (const VectorType *VT = 
+              dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+        if (VT->getNumElements() == VectorWidth)
+          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+            return new BitCastInst(Elt, EI.getType());
+    }
   }
   
   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -9181,7 +9476,8 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
     if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
         EI->getOperand(0)->getType() == IE.getType()) {
       unsigned NumVectorElts = IE.getType()->getNumElements();
-      unsigned ExtractedIdx=cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+      unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
       unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
       
       if (ExtractedIdx >= NumVectorElts) // Out of range extract.