Rename getABITypeSize to getTypePaddedSize, as

[oota-llvm.git] / lib / Transforms / Scalar / InstructionCombining.cpp
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp

index 9ee375c382550f1b59d695b226b9be0f9cf0c822..ee3596bde433d83a9805117befff6c17acbc6264 100644 (file)
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -2929,19 +2929,6 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
      // sdiv X, -1 == -X
      if (RHS->isAllOnesValue())
        return BinaryOperator::CreateNeg(Op0);
-
-    // -X/C -> X/-C, if and only if negation doesn't overflow.
-    if (Value *LHSNeg = dyn_castNegVal(Op0)) {
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(LHSNeg)) {
-        Constant *RHSNeg = ConstantExpr::getNeg(RHS);
-        if (RHS != RHSNeg) {    // Check that there is no overflow.
-          Constant *CINeg = ConstantExpr::getNeg(CI);
-          if (CI != CINeg)      // Check that there is no overflow.
-            return BinaryOperator::CreateSDiv(LHSNeg,
-                                              ConstantExpr::getNeg(RHS));
-        }
-      }
-    }
    }
  
    // If the sign bits of both operands are zero (i.e. we can prove they are
@@ -3102,6 +3089,36 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
      }
    }
  
+  // If it's a constant vector, flip any negative values positive.
+  if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
+    unsigned VWidth = RHSV->getNumOperands();
+
+    bool hasNegative = false;
+    for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
+      if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+        if (RHS->getValue().isNegative())
+          hasNegative = true;
+
+    if (hasNegative) {
+      std::vector<Constant *> Elts(VWidth);
+      for (unsigned i = 0; i != VWidth; ++i) {
+        if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+          if (RHS->getValue().isNegative())
+            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+          else
+            Elts[i] = RHS;
+        }
+      }
+
+      Constant *NewRHSV = ConstantVector::get(Elts);
+      if (NewRHSV != RHSV) {
+        AddUsesToWorkList(I);
+        I.setOperand(1, NewRHSV);
+        return &I;
+      }
+    }
+  }
+
    return 0;
  }
  
@@ -4259,18 +4276,18 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
                                           Value *C, Value *D) {
    // If A is not a select of -1/0, this cannot match.
    Value *Cond = 0;
-  if (!match(A, m_SelectCst(m_Value(Cond), -1, 0)))
+  if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond))))
      return 0;
  
    // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
-  if (match(D, m_SelectCst(m_Specific(Cond), 0, -1)))
+  if (match(D, m_SelectCst<0, -1>(m_Specific(Cond))))
      return SelectInst::Create(Cond, C, B);
-  if (match(D, m_Not(m_SelectCst(m_Specific(Cond), -1, 0))))
+  if (match(D, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
      return SelectInst::Create(Cond, C, B);
    // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
-  if (match(B, m_SelectCst(m_Specific(Cond), 0, -1)))
+  if (match(B, m_SelectCst<0, -1>(m_Specific(Cond))))
      return SelectInst::Create(Cond, C, D);
-  if (match(B, m_Not(m_SelectCst(m_Specific(Cond), -1, 0))))
+  if (match(B, m_Not(m_SelectCst<-1, 0>(m_Specific(Cond)))))
      return SelectInst::Create(Cond, C, D);
    return 0;
  }
@@ -4438,42 +4455,29 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
  
  /// FoldOrWithConstants - This helper function folds:
  ///
-///     ((A | B) & 1) | (B & -2)
+///     ((A | B) & C1) | (B & C2)
  ///
  /// into:
  /// 
-///     (A & 1) | B
+///     (A & C1) | B
  ///
-/// The constants aren't important. Only that they don't overlap. (I.e., the XOR
-/// of the two constants is "all ones".)
+/// when the XOR of the two constants is "all ones" (-1).
  Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
                                                 Value *A, Value *B, Value *C) {
-  if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C)) {
-    Value *V1 = 0, *C2 = 0;
-    if (match(Op, m_And(m_Value(V1), m_Value(C2)))) {
-      ConstantInt *CI2 = dyn_cast<ConstantInt>(C2);
-
-      if (!CI2) {
-        std::swap(V1, C2);
-        CI2 = dyn_cast<ConstantInt>(C2);
-      }
-
-      if (CI2) {
-        APInt Xor = CI1->getValue() ^ CI2->getValue();
-        if (Xor.isAllOnesValue()) {
-            if (V1 == B) {
-              Instruction *NewOp =
-                InsertNewInstBefore(BinaryOperator::CreateAnd(A, CI1), I);
-              return BinaryOperator::CreateOr(NewOp, B);
-            }
-            if (V1 == A) {
-              Instruction *NewOp =
-                InsertNewInstBefore(BinaryOperator::CreateAnd(B, CI1), I);
-              return BinaryOperator::CreateOr(NewOp, A);
-            }
-        }
-      }
-    }
+  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+  if (!CI1) return 0;
+
+  Value *V1 = 0;
+  ConstantInt *CI2 = 0;
+  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+
+  APInt Xor = CI1->getValue() ^ CI2->getValue();
+  if (!Xor.isAllOnesValue()) return 0;
+
+  if (V1 == A || V1 == B) {
+    Instruction *NewOp =
+      InsertNewInstBefore(BinaryOperator::CreateAnd((V1 == A) ? B : A, CI1), I);
+    return BinaryOperator::CreateOr(NewOp, V1);
    }
  
    return 0;
@@ -4867,8 +4871,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
    
    
    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
      if (RHS == ConstantInt::getTrue() && Op0->hasOneUse()) {
+      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
        if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))
          return new ICmpInst(ICI->getInversePredicate(),
                              ICI->getOperand(0), ICI->getOperand(1));
@@ -5138,7 +5142,7 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {
    for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
         ++i, ++GTI) {
      Value *Op = *i;
-    uint64_t Size = TD.getABITypeSize(GTI.getIndexedType()) & PtrSizeMask;
+    uint64_t Size = TD.getTypePaddedSize(GTI.getIndexedType()) & PtrSizeMask;
      if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
        if (OpC->isZero()) continue;
        
@@ -5229,7 +5233,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
        if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
          Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
        } else {
-        uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+        uint64_t Size = TD.getTypePaddedSize(GTI.getIndexedType());
          Offset += Size*CI->getSExtValue();
        }
      } else {
@@ -5245,7 +5249,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
    Value *VariableIdx = GEP->getOperand(i);
    // Determine the scale factor of the variable element.  For example, this is
    // 4 if the variable index is into an array of i32.
-  uint64_t VariableScale = TD.getABITypeSize(GTI.getIndexedType());
+  uint64_t VariableScale = TD.getTypePaddedSize(GTI.getIndexedType());
    
    // Verify that there are no other variable indices.  If so, emit the hard way.
    for (++i, ++GTI; i != e; ++i, ++GTI) {
@@ -5259,7 +5263,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
      } else {
-      uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+      uint64_t Size = TD.getTypePaddedSize(GTI.getIndexedType());
        Offset += Size*CI->getSExtValue();
      }
    }
@@ -6339,6 +6343,28 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
    const APInt &RHSV = RHS->getValue();
    
    switch (LHSI->getOpcode()) {
+  case Instruction::Trunc:
+    if (ICI.isEquality() && LHSI->hasOneUse()) {
+      // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+      // of the high bits truncated out of x are known.
+      unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+             SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
+      APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+      ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+      
+      // If all the high bits are known, we can do this xform.
+      if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+        // Pull in the high bits from known-ones set.
+        APInt NewRHS(RHS->getValue());
+        NewRHS.zext(SrcBits);
+        NewRHS |= KnownOne;
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantInt::get(NewRHS));
+      }
+    }
+    break;
+      
    case Instruction::Xor:         // (icmp pred (xor X, XorCST), CI)
      if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
        // If this is a comparison that tests the signbit (X < 0) or (x > -1),
@@ -6761,29 +6787,6 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
          return &ICI;
        }
      }
-  } else {  // Not a ICMP_EQ/ICMP_NE
-            // If the LHS is a cast from an integral value of the same size, 
-            // then since we know the RHS is a constant, try to simlify.
-    if (CastInst *Cast = dyn_cast<CastInst>(LHSI)) {
-      Value *CastOp = Cast->getOperand(0);
-      const Type *SrcTy = CastOp->getType();
-      uint32_t SrcTySize = SrcTy->getPrimitiveSizeInBits();
-      if (SrcTy->isInteger() && 
-          SrcTySize == Cast->getType()->getPrimitiveSizeInBits()) {
-        // If this is an unsigned comparison, try to make the comparison use
-        // smaller constant values.
-        if (ICI.getPredicate() == ICmpInst::ICMP_ULT && RHSV.isSignBit()) {
-          // X u< 128 => X s> -1
-          return new ICmpInst(ICmpInst::ICMP_SGT, CastOp, 
-                           ConstantInt::get(APInt::getAllOnesValue(SrcTySize)));
-        } else if (ICI.getPredicate() == ICmpInst::ICMP_UGT &&
-                   RHSV == APInt::getSignedMaxValue(SrcTySize)) {
-          // X u> 127 => X s< 0
-          return new ICmpInst(ICmpInst::ICMP_SLT, CastOp, 
-                              Constant::getNullValue(SrcTy));
-        }
-      }
-    }
    }
    return 0;
  }
@@ -7416,8 +7419,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
    // same, we open the door to infinite loops of various kinds.
    if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
  
-  uint64_t AllocElTySize = TD->getABITypeSize(AllocElTy);
-  uint64_t CastElTySize = TD->getABITypeSize(CastElTy);
+  uint64_t AllocElTySize = TD->getTypePaddedSize(AllocElTy);
+  uint64_t CastElTySize = TD->getTypePaddedSize(CastElTy);
    if (CastElTySize == 0 || AllocElTySize == 0) return 0;
  
    // See if we can satisfy the modulus by pulling a scale out of the array
@@ -7691,6 +7694,66 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
    return 0;
  }
  
+/// FindElementAtOffset - Given a type and a constant offset, determine whether
+/// or not there is a sequence of GEP indices into the type that will land us at
+/// the specified offset.  If so, fill them into NewIndices and return true,
+/// otherwise return false.
+static bool FindElementAtOffset(const Type *Ty, int64_t Offset, 
+                                SmallVectorImpl<Value*> &NewIndices,
+                                const TargetData *TD) {
+  if (!Ty->isSized()) return false;
+  
+  // Start with the index over the outer type.  Note that the type size
+  // might be zero (even if the offset isn't zero) if the indexed type
+  // is something like [0 x {int, int}]
+  const Type *IntPtrTy = TD->getIntPtrType();
+  int64_t FirstIdx = 0;
+  if (int64_t TySize = TD->getTypePaddedSize(Ty)) {
+    FirstIdx = Offset/TySize;
+    Offset -= FirstIdx*TySize;
+    
+    // Handle hosts where % returns negative instead of values [0..TySize).
+    if (Offset < 0) {
+      --FirstIdx;
+      Offset += TySize;
+      assert(Offset >= 0);
+    }
+    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+  }
+  
+  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+    
+  // Index into the types.  If we fail, set OrigBase to null.
+  while (Offset) {
+    // Indexing into tail padding between struct/array elements.
+    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+      return false;
+    
+    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+      const StructLayout *SL = TD->getStructLayout(STy);
+      assert(Offset < (int64_t)SL->getSizeInBytes() &&
+             "Offset must stay within the indexed type");
+      
+      unsigned Elt = SL->getElementContainingOffset(Offset);
+      NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt));
+      
+      Offset -= SL->getElementOffset(Elt);
+      Ty = STy->getElementType(Elt);
+    } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+      uint64_t EltSize = TD->getTypePaddedSize(AT->getElementType());
+      assert(EltSize && "Cannot index into a zero-sized array");
+      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+      Offset %= EltSize;
+      Ty = AT->getElementType();
+    } else {
+      // Otherwise, we can't index into the middle of this atomic type, bail.
+      return false;
+    }
+  }
+  
+  return true;
+}
+
  /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
  Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
    Value *Src = CI.getOperand(0);
@@ -7721,74 +7784,21 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
          Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
          const Type *GEPIdxTy =
            cast<PointerType>(OrigBase->getType())->getElementType();
-        if (GEPIdxTy->isSized()) {
-          SmallVector<Value*, 8> NewIndices;
-          
-          // Start with the index over the outer type.  Note that the type size
-          // might be zero (even if the offset isn't zero) if the indexed type
-          // is something like [0 x {int, int}]
-          const Type *IntPtrTy = TD->getIntPtrType();
-          int64_t FirstIdx = 0;
-          if (int64_t TySize = TD->getABITypeSize(GEPIdxTy)) {
-            FirstIdx = Offset/TySize;
-            Offset %= TySize;
+        SmallVector<Value*, 8> NewIndices;
+        if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices, TD)) {
+          // If we were able to index down into an element, create the GEP
+          // and bitcast the result.  This eliminates one bitcast, potentially
+          // two.
+          Instruction *NGEP = GetElementPtrInst::Create(OrigBase, 
+                                                        NewIndices.begin(),
+                                                        NewIndices.end(), "");
+          InsertNewInstBefore(NGEP, CI);
+          NGEP->takeName(GEP);
            
-            // Handle silly modulus not returning values values [0..TySize).
-            if (Offset < 0) {
-              --FirstIdx;
-              Offset += TySize;
-              assert(Offset >= 0);
-            }
-            assert((uint64_t)Offset < (uint64_t)TySize &&"Out of range offset");
-          }
-          
-          NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
-
-          // Index into the types.  If we fail, set OrigBase to null.
-          while (Offset) {
-            if (const StructType *STy = dyn_cast<StructType>(GEPIdxTy)) {
-              const StructLayout *SL = TD->getStructLayout(STy);
-              if (Offset < (int64_t)SL->getSizeInBytes()) {
-                unsigned Elt = SL->getElementContainingOffset(Offset);
-                NewIndices.push_back(ConstantInt::get(Type::Int32Ty, Elt));
-              
-                Offset -= SL->getElementOffset(Elt);
-                GEPIdxTy = STy->getElementType(Elt);
-              } else {
-                // Otherwise, we can't index into this, bail out.
-                Offset = 0;
-                OrigBase = 0;
-              }
-            } else if (isa<ArrayType>(GEPIdxTy) || isa<VectorType>(GEPIdxTy)) {
-              const SequentialType *STy = cast<SequentialType>(GEPIdxTy);
-              if (uint64_t EltSize = TD->getABITypeSize(STy->getElementType())){
-                NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
-                Offset %= EltSize;
-              } else {
-                NewIndices.push_back(ConstantInt::get(IntPtrTy, 0));
-              }
-              GEPIdxTy = STy->getElementType();
-            } else {
-              // Otherwise, we can't index into this, bail out.
-              Offset = 0;
-              OrigBase = 0;
-            }
-          }
-          if (OrigBase) {
-            // If we were able to index down into an element, create the GEP
-            // and bitcast the result.  This eliminates one bitcast, potentially
-            // two.
-            Instruction *NGEP = GetElementPtrInst::Create(OrigBase, 
-                                                          NewIndices.begin(),
-                                                          NewIndices.end(), "");
-            InsertNewInstBefore(NGEP, CI);
-            NGEP->takeName(GEP);
-            
-            if (isa<BitCastInst>(CI))
-              return new BitCastInst(NGEP, CI.getType());
-            assert(isa<PtrToIntInst>(CI));
-            return new PtrToIntInst(NGEP, CI.getType());
-          }
+          if (isa<BitCastInst>(CI))
+            return new BitCastInst(NGEP, CI.getType());
+          assert(isa<PtrToIntInst>(CI));
+          return new PtrToIntInst(NGEP, CI.getType());
          }
        }      
      }
@@ -8397,7 +8407,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
      // is a single-index GEP.
      if (X->getType() == CI.getType()) {
        // Get the size of the pointee type.
-      uint64_t Size = TD->getABITypeSize(DestPointee);
+      uint64_t Size = TD->getTypePaddedSize(DestPointee);
  
        // Convert the constant to intptr type.
        APInt Offset = Cst->getValue();
@@ -8417,7 +8427,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
      // "inttoptr+GEP" instead of "add+intptr".
      
      // Get the size of the pointee type.
-    uint64_t Size = TD->getABITypeSize(DestPointee);
+    uint64_t Size = TD->getTypePaddedSize(DestPointee);
      
      // Convert the constant to intptr type.
      APInt Offset = Cst->getValue();
@@ -8709,11 +8719,11 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
        // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
        // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
        CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
-      if (match(TrueVal, m_ConstantInt(-1)) &&
-          match(FalseVal, m_ConstantInt(0)))
+      if (match(TrueVal, m_ConstantInt<-1>()) &&
+          match(FalseVal, m_ConstantInt<0>()))
          Pred = ICI->getPredicate();
-      else if (match(TrueVal, m_ConstantInt(0)) &&
-               match(FalseVal, m_ConstantInt(-1)))
+      else if (match(TrueVal, m_ConstantInt<0>()) &&
+               match(FalseVal, m_ConstantInt<-1>()))
          Pred = CmpInst::getInversePredicate(ICI->getPredicate());
        
        if (Pred != CmpInst::BAD_ICMP_PREDICATE) {
@@ -9482,7 +9492,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
    const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
    if (!SrcTy->isSized() || !DstTy->isSized())
      return false;
-  if (TD->getABITypeSize(SrcTy) != TD->getABITypeSize(DstTy))
+  if (TD->getTypePaddedSize(SrcTy) != TD->getTypePaddedSize(DstTy))
      return false;
    return true;
  }
@@ -10460,28 +10470,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
    }
    if (MadeChange) return &GEP;
  
-  // If this GEP instruction doesn't move the pointer, and if the input operand
-  // is a bitcast of another pointer, just replace the GEP with a bitcast of the
-  // real input to the dest type.
-  if (GEP.hasAllZeroIndices()) {
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(GEP.getOperand(0))) {
-      // If the bitcast is of an allocation, and the allocation will be
-      // converted to match the type of the cast, don't touch this.
-      if (isa<AllocationInst>(BCI->getOperand(0))) {
-        // See if the bitcast simplifies, if so, don't nuke this GEP yet.
-        if (Instruction *I = visitBitCast(*BCI)) {
-          if (I != BCI) {
-            I->takeName(BCI);
-            BCI->getParent()->getInstList().insert(BCI, I);
-            ReplaceInstUsesWith(*BCI, I);
-          }
-          return &GEP;
-        }
-      }
-      return new BitCastInst(BCI->getOperand(0), GEP.getType());
-    }
-  }
-  
    // Combine Indices - If the source pointer to this getelementptr instruction
    // is a getelementptr instruction, combine the indices of the two
    // getelementptr instructions into a single instruction.
@@ -10620,8 +10608,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();
        const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
        if (isa<ArrayType>(SrcElTy) &&
-          TD->getABITypeSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
-          TD->getABITypeSize(ResElTy)) {
+          TD->getTypePaddedSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+          TD->getTypePaddedSize(ResElTy)) {
          Value *Idx[2];
          Idx[0] = Constant::getNullValue(Type::Int32Ty);
          Idx[1] = GEP.getOperand(1);
@@ -10638,7 +10626,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        
        if (isa<ArrayType>(SrcElTy) && ResElTy == Type::Int8Ty) {
          uint64_t ArrayEltSize =
-            TD->getABITypeSize(cast<ArrayType>(SrcElTy)->getElementType());
+            TD->getTypePaddedSize(cast<ArrayType>(SrcElTy)->getElementType());
          
          // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
          // allow either a mul, shift, or constant here.
@@ -10692,7 +10680,56 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
        }
      }
    }
-
+  
+  /// See if we can simplify:
+  ///   X = bitcast A to B*
+  ///   Y = gep X, <...constant indices...>
+  /// into a gep of the original struct.  This is important for SROA and alias
+  /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+    if (!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+      // Determine how much the GEP moves the pointer.  We are guaranteed to get
+      // a constant back from EmitGEPOffset.
+      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP, GEP, *this));
+      int64_t Offset = OffsetV->getSExtValue();
+      
+      // If this GEP instruction doesn't move the pointer, just replace the GEP
+      // with a bitcast of the real input to the dest type.
+      if (Offset == 0) {
+        // If the bitcast is of an allocation, and the allocation will be
+        // converted to match the type of the cast, don't touch this.
+        if (isa<AllocationInst>(BCI->getOperand(0))) {
+          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+          if (Instruction *I = visitBitCast(*BCI)) {
+            if (I != BCI) {
+              I->takeName(BCI);
+              BCI->getParent()->getInstList().insert(BCI, I);
+              ReplaceInstUsesWith(*BCI, I);
+            }
+            return &GEP;
+          }
+        }
+        return new BitCastInst(BCI->getOperand(0), GEP.getType());
+      }
+      
+      // Otherwise, if the offset is non-zero, we need to find out if there is a
+      // field at Offset in 'A's type.  If so, we can pull the cast through the
+      // GEP.
+      SmallVector<Value*, 8> NewIndices;
+      const Type *InTy =
+        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+      if (FindElementAtOffset(InTy, Offset, NewIndices, TD)) {
+        Instruction *NGEP =
+           GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(),
+                                     NewIndices.end());
+        if (NGEP->getType() == GEP.getType()) return NGEP;
+        InsertNewInstBefore(NGEP, GEP);
+        NGEP->takeName(&GEP);
+        return new BitCastInst(NGEP, GEP.getType());
+      }
+    }
+  }    
+    
    return 0;
  }
  
@@ -10742,7 +10779,7 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {
    // Note that we only do this for alloca's, because malloc should allocate and
    // return a unique pointer, even for a zero byte allocation.
    if (isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized() &&
-      TD->getABITypeSize(AI.getAllocatedType()) == 0)
+      TD->getTypePaddedSize(AI.getAllocatedType()) == 0)
      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
  
    return 0;