//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
using namespace llvm;
using namespace PatternMatch;
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT);
- unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT);
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
if (CopyAlign < MinAlign) {
- MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
- MinAlign, false));
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
return MI;
}
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
- if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) {
+ if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
SrcETy = reduceToSingleValueType(SrcETy);
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT);
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
return nullptr;
}
+static Value *SimplifyX86immshift(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ bool LogicalShift = false;
+ bool ShiftLeft = false;
+
+ switch (II.getIntrinsicID()) {
+ default:
+ return nullptr;
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ LogicalShift = false; ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ LogicalShift = true; ShiftLeft = false;
+ break;
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ LogicalShift = true; ShiftLeft = true;
+ break;
+ }
+ assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
+ // Simplify if count is constant.
+ auto Arg1 = II.getArgOperand(1);
+ auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
+ auto CDV = dyn_cast<ConstantDataVector>(Arg1);
+ auto CInt = dyn_cast<ConstantInt>(Arg1);
+ if (!CAZ && !CDV && !CInt)
+ return nullptr;
+
+ APInt Count(64, 0);
+ if (CDV) {
+ // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ auto VT = cast<VectorType>(CDV->getType());
+ unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
+ assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
+ unsigned NumSubElts = 64 / BitWidth;
+
+ // Concatenate the sub-elements to create the 64-bit value.
+ for (unsigned i = 0; i != NumSubElts; ++i) {
+ unsigned SubEltIdx = (NumSubElts - 1) - i;
+ auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
+ Count = Count.shl(BitWidth);
+ Count |= SubElt->getValue().zextOrTrunc(64);
+ }
+ }
+ else if (CInt)
+ Count = CInt->getValue();
+
+ auto Vec = II.getArgOperand(0);
+ auto VT = cast<VectorType>(Vec->getType());
+ auto SVT = VT->getElementType();
+ unsigned VWidth = VT->getNumElements();
+ unsigned BitWidth = SVT->getPrimitiveSizeInBits();
+
+ // If shift-by-zero then just return the original value.
+ if (Count == 0)
+ return Vec;
+
+ // Handle cases when Shift >= BitWidth.
+ if (Count.uge(BitWidth)) {
+ // If LogicalShift - just return zero.
+ if (LogicalShift)
+ return ConstantAggregateZero::get(VT);
+
+ // If ArithmeticShift - clamp Shift to (BitWidth - 1).
+ Count = APInt(64, BitWidth - 1);
+ }
+
+ // Get a constant vector of the same type as the first operand.
+ auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
+ auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
+
+ if (ShiftLeft)
+ return Builder.CreateShl(Vec, ShiftVec);
+
+ if (LogicalShift)
+ return Builder.CreateLShr(Vec, ShiftVec);
+
+ return Builder.CreateAShr(Vec, ShiftVec);
+}
+
+static Value *SimplifyX86extend(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder,
+ bool SignExtend) {
+ VectorType *SrcTy = cast<VectorType>(II.getArgOperand(0)->getType());
+ VectorType *DstTy = cast<VectorType>(II.getType());
+ unsigned NumDstElts = DstTy->getNumElements();
+
+ // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
+ SmallVector<int, 8> ShuffleMask;
+ for (int i = 0; i != (int)NumDstElts; ++i)
+ ShuffleMask.push_back(i);
+
+ Value *SV = Builder.CreateShuffleVector(II.getArgOperand(0),
+ UndefValue::get(SrcTy), ShuffleMask);
+ return SignExtend ? Builder.CreateSExt(SV, DstTy)
+ : Builder.CreateZExt(SV, DstTy);
+}
+
+static Value *SimplifyX86insertps(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+
+ // The immediate permute control byte looks like this:
+ // [3:0] - zero mask for each 32-bit lane
+ // [5:4] - select one 32-bit destination lane
+ // [7:6] - select one 32-bit source lane
+
+ uint8_t Imm = CInt->getZExtValue();
+ uint8_t ZMask = Imm & 0xf;
+ uint8_t DestLane = (Imm >> 4) & 0x3;
+ uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // If all zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (ZMask == 0xf)
+ return ZeroVector;
+
+ // Initialize by passing all of the first source bits through.
+ int ShuffleMask[4] = { 0, 1, 2, 3 };
+
+ // We may replace the second operand with the zero vector.
+ Value *V1 = II.getArgOperand(1);
+
+ if (ZMask) {
+ // If the zero mask is being used with a single input or the zero mask
+ // overrides the destination lane, this is a shuffle with the zero vector.
+ if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
+ (ZMask & (1 << DestLane))) {
+ V1 = ZeroVector;
+ // We may still move 32-bits of the first source vector from one lane
+ // to another.
+ ShuffleMask[DestLane] = SourceLane;
+ // The zero mask may override the previous insert operation.
+ for (unsigned i = 0; i < 4; ++i)
+ if ((ZMask >> i) & 0x1)
+ ShuffleMask[i] = i + 4;
+ } else {
+ // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
+ return nullptr;
+ }
+ } else {
+ // Replace the selected destination lane with the selected source lane.
+ ShuffleMask[DestLane] = SourceLane + 4;
+ }
+
+ return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
+/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
+/// source vectors, unless a zero bit is set. If a zero bit is set,
+/// then ignore that half of the mask and clear that half of the vector.
+static Value *SimplifyX86vperm2(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ uint8_t Imm = CInt->getZExtValue();
+
+ bool LowHalfZero = Imm & 0x08;
+ bool HighHalfZero = Imm & 0x80;
+
+ // If both zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (LowHalfZero && HighHalfZero)
+ return ZeroVector;
+
+ // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned HalfSize = NumElts / 2;
+ SmallVector<int, 8> ShuffleMask(NumElts);
+
+ // The high bit of the selection field chooses the 1st or 2nd operand.
+ bool LowInputSelect = Imm & 0x02;
+ bool HighInputSelect = Imm & 0x20;
+
+ // The low bit of the selection field chooses the low or high half
+ // of the selected operand.
+ bool LowHalfSelect = Imm & 0x01;
+ bool HighHalfSelect = Imm & 0x10;
+
+ // Determine which operand(s) are actually in use for this instruction.
+ Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+ Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+
+ // If needed, replace operands based on zero mask.
+ V0 = LowHalfZero ? ZeroVector : V0;
+ V1 = HighHalfZero ? ZeroVector : V1;
+
+ // Permute low half of result.
+ unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i] = StartIndex + i;
+
+ // Permute high half of result.
+ StartIndex = HighHalfSelect ? HalfSize : 0;
+ StartIndex += NumElts;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i + HalfSize] = StartIndex + i;
+
+ return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
///
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ auto Args = CI.arg_operands();
+ if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
+ TLI, DT, AC))
+ return ReplaceInstUsesWith(CI, V);
+
if (isFreeCall(&CI, TLI))
return visitFree(CI);
}
break;
- case Intrinsic::uadd_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
- uint32_t BitWidth = IT->getBitWidth();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, II);
- bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
- bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
-
- if (LHSKnownNegative || LHSKnownPositive) {
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, II);
- bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
- bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
- if (LHSKnownNegative && RHSKnownNegative) {
- // The sign bit is set in both cases: this MUST overflow.
- // Create a simple add instruction, and insert it into the struct.
- return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true,
- /*ReUseName*/true);
- }
- if (LHSKnownPositive && RHSKnownPositive) {
- // The sign bit is clear in both cases: this CANNOT overflow.
- // Create a simple add instruction, and insert it into the struct.
- return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false);
- }
- }
- }
- // FALL THROUGH uadd into sadd
+ case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- // Canonicalize constants into the RHS.
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
if (isa<Constant>(II->getArgOperand(0)) &&
!isa<Constant>(II->getArgOperand(1))) {
+ // Canonicalize constants into the RHS.
Value *LHS = II->getArgOperand(0);
II->setArgOperand(0, II->getArgOperand(1));
II->setArgOperand(1, LHS);
return II;
}
+ // fall through
- // X + undef -> undef
- if (isa<UndefValue>(II->getArgOperand(1)))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // X + 0 -> {X, false}
- if (RHS->isZero()) {
- return CreateOverflowTuple(II, II->getArgOperand(0), false,
- /*ReUseName*/false);
- }
- }
-
- // We can strength reduce reduce this signed add into a regular add if we
- // can prove that it will never overflow.
- if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedAdd(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
- }
- }
-
- break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- // undef - X -> undef
- // X - undef -> undef
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
- // X - 0 -> {X, false}
- if (ConstRHS->isZero()) {
- return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
- }
- }
- if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
- if (WillNotOverflowSignedSub(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
- }
- } else {
- if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
- }
- }
- break;
- }
- case Intrinsic::umul_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II);
- if (OR == OverflowResult::NeverOverflows) {
- return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false);
- } else if (OR == OverflowResult::AlwaysOverflows) {
- return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true);
- }
- } // FALL THROUGH
- case Intrinsic::smul_with_overflow:
- // Canonicalize constants into the RHS.
- if (isa<Constant>(II->getArgOperand(0)) &&
- !isa<Constant>(II->getArgOperand(1))) {
- Value *LHS = II->getArgOperand(0);
- II->setArgOperand(0, II->getArgOperand(1));
- II->setArgOperand(1, LHS);
- return II;
- }
-
- // X * undef -> undef
- if (isa<UndefValue>(II->getArgOperand(1)))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+ OverflowCheckFlavor OCF =
+ IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
+ assert(OCF != OCF_INVALID && "unexpected!");
- if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // X*0 -> {0, false}
- if (RHSI->isZero())
- return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+ Value *OperationResult = nullptr;
+ Constant *OverflowResult = nullptr;
+ if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
+ *II, OperationResult, OverflowResult))
+ return CreateOverflowTuple(II, OperationResult, OverflowResult);
- // X * 1 -> {X, false}
- if (RHSI->equalsInt(1)) {
- return CreateOverflowTuple(II, II->getArgOperand(0), false,
- /*ReUseName*/false);
- }
- }
- if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedMul(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
- }
- }
break;
+ }
+
case Intrinsic::minnum:
case Intrinsic::maxnum: {
Value *Arg0 = II->getArgOperand(0);
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
}
+ case Intrinsic::ppc_qpx_qvlfs:
+ // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getType()->getVectorNumElements());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(VTy));
+ Value *Load = Builder->CreateLoad(Ptr);
+ return new FPExtInst(Load, II->getType());
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
+ 32) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getArgOperand(0)->getType()->getVectorNumElements());
+ Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
+ Type *OpPtrTy = PointerType::getUnqual(VTy);
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(TOp, Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
+ 32) {
+ Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
break;
}
- // Constant fold <A x Bi> << Ci.
- // FIXME: We don't handle _dq because it's a shift of an i128, but is
- // represented in the IR as <2 x i64>. A per element shift is wrong.
- case Intrinsic::x86_sse2_psll_d:
- case Intrinsic::x86_sse2_psll_q:
- case Intrinsic::x86_sse2_psll_w:
+ // Constant fold ashr( <A x Bi>, Ci ).
+ // Constant fold lshr( <A x Bi>, Ci ).
+ // Constant fold shl( <A x Bi>, Ci ).
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_sse2_pslli_w:
- case Intrinsic::x86_avx2_psll_d:
- case Intrinsic::x86_avx2_psll_q:
- case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
+ if (Value *V = SimplifyX86immshift(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w: {
- // Simplify if count is constant. To 0 if >= BitWidth,
- // otherwise to shl/lshr.
- auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
- auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
- if (!CDV && !CInt)
- break;
- ConstantInt *Count;
- if (CDV)
- Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
- else
- Count = CInt;
-
- auto Vec = II->getArgOperand(0);
- auto VT = cast<VectorType>(Vec->getType());
- if (Count->getZExtValue() >
- VT->getElementType()->getPrimitiveSizeInBits() - 1)
- return ReplaceInstUsesWith(
- CI, ConstantAggregateZero::get(Vec->getType()));
-
- bool isPackedShiftLeft = true;
- switch (II->getIntrinsicID()) {
- default : break;
- case Intrinsic::x86_sse2_psrl_d:
- case Intrinsic::x86_sse2_psrl_q:
- case Intrinsic::x86_sse2_psrl_w:
- case Intrinsic::x86_sse2_psrli_d:
- case Intrinsic::x86_sse2_psrli_q:
- case Intrinsic::x86_sse2_psrli_w:
- case Intrinsic::x86_avx2_psrl_d:
- case Intrinsic::x86_avx2_psrl_q:
- case Intrinsic::x86_avx2_psrl_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w: {
+ if (Value *V = SimplifyX86immshift(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+
+ // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
+ // operand to compute the shift amount.
+ auto ShiftAmt = II->getArgOperand(1);
+ auto ShiftType = cast<VectorType>(ShiftAmt->getType());
+ assert(ShiftType->getPrimitiveSizeInBits() == 128 &&
+ "Unexpected packed shift size");
+ unsigned VWidth = ShiftType->getNumElements();
+
+ APInt DemandedElts = APInt::getLowBitsSet(VWidth, VWidth / 2);
+ APInt UndefElts(VWidth, 0);
+ if (Value *V =
+ SimplifyDemandedVectorElts(ShiftAmt, DemandedElts, UndefElts)) {
+ II->setArgOperand(1, V);
+ return II;
}
-
- unsigned VWidth = VT->getNumElements();
- // Get a constant vector of the same type as the first operand.
- auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
- if (isPackedShiftLeft)
- return BinaryOperator::CreateShl(Vec,
- Builder->CreateVectorSplat(VWidth, VTCI));
-
- return BinaryOperator::CreateLShr(Vec,
- Builder->CreateVectorSplat(VWidth, VTCI));
+ break;
}
+ case Intrinsic::x86_sse41_pmovsxbd:
+ case Intrinsic::x86_sse41_pmovsxbq:
case Intrinsic::x86_sse41_pmovsxbw:
- case Intrinsic::x86_sse41_pmovsxwd:
case Intrinsic::x86_sse41_pmovsxdq:
+ case Intrinsic::x86_sse41_pmovsxwd:
+ case Intrinsic::x86_sse41_pmovsxwq:
+ case Intrinsic::x86_avx2_pmovsxbd:
+ case Intrinsic::x86_avx2_pmovsxbq:
+ case Intrinsic::x86_avx2_pmovsxbw:
+ case Intrinsic::x86_avx2_pmovsxdq:
+ case Intrinsic::x86_avx2_pmovsxwd:
+ case Intrinsic::x86_avx2_pmovsxwq:
+ if (Value *V = SimplifyX86extend(*II, *Builder, true))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse41_pmovzxbd:
+ case Intrinsic::x86_sse41_pmovzxbq:
case Intrinsic::x86_sse41_pmovzxbw:
+ case Intrinsic::x86_sse41_pmovzxdq:
case Intrinsic::x86_sse41_pmovzxwd:
- case Intrinsic::x86_sse41_pmovzxdq: {
- // pmov{s|z}x ignores the upper half of their input vectors.
- unsigned VWidth =
- cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
- unsigned LowHalfElts = VWidth / 2;
- APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
- APInt UndefElts(VWidth, 0);
- if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
- InputDemandedElts,
- UndefElts)) {
- II->setArgOperand(0, TmpV);
- return II;
- }
+ case Intrinsic::x86_sse41_pmovzxwq:
+ case Intrinsic::x86_avx2_pmovzxbd:
+ case Intrinsic::x86_avx2_pmovzxbq:
+ case Intrinsic::x86_avx2_pmovzxbw:
+ case Intrinsic::x86_avx2_pmovzxdq:
+ case Intrinsic::x86_avx2_pmovzxwd:
+ case Intrinsic::x86_avx2_pmovzxwq:
+ if (Value *V = SimplifyX86extend(*II, *Builder, false))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse41_insertps:
+ if (Value *V = SimplifyX86insertps(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
break;
- }
case Intrinsic::x86_sse4a_insertqi: {
// insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
// ones undef
// TODO: eventually we should lower this intrinsic to IR
- if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
- unsigned Index = CIStart->getZExtValue();
+ if (auto CILength = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ if (auto CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
+ unsigned Index = CIIndex->getZExtValue();
// From AMD documentation: "a value of zero in the field length is
// defined as length of 64".
- unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue();
+ unsigned Length = CILength->equalsInt(0) ? 64 : CILength->getZExtValue();
// From AMD documentation: "If the sum of the bit index + length field
// is greater than 64, the results are undefined".
+ unsigned End = Index + Length;
// Note that both field index and field length are 8-bit quantities.
// Since variables 'Index' and 'Length' are unsigned values
// obtained from zero-extending field index and field length
// respectively, their sum should never wrap around.
- if ((Index + Length) > 64)
+ if (End > 64)
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
if (Length == 64 && Index == 0) {
Builder->CreateShuffleVector(
Vec, Undef, ConstantDataVector::get(
II->getContext(), makeArrayRef(Mask))));
-
} else if (auto Source =
dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
if (Source->hasOneUse() &&
// If the source of the insert has only one use and it's another
// insert (and they're both inserting from the same vector), try to
// bundle both together.
- auto CISourceWidth =
+ auto CISourceLength =
dyn_cast<ConstantInt>(Source->getArgOperand(2));
- auto CISourceStart =
+ auto CISourceIndex =
dyn_cast<ConstantInt>(Source->getArgOperand(3));
- if (CISourceStart && CISourceWidth) {
- unsigned Start = CIStart->getZExtValue();
- unsigned Width = CIWidth->getZExtValue();
- unsigned End = Start + Width;
- unsigned SourceStart = CISourceStart->getZExtValue();
- unsigned SourceWidth = CISourceWidth->getZExtValue();
- unsigned SourceEnd = SourceStart + SourceWidth;
- unsigned NewStart, NewWidth;
+ if (CISourceIndex && CISourceLength) {
+ unsigned SourceIndex = CISourceIndex->getZExtValue();
+ unsigned SourceLength = CISourceLength->getZExtValue();
+ unsigned SourceEnd = SourceIndex + SourceLength;
+ unsigned NewIndex, NewLength;
bool ShouldReplace = false;
- if (Start <= SourceStart && SourceStart <= End) {
- NewStart = Start;
- NewWidth = std::max(End, SourceEnd) - NewStart;
+ if (Index <= SourceIndex && SourceIndex <= End) {
+ NewIndex = Index;
+ NewLength = std::max(End, SourceEnd) - NewIndex;
ShouldReplace = true;
- } else if (SourceStart <= Start && Start <= SourceEnd) {
- NewStart = SourceStart;
- NewWidth = std::max(SourceEnd, End) - NewStart;
+ } else if (SourceIndex <= Index && Index <= SourceEnd) {
+ NewIndex = SourceIndex;
+ NewLength = std::max(SourceEnd, End) - NewIndex;
ShouldReplace = true;
}
if (ShouldReplace) {
- Constant *ConstantWidth = ConstantInt::get(
- II->getArgOperand(2)->getType(), NewWidth, false);
- Constant *ConstantStart = ConstantInt::get(
- II->getArgOperand(3)->getType(), NewStart, false);
+ Constant *ConstantLength = ConstantInt::get(
+ II->getArgOperand(2)->getType(), NewLength, false);
+ Constant *ConstantIndex = ConstantInt::get(
+ II->getArgOperand(3)->getType(), NewIndex, false);
Value *Args[4] = { Source->getArgOperand(0),
- II->getArgOperand(1), ConstantWidth,
- ConstantStart };
+ II->getArgOperand(1), ConstantLength,
+ ConstantIndex };
Module *M = CI.getParent()->getParent()->getParent();
Value *F =
Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
// This optimization is convoluted because the intrinsic is defined as
// getting a vector of floats or doubles for the ps and pd versions.
// FIXME: That should be changed.
+
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
Value *Mask = II->getArgOperand(2);
+
+ // fold (blend A, A, Mask) -> A
+ if (Op0 == Op1)
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Zero Mask - select 1st argument.
+ if (isa<ConstantAggregateZero>(Mask))
+ return ReplaceInstUsesWith(CI, Op0);
+
+ // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
auto Tyi1 = Builder->getInt1Ty();
auto SelectorType = cast<VectorType>(Mask->getType());
Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
}
auto NewSelector = ConstantVector::get(Selectors);
- return SelectInst::Create(NewSelector, II->getArgOperand(1),
- II->getArgOperand(0), "blendv");
- } else {
- break;
+ return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
}
+ break;
}
case Intrinsic::x86_avx_vpermilvar_ps:
return ReplaceInstUsesWith(CI, Shuffle);
}
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ if (Value *V = SimplifyX86vperm2(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
unsigned Idx =
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
- if (DL && DL->isLittleEndian())
+ if (DL.isLittleEndian())
Idx = 31 - Idx;
if (!ExtractedElts[Idx]) {
- Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
- Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
Builder->getInt32(Idx&15));
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT);
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
RHS->getType()->isPointerTy() &&
cast<Constant>(RHS)->isNullValue()) {
LoadInst* LI = cast<LoadInst>(LHS);
- if (isValidAssumeForContext(II, LI, DL, DT)) {
+ if (isValidAssumeForContext(II, LI, DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LI->setMetadata(LLVMContext::MD_nonnull, MD);
return EraseInstFromFunction(*II);
// facts about the relocate value, while being careful to
// preserve relocation semantics.
GCRelocateOperands Operands(II);
- Value *DerivedPtr = Operands.derivedPtr();
+ Value *DerivedPtr = Operands.getDerivedPtr();
+ auto *GCRelocateType = cast<PointerType>(II->getType());
// Remove the relocation if unused, note that this check is required
// to prevent the cases below from looping forever.
// TODO: provide a hook for this in GCStrategy. This is clearly legal for
// most practical collectors, but there was discussion in the review thread
// about whether it was legal for all possible collectors.
- if (isa<UndefValue>(DerivedPtr))
- return ReplaceInstUsesWith(*II, DerivedPtr);
+ if (isa<UndefValue>(DerivedPtr)) {
+ // gc_relocate is uncasted. Use undef of gc_relocate's type to replace it.
+ return ReplaceInstUsesWith(*II, UndefValue::get(GCRelocateType));
+ }
// The relocation of null will be null for most any collector.
// TODO: provide a hook for this in GCStrategy. There might be some weird
// collector this property does not hold for.
- if (isa<ConstantPointerNull>(DerivedPtr))
- return ReplaceInstUsesWith(*II, DerivedPtr);
+ if (isa<ConstantPointerNull>(DerivedPtr)) {
+ // gc_relocate is uncasted. Use null-pointer of gc_relocate's type to replace it.
+ return ReplaceInstUsesWith(*II, ConstantPointerNull::get(GCRelocateType));
+ }
// isKnownNonNull -> nonnull attribute
- if (isKnownNonNull(DerivedPtr))
+ if (isKnownNonNullAt(DerivedPtr, II, DT, TLI))
II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
- // TODO: dereferenceable -> deref attribute
+ // isDereferenceablePointer -> deref attribute
+ if (isDereferenceablePointer(DerivedPtr, DL)) {
+ if (Argument *A = dyn_cast<Argument>(DerivedPtr)) {
+ uint64_t Bytes = A->getDereferenceableBytes();
+ II->addDereferenceableAttr(AttributeSet::ReturnIndex, Bytes);
+ }
+ }
// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
// Canonicalize on the type from the uses to the defs
-
+
// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
}
}
/// isSafeToEliminateVarargsCast - If this cast does not affect the value
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
- const CastInst * const CI,
- const DataLayout * const DL,
+ const DataLayout &DL,
+ const CastInst *const CI,
const int ix) {
if (!CI->isLosslessCast())
return false;
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
- if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy))
+ if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
return false;
return true;
}
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
- if (Value *With = Simplifier->optimizeCall(CI)) {
+ auto InstCombineRAUW = [this](Instruction *From, Value *With) {
+ ReplaceInstUsesWith(*From, With);
+ };
+ LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
+ if (Value *With = Simplifier.optimizeCall(CI)) {
++NumSimplified;
return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
}
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
+
if (isAllocLikeFn(CS.getInstruction(), TLI))
return visitAllocSite(*CS.getInstruction());
bool Changed = false;
+ // Mark any parameters that are known to be non-null with the nonnull
+ // attribute. This is helpful for inlining calls to functions with null
+ // checks on their arguments.
+ unsigned ArgNo = 0;
+ for (Value *V : CS.args()) {
+ if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
+ isKnownNonNull(V)) {
+ AttributeSet AS = CS.getAttributes();
+ AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1,
+ Attribute::NonNull);
+ CS.setAttributes(AS);
+ Changed = true;
+ }
+ ArgNo++;
+ }
+ assert(ArgNo == CS.arg_size() && "sanity check");
+
// If the callee is a pointer to a function, attempt to move any casts to the
// arguments of the call/invoke.
Value *Callee = CS.getCalledValue();
for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
*I = CI->getOperand(0);
Changed = true;
}
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
- Instruction *I = tryOptimizeCall(CI, DL);
+ Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return EraseInstFromFunction(*I);
dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
if (!Callee)
return false;
+ // The prototype of thunks are a lie, don't try to directly call such
+ // functions.
+ if (Callee->hasFnAttribute("thunk"))
+ return false;
Instruction *Caller = CS.getInstruction();
const AttributeSet &CallerPAL = CS.getAttributes();
if (NewRetTy->isStructTy())
return false; // TODO: Handle multiple return values.
- if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) {
+ if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
if (Callee->isDeclaration())
return false; // Cannot transform this return value.
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
- if (RAttrs.
- hasAttributes(AttributeFuncs::
- typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex))
+ if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
return false; // Attribute not compatible with transformed value.
}
unsigned NumActualArgs = CS.arg_size();
unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+ // Prevent us turning:
+ // declare void @takes_i32_inalloca(i32* inalloca)
+ // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
+ //
+ // into:
+ // call void @takes_i32_inalloca(i32* null)
+ //
+ // Similarly, avoid folding away bitcasts of byval calls.
+ if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
+ return false;
+
CallSite::arg_iterator AI = CS.arg_begin();
for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
Type *ActTy = (*AI)->getType();
- if (!CastInst::isBitCastable(ActTy, ParamTy))
+ if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
return false; // Cannot transform this parameter value.
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
- hasAttributes(AttributeFuncs::
- typeIncompatible(ParamTy, i + 1), i + 1))
+ overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
if (CS.isInAllocaArgument(i))
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
Type *CurElTy = ActTy->getPointerElementType();
- if (DL->getTypeAllocSize(CurElTy) !=
- DL->getTypeAllocSize(ParamPTy->getElementType()))
+ if (DL.getTypeAllocSize(CurElTy) !=
+ DL.getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
}
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs.
- removeAttributes(AttributeFuncs::
- typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex);
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
// Add the new return attributes.
if (RAttrs.hasAttributes())
if ((*AI)->getType() == ParamTy) {
Args.push_back(*AI);
} else {
- Args.push_back(Builder->CreateBitCast(*AI, ParamTy));
+ Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy));
}
// Add any parameter attributes.
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
if (!NV->getType()->isVoidTy()) {
- NV = NC = CastInst::Create(CastInst::BitCast, NC, OldRetTy);
+ NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
NC->setDebugLoc(Caller->getDebugLoc());
// If this is an invoke instruction, we should insert it after the first