X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FInstCombine%2FInstCombineCalls.cpp;h=d2f9fc921bd905b29904d75144b58911db001d5a;hb=3574eca1b02600bac4e625297f4ecf745f4c4f32;hp=c3c13ae362fc1b6f4449220ad77a5d12edc00b3c;hpb=d0e277d272d517ca1cda368267d199f0da7cad95;p=oota-llvm.git diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index c3c13ae362f..d2f9fc921bd 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -13,7 +13,7 @@ #include "InstCombine.h" #include "llvm/Support/CallSite.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -29,6 +29,26 @@ static Type *getPromotedType(Type *Ty) { return Ty; } +/// reduceToSingleValueType - Given an aggregate type which ultimately holds a +/// single scalar element, like {{{type}}} or [1 x type], return type. +static Type *reduceToSingleValueType(Type *T) { + while (!T->isSingleValueType()) { + if (StructType *STy = dyn_cast(T)) { + if (STy->getNumElements() == 1) + T = STy->getElementType(0); + else + break; + } else if (ArrayType *ATy = dyn_cast(T)) { + if (ATy->getNumElements() == 1) + T = ATy->getElementType(); + else + break; + } else + break; + } + + return T; +} Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD); @@ -37,26 +57,26 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned CopyAlign = MI->getAlignment(); if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false)); return MI; } - + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. ConstantInt *MemOpLength = dyn_cast(MI->getArgOperand(2)); if (MemOpLength == 0) return 0; - + // Source and destination pointer types are always "i8*" for intrinsic. See // if the size is something we can handle with a single primitive load/store. // A single load+store correctly handles overlapping memory in the memmove // case. - unsigned Size = MemOpLength->getZExtValue(); - if (Size == 0) return MI; // Delete this mem transfer. - + uint64_t Size = MemOpLength->getLimitedValue(); + assert(Size && "0-sized memory transfering should be removed already."); + if (Size > 8 || (Size&(Size-1))) return 0; // If not 1/2/4/8 bytes, exit. - + // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = cast(MI->getArgOperand(1)->getType())->getAddressSpace(); @@ -66,7 +86,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); - + // Memcpy forces the use of i8* for the source and destination. That means // that if you're using memcpy to move one double around, you'll get a cast // from double* to i8*. We'd much rather use a double load+store rather than @@ -74,46 +94,49 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // dest address will be promotable. See if we can find a better type than the // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); + MDNode *CopyMD = 0; if (StrippedDest != MI->getArgOperand(0)) { Type *SrcETy = cast(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. - while (!SrcETy->isSingleValueType()) { - if (StructType *STy = dyn_cast(SrcETy)) { - if (STy->getNumElements() == 1) - SrcETy = STy->getElementType(0); - else - break; - } else if (ArrayType *ATy = dyn_cast(SrcETy)) { - if (ATy->getNumElements() == 1) - SrcETy = ATy->getElementType(); - else - break; - } else - break; - } - + SrcETy = reduceToSingleValueType(SrcETy); + if (SrcETy->isSingleValueType()) { NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); + + // If the memcpy has metadata describing the members, see if we can + // get the TBAA tag describing our copy. + if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { + if (M->getNumOperands() == 3 && + isa(M->getOperand(0)) && + cast(M->getOperand(0))->isNullValue() && + isa(M->getOperand(1)) && + cast(M->getOperand(1))->getValue() == Size && + isa(M->getOperand(2))) + CopyMD = cast(M->getOperand(2)); + } } } } - - + // If the memcpy/memmove provides better alignment info than we can // infer, use it. SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - + Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); L->setAlignment(SrcAlign); + if (CopyMD) + L->setMetadata(LLVMContext::MD_tbaa, CopyMD); StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); S->setAlignment(DstAlign); + if (CopyMD) + S->setMetadata(LLVMContext::MD_tbaa, CopyMD); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); @@ -127,22 +150,20 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Alignment, false)); return MI; } - + // Extract the length and alignment and fill if they are constant. ConstantInt *LenC = dyn_cast(MI->getLength()); ConstantInt *FillC = dyn_cast(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return 0; - uint64_t Len = LenC->getZExtValue(); + uint64_t Len = LenC->getLimitedValue(); Alignment = MI->getAlignment(); - - // If the length is zero, this is a no-op - if (Len == 0) return MI; // memset(d,c,0,a) -> noop - + assert(Len && "0-sized memory setting should be removed already."); + // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. - + Value *Dest = MI->getDest(); unsigned DstAddrSp = cast(Dest->getType())->getAddressSpace(); Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); @@ -150,13 +171,13 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; - + // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, MI->isVolatile()); S->setAlignment(Alignment); - + // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setLength(Constant::getNullValue(LenC->getType())); return MI; @@ -165,15 +186,13 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return 0; } -/// visitCallInst - CallInst simplification. This mostly only handles folding +/// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. /// Instruction *InstCombiner::visitCallInst(CallInst &CI) { - if (isFreeCall(&CI)) + if (isFreeCall(&CI, TLI)) return visitFree(CI); - if (isMalloc(&CI)) - return visitMalloc(CI); // If the caller function is nounwind, mark the call as nounwind, even if the // callee isn't. @@ -182,7 +201,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { CI.setDoesNotThrow(); return &CI; } - + IntrinsicInst *II = dyn_cast(&CI); if (!II) return visitCallSite(&CI); @@ -203,7 +222,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // alignment is sufficient. } } - + // No other transformations apply to volatile transfers. if (MI->isVolatile()) return 0; @@ -242,89 +261,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Changed) return II; } - + switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { - // We need target data for just about everything so depend on it. - if (!TD) break; - - Type *ReturnTy = CI.getType(); - uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; - - // Get to the real allocated thing and offset as fast as possible. - Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); - - uint64_t Offset = 0; - uint64_t Size = -1ULL; - - // Try to look through constant GEPs. - if (GEPOperator *GEP = dyn_cast(Op1)) { - if (!GEP->hasAllConstantIndices()) break; - - // Get the current byte offset into the thing. Use the original - // operand in case we're looking through a bitcast. - SmallVector Ops(GEP->idx_begin(), GEP->idx_end()); - if (!GEP->getPointerOperandType()->isPointerTy()) - return 0; - Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); - - Op1 = GEP->getPointerOperand()->stripPointerCasts(); - - // Make sure we're not a constant offset from an external - // global. - if (GlobalVariable *GV = dyn_cast(Op1)) - if (!GV->hasDefinitiveInitializer()) break; - } - - // If we've stripped down to a single global variable that we - // can know the size of then just return that. - if (GlobalVariable *GV = dyn_cast(Op1)) { - if (GV->hasDefinitiveInitializer()) { - Constant *C = GV->getInitializer(); - Size = TD->getTypeAllocSize(C->getType()); - } else { - // Can't determine size of the GV. - Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow); - return ReplaceInstUsesWith(CI, RetVal); - } - } else if (AllocaInst *AI = dyn_cast(Op1)) { - // Get alloca size. - if (AI->getAllocatedType()->isSized()) { - Size = TD->getTypeAllocSize(AI->getAllocatedType()); - if (AI->isArrayAllocation()) { - const ConstantInt *C = dyn_cast(AI->getArraySize()); - if (!C) break; - Size *= C->getZExtValue(); - } - } - } else if (CallInst *MI = extractMallocCall(Op1)) { - // Get allocation size. - Type* MallocType = getMallocAllocatedType(MI); - if (MallocType && MallocType->isSized()) - if (Value *NElems = getMallocArraySize(MI, TD, true)) - if (ConstantInt *NElements = dyn_cast(NElems)) - Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType); - } - - // Do not return "I don't know" here. Later optimization passes could - // make it possible to evaluate objectsize to a constant. - if (Size == -1ULL) - break; - - if (Size < Offset) { - // Out of bound reference? Negative index normalized to large - // index? Just return "I don't know". - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow)); - } - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset)); + uint64_t Size; + if (getObjectSize(II->getArgOperand(0), Size, TD, TLI)) + return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); + return 0; } case Intrinsic::bswap: // bswap(bswap(x)) -> x if (IntrinsicInst *Operand = dyn_cast(II->getArgOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) return ReplaceInstUsesWith(CI, Operand->getArgOperand(0)); - + // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (TruncInst *TI = dyn_cast(II->getArgOperand(0))) { if (IntrinsicInst *Operand = dyn_cast(TI->getOperand(0))) @@ -336,7 +287,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return new TruncInst(V, TI->getType()); } } - + break; case Intrinsic::powi: if (ConstantInt *Power = dyn_cast(II->getArgOperand(1))) { @@ -361,14 +312,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), - KnownZero, KnownOne); + ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); if ((Mask & KnownZero) == Mask) return ReplaceInstUsesWith(CI, ConstantInt::get(IT, APInt(BitWidth, TrailingZeros))); - + } break; case Intrinsic::ctlz: { @@ -380,31 +330,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), - KnownZero, KnownOne); + ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); if ((Mask & KnownZero) == Mask) return ReplaceInstUsesWith(CI, ConstantInt::get(IT, APInt(BitWidth, LeadingZeros))); - + } break; case Intrinsic::uadd_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); IntegerType *IT = cast(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); - APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; if (LHSKnownNegative || LHSKnownPositive) { APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; if (LHSKnownNegative && RHSKnownNegative) { @@ -450,7 +398,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X + undef -> undef if (isa(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHS = dyn_cast(II->getArgOperand(1))) { // X + 0 -> {X, false} if (RHS->isZero()) { @@ -471,7 +419,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isa(II->getArgOperand(0)) || isa(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHS = dyn_cast(II->getArgOperand(1))) { // X - 0 -> {X, false} if (RHS->isZero()) { @@ -479,7 +427,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = + Constant *Struct = ConstantStruct::get(cast(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } @@ -488,14 +436,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::umul_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); unsigned BitWidth = cast(LHS->getType())->getBitWidth(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); // Get the largest possible values for each operand. APInt LHSMax = ~LHSKnownZero; @@ -528,19 +475,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X * undef -> undef if (isa(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHSI = dyn_cast(II->getArgOperand(1))) { // X*0 -> {0, false} if (RHSI->isZero()) return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); - + // X * 1 -> {X, false} if (RHSI->equalsInt(1)) { Constant *V[] = { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = + Constant *Struct = ConstantStruct::get(cast(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } @@ -559,7 +506,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { - Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); @@ -570,7 +517,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { - Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); return new StoreInst(II->getArgOperand(1), Ptr); @@ -626,7 +573,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Constant *Mask = dyn_cast(II->getArgOperand(2))) { assert(Mask->getType()->getVectorNumElements() == 16 && "Bad type for intrinsic!"); - + // Check that all of the elements are integer constants or undefs. bool AllEltsOk = true; for (unsigned i = 0; i != 16; ++i) { @@ -637,7 +584,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } } - + if (AllEltsOk) { // Cast the input vectors to byte vectors. Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), @@ -645,24 +592,24 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); - + // Only extract each element once. Value *ExtractedElts[32]; memset(ExtractedElts, 0, sizeof(ExtractedElts)); - + for (unsigned i = 0; i != 16; ++i) { if (isa(Mask->getAggregateElement(i))) continue; - unsigned Idx = + unsigned Idx = cast(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - + if (ExtractedElts[Idx] == 0) { - ExtractedElts[Idx] = + ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, Builder->getInt32(Idx&15)); } - + // Insert this value into the result vector. Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], Builder->getInt32(i)); @@ -698,6 +645,57 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::arm_neon_vmulls: + case Intrinsic::arm_neon_vmullu: { + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + + // Handle mul by zero first: + if (isa(Arg0) || isa(Arg1)) { + return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); + } + + // Check for constant LHS & RHS - in this case we just simplify. + bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu); + VectorType *NewVT = cast(II->getType()); + unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth(); + if (ConstantDataVector *CV0 = dyn_cast(Arg0)) { + if (ConstantDataVector *CV1 = dyn_cast(Arg1)) { + VectorType* VT = cast(CV0->getType()); + SmallVector NewElems; + for (unsigned i = 0; i < VT->getNumElements(); ++i) { + APInt CV0E = + (cast(CV0->getAggregateElement(i)))->getValue(); + CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth); + APInt CV1E = + (cast(CV1->getAggregateElement(i)))->getValue(); + CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth); + NewElems.push_back( + ConstantInt::get(NewVT->getElementType(), CV0E * CV1E)); + } + return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems)); + } + + // Couldn't simplify - cannonicalize constant to the RHS. + std::swap(Arg0, Arg1); + } + + // Handle mul by one: + if (ConstantDataVector *CV1 = dyn_cast(Arg1)) { + if (ConstantInt *Splat = + dyn_cast_or_null(CV1->getSplatValue())) { + if (Splat->isOne()) { + if (Zext) + return CastInst::CreateZExtOrBitCast(Arg0, II->getType()); + // else + return CastInst::CreateSExtOrBitCast(Arg0, II->getType()); + } + } + } + + break; + } + case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. @@ -708,14 +706,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return EraseInstFromFunction(CI); } } - + // Scan down this block to see if there is another stack restore in the // same block without an intervening call/alloca. BasicBlock::iterator BI = II; TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa(BI) || isMalloc(BI)) { + if (isa(BI)) { CannotRemove = true; break; } @@ -733,12 +731,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } } - + // If the stack restore is in a return, resume, or unwind block and if there // are no allocas or calls between the restore and the return, nuke the // restore. - if (!CannotRemove && (isa(TI) || isa(TI) || - isa(TI))) + if (!CannotRemove && (isa(TI) || isa(TI))) return EraseInstFromFunction(CI); break; } @@ -753,11 +750,11 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { return visitCallSite(&II); } -/// isSafeToEliminateVarargsCast - If this cast does not affect the value +/// isSafeToEliminateVarargsCast - If this cast does not affect the value /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, const CastInst * const CI, - const TargetData * const TD, + const DataLayout * const TD, const int ix) { if (!CI->isLosslessCast()) return false; @@ -768,7 +765,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, if (!CS.isByValArgument(ix)) return true; - Type* SrcTy = + Type* SrcTy = cast(CI->getOperand(0)->getType())->getElementType(); Type* DstTy = cast(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) @@ -812,14 +809,14 @@ public: } // end anonymous namespace // Try to fold some different type of calls here. -// Currently we're only working with the checking functions, memcpy_chk, +// Currently we're only working with the checking functions, memcpy_chk, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. -Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { +Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) { if (CI->getCalledFunction() == 0) return 0; InstCombineFortifiedLibCalls Simplifier(this); - Simplifier.fold(CI, TD); + Simplifier.fold(CI, TD, TLI); return Simplifier.NewInstruction; } @@ -903,6 +900,9 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { + if (isAllocLikeFn(CS.getInstruction(), TLI)) + return visitAllocSite(*CS.getInstruction()); + bool Changed = false; // If the callee is a pointer to a function, attempt to move any casts to the @@ -921,7 +921,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { !CalleeF->isDeclaration()) { Instruction *OldCall = CS.getInstruction(); new StoreInst(ConstantInt::getTrue(Callee->getContext()), - UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), OldCall); // If OldCall dues not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. @@ -929,7 +929,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); if (isa(OldCall)) return EraseInstFromFunction(*OldCall); - + // We cannot remove an invoke, because it would change the CFG, just // change the callee to a null pointer. cast(OldCall)->setCalledFunction( @@ -938,24 +938,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { } if (isa(Callee) || isa(Callee)) { - // This instruction is not reachable, just remove it. We insert a store to - // undef so that we know that this code is not reachable, despite the fact - // that we can't modify the CFG here. - new StoreInst(ConstantInt::getTrue(Callee->getContext()), - UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), - CS.getInstruction()); - // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) ReplaceInstUsesWith(*CS.getInstruction(), UndefValue::get(CS.getInstruction()->getType())); - if (InvokeInst *II = dyn_cast(CS.getInstruction())) { - // Don't break the CFG, insert a dummy cond branch. - BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), - ConstantInt::getTrue(Callee->getContext()), II); + if (isa(CS.getInstruction())) { + // Can't remove an invoke because we cannot change the CFG. + return 0; } + + // This instruction is not reachable, just remove it. We insert a store to + // undef so that we know that this code is not reachable, despite the fact + // that we can't modify the CFG here. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + CS.getInstruction()); + return EraseInstFromFunction(*CS.getInstruction()); } @@ -984,7 +984,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { Changed = true; } - // Try to optimize the call if possible, we require TargetData for most of + // Try to optimize the call if possible, we require DataLayout for most of // this. None of these calls are seen as possibly dead so go ahead and // delete the instruction now. if (CallInst *CI = dyn_cast(CS.getInstruction())) { @@ -1037,7 +1037,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CallerPAL.isEmpty() && !Caller->use_empty()) { Attributes RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs & Attribute::typeIncompatible(NewRetTy)) + if (RAttrs & Attributes::typeIncompatible(NewRetTy)) return false; // Attribute not compatible with transformed value. } @@ -1067,16 +1067,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this parameter value. Attributes Attrs = CallerPAL.getParamAttributes(i + 1); - if (Attrs & Attribute::typeIncompatible(ParamTy)) + if (Attrs & Attributes::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. - + // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; - + Type *CurElTy = cast(ActTy)->getElementType(); if (TD->getTypeAllocSize(CurElTy) != TD->getTypeAllocSize(ParamPTy->getElementType())) @@ -1104,12 +1104,30 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { PointerType *APTy = cast(CS.getCalledValue()->getType()); if (FT->isVarArg()!=cast(APTy->getElementType())->isVarArg()) return false; + + // If both the callee and the cast type are varargs, we still have to make + // sure the number of fixed parameters are the same or we have the same + // ABI issues as if we introduce a varargs call. + if (FT->isVarArg() && + cast(APTy->getElementType())->isVarArg() && + FT->getNumParams() != + cast(APTy->getElementType())->getNumParams()) + return false; } - // If we're casting varargs to non-varargs, that may not be allowable - // under the ABI, so conservatively don't do anything. - if (FT->getNumParams() < NumActualArgs && FT->isVarArg()) - return false; + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && + !CallerPAL.isEmpty()) + // In this case we have more arguments than the new function type, but we + // won't be dropping them. Check that these extra arguments have attributes + // that are compatible with being a vararg call argument. + for (unsigned i = CallerPAL.getNumSlots(); i; --i) { + if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) + break; + Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; + if (PAttrs & Attribute::VarArgsIncompatible) + return false; + } + // Okay, we decided that this is a safe thing to do: go ahead and start // inserting cast instructions as necessary. @@ -1123,7 +1141,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs &= ~Attribute::typeIncompatible(NewRetTy); + RAttrs &= ~Attributes::typeIncompatible(NewRetTy); // Add the new return attributes. if (RAttrs) @@ -1181,8 +1199,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), - attrVec.end()); + const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast(Caller)) { @@ -1348,14 +1365,13 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Replace the trampoline call with a direct call. Let the generic // code sort out any function type mismatches. - FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg()); Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ? - NestF : ConstantExpr::getBitCast(NestF, + NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), - NewAttrs.end()); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast(Caller)) { @@ -1381,7 +1397,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // parameter, there is no need to adjust the argument list. Let the generic // code sort out any function type mismatches. Constant *NewCallee = - NestF->getType() == PTy ? NestF : + NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy); CS.setCalledFunction(NewCallee); return CS.getInstruction();