X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FUtils%2FInlineFunction.cpp;h=ee440e13da7a0a55cc2dbd6ea53aff6e1fe1927c;hb=529919ff310cbfce1ba55ea252ff738d5b56b93d;hp=f0a9f2b1fcb354dc088555c534ab4cdb51100f90;hpb=6408a8b836f801d27f61219c245a35b44ad10f00;p=oota-llvm.git diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index f0a9f2b1fcb..ee440e13da7 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -13,10 +13,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" @@ -24,14 +30,29 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CommandLine.h" +#include using namespace llvm; +static cl::opt +EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), + cl::Hidden, + cl::desc("Convert noalias attributes to metadata during inlining.")); + +static cl::opt +PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", + cl::init(true), cl::Hidden, + cl::desc("Convert align attributes to assumptions during inlining.")); + bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, bool InsertLifetime) { return InlineFunction(CallSite(CI), IFI, InsertLifetime); @@ -84,7 +105,7 @@ namespace { /// split the landing pad block after the landingpad instruction and jump /// to there. void forwardResume(ResumeInst *RI, - SmallPtrSet &InlinedLPads); + SmallPtrSetImpl &InlinedLPads); /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind /// destination block for the given basic block, using the values for the @@ -143,7 +164,7 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. void InvokeInliningInfo::forwardResume(ResumeInst *RI, - SmallPtrSet &InlinedLPads) { + SmallPtrSetImpl &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); BasicBlock *Src = RI->getParent(); @@ -233,9 +254,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // Append the clauses from the outer landing pad instruction into the inlined // landing pad instructions. LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); - for (SmallPtrSet::iterator I = InlinedLPads.begin(), - E = InlinedLPads.end(); I != E; ++I) { - LandingPadInst *InlinedLPad = *I; + for (LandingPadInst *InlinedLPad : InlinedLPads) { unsigned OuterNum = OuterLPad->getNumClauses(); InlinedLPad->reserveClauses(OuterNum); for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) @@ -260,6 +279,384 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InvokeDest->removePredecessor(II->getParent()); } +/// CloneAliasScopeMetadata - When inlining a function that contains noalias +/// scope metadata, this metadata needs to be cloned so that the inlined blocks +/// have different "unqiue scopes" at every call site. Were this not done, then +/// aliasing scopes from a function inlined into a caller multiple times could +/// not be differentiated (and this would lead to miscompiles because the +/// non-aliasing property communicated by the metadata could have +/// call-site-specific control dependencies). +static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { + const Function *CalledFunc = CS.getCalledFunction(); + SetVector MD; + + // Note: We could only clone the metadata if it is already used in the + // caller. I'm omitting that check here because it might confuse + // inter-procedural alias analysis passes. We can revisit this if it becomes + // an efficiency or overhead problem. + + for (Function::const_iterator I = CalledFunc->begin(), IE = CalledFunc->end(); + I != IE; ++I) + for (BasicBlock::const_iterator J = I->begin(), JE = I->end(); J != JE; ++J) { + if (const MDNode *M = J->getMetadata(LLVMContext::MD_alias_scope)) + MD.insert(M); + if (const MDNode *M = J->getMetadata(LLVMContext::MD_noalias)) + MD.insert(M); + } + + if (MD.empty()) + return; + + // Walk the existing metadata, adding the complete (perhaps cyclic) chain to + // the set. + SmallVector Queue(MD.begin(), MD.end()); + while (!Queue.empty()) { + const MDNode *M = cast(Queue.pop_back_val()); + for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) + if (const MDNode *M1 = dyn_cast(M->getOperand(i))) + if (MD.insert(M1)) + Queue.push_back(M1); + } + + // Now we have a complete set of all metadata in the chains used to specify + // the noalias scopes and the lists of those scopes. + SmallVector DummyNodes; + DenseMap MDMap; + for (SetVector::iterator I = MD.begin(), IE = MD.end(); + I != IE; ++I) { + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[*I].reset(DummyNodes.back().get()); + } + + // Create new metadata nodes to replace the dummy nodes, replacing old + // metadata references with either a dummy node or an already-created new + // node. + for (SetVector::iterator I = MD.begin(), IE = MD.end(); + I != IE; ++I) { + SmallVector NewOps; + for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) { + const Metadata *V = (*I)->getOperand(i); + if (const MDNode *M = dyn_cast(V)) + NewOps.push_back(MDMap[M]); + else + NewOps.push_back(const_cast(V)); + } + + MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); + MDTuple *TempM = cast(MDMap[*I]); + assert(TempM->isTemporary() && "Expected temporary node"); + + TempM->replaceAllUsesWith(NewM); + } + + // Now replace the metadata in the new inlined instructions with the + // repacements from the map. + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast(VMI->second); + if (!NI) + continue; + + if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { + MDNode *NewMD = MDMap[M]; + // If the call site also had alias scope metadata (a list of scopes to + // which instructions inside it might belong), propagate those scopes to + // the inlined instructions. + if (MDNode *CSM = + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) + NewMD = MDNode::concatenate(NewMD, CSM); + NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); + } else if (NI->mayReadOrWriteMemory()) { + if (MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) + NI->setMetadata(LLVMContext::MD_alias_scope, M); + } + + if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { + MDNode *NewMD = MDMap[M]; + // If the call site also had noalias metadata (a list of scopes with + // which instructions inside it don't alias), propagate those scopes to + // the inlined instructions. + if (MDNode *CSM = + CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) + NewMD = MDNode::concatenate(NewMD, CSM); + NI->setMetadata(LLVMContext::MD_noalias, NewMD); + } else if (NI->mayReadOrWriteMemory()) { + if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) + NI->setMetadata(LLVMContext::MD_noalias, M); + } + } +} + +/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then +/// add new alias scopes for each noalias argument, tag the mapped noalias +/// parameters with noalias metadata specifying the new scope, and tag all +/// non-derived loads, stores and memory intrinsics with the new alias scopes. +static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, + const DataLayout &DL, AliasAnalysis *AA) { + if (!EnableNoAliasConversion) + return; + + const Function *CalledFunc = CS.getCalledFunction(); + SmallVector NoAliasArgs; + + for (Function::const_arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); I != E; ++I) { + if (I->hasNoAliasAttr() && !I->hasNUses(0)) + NoAliasArgs.push_back(I); + } + + if (NoAliasArgs.empty()) + return; + + // To do a good job, if a noalias variable is captured, we need to know if + // the capture point dominates the particular use we're considering. + DominatorTree DT; + DT.recalculate(const_cast(*CalledFunc)); + + // noalias indicates that pointer values based on the argument do not alias + // pointer values which are not based on it. So we add a new "scope" for each + // noalias function argument. Accesses using pointers based on that argument + // become part of that alias scope, accesses using pointers not based on that + // argument are tagged as noalias with that scope. + + DenseMap NewScopes; + MDBuilder MDB(CalledFunc->getContext()); + + // Create a new scope domain for this function. + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); + for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { + const Argument *A = NoAliasArgs[i]; + + std::string Name = CalledFunc->getName(); + if (A->hasName()) { + Name += ": %"; + Name += A->getName(); + } else { + Name += ": argument "; + Name += utostr(i); + } + + // Note: We always create a new anonymous root here. This is true regardless + // of the linkage of the callee because the aliasing "scope" is not just a + // property of the callee, but also all control dependencies in the caller. + MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); + NewScopes.insert(std::make_pair(A, NewScope)); + } + + // Iterate over all new instructions in the map; for all memory-access + // instructions, add the alias scope metadata. + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (const Instruction *I = dyn_cast(VMI->first)) { + if (!VMI->second) + continue; + + Instruction *NI = dyn_cast(VMI->second); + if (!NI) + continue; + + bool IsArgMemOnlyCall = false, IsFuncCall = false; + SmallVector PtrArgs; + + if (const LoadInst *LI = dyn_cast(I)) + PtrArgs.push_back(LI->getPointerOperand()); + else if (const StoreInst *SI = dyn_cast(I)) + PtrArgs.push_back(SI->getPointerOperand()); + else if (const VAArgInst *VAAI = dyn_cast(I)) + PtrArgs.push_back(VAAI->getPointerOperand()); + else if (const AtomicCmpXchgInst *CXI = dyn_cast(I)) + PtrArgs.push_back(CXI->getPointerOperand()); + else if (const AtomicRMWInst *RMWI = dyn_cast(I)) + PtrArgs.push_back(RMWI->getPointerOperand()); + else if (ImmutableCallSite ICS = ImmutableCallSite(I)) { + // If we know that the call does not access memory, then we'll still + // know that about the inlined clone of this call site, and we don't + // need to add metadata. + if (ICS.doesNotAccessMemory()) + continue; + + IsFuncCall = true; + if (AA) { + AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(ICS); + if (MRB == AliasAnalysis::OnlyAccessesArgumentPointees || + MRB == AliasAnalysis::OnlyReadsArgumentPointees) + IsArgMemOnlyCall = true; + } + + for (ImmutableCallSite::arg_iterator AI = ICS.arg_begin(), + AE = ICS.arg_end(); AI != AE; ++AI) { + // We need to check the underlying objects of all arguments, not just + // the pointer arguments, because we might be passing pointers as + // integers, etc. + // However, if we know that the call only accesses pointer arguments, + // then we only need to check the pointer arguments. + if (IsArgMemOnlyCall && !(*AI)->getType()->isPointerTy()) + continue; + + PtrArgs.push_back(*AI); + } + } + + // If we found no pointers, then this instruction is not suitable for + // pairing with an instruction to receive aliasing metadata. + // However, if this is a call, this we might just alias with none of the + // noalias arguments. + if (PtrArgs.empty() && !IsFuncCall) + continue; + + // It is possible that there is only one underlying object, but you + // need to go through several PHIs to see it, and thus could be + // repeated in the Objects list. + SmallPtrSet ObjSet; + SmallVector Scopes, NoAliases; + + SmallSetVector NAPtrArgs; + for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) { + SmallVector Objects; + GetUnderlyingObjects(const_cast(PtrArgs[i]), + Objects, DL, /* MaxLookup = */ 0); + + for (Value *O : Objects) + ObjSet.insert(O); + } + + // Figure out if we're derived from anything that is not a noalias + // argument. + bool CanDeriveViaCapture = false, UsesAliasingPtr = false; + for (const Value *V : ObjSet) { + // Is this value a constant that cannot be derived from any pointer + // value (we need to exclude constant expressions, for example, that + // are formed from arithmetic on global symbols). + bool IsNonPtrConst = isa(V) || isa(V) || + isa(V) || + isa(V) || isa(V); + if (IsNonPtrConst) + continue; + + // If this is anything other than a noalias argument, then we cannot + // completely describe the aliasing properties using alias.scope + // metadata (and, thus, won't add any). + if (const Argument *A = dyn_cast(V)) { + if (!A->hasNoAliasAttr()) + UsesAliasingPtr = true; + } else { + UsesAliasingPtr = true; + } + + // If this is not some identified function-local object (which cannot + // directly alias a noalias argument), or some other argument (which, + // by definition, also cannot alias a noalias argument), then we could + // alias a noalias argument that has been captured). + if (!isa(V) && + !isIdentifiedFunctionLocal(const_cast(V))) + CanDeriveViaCapture = true; + } + + // A function call can always get captured noalias pointers (via other + // parameters, globals, etc.). + if (IsFuncCall && !IsArgMemOnlyCall) + CanDeriveViaCapture = true; + + // First, we want to figure out all of the sets with which we definitely + // don't alias. Iterate over all noalias set, and add those for which: + // 1. The noalias argument is not in the set of objects from which we + // definitely derive. + // 2. The noalias argument has not yet been captured. + // An arbitrary function that might load pointers could see captured + // noalias arguments via other noalias arguments or globals, and so we + // must always check for prior capture. + for (const Argument *A : NoAliasArgs) { + if (!ObjSet.count(A) && (!CanDeriveViaCapture || + // It might be tempting to skip the + // PointerMayBeCapturedBefore check if + // A->hasNoCaptureAttr() is true, but this is + // incorrect because nocapture only guarantees + // that no copies outlive the function, not + // that the value cannot be locally captured. + !PointerMayBeCapturedBefore(A, + /* ReturnCaptures */ false, + /* StoreCaptures */ false, I, &DT))) + NoAliases.push_back(NewScopes[A]); + } + + if (!NoAliases.empty()) + NI->setMetadata(LLVMContext::MD_noalias, + MDNode::concatenate( + NI->getMetadata(LLVMContext::MD_noalias), + MDNode::get(CalledFunc->getContext(), NoAliases))); + + // Next, we want to figure out all of the sets to which we might belong. + // We might belong to a set if the noalias argument is in the set of + // underlying objects. If there is some non-noalias argument in our list + // of underlying objects, then we cannot add a scope because the fact + // that some access does not alias with any set of our noalias arguments + // cannot itself guarantee that it does not alias with this access + // (because there is some pointer of unknown origin involved and the + // other access might also depend on this pointer). We also cannot add + // scopes to arbitrary functions unless we know they don't access any + // non-parameter pointer-values. + bool CanAddScopes = !UsesAliasingPtr; + if (CanAddScopes && IsFuncCall) + CanAddScopes = IsArgMemOnlyCall; + + if (CanAddScopes) + for (const Argument *A : NoAliasArgs) { + if (ObjSet.count(A)) + Scopes.push_back(NewScopes[A]); + } + + if (!Scopes.empty()) + NI->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(CalledFunc->getContext(), Scopes))); + } + } +} + +/// If the inlined function has non-byval align arguments, then +/// add @llvm.assume-based alignment assumptions to preserve this information. +static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { + if (!PreserveAlignmentAssumptions) + return; + auto &DL = CS.getCaller()->getParent()->getDataLayout(); + + // To avoid inserting redundant assumptions, we should check for assumptions + // already in the caller. To do this, we might need a DT of the caller. + DominatorTree DT; + bool DTCalculated = false; + + Function *CalledFunc = CS.getCalledFunction(); + for (Function::arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); + I != E; ++I) { + unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; + if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { + if (!DTCalculated) { + DT.recalculate(const_cast(*CS.getInstruction()->getParent() + ->getParent())); + DTCalculated = true; + } + + // If we can already prove the asserted alignment in the context of the + // caller, then don't bother inserting the assumption. + Value *Arg = CS.getArgument(I->getArgNo()); + if (getKnownAlignment(Arg, DL, CS.getInstruction(), + &IFI.ACT->getAssumptionCache(*CalledFunc), + &DT) >= Align) + continue; + + IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, Arg, Align); + } + } +} + /// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee /// into the caller, update the specified callgraph to reflect the changes we /// made. Note that it's possible that not all code was copied over, so only @@ -327,31 +724,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS, static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI) { - LLVMContext &Context = Src->getContext(); - Type *VoidPtrTy = Type::getInt8PtrTy(Context); Type *AggTy = cast(Src->getType())->getElementType(); - Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) }; - Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); - IRBuilder<> builder(InsertBlock->begin()); - Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp"); - Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp"); - - Value *Size; - if (IFI.DL == nullptr) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.DL->getTypeStoreSize(AggTy)); + IRBuilder<> Builder(InsertBlock->begin()); + + Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. - Value *CallArgs[] = { - DstCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::getFalse(Context) // isVolatile - }; - builder.CreateCall(MemCpyFn, CallArgs); + Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); } /// HandleByValArgument - When inlining a call site that has a byval argument, @@ -363,6 +744,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, PointerType *ArgTy = cast(Arg->getType()); Type *AggTy = ArgTy->getElementType(); + Function *Caller = TheCall->getParent()->getParent(); + // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and // temporary. @@ -373,10 +756,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. return Arg; + const DataLayout &DL = Caller->getParent()->getDataLayout(); + // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(Arg, ByValAlignment, - IFI.DL) >= ByValAlignment) + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, + &IFI.ACT->getAssumptionCache(*Caller)) >= + ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad @@ -384,17 +770,14 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, } // Create the alloca. If we have DataLayout, use nice alignment. - unsigned Align = 1; - if (IFI.DL) - Align = IFI.DL->getPrefTypeAlignment(AggTy); - + unsigned Align = + Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); + // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). Align = std::max(Align, ByValAlignment); - Function *Caller = TheCall->getParent()->getParent(); - Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), &*Caller->begin()->begin()); IFI.StaticAllocas.push_back(cast(NewAlloca)); @@ -439,20 +822,42 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to -/// recursively update InlinedAtEntry of a DebugLoc. -static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, - const DebugLoc &InlinedAtDL, - LLVMContext &Ctx) { - if (MDNode *IA = DL.getInlinedAt(Ctx)) { - DebugLoc NewInlinedAtDL - = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), - NewInlinedAtDL.getAsMDNode(Ctx)); +/// Rebuild the entire inlined-at chain for this instruction so that the top of +/// the chain now is inlined-at the new call site. +static DebugLoc +updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode, + LLVMContext &Ctx, + DenseMap &IANodes) { + SmallVector InlinedAtLocations; + MDLocation *Last = InlinedAtNode; + DebugLoc CurInlinedAt = DL; + + // Gather all the inlined-at nodes + while (MDLocation *IA = + cast_or_null(CurInlinedAt.getInlinedAt(Ctx))) { + // Skip any we've already built nodes for + if (MDLocation *Found = IANodes[IA]) { + Last = Found; + break; + } + + InlinedAtLocations.push_back(IA); + CurInlinedAt = DebugLoc::getFromDILocation(IA); } - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), - InlinedAtDL.getAsMDNode(Ctx)); + // Starting from the top, rebuild the nodes to point to the new inlined-at + // location (then rebuilding the rest of the chain behind it) and update the + // map of already-constructed inlined-at nodes. + for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend(); + I != E; ++I) { + const MDLocation *MD = *I; + Last = IANodes[MD] = MDLocation::getDistinct( + Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); + } + + // And finally create the normal location for this instruction, referring to + // the new inlined-at chain. + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last); } /// fixupLineNumbers - Update inlined instructions' line numbers to @@ -463,6 +868,20 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, if (TheCallDL.isUnknown()) return; + auto &Ctx = Fn->getContext(); + auto *InlinedAtNode = cast(TheCallDL.getAsMDNode(Ctx)); + + // Create a unique call site, not to be confused with any other call from the + // same location. + InlinedAtNode = MDLocation::getDistinct( + Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), + InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); + + // Cache the inlined-at nodes as they're built so they are reused, without + // this every instruction's inlined-at chain would become distinct from each + // other. + DenseMap IANodes; + for (; FI != Fn->end(); ++FI) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { @@ -472,47 +891,33 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, // originates from the call location. This is important for // ((__always_inline__, __nodebug__)) functions which must use caller // location for all instructions in their function body. + + // Don't update static allocas, as they may get moved later. + if (auto *AI = dyn_cast(BI)) + if (isa(AI->getArraySize())) + continue; + BI->setDebugLoc(TheCallDL); } else { - BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); if (DbgValueInst *DVI = dyn_cast(BI)) { LLVMContext &Ctx = BI->getContext(); MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DVI->setOperand(2, createInlinedVariable(DVI->getVariable(), - InlinedAt, Ctx)); + DVI->setOperand(2, MetadataAsValue::get( + Ctx, createInlinedVariable(DVI->getVariable(), + InlinedAt, Ctx))); + } else if (DbgDeclareInst *DDI = dyn_cast(BI)) { + LLVMContext &Ctx = BI->getContext(); + MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); + DDI->setOperand(1, MetadataAsValue::get( + Ctx, createInlinedVariable(DDI->getVariable(), + InlinedAt, Ctx))); } } } } } -/// Returns a musttail call instruction if one immediately precedes the given -/// return instruction with an optional bitcast instruction between them. -static CallInst *getPrecedingMustTailCall(ReturnInst *RI) { - Instruction *Prev = RI->getPrevNode(); - if (!Prev) - return nullptr; - - if (Value *RV = RI->getReturnValue()) { - if (RV != Prev) - return nullptr; - - // Look through the optional bitcast. - if (auto *BI = dyn_cast(Prev)) { - RV = BI->getOperand(0); - Prev = BI->getPrevNode(); - if (!Prev || RV != Prev) - return nullptr; - } - } - - if (auto *CI = dyn_cast(Prev)) { - if (CI->isMustTailCall()) - return CI; - } - return nullptr; -} - /// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs @@ -601,6 +1006,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Keep a list of pair (dst, src) to emit byval initializations. SmallVector, 4> ByValInit; + auto &DL = Caller->getParent()->getDataLayout(); + assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -626,13 +1033,18 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, VMap[I] = ActualArg; } + // Add alignment assumptions if necessary. We do this before the inlined + // instructions are actually cloned into the caller so that we can easily + // check what will be known at the start of the inlined code. + AddAlignmentAssumptions(CS, IFI); + // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, IFI.DL, TheCall); + &InlinedFunctionInfo, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -648,6 +1060,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Update inlined instructions' line number information. fixupLineNumbers(Caller, FirstNewBlock, TheCall); + + // Clone existing noalias metadata if necessary. + CloneAliasScopeMetadata(CS, VMap); + + // Add noalias metadata if necessary. + AddAliasScopeMetadata(CS, VMap, DL, IFI.AA); + + // FIXME: We could register any cloned assumptions instead of clearing the + // whole function's cache. + if (IFI.ACT) + IFI.ACT->getAssumptionCache(*Caller).clear(); } // If there are any alloca instructions in the block that used to be the entry @@ -689,6 +1112,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, FirstNewBlock->getInstList(), AI, I); } + // Move any dbg.declares describing the allocas into the entry basic block. + DIBuilder DIB(*Caller->getParent()); + for (auto &AI : IFI.StaticAllocas) + replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } bool InlinedMustTailCalls = false; @@ -746,18 +1173,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast(AI->getArraySize())) { - if (IFI.DL) { - Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType); - uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); - assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); - // Check that array size doesn't saturate uint64_t and doesn't - // overflow when it's multiplied by type size. - if (AllocaArraySize != ~0ULL && - UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { - AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), - AllocaArraySize * AllocaTypeSize); - } + auto &DL = Caller->getParent()->getDataLayout(); + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); } } @@ -765,7 +1191,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, for (ReturnInst *RI : Returns) { // Don't insert llvm.lifetime.end calls between a musttail call and a // return. The return kills all local allocas. - if (InlinedMustTailCalls && getPrecedingMustTailCall(RI)) + if (InlinedMustTailCalls && + RI->getParent()->getTerminatingMustTailCall()) continue; IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); } @@ -789,7 +1216,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, for (ReturnInst *RI : Returns) { // Don't insert llvm.stackrestore calls between a musttail call and a // return. The return will restore the stack pointer. - if (InlinedMustTailCalls && getPrecedingMustTailCall(RI)) + if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) continue; IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); } @@ -812,7 +1239,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Handle the returns preceded by musttail calls separately. SmallVector NormalReturns; for (ReturnInst *RI : Returns) { - CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI); + CallInst *ReturnedMustTail = + RI->getParent()->getTerminatingMustTailCall(); if (!ReturnedMustTail) { NormalReturns.push_back(RI); continue; @@ -1016,7 +1444,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { - if (Value *V = SimplifyInstruction(PHI, IFI.DL)) { + auto &DL = Caller->getParent()->getDataLayout(); + if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, + &IFI.ACT->getAssumptionCache(*Caller))) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); }