X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FUtils%2FInlineFunction.cpp;h=15cb26fbbd67af7ed7761371bfbeaf1ebaff8899;hp=19a35d68cffea11c1a76a0bed0f28f7d7d3ab12d;hb=d03868bb86e3091612f81d4123dc00970f9f8286;hpb=3512034554649610c9727daecc29925d34bb3d2a diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 19a35d68cff..15cb26fbbd6 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -29,6 +30,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -42,22 +44,27 @@ using namespace llvm; static cl::opt -EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(false), +EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); +static cl::opt +PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", + cl::init(true), cl::Hidden, + cl::desc("Convert align attributes to assumptions during inlining.")); + bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, - bool InsertLifetime) { - return InlineFunction(CallSite(CI), IFI, InsertLifetime); + AAResults *CalleeAAR, bool InsertLifetime) { + return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); } bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, - bool InsertLifetime) { - return InlineFunction(CallSite(II), IFI, InsertLifetime); + AAResults *CalleeAAR, bool InsertLifetime) { + return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); } namespace { - /// A class for recording information about inlining through an invoke. - class InvokeInliningInfo { + /// A class for recording information about inlining a landing pad. + class LandingPadInliningInfo { BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. @@ -65,7 +72,7 @@ namespace { SmallVector UnwindDestPHIValues; public: - InvokeInliningInfo(InvokeInst *II) + LandingPadInliningInfo(InvokeInst *II) : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { // If there are PHI nodes in the unwind destination block, we need to keep @@ -82,7 +89,7 @@ namespace { CallerLPad = cast(I); } - /// getOuterResumeDest - The outer unwind destination is the target of + /// The outer unwind destination is the target of /// unwind edges introduced for calls within the inlined function. BasicBlock *getOuterResumeDest() const { return OuterResumeDest; @@ -92,17 +99,16 @@ namespace { LandingPadInst *getLandingPadInst() const { return CallerLPad; } - /// forwardResume - Forward the 'resume' instruction to the caller's landing - /// pad block. When the landing pad block has only one predecessor, this is + /// Forward the 'resume' instruction to the caller's landing pad block. + /// When the landing pad block has only one predecessor, this is /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. void forwardResume(ResumeInst *RI, SmallPtrSetImpl &InlinedLPads); - /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind - /// destination block for the given basic block, using the values for the - /// original invoke's source block. + /// Add incoming-PHI values to the unwind destination block for the given + /// basic block, using the values for the original invoke's source block. void addIncomingPHIValuesFor(BasicBlock *BB) const { addIncomingPHIValuesForInto(BB, OuterResumeDest); } @@ -117,8 +123,8 @@ namespace { }; } -/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts. -BasicBlock *InvokeInliningInfo::getInnerResumeDest() { +/// Get or create a target for the branch from ResumeInsts. +BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; // Split the landing pad. @@ -152,12 +158,12 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { return InnerResumeDest; } -/// forwardResume - Forward the 'resume' instruction to the caller's landing pad -/// block. When the landing pad block has only one predecessor, this is a simple +/// Forward the 'resume' instruction to the caller's landing pad block. +/// When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. -void InvokeInliningInfo::forwardResume(ResumeInst *RI, - SmallPtrSetImpl &InlinedLPads) { +void LandingPadInliningInfo::forwardResume( + ResumeInst *RI, SmallPtrSetImpl &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); BasicBlock *Src = RI->getParent(); @@ -171,13 +177,13 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI, RI->eraseFromParent(); } -/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into -/// an invoke, we have to turn all of the calls that can throw into -/// invokes. This function analyze BB to see if there are any calls, and if so, +/// When we inline a basic block into an invoke, +/// we have to turn all of the calls that can throw into invokes. +/// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. -static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, - InvokeInliningInfo &Invoke) { +static BasicBlock * +HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = BBI++; @@ -200,8 +206,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Create the new invoke instruction. ImmutableCallSite CS(CI); SmallVector InvokeArgs(CS.arg_begin(), CS.arg_end()); - InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, - Invoke.getOuterResumeDest(), + InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs, CI->getName(), BB); II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); @@ -213,22 +218,19 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Delete the original call Split->getInstList().pop_front(); - - // Update any PHI nodes in the exceptional block to indicate that there is - // now a new entry in them. - Invoke.addIncomingPHIValuesFor(BB); - return; + return BB; } + return nullptr; } -/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls +/// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. -static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, - ClonedCodeInfo &InlinedCodeInfo) { +static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); Function *Caller = FirstNewBlock->getParent(); @@ -236,7 +238,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to // rewrite. - InvokeInliningInfo Invoke(II); + LandingPadInliningInfo Invoke(II); // Get all of the inlined landing pad instructions. SmallPtrSet InlinedLPads; @@ -258,7 +260,11 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) - HandleCallsInBlockInlinedThroughInvoke(BB, Invoke); + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + BB, Invoke.getOuterResumeDest())) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + Invoke.addIncomingPHIValuesFor(NewBB); // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast(BB->getTerminator())) @@ -272,8 +278,103 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, InvokeDest->removePredecessor(II->getParent()); } -/// CloneAliasScopeMetadata - When inlining a function that contains noalias -/// scope metadata, this metadata needs to be cloned so that the inlined blocks +/// If we inlined an invoke site, we need to convert calls +/// in the body of the inlined function into invokes. +/// +/// II is the invoke instruction being inlined. FirstNewBlock is the first +/// block of the inlined code (the last block is the end of the function), +/// and InlineCodeInfo is information about the code that got inlined. +static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, + ClonedCodeInfo &InlinedCodeInfo) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Function *Caller = FirstNewBlock->getParent(); + + assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); + + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the invoke before removing the + // edge from this block. + SmallVector UnwindDestPHIValues; + llvm::BasicBlock *InvokeBB = II->getParent(); + for (Instruction &I : *UnwindDest) { + // Save the value to use for this edge. + PHINode *PHI = dyn_cast(&I); + if (!PHI) + break; + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + + // Add incoming-PHI values to the unwind destination block for the given basic + // block, using the values for the original invoke's source block. + auto UpdatePHINodes = [&](BasicBlock *Src) { + BasicBlock::iterator I = UnwindDest->begin(); + for (Value *V : UnwindDestPHIValues) { + PHINode *PHI = cast(I); + PHI->addIncoming(V, Src); + ++I; + } + }; + + // Forward EH terminator instructions to the caller's invoke destination. + // This is as simple as connect all the instructions which 'unwind to caller' + // to the invoke destination. + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; + ++BB) { + Instruction *I = BB->getFirstNonPHI(); + if (I->isEHPad()) { + if (auto *CEPI = dyn_cast(I)) { + if (CEPI->unwindsToCaller()) { + CatchEndPadInst::Create(CEPI->getContext(), UnwindDest, CEPI); + CEPI->eraseFromParent(); + UpdatePHINodes(BB); + } + } else if (auto *CEPI = dyn_cast(I)) { + if (CEPI->unwindsToCaller()) { + CleanupEndPadInst::Create(CEPI->getCleanupPad(), UnwindDest, CEPI); + CEPI->eraseFromParent(); + UpdatePHINodes(BB); + } + } else if (auto *TPI = dyn_cast(I)) { + if (TPI->unwindsToCaller()) { + SmallVector TerminatePadArgs; + for (Value *Operand : TPI->operands()) + TerminatePadArgs.push_back(Operand); + TerminatePadInst::Create(TPI->getContext(), UnwindDest, TPI); + TPI->eraseFromParent(); + UpdatePHINodes(BB); + } + } else { + assert(isa(I) || isa(I)); + } + } + + if (auto *CRI = dyn_cast(BB->getTerminator())) { + if (CRI->unwindsToCaller()) { + CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI); + CRI->eraseFromParent(); + UpdatePHINodes(BB); + } + } + } + + if (InlinedCodeInfo.ContainsCalls) + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; + ++BB) + if (BasicBlock *NewBB = + HandleCallsInBlockInlinedThroughInvoke(BB, UnwindDest)) + // Update any PHI nodes in the exceptional block to indicate that there + // is now a new entry in them. + UpdatePHINodes(NewBB); + + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + UnwindDest->removePredecessor(InvokeBB); +} + +/// When inlining a function that contains noalias scope metadata, +/// this metadata needs to be cloned so that the inlined blocks /// have different "unqiue scopes" at every call site. Were this not done, then /// aliasing scopes from a function inlined into a caller multiple times could /// not be differentiated (and this would lead to miscompiles because the @@ -302,7 +403,7 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // Walk the existing metadata, adding the complete (perhaps cyclic) chain to // the set. - SmallVector Queue(MD.begin(), MD.end()); + SmallVector Queue(MD.begin(), MD.end()); while (!Queue.empty()) { const MDNode *M = cast(Queue.pop_back_val()); for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) @@ -313,13 +414,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // Now we have a complete set of all metadata in the chains used to specify // the noalias scopes and the lists of those scopes. - SmallVector DummyNodes; - DenseMap > MDMap; + SmallVector DummyNodes; + DenseMap MDMap; for (SetVector::iterator I = MD.begin(), IE = MD.end(); I != IE; ++I) { - MDNode *Dummy = MDNode::getTemporary(CalledFunc->getContext(), None); - DummyNodes.push_back(Dummy); - MDMap[*I] = Dummy; + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[*I].reset(DummyNodes.back().get()); } // Create new metadata nodes to replace the dummy nodes, replacing old @@ -327,17 +427,18 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // node. for (SetVector::iterator I = MD.begin(), IE = MD.end(); I != IE; ++I) { - SmallVector NewOps; + SmallVector NewOps; for (unsigned i = 0, ie = (*I)->getNumOperands(); i != ie; ++i) { - const Value *V = (*I)->getOperand(i); + const Metadata *V = (*I)->getOperand(i); if (const MDNode *M = dyn_cast(V)) NewOps.push_back(MDMap[M]); else - NewOps.push_back(const_cast(V)); + NewOps.push_back(const_cast(V)); } - MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps), - *TempM = MDMap[*I]; + MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); + MDTuple *TempM = cast(MDMap[*I]); + assert(TempM->isTemporary() && "Expected temporary node"); TempM->replaceAllUsesWith(NewM); } @@ -359,12 +460,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // which instructions inside it might belong), propagate those scopes to // the inlined instructions. if (MDNode *CSM = - CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) NewMD = MDNode::concatenate(NewMD, CSM); NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); } else if (NI->mayReadOrWriteMemory()) { if (MDNode *M = - CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope)) NI->setMetadata(LLVMContext::MD_alias_scope, M); } @@ -374,27 +475,22 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { // which instructions inside it don't alias), propagate those scopes to // the inlined instructions. if (MDNode *CSM = - CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) + CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) NewMD = MDNode::concatenate(NewMD, CSM); NI->setMetadata(LLVMContext::MD_noalias, NewMD); } else if (NI->mayReadOrWriteMemory()) { - if (MDNode *M = - CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) + if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) NI->setMetadata(LLVMContext::MD_noalias, M); } } - - // Now that everything has been replaced, delete the dummy nodes. - for (unsigned i = 0, ie = DummyNodes.size(); i != ie; ++i) - MDNode::deleteTemporary(DummyNodes[i]); } -/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then -/// add new alias scopes for each noalias argument, tag the mapped noalias +/// If the inlined function has noalias arguments, +/// then add new alias scopes for each noalias argument, tag the mapped noalias /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, - const DataLayout *DL) { + const DataLayout &DL, AAResults *CalleeAAR) { if (!EnableNoAliasConversion) return; @@ -458,6 +554,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, if (!NI) continue; + bool IsArgMemOnlyCall = false, IsFuncCall = false; SmallVector PtrArgs; if (const LoadInst *LI = dyn_cast(I)) @@ -471,34 +568,46 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, else if (const AtomicRMWInst *RMWI = dyn_cast(I)) PtrArgs.push_back(RMWI->getPointerOperand()); else if (ImmutableCallSite ICS = ImmutableCallSite(I)) { - // If we know that the call does not access memory, then we'll still - // know that about the inlined clone of this call site, and we don't - // need to add metadata. + // If we know that the call does not access memory, then we'll still + // know that about the inlined clone of this call site, and we don't + // need to add metadata. if (ICS.doesNotAccessMemory()) continue; + IsFuncCall = true; + if (CalleeAAR) { + FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS); + if (MRB == FMRB_OnlyAccessesArgumentPointees || + MRB == FMRB_OnlyReadsArgumentPointees) + IsArgMemOnlyCall = true; + } + for (ImmutableCallSite::arg_iterator AI = ICS.arg_begin(), - AE = ICS.arg_end(); AI != AE; ++AI) - // We need to check the underlying objects of all arguments, not just - // the pointer arguments, because we might be passing pointers as - // integers, etc. - // FIXME: If we know that the call only accesses pointer arguments, + AE = ICS.arg_end(); AI != AE; ++AI) { + // We need to check the underlying objects of all arguments, not just + // the pointer arguments, because we might be passing pointers as + // integers, etc. + // However, if we know that the call only accesses pointer arguments, // then we only need to check the pointer arguments. + if (IsArgMemOnlyCall && !(*AI)->getType()->isPointerTy()) + continue; + PtrArgs.push_back(*AI); + } } // If we found no pointers, then this instruction is not suitable for // pairing with an instruction to receive aliasing metadata. // However, if this is a call, this we might just alias with none of the // noalias arguments. - if (PtrArgs.empty() && !isa(I) && !isa(I)) + if (PtrArgs.empty() && !IsFuncCall) continue; // It is possible that there is only one underlying object, but you // need to go through several PHIs to see it, and thus could be // repeated in the Objects list. SmallPtrSet ObjSet; - SmallVector Scopes, NoAliases; + SmallVector Scopes, NoAliases; SmallSetVector NAPtrArgs; for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) { @@ -510,23 +619,59 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, ObjSet.insert(O); } - // Figure out if we're derived from anyhing that is not a noalias + // Figure out if we're derived from anything that is not a noalias // argument. - bool CanDeriveViaCapture = false; - for (const Value *V : ObjSet) - if (!isIdentifiedFunctionLocal(const_cast(V))) { - CanDeriveViaCapture = true; - break; + bool CanDeriveViaCapture = false, UsesAliasingPtr = false; + for (const Value *V : ObjSet) { + // Is this value a constant that cannot be derived from any pointer + // value (we need to exclude constant expressions, for example, that + // are formed from arithmetic on global symbols). + bool IsNonPtrConst = isa(V) || isa(V) || + isa(V) || + isa(V) || isa(V); + if (IsNonPtrConst) + continue; + + // If this is anything other than a noalias argument, then we cannot + // completely describe the aliasing properties using alias.scope + // metadata (and, thus, won't add any). + if (const Argument *A = dyn_cast(V)) { + if (!A->hasNoAliasAttr()) + UsesAliasingPtr = true; + } else { + UsesAliasingPtr = true; } - + + // If this is not some identified function-local object (which cannot + // directly alias a noalias argument), or some other argument (which, + // by definition, also cannot alias a noalias argument), then we could + // alias a noalias argument that has been captured). + if (!isa(V) && + !isIdentifiedFunctionLocal(const_cast(V))) + CanDeriveViaCapture = true; + } + + // A function call can always get captured noalias pointers (via other + // parameters, globals, etc.). + if (IsFuncCall && !IsArgMemOnlyCall) + CanDeriveViaCapture = true; + // First, we want to figure out all of the sets with which we definitely // don't alias. Iterate over all noalias set, and add those for which: // 1. The noalias argument is not in the set of objects from which we // definitely derive. // 2. The noalias argument has not yet been captured. + // An arbitrary function that might load pointers could see captured + // noalias arguments via other noalias arguments or globals, and so we + // must always check for prior capture. for (const Argument *A : NoAliasArgs) { if (!ObjSet.count(A) && (!CanDeriveViaCapture || - A->hasNoCaptureAttr() || + // It might be tempting to skip the + // PointerMayBeCapturedBefore check if + // A->hasNoCaptureAttr() is true, but this is + // incorrect because nocapture only guarantees + // that no copies outlive the function, not + // that the value cannot be locally captured. !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false, /* StoreCaptures */ false, I, &DT))) @@ -534,36 +679,81 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, } if (!NoAliases.empty()) - NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_noalias), - MDNode::get(CalledFunc->getContext(), NoAliases))); + NI->setMetadata(LLVMContext::MD_noalias, + MDNode::concatenate( + NI->getMetadata(LLVMContext::MD_noalias), + MDNode::get(CalledFunc->getContext(), NoAliases))); + // Next, we want to figure out all of the sets to which we might belong. - // We might below to a set if: - // 1. The noalias argument is in the set of underlying objects - // or - // 2. There is some non-noalias argument in our list and the no-alias - // argument has been captured. - - for (const Argument *A : NoAliasArgs) { - if (ObjSet.count(A) || (CanDeriveViaCapture && - PointerMayBeCapturedBefore(A, - /* ReturnCaptures */ false, - /* StoreCaptures */ false, - I, &DT))) - Scopes.push_back(NewScopes[A]); - } + // We might belong to a set if the noalias argument is in the set of + // underlying objects. If there is some non-noalias argument in our list + // of underlying objects, then we cannot add a scope because the fact + // that some access does not alias with any set of our noalias arguments + // cannot itself guarantee that it does not alias with this access + // (because there is some pointer of unknown origin involved and the + // other access might also depend on this pointer). We also cannot add + // scopes to arbitrary functions unless we know they don't access any + // non-parameter pointer-values. + bool CanAddScopes = !UsesAliasingPtr; + if (CanAddScopes && IsFuncCall) + CanAddScopes = IsArgMemOnlyCall; + + if (CanAddScopes) + for (const Argument *A : NoAliasArgs) { + if (ObjSet.count(A)) + Scopes.push_back(NewScopes[A]); + } if (!Scopes.empty()) - NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_alias_scope), - MDNode::get(CalledFunc->getContext(), Scopes))); + NI->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(CalledFunc->getContext(), Scopes))); } } } -/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee -/// into the caller, update the specified callgraph to reflect the changes we -/// made. Note that it's possible that not all code was copied over, so only +/// If the inlined function has non-byval align arguments, then +/// add @llvm.assume-based alignment assumptions to preserve this information. +static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { + if (!PreserveAlignmentAssumptions) + return; + auto &DL = CS.getCaller()->getParent()->getDataLayout(); + + // To avoid inserting redundant assumptions, we should check for assumptions + // already in the caller. To do this, we might need a DT of the caller. + DominatorTree DT; + bool DTCalculated = false; + + Function *CalledFunc = CS.getCalledFunction(); + for (Function::arg_iterator I = CalledFunc->arg_begin(), + E = CalledFunc->arg_end(); + I != E; ++I) { + unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0; + if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) { + if (!DTCalculated) { + DT.recalculate(const_cast(*CS.getInstruction()->getParent() + ->getParent())); + DTCalculated = true; + } + + // If we can already prove the asserted alignment in the context of the + // caller, then don't bother inserting the assumption. + Value *Arg = CS.getArgument(I->getArgNo()); + if (getKnownAlignment(Arg, DL, CS.getInstruction(), + &IFI.ACT->getAssumptionCache(*CS.getCaller()), + &DT) >= Align) + continue; + + IRBuilder<>(CS.getInstruction()) + .CreateAlignmentAssumption(DL, Arg, Align); + } + } +} + +/// Once we have cloned code over from a callee into the caller, +/// update the specified callgraph to reflect the changes we made. +/// Note that it's possible that not all code was copied over, so only /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, @@ -598,8 +788,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast(VMI->second); - if (!NewCall) continue; + if (!NewCall) + continue; + // We do not treat intrinsic calls like real function calls because we + // expect them to become inline code; do not add an edge for an intrinsic. + CallSite CS = CallSite(NewCall); + if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic()) + continue; + // Remember that this call site got inlined for the client of // InlineFunction. IFI.InlinedCalls.push_back(NewCall); @@ -628,34 +825,18 @@ static void UpdateCallGraphAfterInlining(CallSite CS, static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI) { - LLVMContext &Context = Src->getContext(); - Type *VoidPtrTy = Type::getInt8PtrTy(Context); Type *AggTy = cast(Src->getType())->getElementType(); - Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) }; - Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); - IRBuilder<> builder(InsertBlock->begin()); - Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp"); - Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp"); - - Value *Size; - if (IFI.DL == nullptr) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.DL->getTypeStoreSize(AggTy)); + IRBuilder<> Builder(InsertBlock->begin()); + + Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. - Value *CallArgs[] = { - DstCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::getFalse(Context) // isVolatile - }; - builder.CreateCall(MemCpyFn, CallArgs); + Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); } -/// HandleByValArgument - When inlining a call site that has a byval argument, +/// When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, @@ -664,6 +845,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, PointerType *ArgTy = cast(Arg->getType()); Type *AggTy = ArgTy->getElementType(); + Function *Caller = TheCall->getParent()->getParent(); + // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and // temporary. @@ -674,10 +857,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment. return Arg; + const DataLayout &DL = Caller->getParent()->getDataLayout(); + // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. - if (getOrEnforceKnownAlignment(Arg, ByValAlignment, - IFI.DL) >= ByValAlignment) + if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, + &IFI.ACT->getAssumptionCache(*Caller)) >= + ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad @@ -685,17 +871,14 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, } // Create the alloca. If we have DataLayout, use nice alignment. - unsigned Align = 1; - if (IFI.DL) - Align = IFI.DL->getPrefTypeAlignment(AggTy); - + unsigned Align = + Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy); + // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). Align = std::max(Align, ByValAlignment); - Function *Caller = TheCall->getParent()->getParent(); - Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), &*Caller->begin()->begin()); IFI.StaticAllocas.push_back(cast(NewAlloca)); @@ -705,8 +888,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, return NewAlloca; } -// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime -// intrinsic. +// Check whether this Value is used by a lifetime intrinsic. static bool isUsedByLifetimeMarker(Value *V) { for (User *U : V->users()) { if (IntrinsicInst *II = dyn_cast(U)) { @@ -721,7 +903,7 @@ static bool isUsedByLifetimeMarker(Value *V) { return false; } -// hasLifetimeMarkers - Check whether the given alloca already has +// Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { Type *Ty = AI->getType(); @@ -740,64 +922,96 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to -/// recursively update InlinedAtEntry of a DebugLoc. -static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, - const DebugLoc &InlinedAtDL, - LLVMContext &Ctx) { - if (MDNode *IA = DL.getInlinedAt(Ctx)) { - DebugLoc NewInlinedAtDL - = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), - NewInlinedAtDL.getAsMDNode(Ctx)); +/// Rebuild the entire inlined-at chain for this instruction so that the top of +/// the chain now is inlined-at the new call site. +static DebugLoc +updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx, + DenseMap &IANodes) { + SmallVector InlinedAtLocations; + DILocation *Last = InlinedAtNode; + DILocation *CurInlinedAt = DL; + + // Gather all the inlined-at nodes + while (DILocation *IA = CurInlinedAt->getInlinedAt()) { + // Skip any we've already built nodes for + if (DILocation *Found = IANodes[IA]) { + Last = Found; + break; + } + + InlinedAtLocations.push_back(IA); + CurInlinedAt = IA; } - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), - InlinedAtDL.getAsMDNode(Ctx)); + // Starting from the top, rebuild the nodes to point to the new inlined-at + // location (then rebuilding the rest of the chain behind it) and update the + // map of already-constructed inlined-at nodes. + for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(), + InlinedAtLocations.rend())) { + Last = IANodes[MD] = DILocation::getDistinct( + Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); + } + + // And finally create the normal location for this instruction, referring to + // the new inlined-at chain. + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); } -/// fixupLineNumbers - Update inlined instructions' line numbers to +/// Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall) { DebugLoc TheCallDL = TheCall->getDebugLoc(); - if (TheCallDL.isUnknown()) + if (!TheCallDL) return; + auto &Ctx = Fn->getContext(); + DILocation *InlinedAtNode = TheCallDL; + + // Create a unique call site, not to be confused with any other call from the + // same location. + InlinedAtNode = DILocation::getDistinct( + Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), + InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); + + // Cache the inlined-at nodes as they're built so they are reused, without + // this every instruction's inlined-at chain would become distinct from each + // other. + DenseMap IANodes; + for (; FI != Fn->end(); ++FI) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { DebugLoc DL = BI->getDebugLoc(); - if (DL.isUnknown()) { + if (!DL) { // If the inlined instruction has no line number, make it look as if it // originates from the call location. This is important for // ((__always_inline__, __nodebug__)) functions which must use caller // location for all instructions in their function body. + + // Don't update static allocas, as they may get moved later. + if (auto *AI = dyn_cast(BI)) + if (isa(AI->getArraySize())) + continue; + BI->setDebugLoc(TheCallDL); } else { - BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); - if (DbgValueInst *DVI = dyn_cast(BI)) { - LLVMContext &Ctx = BI->getContext(); - MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx); - DVI->setOperand(2, createInlinedVariable(DVI->getVariable(), - InlinedAt, Ctx)); - } + BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); } } } } -/// InlineFunction - This function inlines the called function into the basic -/// block of the caller. This returns false if it is not possible to inline -/// this call. The program is still in a well defined state if this occurs -/// though. +/// This function inlines the called function into the basic block of the +/// caller. This returns false if it is not possible to inline this call. +/// The program is still in a well defined state if this occurs though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, - bool InsertLifetime) { + AAResults *CalleeAAR, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); @@ -829,35 +1043,23 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } // Get the personality function from the callee if it contains a landing pad. - Value *CalleePersonality = nullptr; - for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); - I != E; ++I) - if (const InvokeInst *II = dyn_cast(I->getTerminator())) { - const BasicBlock *BB = II->getUnwindDest(); - const LandingPadInst *LP = BB->getLandingPadInst(); - CalleePersonality = LP->getPersonalityFn(); - break; - } + Constant *CalledPersonality = + CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn() : nullptr; // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. - if (CalleePersonality) { - for (Function::const_iterator I = Caller->begin(), E = Caller->end(); - I != E; ++I) - if (const InvokeInst *II = dyn_cast(I->getTerminator())) { - const BasicBlock *BB = II->getUnwindDest(); - const LandingPadInst *LP = BB->getLandingPadInst(); - - // If the personality functions match, then we can perform the - // inlining. Otherwise, we can't inline. - // TODO: This isn't 100% true. Some personality functions are proper - // supersets of others and can be used in place of the other. - if (LP->getPersonalityFn() != CalleePersonality) - return false; - - break; - } + Constant *CallerPersonality = + Caller->hasPersonalityFn() ? Caller->getPersonalityFn() : nullptr; + if (CalledPersonality) { + if (!CallerPersonality) + Caller->setPersonalityFn(CalledPersonality); + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + else if (CalledPersonality != CallerPersonality) + return false; } // Get an iterator to the last basic block in the function, which will have @@ -875,6 +1077,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Keep a list of pair (dst, src) to emit byval initializations. SmallVector, 4> ByValInit; + auto &DL = Caller->getParent()->getDataLayout(); + assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -900,13 +1104,18 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, VMap[I] = ActualArg; } + // Add alignment assumptions if necessary. We do this before the inlined + // instructions are actually cloned into the caller so that we can easily + // check what will be known at the start of the inlined code. + AddAlignmentAssumptions(CS, IFI); + // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo, IFI.DL, TheCall); + &InlinedFunctionInfo, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -927,7 +1136,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CloneAliasScopeMetadata(CS, VMap); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CS, VMap, IFI.DL); + AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); + + // FIXME: We could register any cloned assumptions instead of clearing the + // whole function's cache. + if (IFI.ACT) + IFI.ACT->getAssumptionCache(*Caller).clear(); } // If there are any alloca instructions in the block that used to be the entry @@ -969,6 +1183,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, FirstNewBlock->getInstList(), AI, I); } + // Move any dbg.declares describing the allocas into the entry basic block. + DIBuilder DIB(*Caller->getParent()); + for (auto &AI : IFI.StaticAllocas) + replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); } bool InlinedMustTailCalls = false; @@ -1026,18 +1244,21 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast(AI->getArraySize())) { - if (IFI.DL) { - Type *AllocaType = AI->getAllocatedType(); - uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType); - uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); - assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); - // Check that array size doesn't saturate uint64_t and doesn't - // overflow when it's multiplied by type size. - if (AllocaArraySize != ~0ULL && - UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { - AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), - AllocaArraySize * AllocaTypeSize); - } + auto &DL = Caller->getParent()->getDataLayout(); + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + + // Don't add markers for zero-sized allocas. + if (AllocaArraySize == 0) + continue; + + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); } } @@ -1063,7 +1284,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) - .CreateCall(StackSave, "savedstack"); + .CreateCall(StackSave, {}, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. @@ -1078,8 +1299,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. - if (InvokeInst *II = dyn_cast(TheCall)) - HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + if (auto *II = dyn_cast(TheCall)) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); + if (isa(FirstNonPHI)) { + HandleInlinedLandingPad(II, FirstNewBlock, InlinedFunctionInfo); + } else { + HandleInlinedEHPad(II, FirstNewBlock, InlinedFunctionInfo); + } + } // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been @@ -1298,7 +1526,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { - if (Value *V = SimplifyInstruction(PHI, IFI.DL)) { + auto &DL = Caller->getParent()->getDataLayout(); + if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, + &IFI.ACT->getAssumptionCache(*Caller))) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); }