X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FIPO%2FInliner.cpp;h=dc710d14157bdd0102e1d108e6702772861addfb;hp=9975333976ded7fc43afc15dc67a36eb10a91f6c;hb=54fec07ec00b4449393a66e8e2e62fd241781f80;hpb=f91f5af802bd4487c49ee17cd0d3e46c6456263e diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 9975333976d..dc710d14157 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -14,22 +14,22 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "inline" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Transforms/IPO/InlinerPass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); @@ -37,6 +37,11 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); +// This weirdly named statistic tracks the number of times that, when attempting +// to inline a function A into B, we analyze the callers of B in order to see +// if those would be more profitable and blocked inline steps. +STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); + static cl::opt InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); @@ -59,14 +64,48 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime) /// getAnalysisUsage - For this class, we declare that we require and preserve /// the call graph. If the derived class implements this method, it should /// always explicitly call the implementation here. -void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { - CallGraphSCCPass::getAnalysisUsage(Info); +void Inliner::getAnalysisUsage(AnalysisUsage &AU) const { + CallGraphSCCPass::getAnalysisUsage(AU); } typedef DenseMap > InlinedArrayAllocasTy; +/// \brief If the inlined function had a higher stack protection level than the +/// calling function, then bump up the caller's stack protection level. +static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) { + // If upgrading the SSP attribute, clear out the old SSP Attributes first. + // Having multiple SSP attributes doesn't actually hurt, but it adds useless + // clutter to the IR. + AttrBuilder B; + B.addAttribute(Attribute::StackProtect) + .addAttribute(Attribute::StackProtectStrong); + AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(), + AttributeSet::FunctionIndex, + B); + AttributeSet CallerAttr = Caller->getAttributes(), + CalleeAttr = Callee->getAttributes(); + + if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq)) { + Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller->addFnAttr(Attribute::StackProtectReq); + } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq)) { + Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr); + Caller->addFnAttr(Attribute::StackProtectStrong); + } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtect) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq) && + !CallerAttr.hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) + Caller->addFnAttr(Attribute::StackProtect); +} + /// InlineCallIfPossible - If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// @@ -77,7 +116,8 @@ InlinedArrayAllocasTy; /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, - int InlineHistory, bool InsertLifetime) { + int InlineHistory, bool InsertLifetime, + const DataLayout *TD) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); @@ -86,13 +126,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, if (!InlineFunction(CS, IFI, InsertLifetime)) return false; - // If the inlined function had a higher stack protection level than the - // calling function, then bump up the caller's stack protection level. - if (Callee->hasFnAttr(Attribute::StackProtectReq)) - Caller->addFnAttr(Attribute::StackProtectReq); - else if (Callee->hasFnAttr(Attribute::StackProtect) && - !Caller->hasFnAttr(Attribute::StackProtectReq)) - Caller->addFnAttr(Attribute::StackProtect); + AdjustCallerSSPLevel(Caller, Callee); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, @@ -156,6 +190,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, bool MergedAwayAlloca = false; for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { AllocaInst *AvailableAlloca = AllocasForType[i]; + + unsigned Align1 = AI->getAlignment(), + Align2 = AvailableAlloca->getAlignment(); + // If we don't have data layout information, and only one alloca is using + // the target default, then we can't safely merge them because we can't + // pick the greater alignment. + if (!TD && (!Align1 || !Align2) && Align1 != Align2) + continue; // The available alloca has to be in the right function, not in some other // function in this SCC. @@ -173,6 +215,20 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); + + if (Align1 != Align2) { + if (!Align1 || !Align2) { + assert(TD && "DataLayout required to compare default alignments"); + unsigned TypeAlign = TD->getABITypeAlignment(AI->getAllocatedType()); + + Align1 = Align1 ? Align1 : TypeAlign; + Align2 = Align2 ? Align2 : TypeAlign; + } + + if (Align1 > Align2) + AvailableAlloca->setAlignment(AI->getAlignment()); + } + AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; @@ -197,19 +253,28 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, } unsigned Inliner::getInlineThreshold(CallSite CS) const { - int thres = InlineThreshold; + int thres = InlineThreshold; // -inline-threshold or else selected by + // overall opt level - // Listen to optsize when -inline-limit is not given. + // If -inline-threshold is not given, listen to the optsize attribute when it + // would decrease the threshold. Function *Caller = CS.getCaller(); - if (Caller && !Caller->isDeclaration() && - Caller->hasFnAttr(Attribute::OptimizeForSize) && - InlineLimit.getNumOccurrences() == 0) + bool OptSize = Caller && !Caller->isDeclaration() && + Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); + if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && + OptSizeThreshold < thres) thres = OptSizeThreshold; - // Listen to inlinehint when it would increase the threshold. + // Listen to the inlinehint attribute when it would increase the threshold + // and the caller does not need to minimize its size. Function *Callee = CS.getCalledFunction(); - if (HintThreshold > thres && Callee && !Callee->isDeclaration() && - Callee->hasFnAttr(Attribute::InlineHint)) + bool InlineHint = Callee && !Callee->isDeclaration() && + Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::InlineHint); + if (InlineHint && HintThreshold > thres + && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize)) thres = HintThreshold; return thres; @@ -232,14 +297,10 @@ bool Inliner::shouldInline(CallSite CS) { return false; } - int Cost = IC.getValue(); Function *Caller = CS.getCaller(); - int CurrentThreshold = getInlineThreshold(CS); - float FudgeFactor = getInlineFudgeFactor(CS); - int AdjThreshold = (int)(CurrentThreshold * FudgeFactor); - if (Cost >= AdjThreshold) { - DEBUG(dbgs() << " NOT Inlining: cost=" << Cost - << ", thres=" << AdjThreshold + if (!IC) { + DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() + << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); return false; } @@ -256,12 +317,17 @@ bool Inliner::shouldInline(CallSite CS) { // are used. Thus we will always have the opportunity to make local inlining // decisions. Importantly the linkonce-ODR linkage covers inline functions // and templates in C++. + // + // FIXME: All of this logic should be sunk into getInlineCost. It relies on + // the internal implementation of the inline cost metrics rather than + // treating them as truly abstract units etc. if (Caller->hasLocalLinkage() || Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) { int TotalSecondaryCost = 0; - bool outerCallsFound = false; + // The candidate cost to be imposed upon the current function. + int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); // This bool tracks what happens if we do NOT inline C into B. - bool callerWillBeRemoved = true; + bool callerWillBeRemoved = Caller->hasLocalLinkage(); // This bool tracks what happens if we DO inline C into B. bool inliningPreventsSomeOuterInline = false; for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); @@ -277,26 +343,20 @@ bool Inliner::shouldInline(CallSite CS) { } InlineCost IC2 = getInlineCost(CS2); - if (IC2.isNever()) + ++NumCallerCallersAnalyzed; + if (!IC2) { callerWillBeRemoved = false; - if (IC2.isAlways() || IC2.isNever()) + continue; + } + if (IC2.isAlways()) continue; - outerCallsFound = true; - int Cost2 = IC2.getValue(); - int CurrentThreshold2 = getInlineThreshold(CS2); - float FudgeFactor2 = getInlineFudgeFactor(CS2); - - if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) - callerWillBeRemoved = false; - - // See if we have this case. We subtract off the penalty - // for the call instruction, which we would be deleting. - if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && - Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= - (int)(CurrentThreshold2 * FudgeFactor2)) { + // See if inlining or original callsite would erase the cost delta of + // this callsite. We subtract off the penalty for the call instruction, + // which we would be deleting. + if (IC2.getCostDelta() <= CandidateCost) { inliningPreventsSomeOuterInline = true; - TotalSecondaryCost += Cost2; + TotalSecondaryCost += IC2.getCost(); } } // If all outer calls to Caller would get inlined, the cost for the last @@ -306,17 +366,16 @@ bool Inliner::shouldInline(CallSite CS) { if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; - if (outerCallsFound && inliningPreventsSomeOuterInline && - TotalSecondaryCost < Cost) { - DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << - " Cost = " << Cost << + if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) { + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << + " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); return false; } } - DEBUG(dbgs() << " Inlining: cost=" << Cost - << ", thres=" << AdjThreshold + DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() + << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); return true; } @@ -335,41 +394,10 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } -/// \brief Simplify arguments going into a particular callsite. -/// -/// This is important to do each time we add a callsite due to inlining so that -/// constants and other entities which feed into inline cost estimation are -/// properly recognized when analyzing the new callsite. Consider: -/// void outer(int x) { -/// if (x < 42) -/// return inner(42 - x); -/// ... -/// } -/// void inner(int x) { -/// ... -/// } -/// -/// The inliner gives calls to 'outer' with a constant argument a bonus because -/// it will delete one side of a branch. But the resulting call to 'inner' -/// will, after inlining, also have a constant operand. We need to do just -/// enough constant folding to expose this for callsite arguments. The rest -/// will be taken care of after the inliner finishes running. -static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) { - // FIXME: It would be nice to avoid this smallvector if RAUW doesn't - // invalidate operand iterators in any cases. - SmallVector, 4> SimplifiedArgs; - for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (Instruction *Inst = dyn_cast(*I)) - if (Value *SimpleArg = SimplifyInstruction(Inst, TD)) - SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg)); - for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx) - SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second); -} - bool Inliner::runOnSCC(CallGraphSCC &SCC) { - CallGraph &CG = getAnalysis(); - const TargetData *TD = getAnalysisIfAvailable(); + CallGraph &CG = getAnalysis().getCallGraph(); + const DataLayout *TD = getAnalysisIfAvailable(); + const TargetLibraryInfo *TLI = getAnalysisIfAvailable(); SmallPtrSet SCCFunctions; DEBUG(dbgs() << "Inliner visiting SCC:"); @@ -448,15 +476,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction())) { + if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) { DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() << "\n"); // Update the call graph by deleting the edge from Callee to Caller. CG[Caller]->removeCallEdgeFor(CS); CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; - // Update the cached cost info with the missing call - growCachedCostInfo(Caller, NULL); } else { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; @@ -479,7 +505,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime)) + InlineHistoryID, InsertLifetime, TD)) continue; ++NumInlined; @@ -494,14 +520,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); i != e; ++i) { Value *Ptr = InlineInfo.InlinedCalls[i]; - CallSite NewCS = Ptr; - simplifyCallSiteArguments(TD, NewCS); - CallSites.push_back(std::make_pair(NewCS, NewHistoryID)); + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); } } - - // Update the cached cost info with the inlined call. - growCachedCostInfo(Caller, Callee); } // If we inlined or deleted the last possible call site to the function, @@ -521,8 +542,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Remove any call graph edges from the callee to its callees. CalleeNode->removeAllCalledFunctions(); - resetCachedCostInfo(Callee); - // Removing the node for callee from the call graph and delete it. delete CG.removeFunctionFromModule(CalleeNode); ++NumDeleted; @@ -570,7 +589,9 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. - if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline)) + if (AlwaysInlineOnly && + !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::AlwaysInline)) continue; // If the only remaining users of the function are dead constants, remove @@ -601,14 +622,13 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Note that it doesn't matter that we are iterating over a non-stable order // here to do this, it doesn't matter which order the functions are deleted // in. - std::sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); + array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), FunctionsToRemove.end()); for (SmallVectorImpl::iterator I = FunctionsToRemove.begin(), E = FunctionsToRemove.end(); I != E; ++I) { - resetCachedCostInfo((*I)->getFunction()); delete CG.removeFunctionFromModule(*I); ++NumDeleted; }