//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "inline"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include <set>
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "inline"
+
STATISTIC(NumInlined, "Number of functions inlined");
STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+// This weirdly named statistic tracks the number of times that, when attempting
+// to inline a function A into B, we analyze the callers of B in order to see
+// if those would be more profitable and blocked inline steps.
+STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
+
static cl::opt<int>
InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
cl::desc("Threshold for inlining functions with inline hint"));
+// We instroduce this threshold to help performance of instrumentation based
+// PGO before we actually hook up inliner with analysis passes such as BPI and
+// BFI.
+static cl::opt<int>
+ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
+ cl::desc("Threshold for inlining functions with cold attribute"));
+
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
Inliner::Inliner(char &ID)
- : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {}
-Inliner::Inliner(char &ID, int Threshold)
- : CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
+Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
+ : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
+ InlineLimit : Threshold),
+ InsertLifetime(InsertLifetime) {}
/// getAnalysisUsage - For this class, we declare that we require and preserve
/// the call graph. If the derived class implements this method, it should
/// always explicitly call the implementation here.
-void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
- CallGraphSCCPass::getAnalysisUsage(Info);
+void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionTracker>();
+ CallGraphSCCPass::getAnalysisUsage(AU);
}
-typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
InlinedArrayAllocasTy;
+/// \brief If the inlined function had a higher stack protection level than the
+/// calling function, then bump up the caller's stack protection level.
+static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
+ // If upgrading the SSP attribute, clear out the old SSP Attributes first.
+ // Having multiple SSP attributes doesn't actually hurt, but it adds useless
+ // clutter to the IR.
+ AttrBuilder B;
+ B.addAttribute(Attribute::StackProtect)
+ .addAttribute(Attribute::StackProtectStrong);
+ AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
+ AttributeSet::FunctionIndex,
+ B);
+ AttributeSet CallerAttr = Caller->getAttributes(),
+ CalleeAttr = Callee->getAttributes();
+
+ if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq)) {
+ Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller->addFnAttr(Attribute::StackProtectReq);
+ } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq)) {
+ Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller->addFnAttr(Attribute::StackProtectStrong);
+ } else if (CalleeAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtect) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectReq) &&
+ !CallerAttr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackProtectStrong))
+ Caller->addFnAttr(Attribute::StackProtect);
+}
+
/// InlineCallIfPossible - If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
- InlinedArrayAllocasTy &InlinedArrayAllocas) {
+ InlinedArrayAllocasTy &InlinedArrayAllocas,
+ int InlineHistory, bool InsertLifetime,
+ const DataLayout *DL) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
// Try to inline the function. Get the list of static allocas that were
// inlined.
- if (!InlineFunction(CS, IFI))
+ if (!InlineFunction(CS, IFI, InsertLifetime))
return false;
- // If the inlined function had a higher stack protection level than the
- // calling function, then bump up the caller's stack protection level.
- if (Callee->hasFnAttr(Attribute::StackProtectReq))
- Caller->addFnAttr(Attribute::StackProtectReq);
- else if (Callee->hasFnAttr(Attribute::StackProtect) &&
- !Caller->hasFnAttr(Attribute::StackProtectReq))
- Caller->addFnAttr(Attribute::StackProtect);
+ AdjustCallerSSPLevel(Caller, Callee);
-
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
// then we know that they have disjoint lifetimes and that we can merge them.
//
SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+ // When processing our SCC, check to see if CS was inlined from some other
+ // call site. For example, if we're processing "A" in this code:
+ // A() { B() }
+ // B() { x = alloca ... C() }
+ // C() { y = alloca ... }
+ // Assume that C was not inlined into B initially, and so we're processing A
+ // and decide to inline B into A. Doing this makes an alloca available for
+ // reuse and makes a callsite (C) available for inlining. When we process
+ // the C call site we don't want to do any alloca merging between X and Y
+ // because their scopes are not disjoint. We could make this smarter by
+ // keeping track of the inline history for each alloca in the
+ // InlinedArrayAllocas but this isn't likely to be a significant win.
+ if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
+ return true;
+
// Loop over all the allocas we have so far and see if they can be merged with
// a previously inlined alloca. If not, remember that we had it.
for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
// Don't bother trying to merge array allocations (they will usually be
// canonicalized to be an allocation *of* an array), or allocations whose
// type is not itself an array (because we're afraid of pessimizing SRoA).
- const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
- if (ATy == 0 || AI->isArrayAllocation())
+ ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+ if (!ATy || AI->isArrayAllocation())
continue;
// Get the list of all available allocas for this array type.
bool MergedAwayAlloca = false;
for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
AllocaInst *AvailableAlloca = AllocasForType[i];
+
+ unsigned Align1 = AI->getAlignment(),
+ Align2 = AvailableAlloca->getAlignment();
+ // If we don't have data layout information, and only one alloca is using
+ // the target default, then we can't safely merge them because we can't
+ // pick the greater alignment.
+ if (!DL && (!Align1 || !Align2) && Align1 != Align2)
+ continue;
// The available alloca has to be in the right function, not in some other
// function in this SCC.
// Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
// success!
- DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI);
+ DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+ << *AvailableAlloca << '\n');
AI->replaceAllUsesWith(AvailableAlloca);
+
+ if (Align1 != Align2) {
+ if (!Align1 || !Align2) {
+ assert(DL && "DataLayout required to compare default alignments");
+ unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType());
+
+ Align1 = Align1 ? Align1 : TypeAlign;
+ Align2 = Align2 ? Align2 : TypeAlign;
+ }
+
+ if (Align1 > Align2)
+ AvailableAlloca->setAlignment(AI->getAlignment());
+ }
+
AI->eraseFromParent();
MergedAwayAlloca = true;
++NumMergedAllocas;
+ IFI.StaticAllocas[AllocaNo] = nullptr;
break;
}
// If we already nuked the alloca, we're done with it.
if (MergedAwayAlloca)
continue;
-
+
// If we were unable to merge away the alloca either because there are no
// allocas of the right type available or because we reused them all
// already, remember that this alloca came from an inlined function and mark
}
unsigned Inliner::getInlineThreshold(CallSite CS) const {
- int thres = InlineThreshold;
+ int thres = InlineThreshold; // -inline-threshold or else selected by
+ // overall opt level
- // Listen to optsize when -inline-limit is not given.
+ // If -inline-threshold is not given, listen to the optsize attribute when it
+ // would decrease the threshold.
Function *Caller = CS.getCaller();
- if (Caller && !Caller->isDeclaration() &&
- Caller->hasFnAttr(Attribute::OptimizeForSize) &&
- InlineLimit.getNumOccurrences() == 0)
+ bool OptSize = Caller && !Caller->isDeclaration() &&
+ Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize);
+ if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
+ OptSizeThreshold < thres)
thres = OptSizeThreshold;
- // Listen to inlinehint when it would increase the threshold.
+ // Listen to the inlinehint attribute when it would increase the threshold
+ // and the caller does not need to minimize its size.
Function *Callee = CS.getCalledFunction();
- if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
- Callee->hasFnAttr(Attribute::InlineHint))
+ bool InlineHint = Callee && !Callee->isDeclaration() &&
+ Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::InlineHint);
+ if (InlineHint && HintThreshold > thres
+ && !Caller->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize))
thres = HintThreshold;
+ // Listen to the cold attribute when it would decrease the threshold.
+ bool ColdCallee = Callee && !Callee->isDeclaration() &&
+ Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Cold);
+ // Command line argument for InlineLimit will override the default
+ // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
+ // do not use the default cold threshold even if it is smaller.
+ if ((InlineLimit.getNumOccurrences() == 0 ||
+ ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
+ ColdThreshold < thres)
+ thres = ColdThreshold;
+
return thres;
}
+static void emitAnalysis(CallSite CS, const Twine &Msg) {
+ Function *Caller = CS.getCaller();
+ LLVMContext &Ctx = Caller->getContext();
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+}
+
/// shouldInline - Return true if the inliner should attempt to inline
/// at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
+ " should always be inlined (cost=always)");
return true;
}
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+ " should never be inlined (cost=never)"));
return false;
}
- int Cost = IC.getValue();
Function *Caller = CS.getCaller();
- int CurrentThreshold = getInlineThreshold(CS);
- float FudgeFactor = getInlineFudgeFactor(CS);
- int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
- if (Cost >= AdjThreshold) {
- DEBUG(dbgs() << " NOT Inlining: cost=" << Cost
- << ", thres=" << AdjThreshold
+ if (!IC) {
+ DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+ " too costly to inline (cost=") +
+ Twine(IC.getCost()) + ", threshold=" +
+ Twine(IC.getCostDelta() + IC.getCost()) + ")");
return false;
}
- // Try to detect the case where the current inlining candidate caller
- // (call it B) is a static function and is an inlining candidate elsewhere,
- // and the current candidate callee (call it C) is large enough that
- // inlining it into B would make B too big to inline later. In these
- // circumstances it may be best not to inline C into B, but to inline B
- // into its callers.
- if (Caller->hasLocalLinkage()) {
+ // Try to detect the case where the current inlining candidate caller (call
+ // it B) is a static or linkonce-ODR function and is an inlining candidate
+ // elsewhere, and the current candidate callee (call it C) is large enough
+ // that inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B into
+ // its callers.
+ //
+ // This only applies to static and linkonce-ODR functions because those are
+ // expected to be available for inlining in the translation units where they
+ // are used. Thus we will always have the opportunity to make local inlining
+ // decisions. Importantly the linkonce-ODR linkage covers inline functions
+ // and templates in C++.
+ //
+ // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+ // the internal implementation of the inline cost metrics rather than
+ // treating them as truly abstract units etc.
+ if (Caller->hasLocalLinkage() || Caller->hasLinkOnceODRLinkage()) {
int TotalSecondaryCost = 0;
- bool outerCallsFound = false;
- bool allOuterCallsWillBeInlined = true;
- bool someOuterCallWouldNotBeInlined = false;
- for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
- I != E; ++I) {
- CallSite CS2(*I);
+ // The candidate cost to be imposed upon the current function.
+ int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ // This bool tracks what happens if we do NOT inline C into B.
+ bool callerWillBeRemoved = Caller->hasLocalLinkage();
+ // This bool tracks what happens if we DO inline C into B.
+ bool inliningPreventsSomeOuterInline = false;
+ for (User *U : Caller->users()) {
+ CallSite CS2(U);
// If this isn't a call to Caller (it could be some other sort
- // of reference) skip it.
- if (!CS2 || CS2.getCalledFunction() != Caller)
+ // of reference) skip it. Such references will prevent the caller
+ // from being removed.
+ if (!CS2 || CS2.getCalledFunction() != Caller) {
+ callerWillBeRemoved = false;
continue;
+ }
InlineCost IC2 = getInlineCost(CS2);
- if (IC2.isNever())
- allOuterCallsWillBeInlined = false;
- if (IC2.isAlways() || IC2.isNever())
+ ++NumCallerCallersAnalyzed;
+ if (!IC2) {
+ callerWillBeRemoved = false;
+ continue;
+ }
+ if (IC2.isAlways())
continue;
- outerCallsFound = true;
- int Cost2 = IC2.getValue();
- int CurrentThreshold2 = getInlineThreshold(CS2);
- float FudgeFactor2 = getInlineFudgeFactor(CS2);
-
- if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
- allOuterCallsWillBeInlined = false;
-
- // See if we have this case. We subtract off the penalty
- // for the call instruction, which we would be deleting.
- if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
- Cost2 + Cost - (InlineConstants::CallPenalty + 1) >=
- (int)(CurrentThreshold2 * FudgeFactor2)) {
- someOuterCallWouldNotBeInlined = true;
- TotalSecondaryCost += Cost2;
+ // See if inlining or original callsite would erase the cost delta of
+ // this callsite. We subtract off the penalty for the call instruction,
+ // which we would be deleting.
+ if (IC2.getCostDelta() <= CandidateCost) {
+ inliningPreventsSomeOuterInline = true;
+ TotalSecondaryCost += IC2.getCost();
}
}
// If all outer calls to Caller would get inlined, the cost for the last
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+ if (callerWillBeRemoved && !Caller->use_empty())
TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
- if (outerCallsFound && someOuterCallWouldNotBeInlined &&
- TotalSecondaryCost < Cost) {
- DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
- " Cost = " << Cost <<
+ if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
+ DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
+ " Cost = " << IC.getCost() <<
", outer Cost = " << TotalSecondaryCost << '\n');
+ emitAnalysis(
+ CS, Twine("Not inlining. Cost of inlining " +
+ CS.getCalledFunction()->getName() +
+ " increases the cost of inlining " +
+ CS.getCaller()->getName() + " in other contexts"));
return false;
}
}
- DEBUG(dbgs() << " Inlining: cost=" << Cost
- << ", thres=" << AdjThreshold
+ DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
+ << ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << '\n');
+ emitAnalysis(
+ CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
+ CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
+ " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
return true;
}
return false;
}
-
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
- CallGraph &CG = getAnalysis<CallGraph>();
- const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ AssumptionTracker *AT = &getAnalysis<AssumptionTracker>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
+ const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
SmallPtrSet<Function*, 8> SCCFunctions;
DEBUG(dbgs() << "Inliner visiting SCC:");
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, TD);
+ InlineFunctionInfo InlineInfo(&CG, DL, AA, AT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
// just delete the call instead of trying to inline it, regardless of
// size. This happens because IPSCCP propagates the result out of the
// call and then we're left with the dead call.
- if (isInstructionTriviallyDead(CS.getInstruction())) {
+ if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
DEBUG(dbgs() << " -> Deleting dead call: "
<< *CS.getInstruction() << "\n");
// Update the call graph by deleting the edge from Callee to Caller.
CG[Caller]->removeCallEdgeFor(CS);
CS.getInstruction()->eraseFromParent();
++NumCallsDeleted;
- // Update the cached cost info with the missing call
- growCachedCostInfo(Caller, NULL);
} else {
// We can only inline direct calls to non-declarations.
- if (Callee == 0 || Callee->isDeclaration()) continue;
+ if (!Callee || Callee->isDeclaration()) continue;
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
- // itself. If so, we'd be recursively inlinling the same function,
+ // itself. If so, we'd be recursively inlining the same function,
// which would provide the same callsites, which would cause us to
// infinitely inline.
int InlineHistoryID = CallSites[CSi].second;
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
continue;
-
+ LLVMContext &CallerCtx = Caller->getContext();
+
+ // Get DebugLoc to report. CS will be invalid after Inliner.
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+
// If the policy determines that we should inline this function,
// try to do so.
- if (!shouldInline(CS))
+ if (!shouldInline(CS)) {
+ emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() +
+ " will not be inlined into " +
+ Caller->getName()));
continue;
+ }
// Attempt to inline the function.
- if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas))
+ if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+ InlineHistoryID, InsertLifetime, DL)) {
+ emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() +
+ " will not be inlined into " +
+ Caller->getName()));
continue;
+ }
++NumInlined;
-
+
+ // Report the inline decision.
+ emitOptimizationRemark(
+ CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() + " inlined into " + Caller->getName()));
+
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
if (!InlineInfo.InlinedCalls.empty()) {
CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
}
}
-
- // Update the cached cost info with the inlined call.
- growCachedCostInfo(Caller, Callee);
}
// If we inlined or deleted the last possible call site to the function,
// Remove any call graph edges from the callee to its callees.
CalleeNode->removeAllCalledFunctions();
- resetCachedCostInfo(Callee);
-
// Removing the node for callee from the call graph and delete it.
delete CG.removeFunctionFromModule(CalleeNode);
++NumDeleted;
/// removeDeadFunctions - Remove dead functions that are not included in
/// DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG,
- SmallPtrSet<const Function *, 16> *DNR) {
- SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+ SmallVector<CallGraphNode*, 16> FunctionsToRemove;
// Scan for all of the functions, looking for ones that should now be removed
// from the program. Insert the dead ones in the FunctionsToRemove set.
for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
CallGraphNode *CGN = I->second;
- if (CGN->getFunction() == 0)
- continue;
-
Function *F = CGN->getFunction();
-
+ if (!F || F->isDeclaration())
+ continue;
+
+ // Handle the case when this function is called and we only want to care
+ // about always-inline functions. This is a bit of a hack to share code
+ // between here and the InlineAlways pass.
+ if (AlwaysInlineOnly &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::AlwaysInline))
+ continue;
+
// If the only remaining users of the function are dead constants, remove
// them.
F->removeDeadConstantUsers();
- if (DNR && DNR->count(F))
- continue;
- if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
- !F->hasAvailableExternallyLinkage())
+ if (!F->isDefTriviallyDead())
continue;
- if (!F->use_empty())
+
+ // It is unsafe to drop a function with discardable linkage from a COMDAT
+ // without also dropping the other members of the COMDAT.
+ // The inliner doesn't visit non-function entities which are in COMDAT
+ // groups so it is unsafe to do so *unless* the linkage is local.
+ if (!F->hasLocalLinkage() && F->hasComdat())
continue;
// Remove any call graph edges from the function to its callees.
CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
// Removing the node for callee from the call graph and delete it.
- FunctionsToRemove.insert(CGN);
+ FunctionsToRemove.push_back(CGN);
}
+ if (FunctionsToRemove.empty())
+ return false;
// Now that we know which functions to delete, do so. We didn't want to do
// this inline, because that would invalidate our CallGraph::iterator
// objects. :(
//
- // Note that it doesn't matter that we are iterating over a non-stable set
+ // Note that it doesn't matter that we are iterating over a non-stable order
// here to do this, it doesn't matter which order the functions are deleted
// in.
- bool Changed = false;
- for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
- E = FunctionsToRemove.end(); I != E; ++I) {
- resetCachedCostInfo((*I)->getFunction());
+ array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+ FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
+ FunctionsToRemove.end()),
+ FunctionsToRemove.end());
+ for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
+ E = FunctionsToRemove.end();
+ I != E; ++I) {
delete CG.removeFunctionFromModule(*I);
++NumDeleted;
- Changed = true;
}
-
- return Changed;
+ return true;
}