From 28f872f8a1945635f30763805c1418a90c6b345e Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Mon, 19 Nov 2012 07:04:35 +0000 Subject: [PATCH] Clean up handling of always-inline functions in the inliner. This patch moves the isInlineViable function from the InlineAlways pass into the InlineCostAnalyzer and then changes the InlineCost computation to use that simple check for always-inline functions. All the special-case checks for AlwaysInline in the CallAnalyzer can then go away. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168300 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/InlineCost.h | 3 + lib/Analysis/InlineCost.cpp | 176 +++++++++++++++++----------- lib/Transforms/IPO/InlineAlways.cpp | 54 ++------- 3 files changed, 118 insertions(+), 115 deletions(-) diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index af07d381690..82a3a566c92 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -129,6 +129,9 @@ namespace llvm { // Note: This is used by out-of-tree passes, please do not remove without // adding a replacement API. InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold); + + /// \brief Minimal filter to detect invalid constructs for inlining. + bool isInlineViable(Function &Callee); }; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 0da54134638..458e25503d7 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -49,7 +49,6 @@ class CallAnalyzer : public InstVisitor { int Threshold; int Cost; - const bool AlwaysInline; bool IsCallerRecursive; bool IsRecursiveCall; @@ -128,7 +127,6 @@ class CallAnalyzer : public InstVisitor { public: CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - AlwaysInline(F.getFnAttributes().hasAttribute(Attributes::AlwaysInline)), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), @@ -142,7 +140,6 @@ public: int getThreshold() { return Threshold; } int getCost() { return Cost; } - bool isAlwaysInline() { return AlwaysInline; } // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. @@ -281,9 +278,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { Ty->getPrimitiveSizeInBits()); } - // We will happily inline static alloca instructions or dynamic alloca - // instructions in always-inline situations. - if (AlwaysInline || I.isStaticAlloca()) + // We will happily inline static alloca instructions. + if (I.isStaticAlloca()) return Base::visitAlloca(I); // FIXME: This is overly conservative. Dynamic allocas are inefficient for @@ -743,7 +739,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { // Check if we've past the threshold so we don't spin in huge basic // blocks that will never inline. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + if (Cost > (Threshold + VectorBonus)) return false; } @@ -805,70 +801,69 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { int SingleBBBonus = Threshold / 2; Threshold += SingleBBBonus; - // Unless we are always-inlining, perform some tweaks to the cost and - // threshold based on the direct callsite information. - if (!AlwaysInline) { - // We want to more aggressively inline vector-dense kernels, so up the - // threshold, and we'll lower it if the % of vector instructions gets too - // low. - assert(NumInstructions == 0); - assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; - - // Give out bonuses per argument, as the instructions setting them up will - // be gone after inlining. - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (TD && CS.isByValArgument(I)) { - // We approximate the number of loads and stores needed by dividing the - // size of the byval type by the target's pointer size. - PointerType *PTy = cast(CS.getArgument(I)->getType()); - unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getPointerSizeInBits(); - // Ceiling division. - unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; - - // If it generates more than 8 stores it is likely to be expanded as an - // inline memcpy so we take that as an upper bound. Otherwise we assume - // one load and one store per word copied. - // FIXME: The maxStoresPerMemcpy setting from the target should be used - // here instead of a magic number of 8, but it's not available via - // DataLayout. - NumStores = std::min(NumStores, 8U); - - Cost -= 2 * NumStores * InlineConstants::InstrCost; - } else { - // For non-byval arguments subtract off one instruction per call - // argument. - Cost -= InlineConstants::InstrCost; - } + // Perform some tweaks to the cost and threshold based on the direct + // callsite information. + + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; } + } - // If there is only one call of the function, and it has internal linkage, - // the cost of inlining it drops dramatically. - if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) - Cost += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero - // cost. - Instruction *Instr = CS.getInstruction(); - if (InvokeInst *II = dyn_cast(Instr)) { - if (isa(II->getNormalDest()->begin())) - Threshold = 1; - } else if (isa(++BasicBlock::iterator(Instr))) + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast(Instr)) { + if (isa(II->getNormalDest()->begin())) Threshold = 1; + } else if (isa(++BasicBlock::iterator(Instr))) + Threshold = 1; - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; - // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) - return false; - } + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; if (F.empty()) return true; @@ -930,7 +925,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + if (Cost > (Threshold + VectorBonus)) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1015,7 +1010,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Threshold += VectorBonus; - return AlwaysInline || Cost < Threshold; + return Cost < Threshold; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1040,10 +1035,22 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, int Threshold) { + // Cannot inline indirect calls. + if (!Callee) + return llvm::InlineCost::getNever(); + + // Calls to functions with always-inline attributes should be inlined + // whenever possible. + if (Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline)) { + if (isInlineViable(*Callee)) + return llvm::InlineCost::getAlways(); + return llvm::InlineCost::getNever(); + } + // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites // marked noinline. - if (!Callee || Callee->mayBeOverridden() || + if (Callee->mayBeOverridden() || Callee->getFnAttributes().hasAttribute(Attributes::NoInline) || CS.isNoInline()) return llvm::InlineCost::getNever(); @@ -1059,9 +1066,36 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, // Check if there was a reason to force inlining or no inlining. if (!ShouldInline && CA.getCost() < CA.getThreshold()) return InlineCost::getNever(); - if (ShouldInline && (CA.isAlwaysInline() || - CA.getCost() >= CA.getThreshold())) + if (ShouldInline && CA.getCost() >= CA.getThreshold()) return InlineCost::getAlways(); return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } + +bool InlineCostAnalyzer::isInlineViable(Function &F) { + bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index b1c36c15db0..6f4b810acc9 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -32,6 +32,7 @@ namespace { // AlwaysInliner only inlines functions that are mark as "always inline". class AlwaysInliner : public Inliner { + InlineCostAnalyzer CA; public: // Use extremely low threshold. AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) { @@ -63,35 +64,6 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { return new AlwaysInliner(InsertLifetime); } -/// \brief Minimal filter to detect invalid constructs for inlining. -static bool isInlineViable(Function &F) { - bool ReturnsTwice =F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice); - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - // Disallow inlining of functions which contain an indirect branch. - if (isa(BI->getTerminator())) - return false; - - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - CallSite CS(II); - if (!CS) - continue; - - // Disallow recursive calls. - if (&F == CS.getCalledFunction()) - return false; - - // Disallow calls which expose returns-twice to a function not previously - // attributed as such. - if (!ReturnsTwice && CS.isCall() && - cast(CS.getInstruction())->canReturnTwice()) - return false; - } - } - - return true; -} - /// \brief Get the inline cost for the always-inliner. /// /// The always inliner *only* handles functions which are marked with the @@ -106,27 +78,21 @@ static bool isInlineViable(Function &F) { /// likely not worth it in practice. InlineCost AlwaysInliner::getInlineCost(CallSite CS) { Function *Callee = CS.getCalledFunction(); - // We assume indirect calls aren't calling an always-inline function. - if (!Callee) return InlineCost::getNever(); - - // We can't inline calls to external functions. - // FIXME: We shouldn't even get here. - if (Callee->isDeclaration()) return InlineCost::getNever(); - - // Return never for anything not marked as always inline. - if (!Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline)) - return InlineCost::getNever(); - // Do some minimal analysis to preclude non-viable functions. - if (!isInlineViable(*Callee)) - return InlineCost::getNever(); + // Only inline direct calls to functions with always-inline attributes + // that are viable for inlining. FIXME: We shouldn't even get here for + // declarations. + if (Callee && !Callee->isDeclaration() && + Callee->getFnAttributes().hasAttribute(Attributes::AlwaysInline) && + CA.isInlineViable(*Callee)) + return InlineCost::getAlways(); - // Otherwise, force inlining. - return InlineCost::getAlways(); + return InlineCost::getNever(); } // doInitialization - Initializes the vector of functions that have not // been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { + CA.setDataLayout(getAnalysisIfAvailable()); return false; } -- 2.34.1