From c738af5a7f115bed34f6b05660d8ce35c03726f2 Mon Sep 17 00:00:00 2001 From: Diego Novillo Date: Fri, 27 Nov 2015 23:14:51 +0000 Subject: [PATCH] SamplePGO - Add initial support for inliner annotations. This adds two thresholds to the sample profiler to affect inlining decisions: the concept of global hotness and coldness. Functions that have accumulated more than a certain fraction of samples at runtime, are annotated with the InlineHint attribute. Conversely, functions that accumulate less than a certain fraction of samples, are annotated with the Cold attribute. This is very similar to the hints emitted by Clang when using instrumentation profiles. Notice that this is a very blunt instrument. A function may have globally collected a significant fraction of samples, but that does not necessarily mean that every callsite for that function is hot. Ideally, we would annotate each callsite with the samples collected at that callsite. This way, the inliner can incorporate all these weights into its cost model. Once the inliner offers this functionality, we can change the hints emitted here to a more precise per-callsite annotation. For now, this is providing some measure of speedups with our internal benchmarks. I've observed speedups of up to 23% (though the geo mean is about 3%). I expect these numbers to improve as the inliner gets better annotations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254212 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/SampleProfile.cpp | 80 ++++++++++++++++++- .../SampleProfile/Inputs/inline-hint.prof | 3 + test/Transforms/SampleProfile/inline-hint.ll | 38 +++++++++ 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/SampleProfile/Inputs/inline-hint.prof create mode 100644 test/Transforms/SampleProfile/inline-hint.ll diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 69194eac078..5cb71f71e70 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -75,6 +75,16 @@ static cl::opt SampleProfileHotThreshold( "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"), cl::desc("Inlined functions that account for more than N% of all samples " "collected in the parent function, will be inlined again.")); +static cl::opt SampleProfileGlobalHotThreshold( + "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"), + cl::desc("Top-level functions that account for more than N% of all samples " + "collected in the profile, will be marked as hot for the inliner " + "to consider.")); +static cl::opt SampleProfileGlobalColdThreshold( + "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"), + cl::desc("Top-level functions that account for less than N% of all samples " + "collected in the profile, will be marked as cold for the inliner " + "to consider.")); namespace { typedef DenseMap BlockWeightMap; @@ -96,7 +106,8 @@ public: SampleProfileLoader(StringRef Name = SampleProfileFile) : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(), - Samples(nullptr), Filename(Name), ProfileIsValid(false) { + Samples(nullptr), Filename(Name), ProfileIsValid(false), + TotalCollectedSamples(0) { initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry()); } @@ -121,6 +132,7 @@ protected: const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineHotFunctions(Function &F); + bool emitInlineHints(Function &F); void printEdgeWeight(raw_ostream &OS, Edge E); void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); @@ -185,6 +197,12 @@ protected: /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; + + /// \brief Total number of samples collected in this profile. + /// + /// This is the sum of all the samples collected in all the functions executed + /// at runtime. + uint64_t TotalCollectedSamples; }; class SampleCoverageTracker { @@ -582,6 +600,60 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return FS; } +/// \brief Emit an inline hint if \p F is globally hot or cold. +/// +/// If \p F consumes a significant fraction of samples (indicated by +/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the +/// inliner to consider the function hot. +/// +/// If \p F consumes a small fraction of samples (indicated by +/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner +/// to consider the function cold. +/// +/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a +/// function globally hot or cold, we should be annotating individual callsites. +/// This is not currently possible, but work on the inliner will eventually +/// provide this ability. See http://reviews.llvm.org/D15003 for details and +/// discussion. +/// +/// \returns True if either attribute was applied to \p F. +bool SampleProfileLoader::emitInlineHints(Function &F) { + if (TotalCollectedSamples == 0) + return false; + + uint64_t FunctionSamples = Samples->getTotalSamples(); + double SamplesPercent = + (double)FunctionSamples / (double)TotalCollectedSamples * 100.0; + + // If the function collected more samples than the hot threshold, mark + // it globally hot. + if (SamplesPercent >= SampleProfileGlobalHotThreshold) { + F.addFnAttr(llvm::Attribute::InlineHint); + emitOptimizationRemark( + F.getContext(), DEBUG_TYPE, F, DebugLoc(), + Twine("Applied inline hint to globally hot function '" + F.getName() + + "' with " + Twine(std::to_string(SamplesPercent)) + + "% of samples (threshold: " + + Twine(std::to_string(SampleProfileGlobalHotThreshold)) + "%)")); + return true; + } + + // If the function collected fewer samples than the cold threshold, mark + // it globally cold. + if (SamplesPercent <= SampleProfileGlobalColdThreshold) { + F.addFnAttr(llvm::Attribute::Cold); + emitOptimizationRemark( + F.getContext(), DEBUG_TYPE, F, DebugLoc(), + Twine("Applied cold hint to globally cold function '" + F.getName() + + "' with " + Twine(std::to_string(SamplesPercent)) + + "% of samples (threshold: " + + Twine(std::to_string(SampleProfileGlobalColdThreshold)) + "%)")); + return true; + } + + return false; +} + /// \brief Iteratively inline hot callsites of a function. /// /// Iteratively traverse all callsites of the function \p F, and find if @@ -1088,6 +1160,8 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { DEBUG(dbgs() << "Line number for the first instruction in " << F.getName() << ": " << getFunctionLoc(F) << "\n"); + Changed |= emitInlineHints(F); + Changed |= inlineHotFunctions(F); // Compute basic block weights. @@ -1165,6 +1239,10 @@ bool SampleProfileLoader::runOnModule(Module &M) { if (!ProfileIsValid) return false; + // Compute the total number of samples collected in this profile. + for (const auto &I : Reader->getProfiles()) + TotalCollectedSamples += I.second.getTotalSamples(); + bool retval = false; for (auto &F : M) if (!F.isDeclaration()) { diff --git a/test/Transforms/SampleProfile/Inputs/inline-hint.prof b/test/Transforms/SampleProfile/Inputs/inline-hint.prof new file mode 100644 index 00000000000..a6840346eb4 --- /dev/null +++ b/test/Transforms/SampleProfile/Inputs/inline-hint.prof @@ -0,0 +1,3 @@ +_Z6hot_fnRxi:700:0 +_Z7cold_fnRxi:1:0 +other:299:0 diff --git a/test/Transforms/SampleProfile/inline-hint.ll b/test/Transforms/SampleProfile/inline-hint.ll new file mode 100644 index 00000000000..16c4e64ec5b --- /dev/null +++ b/test/Transforms/SampleProfile/inline-hint.ll @@ -0,0 +1,38 @@ +; RUN: opt %s -sample-profile -sample-profile-file=%S/Inputs/inline-hint.prof -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s +; +; CHECK: Applied cold hint to globally cold function '_Z7cold_fnRxi' with 0.1 +define void @_Z7cold_fnRxi() !dbg !4 { +entry: + ret void, !dbg !29 +} + +; CHECK: Applied inline hint to globally hot function '_Z6hot_fnRxi' with 70.0 +define void @_Z6hot_fnRxi() #0 !dbg !10 { +entry: + ret void, !dbg !38 +} + +!llvm.module.flags = !{!17, !18} +!llvm.ident = !{!19} + +!1 = !DIFile(filename: "inline-hint.cc", directory: ".") +!2 = !{} +!3 = !{!4, !10, !11, !14} +!4 = distinct !DISubprogram(name: "cold_fn", linkageName: "_Z7cold_fnRxi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null, !7, !9} +!7 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !8, size: 64, align: 64) +!8 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed) +!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = distinct !DISubprogram(name: "hot_fn", linkageName: "_Z6hot_fnRxi", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!11 = distinct !DISubprogram(name: "compute", linkageName: "_Z7computex", scope: !1, file: !1, line: 11, type: !12, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!12 = !DISubroutineType(types: !13) +!13 = !{!8, !8} +!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !15, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, variables: !2) +!15 = !DISubroutineType(types: !16) +!16 = !{!9} +!17 = !{i32 2, !"Dwarf Version", i32 4} +!18 = !{i32 2, !"Debug Info Version", i32 3} +!19 = !{!"clang version 3.8.0 (trunk 254067) (llvm/trunk 254079)"} +!29 = !DILocation(line: 5, column: 1, scope: !4) +!38 = !DILocation(line: 9, column: 1, scope: !10) -- 2.34.1