From 77226a03dca98e6237c1068f2652fe41bea7b687 Mon Sep 17 00:00:00 2001 From: Diego Novillo Date: Fri, 24 May 2013 12:26:52 +0000 Subject: [PATCH] Add a new function attribute 'cold' to functions. Other than recognizing the attribute, the patch does little else. It changes the branch probability analyzer so that edges into blocks postdominated by a cold function are given low weight. Added analysis and code generation tests. Added documentation for the new attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182638 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.rst | 5 ++ include/llvm-c/Core.h | 1 + include/llvm/Analysis/BranchProbabilityInfo.h | 4 + include/llvm/IR/Attributes.h | 1 + lib/Analysis/BranchProbabilityInfo.cpp | 81 +++++++++++++++++++ lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 2 + lib/AsmParser/LLToken.h | 1 + lib/IR/Attributes.cpp | 3 + lib/IR/Verifier.cpp | 3 +- test/Analysis/BranchProbabilityInfo/basic.ll | 58 +++++++++++++ test/CodeGen/X86/block-placement.ll | 32 ++++++++ test/Feature/cold.ll | 9 +++ 13 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 test/Feature/cold.ll diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 5e32eba7f72..e902159bf08 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -812,6 +812,11 @@ example: This attribute indicates that the inliner should attempt to inline this function into callers whenever possible, ignoring any active inlining size threshold for this caller. +``cold`` + This attribute indicates that this function is rarely called. When + computing edge weights, basic blocks post-dominated by a cold + function call are also considered to be cold; and, thus, given low + weight. ``nonlazybind`` This attribute suppresses lazy symbol binding for the function. This may make calls to the function faster, at the cost of extra program diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 0920c5d0b0d..409c0f15c55 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -166,6 +166,7 @@ typedef enum { and the path forward agreed upon. LLVMAddressSafety = 1ULL << 32, LLVMStackProtectStrongAttribute = 1ULL<<33 + LLVMCold = 1ULL << 34 */ } LLVMAttribute; diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index 6c23f7c3aeb..4ff7121728e 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -131,11 +131,15 @@ private: /// \brief Track the set of blocks directly succeeded by a returning block. SmallPtrSet PostDominatedByUnreachable; + /// \brief Track the set of blocks that always lead to a cold call. + SmallPtrSet PostDominatedByColdCall; + /// \brief Get sum of the block successors' weights. uint32_t getSumForBlock(const BasicBlock *BB) const; bool calcUnreachableHeuristics(BasicBlock *BB); bool calcMetadataWeights(BasicBlock *BB); + bool calcColdCallHeuristics(BasicBlock *BB); bool calcPointerHeuristics(BasicBlock *BB); bool calcLoopBranchHeuristics(BasicBlock *BB); bool calcZeroHeuristics(BasicBlock *BB); diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 2c7da6485dc..0d14709fe9f 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -68,6 +68,7 @@ public: ///< 0 means unaligned (different from align(1)) AlwaysInline, ///< inline=always ByVal, ///< Pass structure by value + Cold, ///< Marks function as being in a cold path. InlineHint, ///< Source said inlining was desirable InReg, ///< Force argument to be passed in register MinSize, ///< Function must be optimized for size first diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 6c5885601fa..54c148ed293 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -69,6 +69,20 @@ static const uint32_t UR_TAKEN_WEIGHT = 1; /// easily subsume it. static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; +/// \brief Weight for a branch taken going into a cold block. +/// +/// This is the weight for a branch taken toward a block marked +/// cold. A block is marked cold if it's postdominated by a +/// block containing a call to a cold function. Cold functions +/// are those marked with attribute 'cold'. +static const uint32_t CC_TAKEN_WEIGHT = 4; + +/// \brief Weight for a branch not-taken into a cold block. +/// +/// This is the weight for a branch not taken toward a block marked +/// cold. +static const uint32_t CC_NONTAKEN_WEIGHT = 64; + static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; @@ -193,6 +207,69 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { return true; } +/// \brief Calculate edge weights for edges leading to cold blocks. +/// +/// A cold block is one post-dominated by a block with a call to a +/// cold function. Those edges are unlikely to be taken, so we give +/// them relatively low weight. +/// +/// Return true if we could compute the weights for cold edges. +/// Return false, otherwise. +bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) + return false; + + // Determine which successors are post-dominated by a cold block. + SmallVector ColdEdges; + ColdEdges.reserve(TI->getNumSuccessors()); + SmallVector NormalEdges; + NormalEdges.reserve(TI->getNumSuccessors()); + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + if (PostDominatedByColdCall.count(*I)) + ColdEdges.push_back(I.getSuccessorIndex()); + else + NormalEdges.push_back(I.getSuccessorIndex()); + + // If all successors are in the set of blocks post-dominated by cold calls, + // this block is in the set post-dominated by cold calls. + if (ColdEdges.size() == TI->getNumSuccessors()) + PostDominatedByColdCall.insert(BB); + else { + // Otherwise, if the block itself contains a cold function, add it to the + // set of blocks postdominated by a cold call. + assert(!PostDominatedByColdCall.count(BB)); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast(I)) + if (CI->hasFnAttr(Attribute::Cold)) { + PostDominatedByColdCall.insert(BB); + break; + } + } + + // Skip probabilities if this block has a single successor. + if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) + return false; + + uint32_t ColdWeight = + std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT); + for (SmallVector::iterator I = ColdEdges.begin(), + E = ColdEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, ColdWeight); + + if (NormalEdges.empty()) + return true; + uint32_t NormalWeight = std::max( + CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT); + for (SmallVector::iterator I = NormalEdges.begin(), + E = NormalEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, NormalWeight); + + return true; +} + // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { @@ -397,6 +474,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { LastF = &F; // Store the last function we ran on for printing. LI = &getAnalysis(); assert(PostDominatedByUnreachable.empty()); + assert(PostDominatedByColdCall.empty()); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. @@ -408,6 +486,8 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { continue; if (calcMetadataWeights(*I)) continue; + if (calcColdCallHeuristics(*I)) + continue; if (calcLoopBranchHeuristics(*I)) continue; if (calcPointerHeuristics(*I)) @@ -420,6 +500,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { } PostDominatedByUnreachable.clear(); + PostDominatedByColdCall.clear(); return false; } diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index e7a9f2ad1e3..82d9975ca68 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -564,6 +564,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(alwaysinline); KEYWORD(byval); + KEYWORD(cold); KEYWORD(inlinehint); KEYWORD(inreg); KEYWORD(minsize); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 62d8070d186..b22d251f9e9 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -909,6 +909,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, continue; } case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; + case lltok::kw_cold: B.addAttribute(Attribute::Cold); break; case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; @@ -1222,6 +1223,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_alignstack: case lltok::kw_alwaysinline: + case lltok::kw_cold: case lltok::kw_inlinehint: case lltok::kw_minsize: case lltok::kw_naked: diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 3bf54fa1cc6..e889a2bfd0e 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -96,6 +96,7 @@ namespace lltok { kw_alwaysinline, kw_sanitize_address, kw_byval, + kw_cold, kw_inlinehint, kw_inreg, kw_minsize, diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 4fe6f9ddc59..e48ebb13352 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -217,6 +217,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "uwtable"; if (hasAttribute(Attribute::ZExt)) return "zeroext"; + if (hasAttribute(Attribute::Cold)) + return "cold"; // FIXME: These should be output like this: // @@ -396,6 +398,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::SanitizeMemory: return 1ULL << 37; case Attribute::NoBuiltin: return 1ULL << 38; case Attribute::Returned: return 1ULL << 39; + case Attribute::Cold: return 1ULL << 40; } llvm_unreachable("Unsupported attribute type"); } diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index d106173b521..a94c1566a20 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -692,7 +692,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, I->getKindAsEnum() == Attribute::SanitizeMemory || I->getKindAsEnum() == Attribute::MinSize || I->getKindAsEnum() == Attribute::NoDuplicate || - I->getKindAsEnum() == Attribute::NoBuiltin) { + I->getKindAsEnum() == Attribute::NoBuiltin || + I->getKindAsEnum() == Attribute::Cold) { if (!isFunction) CheckFailed("Attribute '" + I->getKindAsString() + "' only applies to functions!", V); diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll index 08adfa8a36f..c6e1bde81df 100644 --- a/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/test/Analysis/BranchProbabilityInfo/basic.ll @@ -115,3 +115,61 @@ return: } !2 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64} + +declare void @coldfunc() cold + +define i32 @test5(i32 %a, i32 %b, i1 %flag) { +; CHECK: Printing analysis {{.*}} for function 'test5' +entry: + br i1 %flag, label %then, label %else +; CHECK: edge entry -> then probability is 4 / 68 +; CHECK: edge entry -> else probability is 64 / 68 + +then: + call void @coldfunc() + br label %exit +; CHECK: edge then -> exit probability is 16 / 16 = 100% + +else: + br label %exit +; CHECK: edge else -> exit probability is 16 / 16 = 100% + +exit: + %result = phi i32 [ %a, %then ], [ %b, %else ] + ret i32 %result +} + +declare i32 @regular_function(i32 %i) + +define i32 @test_cold_call_sites(i32* %a) { +; Test that edges to blocks post-dominated by cold call sites +; are marked as not expected to be taken. +; TODO(dnovillo) The calls to regular_function should not be merged, but +; they are currently being merged. Convert this into a code generation test +; after that is fixed. + +; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites' +; CHECK: edge entry -> then probability is 4 / 68 = 5.88235% +; CHECK: edge entry -> else probability is 64 / 68 = 94.1176% [HOT edge] + +entry: + %gep1 = getelementptr i32* %a, i32 1 + %val1 = load i32* %gep1 + %cond1 = icmp ugt i32 %val1, 1 + br i1 %cond1, label %then, label %else + +then: + ; This function is not declared cold, but this call site is. + %val4 = call i32 @regular_function(i32 %val1) cold + br label %exit + +else: + %gep2 = getelementptr i32* %a, i32 2 + %val2 = load i32* %gep2 + %val3 = call i32 @regular_function(i32 %val2) + br label %exit + +exit: + %ret = phi i32 [ %val4, %then ], [ %val3, %else ] + ret i32 %ret +} diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll index 271fb425051..be627e08e0e 100644 --- a/test/CodeGen/X86/block-placement.ll +++ b/test/CodeGen/X86/block-placement.ll @@ -1089,3 +1089,35 @@ while.end: store double %rra.0, double* %arrayidx34, align 8 br label %for.cond } + +declare void @cold_function() cold + +define i32 @test_cold_calls(i32* %a) { +; Test that edges to blocks post-dominated by cold calls are +; marked as not expected to be taken. They should be laid out +; at the bottom. +; CHECK: test_cold_calls: +; CHECK: %entry +; CHECK: %else +; CHECK: %exit +; CHECK: %then + +entry: + %gep1 = getelementptr i32* %a, i32 1 + %val1 = load i32* %gep1 + %cond1 = icmp ugt i32 %val1, 1 + br i1 %cond1, label %then, label %else + +then: + call void @cold_function() + br label %exit + +else: + %gep2 = getelementptr i32* %a, i32 2 + %val2 = load i32* %gep2 + br label %exit + +exit: + %ret = phi i32 [ %val1, %then ], [ %val2, %else ] + ret i32 %ret +} diff --git a/test/Feature/cold.ll b/test/Feature/cold.ll new file mode 100644 index 00000000000..dcf79c5ba39 --- /dev/null +++ b/test/Feature/cold.ll @@ -0,0 +1,9 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +; CHECK: @fun() #0 +define void @fun() #0 { + ret void +} + +; CHECK: attributes #0 = { cold } +attributes #0 = { cold } -- 2.34.1