From: Nadav Rotem Date: Wed, 19 Sep 2012 08:08:04 +0000 (+0000) Subject: Prevent inlining of callees which allocate lots of memory into a recursive caller. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=92df026f0da91dc65ef6186e97ff87b1f53e8cd0 Prevent inlining of callees which allocate lots of memory into a recursive caller. Example: void foo() { ... foo(); // I'm recursive! bar(); } bar() { int a[1000]; // large stack size } rdar://10853263 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164207 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index 0cba135222b..f8de5335067 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -36,6 +36,9 @@ namespace llvm { const int LastCallToStaticBonus = -15000; const int ColdccPenalty = 2000; const int NoreturnPenalty = 10000; + /// Do not inline functions which allocate this many bytes on the stack + /// when the caller is recursive. + const int TotalAllocaSizeRecursiveCaller = 1024; } /// \brief Represents the cost of inlining a function. diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 82f53f7b0f6..1a94665096b 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -51,9 +51,12 @@ class CallAnalyzer : public InstVisitor { int Cost; const bool AlwaysInline; - bool IsRecursive; + bool IsCallerRecursive; + bool IsRecursiveCall; bool ExposesReturnsTwice; bool HasDynamicAlloca; + /// Number of bytes allocated statically by the callee. + uint64_t AllocatedSize; unsigned NumInstructions, NumVectorInstructions; int FiftyPercentVectorBonus, TenPercentVectorBonus; int VectorBonus; @@ -126,7 +129,8 @@ public: CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) : TD(TD), F(Callee), Threshold(Threshold), Cost(0), AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), - IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), @@ -269,6 +273,13 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // FIXME: Check whether inlining will turn a dynamic alloca into a static // alloca, and handle that case. + // Accumulate the allocated size. + if (I.isStaticAlloca()) { + Type *Ty = I.getAllocatedType(); + AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : + Ty->getPrimitiveSizeInBits()); + } + // We will happily inline static alloca instructions or dynamic alloca // instructions in always-inline situations. if (AlwaysInline || I.isStaticAlloca()) @@ -625,7 +636,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { if (F == CS.getInstruction()->getParent()->getParent()) { // This flag will fully abort the analysis, so don't bother with anything // else. - IsRecursive = true; + IsRecursiveCall = true; return false; } @@ -712,7 +723,14 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { Cost += InlineConstants::InstrCost; // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; if (NumVectorInstructions > NumInstructions/2) @@ -831,12 +849,14 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Cost += InlineConstants::LastCallToStaticBonus; // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero cost. - if (InvokeInst *II = dyn_cast(CS.getInstruction())) { + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast(Instr)) { if (isa(II->getNormalDest()->begin())) Threshold = 1; - } else if (isa(++BasicBlock::iterator(CS.getInstruction()))) + } else if (isa(++BasicBlock::iterator(Instr))) Threshold = 1; // If this function uses the coldcc calling convention, prefer not to inline @@ -852,6 +872,20 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (F.empty()) return true; + Function *Caller = CS.getInstruction()->getParent()->getParent(); + // Check if the caller function is recursive itself. + for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); + U != E; ++U) { + CallSite Site(cast(*U)); + if (!Site) + continue; + Instruction *I = Site.getInstruction(); + if (I->getParent()->getParent() == Caller) { + IsCallerRecursive = true; + break; + } + } + // Track whether we've seen a return instruction. The first return // instruction is free, as at least one will usually disappear in inlining. bool HasReturn = false; @@ -908,9 +942,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this indirect - // jump would jump from the inlined copy of the function into the original - // function which is extremely undefined behavior. + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions // with indirectbr's as long as no other function or global references the // blockaddress of a block within the current function. And as a QOI issue, @@ -928,8 +962,16 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { - if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + break; } @@ -955,7 +997,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // If we're unable to select a particular successor, just count all of // them. - for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; + ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); // If we had any successors at this point, than post-inlining is likely to @@ -1003,7 +1046,8 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) return llvm::InlineCost::getNever(); - DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "...\n"); CallAnalyzer CA(TD, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); diff --git a/test/Transforms/Inline/recurseive.ll b/test/Transforms/Inline/recurseive.ll new file mode 100644 index 00000000000..5fe8d1639ca --- /dev/null +++ b/test/Transforms/Inline/recurseive.ll @@ -0,0 +1,38 @@ +; RUN: opt %s -inline -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin10.0" + +; rdar://10853263 + +; Make sure that the callee is still here. +; CHECK: define i32 @callee +define i32 @callee(i32 %param) { + %yyy = alloca [100000 x i8] + %r = bitcast [100000 x i8]* %yyy to i8* + call void @foo2(i8* %r) + ret i32 4 +} + +; CHECK: define i32 @caller +; CHECK-NEXT: entry: +; CHECK-NOT: alloca +; CHECK: ret +define i32 @caller(i32 %param) { +entry: + %t = call i32 @foo(i32 %param) + %cmp = icmp eq i32 %t, -1 + br i1 %cmp, label %exit, label %cont + +cont: + %r = call i32 @caller(i32 %t) + %f = call i32 @callee(i32 %r) + br label %cont +exit: + ret i32 4 +} + +declare void @foo2(i8* %in) + +declare i32 @foo(i32 %param) +