int Cost = IC.getValue();
int CurrentThreshold = InlineThreshold;
- Function *Fn = CS.getCaller();
- if (Fn && !Fn->isDeclaration() &&
- Fn->hasFnAttr(Attribute::OptimizeForSize) &&
+ Function *Caller = CS.getCaller();
+ if (Caller && !Caller->isDeclaration() &&
+ Caller->hasFnAttr(Attribute::OptimizeForSize) &&
InlineLimit.getNumOccurrences() == 0 &&
InlineThreshold != 50)
CurrentThreshold = 50;
return false;
}
+ // Try to detect the case where the current inlining candidate caller
+ // (call it B) is a static function and is an inlining candidate elsewhere,
+ // and the current candidate callee (call it C) is large enough that
+ // inlining it into B would make B too big to inline later. In these
+ // circumstances it may be best not to inline C into B, but to inline B
+ // into its callers.
+ if (Caller->hasLocalLinkage()) {
+ int TotalSecondaryCost = 0;
+ bool outerCallsFound = false;
+ bool allOuterCallsWillBeInlined = true;
+ bool someOuterCallWouldNotBeInlined = false;
+ for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
+ I != E; ++I) {
+ CallSite CS2 = CallSite::get(*I);
+
+ // If this isn't a call to Caller (it could be some other sort
+ // of reference) skip it.
+ if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
+ continue;
+
+ InlineCost IC2 = getInlineCost(CS2);
+ if (IC2.isNever())
+ allOuterCallsWillBeInlined = false;
+ if (IC2.isAlways() || IC2.isNever())
+ continue;
+
+ outerCallsFound = true;
+ int Cost2 = IC2.getValue();
+ int CurrentThreshold2 = InlineThreshold;
+ Function *Caller2 = CS2.getCaller();
+ if (Caller2 && !Caller2->isDeclaration() &&
+ Caller2->hasFnAttr(Attribute::OptimizeForSize) &&
+ InlineThreshold != 50)
+ CurrentThreshold2 = 50;
+
+ float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+ if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+ allOuterCallsWillBeInlined = false;
+
+ // See if we have this case. The magic 6 is what InlineCost assigns
+ // for the call instruction, which we would be deleting.
+ if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+ Cost2 + Cost - 6 >= (int)(CurrentThreshold2 * FudgeFactor2)) {
+ someOuterCallWouldNotBeInlined = true;
+ TotalSecondaryCost += Cost2;
+ }
+ }
+ // If all outer calls to Caller would get inlined, the cost for the last
+ // one is set very low by getInlineCost, in anticipation that Caller will
+ // be removed entirely. We did not account for this above unless there
+ // is only one caller of Caller.
+ if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+ TotalSecondaryCost -= 15000;
+
+ if (outerCallsFound && someOuterCallWouldNotBeInlined &&
+ TotalSecondaryCost < Cost) {
+ DEBUG(errs() << " NOT Inlining: " << *CS.getInstruction() <<
+ " Cost = " << Cost <<
+ ", outer Cost = " << TotalSecondaryCost << '\n');
+ return false;
+ }
+ }
+
DEBUG(errs() << " Inlining: cost=" << Cost
- << ", Call: " << *CS.getInstruction() << "\n");
+ << ", Call: " << *CS.getInstruction() << '\n');
return true;
}
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
CallSite CS = CallSite::get(I);
- // If this this isn't a call, or it is a call to an intrinsic, it can
+ // If this isn't a call, or it is a call to an intrinsic, it can
// never be inlined.
if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
continue;
// try to do so.
if (!shouldInline(CS))
continue;
-
+
Function *Caller = CS.getCaller();
// Attempt to inline the function...
if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
--- /dev/null
+; RUN: opt < %s -inline -S | FileCheck %s
+; Test that bar and bar2 are both inlined throughout and removed.
+@A = weak global i32 0 ; <i32*> [#uses=1]
+@B = weak global i32 0 ; <i32*> [#uses=1]
+@C = weak global i32 0 ; <i32*> [#uses=1]
+
+define fastcc void @foo(i32 %X) {
+entry:
+; CHECK: @foo
+ %ALL = alloca i32, align 4 ; <i32*> [#uses=1]
+ %tmp1 = and i32 %X, 1 ; <i32> [#uses=1]
+ %tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
+ br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true: ; preds = %entry
+ store i32 1, i32* @A
+ br label %cond_next
+
+cond_next: ; preds = %cond_true, %entry
+ %tmp4 = and i32 %X, 2 ; <i32> [#uses=1]
+ %tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1]
+ br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5: ; preds = %cond_next
+ store i32 1, i32* @B
+ br label %cond_next7
+
+cond_next7: ; preds = %cond_true5, %cond_next
+ %tmp10 = and i32 %X, 4 ; <i32> [#uses=1]
+ %tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1]
+ br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11: ; preds = %cond_next7
+ store i32 1, i32* @C
+ br label %cond_next13
+
+cond_next13: ; preds = %cond_true11, %cond_next7
+ %tmp16 = and i32 %X, 8 ; <i32> [#uses=1]
+ %tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1]
+ br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17: ; preds = %cond_next13
+ call void @ext( i32* %ALL )
+ ret void
+
+UnifiedReturnBlock: ; preds = %cond_next13
+ ret void
+}
+
+; CHECK-NOT: @bar
+define internal fastcc void @bar(i32 %X) {
+entry:
+ %ALL = alloca i32, align 4 ; <i32*> [#uses=1]
+ %tmp1 = and i32 %X, 1 ; <i32> [#uses=1]
+ %tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
+ br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true: ; preds = %entry
+ store i32 1, i32* @A
+ br label %cond_next
+
+cond_next: ; preds = %cond_true, %entry
+ %tmp4 = and i32 %X, 2 ; <i32> [#uses=1]
+ %tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1]
+ br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5: ; preds = %cond_next
+ store i32 1, i32* @B
+ br label %cond_next7
+
+cond_next7: ; preds = %cond_true5, %cond_next
+ %tmp10 = and i32 %X, 4 ; <i32> [#uses=1]
+ %tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1]
+ br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11: ; preds = %cond_next7
+ store i32 1, i32* @C
+ br label %cond_next13
+
+cond_next13: ; preds = %cond_true11, %cond_next7
+ %tmp16 = and i32 %X, 8 ; <i32> [#uses=1]
+ %tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1]
+ br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17: ; preds = %cond_next13
+ call void @foo( i32 %X )
+ ret void
+
+UnifiedReturnBlock: ; preds = %cond_next13
+ ret void
+}
+
+define internal fastcc void @bar2(i32 %X) {
+entry:
+ call void @foo( i32 %X )
+ ret void
+}
+
+declare void @ext(i32*)
+
+define void @test(i32 %X) {
+entry:
+; CHECK: test
+; CHECK-NOT: @bar
+ tail call fastcc void @bar( i32 %X )
+ tail call fastcc void @bar( i32 %X )
+ tail call fastcc void @bar2( i32 %X )
+ tail call fastcc void @bar2( i32 %X )
+ ret void
+; CHECK: ret
+}