fix PR5009 by making CGSCCPM realize that a call was devirtualized

author Chris Lattner <sabre@nondot.org>

Sat, 1 May 2010 06:38:43 +0000 (06:38 +0000)

committer Chris Lattner <sabre@nondot.org>

Sat, 1 May 2010 06:38:43 +0000 (06:38 +0000)
author Chris Lattner <sabre@nondot.org>
Sat, 1 May 2010 06:38:43 +0000 (06:38 +0000)
committer Chris Lattner <sabre@nondot.org>
Sat, 1 May 2010 06:38:43 +0000 (06:38 +0000)
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp

index 288a7ef15830c28b2cdeae81993e31309b3e63e7..0c01ee5b828486d9981216921ec9414bf17559c1 100644 (file)
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -191,6 +191,10 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
      
      // Walk the function body looking for call sites.  Sync up the call sites in
      // CGN with those actually in the function.
+
+    // Keep track of the number of direct and indirect calls that were
+    // invalidated and removed.
+    unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
      
      // Get the set of call sites currently in the function.
      for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
@@ -209,6 +213,12 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
          assert(!CheckingMode &&
                 "CallGraphSCCPass did not update the CallGraph correctly!");
          
+        // If this was an indirect call site, count it.
+        if (I->second->getFunction() == 0)
+          ++NumIndirectRemoved;
+        else 
+          ++NumDirectRemoved;
+        
          // Just remove the edge from the set of callees, keep track of whether
          // I points to the last element of the vector.
          bool WasLast = I + 1 == E;
@@ -230,6 +240,9 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
      }
      
      // Loop over all of the instructions in the function, getting the callsites.
+    // Keep track of the number of direct/indirect calls added.
+    unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+    
      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
        for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
          CallSite CS = CallSite::get(I);
@@ -286,19 +299,34 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
          assert(!CheckingMode &&
                 "CallGraphSCCPass did not update the CallGraph correctly!");
  
-        // If the call site didn't exist in the CGN yet, add it.  We assume that
-        // newly introduced call sites won't be indirect.  This could be fixed
-        // in the future.
+        // If the call site didn't exist in the CGN yet, add it.
          CallGraphNode *CalleeNode;
-        if (Function *Callee = CS.getCalledFunction())
+        if (Function *Callee = CS.getCalledFunction()) {
            CalleeNode = CG.getOrInsertFunction(Callee);
-        else
+          ++NumDirectAdded;
+        } else {
            CalleeNode = CG.getCallsExternalNode();
+          ++NumIndirectAdded;
+        }
          
          CGN->addCalledFunction(CS, CalleeNode);
          MadeChange = true;
        }
      
+    // We scanned the old callgraph node, removing invalidated call sites and
+    // then added back newly found call sites.  One thing that can happen is
+    // that an old indirect call site was deleted and replaced with a new direct
+    // call.  In this case, we have devirtualized a call, and CGSCCPM would like
+    // to iteratively optimize the new code.  Unfortunately, we don't really
+    // have a great way to detect when this happens.  As an approximation, we
+    // just look at whether the number of indirect calls is reduced and the
+    // number of direct calls is increased.  There are tons of ways to fool this
+    // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+    // direct call) but this is close enough.
+    if (NumIndirectRemoved > NumIndirectAdded &&
+        NumDirectRemoved < NumDirectAdded)
+      DevirtualizedCall = true;
+    
      // After scanning this function, if we still have entries in callsites, then
      // they are dangling pointers.  WeakVH should save us for this, so abort if
      // this happens.
@@ -315,6 +343,9 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
            for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
              I != E; ++I)
                (*I)->dump();
+          if (DevirtualizedCall)
+            dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
           } else {
             dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
           }
diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll

new file mode 100644 (file)

index 0000000..0a50786
--- /dev/null
+++ b/test/Transforms/Inline/devirtualize-3.ll
@@ -0,0 +1,79 @@
+; RUN: opt -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
+; PR5009
+
+; CHECK: define i32 @main() 
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  call void @exit(i32 38) 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.cont_t = type { void (i8*, i32)*, i8* }
+%struct.foo_sf_t = type { %struct.cont_t*, i32 }
+
+define i32 @main() nounwind ssp {
+entry:
+  %cont = alloca %struct.cont_t, align 8          ; <%struct.cont_t*> [#uses=4]
+  %tmp = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp1 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=2]
+  store void (i8*, i32)* bitcast (void (%struct.cont_t*, i32)* @quit to void (i8*, i32)*), void (i8*, i32)** %tmp1
+  %tmp2 = load void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
+  store void (i8*, i32)* %tmp2, void (i8*, i32)** %tmp
+  %tmp3 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 1 ; <i8**> [#uses=1]
+  store i8* null, i8** %tmp3
+  call void @foo(%struct.cont_t* %cont)
+  ret i32 0
+}
+
+define internal void @quit(%struct.cont_t* %cont, i32 %rcode) nounwind ssp {
+entry:
+  call void @exit(i32 %rcode) noreturn
+  unreachable
+}
+
+define internal void @foo(%struct.cont_t* %c) nounwind ssp {
+entry:
+  %sf = alloca %struct.foo_sf_t, align 8          ; <%struct.foo_sf_t*> [#uses=3]
+  %next = alloca %struct.cont_t, align 8          ; <%struct.cont_t*> [#uses=3]
+  %tmp = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  store %struct.cont_t* %c, %struct.cont_t** %tmp
+  %tmp2 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
+  store i32 2, i32* %tmp2
+  %tmp4 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  store void (i8*, i32)* bitcast (void (%struct.foo_sf_t*, i32)* @foo2 to void (i8*, i32)*), void (i8*, i32)** %tmp4
+  %tmp5 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 1 ; <i8**> [#uses=1]
+  %conv = bitcast %struct.foo_sf_t* %sf to i8*    ; <i8*> [#uses=1]
+  store i8* %conv, i8** %tmp5
+  call void @bar(%struct.cont_t* %next, i32 14)
+  ret void
+}
+
+define internal void @foo2(%struct.foo_sf_t* %sf, i32 %y) nounwind ssp {
+entry:
+  %tmp1 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  %tmp2 = load %struct.cont_t** %tmp1             ; <%struct.cont_t*> [#uses=1]
+  %tmp3 = getelementptr inbounds %struct.cont_t* %tmp2, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp4 = load void (i8*, i32)** %tmp3            ; <void (i8*, i32)*> [#uses=1]
+  %tmp6 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  %tmp7 = load %struct.cont_t** %tmp6             ; <%struct.cont_t*> [#uses=1]
+  %conv = bitcast %struct.cont_t* %tmp7 to i8*    ; <i8*> [#uses=1]
+  %tmp9 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
+  %tmp10 = load i32* %tmp9                        ; <i32> [#uses=1]
+  %mul = mul i32 %tmp10, %y                       ; <i32> [#uses=1]
+  call void %tmp4(i8* %conv, i32 %mul)
+  ret void
+}
+
+define internal void @bar(%struct.cont_t* %c, i32 %y) nounwind ssp {
+entry:
+  %tmp1 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp2 = load void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
+  %tmp4 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 1 ; <i8**> [#uses=1]
+  %tmp5 = load i8** %tmp4                         ; <i8*> [#uses=1]
+  %add = add nsw i32 %y, 5                        ; <i32> [#uses=1]
+  call void %tmp2(i8* %tmp5, i32 %add)
+  ret void
+}
+
+declare void @exit(i32) noreturn
+
author	Chris Lattner <sabre@nondot.org>
	Sat, 1 May 2010 06:38:43 +0000 (06:38 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Sat, 1 May 2010 06:38:43 +0000 (06:38 +0000)
lib/Analysis/IPA/CallGraphSCCPass.cpp		patch \| blob \| history
test/Transforms/Inline/devirtualize-3.ll	[new file with mode: 0644]	patch \| blob