1 //===- BottomUpClosure.cpp - Compute bottom-up interprocedural closure ----===//
3 // This file implements the BUDataStructures class, which represents the
4 // Bottom-Up Interprocedural closure of the data structure graph over the
5 // program. This is useful for applications like pool allocation, but **not**
6 // applications like alias analysis.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/Analysis/DataStructure.h"
11 #include "llvm/Analysis/DSGraph.h"
12 #include "llvm/Module.h"
13 #include "Support/Statistic.h"
14 #include "Support/hash_map"
17 Statistic<> MaxSCC("budatastructure", "Maximum SCC Size in Call Graph");
19 RegisterAnalysis<BUDataStructures>
20 X("budatastructure", "Bottom-up Data Structure Analysis");
25 static bool isVAHackFn(const Function *F) {
26 return F->getName() == "printf" || F->getName() == "sscanf" ||
27 F->getName() == "fprintf" || F->getName() == "open" ||
28 F->getName() == "sprintf" || F->getName() == "fputs" ||
29 F->getName() == "fscanf";
32 // isCompleteNode - Return true if we know all of the targets of this node, and
33 // if the call sites are not external.
35 static inline bool isCompleteNode(DSNode *N) {
36 if (N->isIncomplete()) return false;
37 const std::vector<GlobalValue*> &Callees = N->getGlobals();
38 for (unsigned i = 0, e = Callees.size(); i != e; ++i)
39 if (Callees[i]->isExternal())
40 if (!isVAHackFn(cast<Function>(Callees[i])))
41 return false; // External function found...
42 return true; // otherwise ok
45 struct CallSiteIterator {
46 // FCs are the edges out of the current node are the call site targets...
47 std::vector<DSCallSite> *FCs;
49 unsigned CallSiteEntry;
51 CallSiteIterator(std::vector<DSCallSite> &CS) : FCs(&CS) {
52 CallSite = 0; CallSiteEntry = 0;
53 advanceToValidCallee();
56 // End iterator ctor...
57 CallSiteIterator(std::vector<DSCallSite> &CS, bool) : FCs(&CS) {
58 CallSite = FCs->size(); CallSiteEntry = 0;
61 void advanceToValidCallee() {
62 while (CallSite < FCs->size()) {
63 if ((*FCs)[CallSite].isDirectCall()) {
64 if (CallSiteEntry == 0 && // direct call only has one target...
65 (!(*FCs)[CallSite].getCalleeFunc()->isExternal() ||
66 isVAHackFn((*FCs)[CallSite].getCalleeFunc()))) // If not external
69 DSNode *CalleeNode = (*FCs)[CallSite].getCalleeNode();
70 if (CallSiteEntry || isCompleteNode(CalleeNode)) {
71 const std::vector<GlobalValue*> &Callees = CalleeNode->getGlobals();
73 if (CallSiteEntry < Callees.size())
82 static CallSiteIterator begin(DSGraph &G) { return G.getAuxFunctionCalls(); }
83 static CallSiteIterator end(DSGraph &G) {
84 return CallSiteIterator(G.getAuxFunctionCalls(), true);
86 static CallSiteIterator begin(std::vector<DSCallSite> &CSs) { return CSs; }
87 static CallSiteIterator end(std::vector<DSCallSite> &CSs) {
88 return CallSiteIterator(CSs, true);
90 bool operator==(const CallSiteIterator &CSI) const {
91 return CallSite == CSI.CallSite && CallSiteEntry == CSI.CallSiteEntry;
93 bool operator!=(const CallSiteIterator &CSI) const { return !operator==(CSI);}
95 unsigned getCallSiteIdx() const { return CallSite; }
96 DSCallSite &getCallSite() const { return (*FCs)[CallSite]; }
98 Function *operator*() const {
99 if ((*FCs)[CallSite].isDirectCall()) {
100 return (*FCs)[CallSite].getCalleeFunc();
102 DSNode *Node = (*FCs)[CallSite].getCalleeNode();
103 return cast<Function>(Node->getGlobals()[CallSiteEntry]);
107 CallSiteIterator& operator++() { // Preincrement
109 advanceToValidCallee();
112 CallSiteIterator operator++(int) { // Postincrement
113 CallSiteIterator tmp = *this; ++*this; return tmp;
119 // run - Calculate the bottom up data structure graphs for each function in the
122 bool BUDataStructures::run(Module &M) {
123 LocalDataStructures &LocalDSA = getAnalysis<LocalDataStructures>();
124 GlobalsGraph = new DSGraph(LocalDSA.getGlobalsGraph());
125 GlobalsGraph->setPrintAuxCalls();
127 Function *MainFunc = M.getMainFunction();
129 calculateReachableGraphs(MainFunc);
131 // Calculate the graphs for any functions that are unreachable from main...
132 for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
133 if (!I->isExternal() && DSInfo.find(I) == DSInfo.end()) {
136 std::cerr << "*** Function unreachable from main: "
137 << I->getName() << "\n";
139 calculateReachableGraphs(I); // Calculate all graphs...
144 void BUDataStructures::calculateReachableGraphs(Function *F) {
145 std::vector<Function*> Stack;
146 hash_map<Function*, unsigned> ValMap;
148 calculateGraphs(F, Stack, NextID, ValMap);
151 DSGraph &BUDataStructures::getOrCreateGraph(Function *F) {
152 // Has the graph already been created?
153 DSGraph *&Graph = DSInfo[F];
154 if (Graph) return *Graph;
156 // Copy the local version into DSInfo...
157 Graph = new DSGraph(getAnalysis<LocalDataStructures>().getDSGraph(*F));
159 Graph->setGlobalsGraph(GlobalsGraph);
160 Graph->setPrintAuxCalls();
162 // Start with a copy of the original call sites...
163 Graph->getAuxFunctionCalls() = Graph->getFunctionCalls();
167 unsigned BUDataStructures::calculateGraphs(Function *F,
168 std::vector<Function*> &Stack,
170 hash_map<Function*, unsigned> &ValMap) {
171 assert(ValMap.find(F) == ValMap.end() && "Shouldn't revisit functions!");
172 unsigned Min = NextID++, MyID = Min;
176 if (F->isExternal()) { // sprintf, fprintf, sscanf, etc...
183 DSGraph &Graph = getOrCreateGraph(F);
185 // The edges out of the current node are the call site targets...
186 for (CallSiteIterator I = CallSiteIterator::begin(Graph),
187 E = CallSiteIterator::end(Graph); I != E; ++I) {
188 Function *Callee = *I;
190 // Have we visited the destination function yet?
191 hash_map<Function*, unsigned>::iterator It = ValMap.find(Callee);
192 if (It == ValMap.end()) // No, visit it now.
193 M = calculateGraphs(Callee, Stack, NextID, ValMap);
194 else // Yes, get it's number.
196 if (M < Min) Min = M;
199 assert(ValMap[F] == MyID && "SCC construction assumption wrong!");
201 return Min; // This is part of a larger SCC!
203 // If this is a new SCC, process it now.
204 if (Stack.back() == F) { // Special case the single "SCC" case here.
205 DEBUG(std::cerr << "Visiting single node SCC #: " << MyID << " fn: "
206 << F->getName() << "\n");
208 DSGraph &G = calculateGraph(*F);
210 if (MaxSCC < 1) MaxSCC = 1;
212 // Should we revisit the graph?
213 if (CallSiteIterator::begin(G) != CallSiteIterator::end(G)) {
215 return calculateGraphs(F, Stack, NextID, ValMap);
222 // SCCFunctions - Keep track of the functions in the current SCC
224 hash_set<Function*> SCCFunctions;
227 std::vector<Function*>::iterator FirstInSCC = Stack.end();
231 SCCFunctions.insert(NF);
234 std::cerr << "Identified SCC #: " << MyID << " of size: "
235 << (Stack.end()-FirstInSCC) << "\n";
237 // Compute the Max SCC Size...
238 if (MaxSCC < unsigned(Stack.end()-FirstInSCC))
239 MaxSCC = Stack.end()-FirstInSCC;
241 std::vector<Function*>::iterator I = Stack.end();
245 /*DEBUG*/(std::cerr << " Fn #" << (Stack.end()-I) << "/"
246 << (Stack.end()-FirstInSCC) << " in SCC: "
248 DSGraph &G = getDSGraph(**I);
249 std::cerr << " [" << G.getGraphSize() << "+"
250 << G.getAuxFunctionCalls().size() << "] ";
253 // Eliminate all call sites in the SCC that are not to functions that are
255 inlineNonSCCGraphs(**I, SCCFunctions);
258 std::cerr << "after Non-SCC's [" << G.getGraphSize() << "+"
259 << G.getAuxFunctionCalls().size() << "]\n";
261 } while (I != FirstInSCC);
267 /*DEBUG*/(std::cerr << " Fn #" << (Stack.end()-I) << "/"
268 << (Stack.end()-FirstInSCC) << " in SCC: "
270 DSGraph &G = getDSGraph(**I);
271 std::cerr << " [" << G.getGraphSize() << "+"
272 << G.getAuxFunctionCalls().size() << "] ";
274 // Inline all graphs into the SCC nodes...
275 calculateSCCGraph(**I, SCCFunctions);
278 std::cerr << "after [" << G.getGraphSize() << "+"
279 << G.getAuxFunctionCalls().size() << "]\n";
281 } while (I != FirstInSCC);
284 std::cerr << "DONE with SCC #: " << MyID << "\n";
286 // We never have to revisit "SCC" processed functions...
288 // Drop the stuff we don't need from the end of the stack
289 Stack.erase(FirstInSCC, Stack.end());
293 return MyID; // == Min
297 // releaseMemory - If the pass pipeline is done with this pass, we can release
298 // our memory... here...
300 void BUDataStructures::releaseMemory() {
301 for (hash_map<const Function*, DSGraph*>::iterator I = DSInfo.begin(),
302 E = DSInfo.end(); I != E; ++I)
305 // Empty map so next time memory is released, data structures are not
312 DSGraph &BUDataStructures::calculateGraph(Function &F) {
313 DSGraph &Graph = getDSGraph(F);
314 DEBUG(std::cerr << " [BU] Calculating graph for: " << F.getName() << "\n");
316 // Move our call site list into TempFCs so that inline call sites go into the
317 // new call site list and doesn't invalidate our iterators!
318 std::vector<DSCallSite> TempFCs;
319 std::vector<DSCallSite> &AuxCallsList = Graph.getAuxFunctionCalls();
320 TempFCs.swap(AuxCallsList);
322 // Loop over all of the resolvable call sites
323 unsigned LastCallSiteIdx = ~0U;
324 for (CallSiteIterator I = CallSiteIterator::begin(TempFCs),
325 E = CallSiteIterator::end(TempFCs); I != E; ++I) {
326 // If we skipped over any call sites, they must be unresolvable, copy them
327 // to the real call site list.
329 for (; LastCallSiteIdx < I.getCallSiteIdx(); ++LastCallSiteIdx)
330 AuxCallsList.push_back(TempFCs[LastCallSiteIdx]);
331 LastCallSiteIdx = I.getCallSiteIdx();
333 // Resolve the current call...
334 Function *Callee = *I;
335 DSCallSite &CS = I.getCallSite();
337 if (Callee->isExternal()) {
338 // Ignore this case, simple varargs functions we cannot stub out!
339 } else if (Callee == &F) {
340 // Self recursion... simply link up the formal arguments with the
341 // actual arguments...
342 DEBUG(std::cerr << " Self Inlining: " << F.getName() << "\n");
344 // Handle self recursion by resolving the arguments and return value
345 Graph.mergeInGraph(CS, Graph, 0);
348 // Get the data structure graph for the called function.
350 DSGraph &GI = getDSGraph(*Callee); // Graph to inline
352 DEBUG(std::cerr << " Inlining graph for " << Callee->getName()
353 << "[" << GI.getGraphSize() << "+"
354 << GI.getAuxFunctionCalls().size() << "] into: " << F.getName()
355 << "[" << Graph.getGraphSize() << "+"
356 << Graph.getAuxFunctionCalls().size() << "]\n");
358 Graph.writeGraphToFile(std::cerr, "bu_" + F.getName() + "_before_" +
362 // Handle self recursion by resolving the arguments and return value
363 Graph.mergeInGraph(CS, GI,
364 DSGraph::KeepModRefBits |
365 DSGraph::StripAllocaBit | DSGraph::DontCloneCallNodes);
368 Graph.writeGraphToFile(std::cerr, "bu_" + F.getName() + "_after_" +
374 // Make sure to catch any leftover unresolvable calls...
375 for (++LastCallSiteIdx; LastCallSiteIdx < TempFCs.size(); ++LastCallSiteIdx)
376 AuxCallsList.push_back(TempFCs[LastCallSiteIdx]);
380 // Recompute the Incomplete markers. If there are any function calls left
381 // now that are complete, we must loop!
382 Graph.maskIncompleteMarkers();
383 Graph.markIncompleteNodes(DSGraph::MarkFormalArgs);
384 // FIXME: materialize nodes from the globals graph as neccesary...
385 Graph.removeDeadNodes(DSGraph::KeepUnreachableGlobals);
387 DEBUG(std::cerr << " [BU] Done inlining: " << F.getName() << " ["
388 << Graph.getGraphSize() << "+" << Graph.getAuxFunctionCalls().size()
391 //Graph.writeGraphToFile(std::cerr, "bu_" + F.getName());
397 // inlineNonSCCGraphs - This method is almost like the other two calculate graph
398 // methods. This one is used to inline function graphs (from functions outside
399 // of the SCC) into functions in the SCC. It is not supposed to touch functions
400 // IN the SCC at all.
402 DSGraph &BUDataStructures::inlineNonSCCGraphs(Function &F,
403 hash_set<Function*> &SCCFunctions){
404 DSGraph &Graph = getDSGraph(F);
405 DEBUG(std::cerr << " [BU] Inlining Non-SCC graphs for: "
406 << F.getName() << "\n");
408 // Move our call site list into TempFCs so that inline call sites go into the
409 // new call site list and doesn't invalidate our iterators!
410 std::vector<DSCallSite> TempFCs;
411 std::vector<DSCallSite> &AuxCallsList = Graph.getAuxFunctionCalls();
412 TempFCs.swap(AuxCallsList);
414 // Loop over all of the resolvable call sites
415 unsigned LastCallSiteIdx = ~0U;
416 for (CallSiteIterator I = CallSiteIterator::begin(TempFCs),
417 E = CallSiteIterator::end(TempFCs); I != E; ++I) {
418 // If we skipped over any call sites, they must be unresolvable, copy them
419 // to the real call site list.
421 for (; LastCallSiteIdx < I.getCallSiteIdx(); ++LastCallSiteIdx)
422 AuxCallsList.push_back(TempFCs[LastCallSiteIdx]);
423 LastCallSiteIdx = I.getCallSiteIdx();
425 // Resolve the current call...
426 Function *Callee = *I;
427 DSCallSite &CS = I.getCallSite();
429 if (Callee->isExternal()) {
430 // Ignore this case, simple varargs functions we cannot stub out!
431 } else if (SCCFunctions.count(Callee)) {
432 // Calling a function in the SCC, ignore it for now!
433 DEBUG(std::cerr << " SCC CallSite for: " << Callee->getName() << "\n");
434 AuxCallsList.push_back(CS);
436 // Get the data structure graph for the called function.
438 DSGraph &GI = getDSGraph(*Callee); // Graph to inline
440 DEBUG(std::cerr << " Inlining graph for " << Callee->getName()
441 << "[" << GI.getGraphSize() << "+"
442 << GI.getAuxFunctionCalls().size() << "] into: " << F.getName()
443 << "[" << Graph.getGraphSize() << "+"
444 << Graph.getAuxFunctionCalls().size() << "]\n");
446 // Handle self recursion by resolving the arguments and return value
447 Graph.mergeInGraph(CS, GI,
448 DSGraph::KeepModRefBits | DSGraph::StripAllocaBit |
449 DSGraph::DontCloneCallNodes);
453 // Make sure to catch any leftover unresolvable calls...
454 for (++LastCallSiteIdx; LastCallSiteIdx < TempFCs.size(); ++LastCallSiteIdx)
455 AuxCallsList.push_back(TempFCs[LastCallSiteIdx]);
459 // Recompute the Incomplete markers. If there are any function calls left
460 // now that are complete, we must loop!
461 Graph.maskIncompleteMarkers();
462 Graph.markIncompleteNodes(DSGraph::MarkFormalArgs);
463 Graph.removeDeadNodes(DSGraph::KeepUnreachableGlobals);
465 DEBUG(std::cerr << " [BU] Done Non-SCC inlining: " << F.getName() << " ["
466 << Graph.getGraphSize() << "+" << Graph.getAuxFunctionCalls().size()
468 //Graph.writeGraphToFile(std::cerr, "nscc_" + F.getName());
473 DSGraph &BUDataStructures::calculateSCCGraph(Function &F,
474 hash_set<Function*> &SCCFunctions){
475 DSGraph &Graph = getDSGraph(F);
476 DEBUG(std::cerr << " [BU] Calculating SCC graph for: " << F.getName()<<"\n");
478 std::vector<DSCallSite> UnresolvableCalls;
479 hash_map<Function*, DSCallSite> SCCCallSiteMap;
480 std::vector<DSCallSite> &AuxCallsList = Graph.getAuxFunctionCalls();
482 while (1) { // Loop until we run out of resolvable call sites!
483 // Move our call site list into TempFCs so that inline call sites go into
484 // the new call site list and doesn't invalidate our iterators!
485 std::vector<DSCallSite> TempFCs;
486 TempFCs.swap(AuxCallsList);
488 // Loop over all of the resolvable call sites
489 unsigned LastCallSiteIdx = ~0U;
490 CallSiteIterator I = CallSiteIterator::begin(TempFCs),
491 E = CallSiteIterator::end(TempFCs);
493 TempFCs.swap(AuxCallsList);
494 break; // Done when no resolvable call sites exist
497 for (; I != E; ++I) {
498 // If we skipped over any call sites, they must be unresolvable, copy them
499 // to the unresolvable site list.
501 for (; LastCallSiteIdx < I.getCallSiteIdx(); ++LastCallSiteIdx)
502 UnresolvableCalls.push_back(TempFCs[LastCallSiteIdx]);
503 LastCallSiteIdx = I.getCallSiteIdx();
505 // Resolve the current call...
506 Function *Callee = *I;
507 DSCallSite &CS = I.getCallSite();
509 if (Callee->isExternal()) {
510 // Ignore this case, simple varargs functions we cannot stub out!
511 } else if (Callee == &F) {
512 // Self recursion... simply link up the formal arguments with the
513 // actual arguments...
514 DEBUG(std::cerr << " Self Inlining: " << F.getName() << "\n");
516 // Handle self recursion by resolving the arguments and return value
517 Graph.mergeInGraph(CS, Graph, 0);
518 } else if (SCCCallSiteMap.count(Callee)) {
519 // We have already seen a call site in the SCC for this function, just
520 // merge the two call sites together and we are done.
521 SCCCallSiteMap.find(Callee)->second.mergeWith(CS);
523 // Get the data structure graph for the called function.
525 DSGraph &GI = getDSGraph(*Callee); // Graph to inline
526 DEBUG(std::cerr << " Inlining graph for " << Callee->getName()
527 << "[" << GI.getGraphSize() << "+"
528 << GI.getAuxFunctionCalls().size() << "] into: " << F.getName()
529 << "[" << Graph.getGraphSize() << "+"
530 << Graph.getAuxFunctionCalls().size() << "]\n");
532 // Handle self recursion by resolving the arguments and return value
533 Graph.mergeInGraph(CS, GI,
534 DSGraph::KeepModRefBits | DSGraph::StripAllocaBit |
535 DSGraph::DontCloneCallNodes);
537 if (SCCFunctions.count(Callee))
538 SCCCallSiteMap.insert(std::make_pair(Callee, CS));
542 // Make sure to catch any leftover unresolvable calls...
543 for (++LastCallSiteIdx; LastCallSiteIdx < TempFCs.size(); ++LastCallSiteIdx)
544 UnresolvableCalls.push_back(TempFCs[LastCallSiteIdx]);
547 // Reset the SCCCallSiteMap...
548 SCCCallSiteMap.clear();
550 AuxCallsList.insert(AuxCallsList.end(), UnresolvableCalls.begin(),
551 UnresolvableCalls.end());
552 UnresolvableCalls.clear();
555 // Recompute the Incomplete markers. If there are any function calls left
556 // now that are complete, we must loop!
557 Graph.maskIncompleteMarkers();
558 Graph.markIncompleteNodes(DSGraph::MarkFormalArgs);
560 // FIXME: materialize nodes from the globals graph as neccesary...
562 Graph.removeDeadNodes(DSGraph::KeepUnreachableGlobals);
564 DEBUG(std::cerr << " [BU] Done inlining: " << F.getName() << " ["
565 << Graph.getGraphSize() << "+" << Graph.getAuxFunctionCalls().size()
567 //Graph.writeGraphToFile(std::cerr, "bu_" + F.getName());