LLVM API Documentation

TopDownClosure.cpp

Go to the documentation of this file.
00001 //===- TopDownClosure.cpp - Compute the top-down interprocedure closure ---===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the TDDataStructures class, which represents the
00011 // Top-down Interprocedural closure of the data structure graph over the
00012 // program.  This is useful (but not strictly necessary?) for applications
00013 // like pointer analysis.
00014 //
00015 //===----------------------------------------------------------------------===//
00016 
00017 #include "llvm/Analysis/DataStructure/DataStructure.h"
00018 #include "llvm/Module.h"
00019 #include "llvm/DerivedTypes.h"
00020 #include "llvm/Analysis/DataStructure/DSGraph.h"
00021 #include "llvm/Support/Debug.h"
00022 #include "llvm/Support/Timer.h"
00023 #include "llvm/ADT/Statistic.h"
00024 #include <iostream>
00025 using namespace llvm;
00026 
00027 #if 0
00028 #define TIME_REGION(VARNAME, DESC) \
00029    NamedRegionTimer VARNAME(DESC)
00030 #else
00031 #define TIME_REGION(VARNAME, DESC)
00032 #endif
00033 
00034 namespace {
00035   RegisterAnalysis<TDDataStructures>   // Register the pass
00036   Y("tddatastructure", "Top-down Data Structure Analysis");
00037 
00038   Statistic<> NumTDInlines("tddatastructures", "Number of graphs inlined");
00039 }
00040 
00041 void TDDataStructures::markReachableFunctionsExternallyAccessible(DSNode *N,
00042                                                    hash_set<DSNode*> &Visited) {
00043   if (!N || Visited.count(N)) return;
00044   Visited.insert(N);
00045 
00046   for (unsigned i = 0, e = N->getNumLinks(); i != e; ++i) {
00047     DSNodeHandle &NH = N->getLink(i*N->getPointerSize());
00048     if (DSNode *NN = NH.getNode()) {
00049       std::vector<Function*> Functions;
00050       NN->addFullFunctionList(Functions);
00051       ArgsRemainIncomplete.insert(Functions.begin(), Functions.end());
00052       markReachableFunctionsExternallyAccessible(NN, Visited);
00053     }
00054   }
00055 }
00056 
00057 
00058 // run - Calculate the top down data structure graphs for each function in the
00059 // program.
00060 //
00061 bool TDDataStructures::runOnModule(Module &M) {
00062   BUInfo = &getAnalysis<BUDataStructures>();
00063   GlobalECs = BUInfo->getGlobalECs();
00064   GlobalsGraph = new DSGraph(BUInfo->getGlobalsGraph(), GlobalECs);
00065   GlobalsGraph->setPrintAuxCalls();
00066 
00067   // Figure out which functions must not mark their arguments complete because
00068   // they are accessible outside this compilation unit.  Currently, these
00069   // arguments are functions which are reachable by global variables in the
00070   // globals graph.
00071   const DSScalarMap &GGSM = GlobalsGraph->getScalarMap();
00072   hash_set<DSNode*> Visited;
00073   for (DSScalarMap::global_iterator I=GGSM.global_begin(), E=GGSM.global_end();
00074        I != E; ++I) {
00075     DSNode *N = GGSM.find(*I)->second.getNode();
00076     if (N->isIncomplete())
00077       markReachableFunctionsExternallyAccessible(N, Visited);
00078   }
00079 
00080   // Loop over unresolved call nodes.  Any functions passed into (but not
00081   // returned!) from unresolvable call nodes may be invoked outside of the
00082   // current module.
00083   for (DSGraph::afc_iterator I = GlobalsGraph->afc_begin(),
00084          E = GlobalsGraph->afc_end(); I != E; ++I)
00085     for (unsigned arg = 0, e = I->getNumPtrArgs(); arg != e; ++arg)
00086       markReachableFunctionsExternallyAccessible(I->getPtrArg(arg).getNode(),
00087                                                  Visited);
00088   Visited.clear();
00089 
00090   // Functions without internal linkage also have unknown incoming arguments!
00091   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
00092     if (!I->isExternal() && !I->hasInternalLinkage())
00093       ArgsRemainIncomplete.insert(I);
00094 
00095   // We want to traverse the call graph in reverse post-order.  To do this, we
00096   // calculate a post-order traversal, then reverse it.
00097   hash_set<DSGraph*> VisitedGraph;
00098   std::vector<DSGraph*> PostOrder;
00099 
00100 #if 0
00101 {TIME_REGION(XXX, "td:Copy graphs");
00102 
00103   // Visit each of the graphs in reverse post-order now!
00104   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
00105     if (!I->isExternal())
00106       getOrCreateDSGraph(*I);
00107   return false;
00108 }
00109 #endif
00110 
00111 
00112 {TIME_REGION(XXX, "td:Compute postorder");
00113 
00114   // Calculate top-down from main...
00115   if (Function *F = M.getMainFunction())
00116     ComputePostOrder(*F, VisitedGraph, PostOrder);
00117 
00118   // Next calculate the graphs for each unreachable function...
00119   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
00120     ComputePostOrder(*I, VisitedGraph, PostOrder);
00121 
00122   VisitedGraph.clear();   // Release memory!
00123 }
00124 
00125 {TIME_REGION(XXX, "td:Inline stuff");
00126 
00127   // Visit each of the graphs in reverse post-order now!
00128   while (!PostOrder.empty()) {
00129     InlineCallersIntoGraph(*PostOrder.back());
00130     PostOrder.pop_back();
00131   }
00132 }
00133 
00134   // Free the IndCallMap.
00135   while (!IndCallMap.empty()) {
00136     delete IndCallMap.begin()->second;
00137     IndCallMap.erase(IndCallMap.begin());
00138   }
00139 
00140 
00141   ArgsRemainIncomplete.clear();
00142   GlobalsGraph->removeTriviallyDeadNodes();
00143 
00144   return false;
00145 }
00146 
00147 
00148 DSGraph &TDDataStructures::getOrCreateDSGraph(Function &F) {
00149   DSGraph *&G = DSInfo[&F];
00150   if (G == 0) { // Not created yet?  Clone BU graph...
00151     G = new DSGraph(getAnalysis<BUDataStructures>().getDSGraph(F), GlobalECs,
00152                     DSGraph::DontCloneAuxCallNodes);
00153     assert(G->getAuxFunctionCalls().empty() && "Cloned aux calls?");
00154     G->setPrintAuxCalls();
00155     G->setGlobalsGraph(GlobalsGraph);
00156 
00157     // Note that this graph is the graph for ALL of the function in the SCC, not
00158     // just F.
00159     for (DSGraph::retnodes_iterator RI = G->retnodes_begin(),
00160            E = G->retnodes_end(); RI != E; ++RI)
00161       if (RI->first != &F)
00162         DSInfo[RI->first] = G;
00163   }
00164   return *G;
00165 }
00166 
00167 
00168 void TDDataStructures::ComputePostOrder(Function &F,hash_set<DSGraph*> &Visited,
00169                                         std::vector<DSGraph*> &PostOrder) {
00170   if (F.isExternal()) return;
00171   DSGraph &G = getOrCreateDSGraph(F);
00172   if (Visited.count(&G)) return;
00173   Visited.insert(&G);
00174 
00175   // Recursively traverse all of the callee graphs.
00176   for (DSGraph::fc_iterator CI = G.fc_begin(), CE = G.fc_end(); CI != CE; ++CI){
00177     Instruction *CallI = CI->getCallSite().getInstruction();
00178     for (BUDataStructures::callee_iterator I = BUInfo->callee_begin(CallI),
00179            E = BUInfo->callee_end(CallI); I != E; ++I)
00180       ComputePostOrder(*I->second, Visited, PostOrder);
00181   }
00182 
00183   PostOrder.push_back(&G);
00184 }
00185 
00186 
00187 
00188 
00189 
00190 // releaseMemory - If the pass pipeline is done with this pass, we can release
00191 // our memory... here...
00192 //
00193 // FIXME: This should be releaseMemory and will work fine, except that LoadVN
00194 // has no way to extend the lifetime of the pass, which screws up ds-aa.
00195 //
00196 void TDDataStructures::releaseMyMemory() {
00197   for (hash_map<Function*, DSGraph*>::iterator I = DSInfo.begin(),
00198          E = DSInfo.end(); I != E; ++I) {
00199     I->second->getReturnNodes().erase(I->first);
00200     if (I->second->getReturnNodes().empty())
00201       delete I->second;
00202   }
00203 
00204   // Empty map so next time memory is released, data structures are not
00205   // re-deleted.
00206   DSInfo.clear();
00207   delete GlobalsGraph;
00208   GlobalsGraph = 0;
00209 }
00210 
00211 /// InlineCallersIntoGraph - Inline all of the callers of the specified DS graph
00212 /// into it, then recompute completeness of nodes in the resultant graph.
00213 void TDDataStructures::InlineCallersIntoGraph(DSGraph &DSG) {
00214   // Inline caller graphs into this graph.  First step, get the list of call
00215   // sites that call into this graph.
00216   std::vector<CallerCallEdge> EdgesFromCaller;
00217   std::map<DSGraph*, std::vector<CallerCallEdge> >::iterator
00218     CEI = CallerEdges.find(&DSG);
00219   if (CEI != CallerEdges.end()) {
00220     std::swap(CEI->second, EdgesFromCaller);
00221     CallerEdges.erase(CEI);
00222   }
00223 
00224   // Sort the caller sites to provide a by-caller-graph ordering.
00225   std::sort(EdgesFromCaller.begin(), EdgesFromCaller.end());
00226 
00227 
00228   // Merge information from the globals graph into this graph.  FIXME: This is
00229   // stupid.  Instead of us cloning information from the GG into this graph,
00230   // then having RemoveDeadNodes clone it back, we should do all of this as a
00231   // post-pass over all of the graphs.  We need to take cloning out of
00232   // removeDeadNodes and gut removeDeadNodes at the same time first though. :(
00233   {
00234     DSGraph &GG = *DSG.getGlobalsGraph();
00235     ReachabilityCloner RC(DSG, GG,
00236                           DSGraph::DontCloneCallNodes |
00237                           DSGraph::DontCloneAuxCallNodes);
00238     for (DSScalarMap::global_iterator
00239            GI = DSG.getScalarMap().global_begin(),
00240            E = DSG.getScalarMap().global_end(); GI != E; ++GI)
00241       RC.getClonedNH(GG.getNodeForValue(*GI));
00242   }
00243 
00244   DEBUG(std::cerr << "[TD] Inlining callers into '" << DSG.getFunctionNames()
00245         << "'\n");
00246 
00247   // Iteratively inline caller graphs into this graph.
00248   while (!EdgesFromCaller.empty()) {
00249     DSGraph &CallerGraph = *EdgesFromCaller.back().CallerGraph;
00250 
00251     // Iterate through all of the call sites of this graph, cloning and merging
00252     // any nodes required by the call.
00253     ReachabilityCloner RC(DSG, CallerGraph,
00254                           DSGraph::DontCloneCallNodes |
00255                           DSGraph::DontCloneAuxCallNodes);
00256 
00257     // Inline all call sites from this caller graph.
00258     do {
00259       const DSCallSite &CS = *EdgesFromCaller.back().CS;
00260       Function &CF = *EdgesFromCaller.back().CalledFunction;
00261       DEBUG(std::cerr << "   [TD] Inlining graph into Fn '"
00262             << CF.getName() << "' from ");
00263       if (CallerGraph.getReturnNodes().empty())
00264         DEBUG(std::cerr << "SYNTHESIZED INDIRECT GRAPH");
00265       else
00266         DEBUG (std::cerr << "Fn '"
00267                << CS.getCallSite().getInstruction()->
00268                getParent()->getParent()->getName() << "'");
00269       DEBUG(std::cerr << ": " << CF.getFunctionType()->getNumParams()
00270             << " args\n");
00271 
00272       // Get the formal argument and return nodes for the called function and
00273       // merge them with the cloned subgraph.
00274       DSCallSite T1 = DSG.getCallSiteForArguments(CF);
00275       RC.mergeCallSite(T1, CS);
00276       ++NumTDInlines;
00277 
00278       EdgesFromCaller.pop_back();
00279     } while (!EdgesFromCaller.empty() &&
00280              EdgesFromCaller.back().CallerGraph == &CallerGraph);
00281   }
00282 
00283 
00284   // Next, now that this graph is finalized, we need to recompute the
00285   // incompleteness markers for this graph and remove unreachable nodes.
00286   DSG.maskIncompleteMarkers();
00287 
00288   // If any of the functions has incomplete incoming arguments, don't mark any
00289   // of them as complete.
00290   bool HasIncompleteArgs = false;
00291   for (DSGraph::retnodes_iterator I = DSG.retnodes_begin(),
00292          E = DSG.retnodes_end(); I != E; ++I)
00293     if (ArgsRemainIncomplete.count(I->first)) {
00294       HasIncompleteArgs = true;
00295       break;
00296     }
00297 
00298   // Recompute the Incomplete markers.  Depends on whether args are complete
00299   unsigned Flags
00300     = HasIncompleteArgs ? DSGraph::MarkFormalArgs : DSGraph::IgnoreFormalArgs;
00301   DSG.markIncompleteNodes(Flags | DSGraph::IgnoreGlobals);
00302 
00303   // Delete dead nodes.  Treat globals that are unreachable as dead also.
00304   DSG.removeDeadNodes(DSGraph::RemoveUnreachableGlobals);
00305 
00306   // We are done with computing the current TD Graph!  Finally, before we can
00307   // finish processing this function, we figure out which functions it calls and
00308   // records these call graph edges, so that we have them when we process the
00309   // callee graphs.
00310   if (DSG.fc_begin() == DSG.fc_end()) return;
00311 
00312   // Loop over all the call sites and all the callees at each call site, and add
00313   // edges to the CallerEdges structure for each callee.
00314   for (DSGraph::fc_iterator CI = DSG.fc_begin(), E = DSG.fc_end();
00315        CI != E; ++CI) {
00316 
00317     // Handle direct calls efficiently.
00318     if (CI->isDirectCall()) {
00319       if (!CI->getCalleeFunc()->isExternal() &&
00320           !DSG.getReturnNodes().count(CI->getCalleeFunc()))
00321         CallerEdges[&getDSGraph(*CI->getCalleeFunc())]
00322           .push_back(CallerCallEdge(&DSG, &*CI, CI->getCalleeFunc()));
00323       continue;
00324     }
00325 
00326     Instruction *CallI = CI->getCallSite().getInstruction();
00327     // For each function in the invoked function list at this call site...
00328     BUDataStructures::callee_iterator IPI =
00329       BUInfo->callee_begin(CallI), IPE = BUInfo->callee_end(CallI);
00330 
00331     // Skip over all calls to this graph (SCC calls).
00332     while (IPI != IPE && &getDSGraph(*IPI->second) == &DSG)
00333       ++IPI;
00334 
00335     // All SCC calls?
00336     if (IPI == IPE) continue;
00337 
00338     Function *FirstCallee = IPI->second;
00339     ++IPI;
00340 
00341     // Skip over more SCC calls.
00342     while (IPI != IPE && &getDSGraph(*IPI->second) == &DSG)
00343       ++IPI;
00344 
00345     // If there is exactly one callee from this call site, remember the edge in
00346     // CallerEdges.
00347     if (IPI == IPE) {
00348       if (!FirstCallee->isExternal())
00349         CallerEdges[&getDSGraph(*FirstCallee)]
00350           .push_back(CallerCallEdge(&DSG, &*CI, FirstCallee));
00351       continue;
00352     }
00353 
00354     // Otherwise, there are multiple callees from this call site, so it must be
00355     // an indirect call.  Chances are that there will be other call sites with
00356     // this set of targets.  If so, we don't want to do M*N inlining operations,
00357     // so we build up a new, private, graph that represents the calls of all
00358     // calls to this set of functions.
00359     std::vector<Function*> Callees;
00360     for (BUDataStructures::ActualCalleesTy::const_iterator I =
00361            BUInfo->callee_begin(CallI), E = BUInfo->callee_end(CallI);
00362          I != E; ++I)
00363       if (!I->second->isExternal())
00364         Callees.push_back(I->second);
00365     std::sort(Callees.begin(), Callees.end());
00366 
00367     std::map<std::vector<Function*>, DSGraph*>::iterator IndCallRecI =
00368       IndCallMap.lower_bound(Callees);
00369 
00370     DSGraph *IndCallGraph;
00371 
00372     // If we already have this graph, recycle it.
00373     if (IndCallRecI != IndCallMap.end() && IndCallRecI->first == Callees) {
00374       std::cerr << "  [TD] *** Reuse of indcall graph for " << Callees.size()
00375                 << " callees!\n";
00376       IndCallGraph = IndCallRecI->second;
00377     } else {
00378       // Otherwise, create a new DSGraph to represent this.
00379       IndCallGraph = new DSGraph(DSG.getGlobalECs(), DSG.getTargetData());
00380 
00381       // Make a nullary dummy call site, which will eventually get some content
00382       // merged into it.  The actual callee function doesn't matter here, so we
00383       // just pass it something to keep the ctor happy.
00384       std::vector<DSNodeHandle> ArgDummyVec;
00385       DSCallSite DummyCS(CI->getCallSite(), DSNodeHandle(), Callees[0]/*dummy*/,
00386                          ArgDummyVec);
00387       IndCallGraph->getFunctionCalls().push_back(DummyCS);
00388 
00389       IndCallRecI = IndCallMap.insert(IndCallRecI,
00390                                       std::make_pair(Callees, IndCallGraph));
00391 
00392       // Additionally, make sure that each of the callees inlines this graph
00393       // exactly once.
00394       DSCallSite *NCS = &IndCallGraph->getFunctionCalls().front();
00395       for (unsigned i = 0, e = Callees.size(); i != e; ++i) {
00396         DSGraph& CalleeGraph = getDSGraph(*Callees[i]);
00397         if (&CalleeGraph != &DSG)
00398           CallerEdges[&CalleeGraph].push_back(CallerCallEdge(IndCallGraph, NCS,
00399                                                              Callees[i]));
00400       }
00401     }
00402 
00403     // Now that we know which graph to use for this, merge the caller
00404     // information into the graph, based on information from the call site.
00405     ReachabilityCloner RC(*IndCallGraph, DSG, 0);
00406     RC.mergeCallSite(IndCallGraph->getFunctionCalls().front(), *CI);
00407   }
00408 }
00409 
00410 
00411 static const Function *getFnForValue(const Value *V) {
00412   if (const Instruction *I = dyn_cast<Instruction>(V))
00413     return I->getParent()->getParent();
00414   else if (const Argument *A = dyn_cast<Argument>(V))
00415     return A->getParent();
00416   else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
00417     return BB->getParent();
00418   return 0;
00419 }
00420 
00421 void TDDataStructures::deleteValue(Value *V) {
00422   if (const Function *F = getFnForValue(V)) {  // Function local value?
00423     // If this is a function local value, just delete it from the scalar map!
00424     getDSGraph(*F).getScalarMap().eraseIfExists(V);
00425     return;
00426   }
00427 
00428   if (Function *F = dyn_cast<Function>(V)) {
00429     assert(getDSGraph(*F).getReturnNodes().size() == 1 &&
00430            "cannot handle scc's");
00431     delete DSInfo[F];
00432     DSInfo.erase(F);
00433     return;
00434   }
00435 
00436   assert(!isa<GlobalVariable>(V) && "Do not know how to delete GV's yet!");
00437 }
00438 
00439 void TDDataStructures::copyValue(Value *From, Value *To) {
00440   if (From == To) return;
00441   if (const Function *F = getFnForValue(From)) {  // Function local value?
00442     // If this is a function local value, just delete it from the scalar map!
00443     getDSGraph(*F).getScalarMap().copyScalarIfExists(From, To);
00444     return;
00445   }
00446 
00447   if (Function *FromF = dyn_cast<Function>(From)) {
00448     Function *ToF = cast<Function>(To);
00449     assert(!DSInfo.count(ToF) && "New Function already exists!");
00450     DSGraph *NG = new DSGraph(getDSGraph(*FromF), GlobalECs);
00451     DSInfo[ToF] = NG;
00452     assert(NG->getReturnNodes().size() == 1 && "Cannot copy SCC's yet!");
00453 
00454     // Change the Function* is the returnnodes map to the ToF.
00455     DSNodeHandle Ret = NG->retnodes_begin()->second;
00456     NG->getReturnNodes().clear();
00457     NG->getReturnNodes()[ToF] = Ret;
00458     return;
00459   }
00460 
00461   if (const Function *F = getFnForValue(To)) {
00462     DSGraph &G = getDSGraph(*F);
00463     G.getScalarMap().copyScalarIfExists(From, To);
00464     return;
00465   }
00466 
00467   std::cerr << *From;
00468   std::cerr << *To;
00469   assert(0 && "Do not know how to copy this yet!");
00470   abort();
00471 }