LLVM API Documentation
00001 //===- TopDownClosure.cpp - Compute the top-down interprocedure closure ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the TDDataStructures class, which represents the 00011 // Top-down Interprocedural closure of the data structure graph over the 00012 // program. This is useful (but not strictly necessary?) for applications 00013 // like pointer analysis. 00014 // 00015 //===----------------------------------------------------------------------===// 00016 00017 #include "llvm/Analysis/DataStructure/DataStructure.h" 00018 #include "llvm/Module.h" 00019 #include "llvm/DerivedTypes.h" 00020 #include "llvm/Analysis/DataStructure/DSGraph.h" 00021 #include "llvm/Support/Debug.h" 00022 #include "llvm/Support/Timer.h" 00023 #include "llvm/ADT/Statistic.h" 00024 #include <iostream> 00025 using namespace llvm; 00026 00027 #if 0 00028 #define TIME_REGION(VARNAME, DESC) \ 00029 NamedRegionTimer VARNAME(DESC) 00030 #else 00031 #define TIME_REGION(VARNAME, DESC) 00032 #endif 00033 00034 namespace { 00035 RegisterAnalysis<TDDataStructures> // Register the pass 00036 Y("tddatastructure", "Top-down Data Structure Analysis"); 00037 00038 Statistic<> NumTDInlines("tddatastructures", "Number of graphs inlined"); 00039 } 00040 00041 void TDDataStructures::markReachableFunctionsExternallyAccessible(DSNode *N, 00042 hash_set<DSNode*> &Visited) { 00043 if (!N || Visited.count(N)) return; 00044 Visited.insert(N); 00045 00046 for (unsigned i = 0, e = N->getNumLinks(); i != e; ++i) { 00047 DSNodeHandle &NH = N->getLink(i*N->getPointerSize()); 00048 if (DSNode *NN = NH.getNode()) { 00049 std::vector<Function*> Functions; 00050 NN->addFullFunctionList(Functions); 00051 ArgsRemainIncomplete.insert(Functions.begin(), Functions.end()); 00052 markReachableFunctionsExternallyAccessible(NN, Visited); 00053 } 00054 } 00055 } 00056 00057 00058 // run - Calculate the top down data structure graphs for each function in the 00059 // program. 00060 // 00061 bool TDDataStructures::runOnModule(Module &M) { 00062 BUInfo = &getAnalysis<BUDataStructures>(); 00063 GlobalECs = BUInfo->getGlobalECs(); 00064 GlobalsGraph = new DSGraph(BUInfo->getGlobalsGraph(), GlobalECs); 00065 GlobalsGraph->setPrintAuxCalls(); 00066 00067 // Figure out which functions must not mark their arguments complete because 00068 // they are accessible outside this compilation unit. Currently, these 00069 // arguments are functions which are reachable by global variables in the 00070 // globals graph. 00071 const DSScalarMap &GGSM = GlobalsGraph->getScalarMap(); 00072 hash_set<DSNode*> Visited; 00073 for (DSScalarMap::global_iterator I=GGSM.global_begin(), E=GGSM.global_end(); 00074 I != E; ++I) { 00075 DSNode *N = GGSM.find(*I)->second.getNode(); 00076 if (N->isIncomplete()) 00077 markReachableFunctionsExternallyAccessible(N, Visited); 00078 } 00079 00080 // Loop over unresolved call nodes. Any functions passed into (but not 00081 // returned!) from unresolvable call nodes may be invoked outside of the 00082 // current module. 00083 for (DSGraph::afc_iterator I = GlobalsGraph->afc_begin(), 00084 E = GlobalsGraph->afc_end(); I != E; ++I) 00085 for (unsigned arg = 0, e = I->getNumPtrArgs(); arg != e; ++arg) 00086 markReachableFunctionsExternallyAccessible(I->getPtrArg(arg).getNode(), 00087 Visited); 00088 Visited.clear(); 00089 00090 // Functions without internal linkage also have unknown incoming arguments! 00091 for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) 00092 if (!I->isExternal() && !I->hasInternalLinkage()) 00093 ArgsRemainIncomplete.insert(I); 00094 00095 // We want to traverse the call graph in reverse post-order. To do this, we 00096 // calculate a post-order traversal, then reverse it. 00097 hash_set<DSGraph*> VisitedGraph; 00098 std::vector<DSGraph*> PostOrder; 00099 00100 #if 0 00101 {TIME_REGION(XXX, "td:Copy graphs"); 00102 00103 // Visit each of the graphs in reverse post-order now! 00104 for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) 00105 if (!I->isExternal()) 00106 getOrCreateDSGraph(*I); 00107 return false; 00108 } 00109 #endif 00110 00111 00112 {TIME_REGION(XXX, "td:Compute postorder"); 00113 00114 // Calculate top-down from main... 00115 if (Function *F = M.getMainFunction()) 00116 ComputePostOrder(*F, VisitedGraph, PostOrder); 00117 00118 // Next calculate the graphs for each unreachable function... 00119 for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) 00120 ComputePostOrder(*I, VisitedGraph, PostOrder); 00121 00122 VisitedGraph.clear(); // Release memory! 00123 } 00124 00125 {TIME_REGION(XXX, "td:Inline stuff"); 00126 00127 // Visit each of the graphs in reverse post-order now! 00128 while (!PostOrder.empty()) { 00129 InlineCallersIntoGraph(*PostOrder.back()); 00130 PostOrder.pop_back(); 00131 } 00132 } 00133 00134 // Free the IndCallMap. 00135 while (!IndCallMap.empty()) { 00136 delete IndCallMap.begin()->second; 00137 IndCallMap.erase(IndCallMap.begin()); 00138 } 00139 00140 00141 ArgsRemainIncomplete.clear(); 00142 GlobalsGraph->removeTriviallyDeadNodes(); 00143 00144 return false; 00145 } 00146 00147 00148 DSGraph &TDDataStructures::getOrCreateDSGraph(Function &F) { 00149 DSGraph *&G = DSInfo[&F]; 00150 if (G == 0) { // Not created yet? Clone BU graph... 00151 G = new DSGraph(getAnalysis<BUDataStructures>().getDSGraph(F), GlobalECs, 00152 DSGraph::DontCloneAuxCallNodes); 00153 assert(G->getAuxFunctionCalls().empty() && "Cloned aux calls?"); 00154 G->setPrintAuxCalls(); 00155 G->setGlobalsGraph(GlobalsGraph); 00156 00157 // Note that this graph is the graph for ALL of the function in the SCC, not 00158 // just F. 00159 for (DSGraph::retnodes_iterator RI = G->retnodes_begin(), 00160 E = G->retnodes_end(); RI != E; ++RI) 00161 if (RI->first != &F) 00162 DSInfo[RI->first] = G; 00163 } 00164 return *G; 00165 } 00166 00167 00168 void TDDataStructures::ComputePostOrder(Function &F,hash_set<DSGraph*> &Visited, 00169 std::vector<DSGraph*> &PostOrder) { 00170 if (F.isExternal()) return; 00171 DSGraph &G = getOrCreateDSGraph(F); 00172 if (Visited.count(&G)) return; 00173 Visited.insert(&G); 00174 00175 // Recursively traverse all of the callee graphs. 00176 for (DSGraph::fc_iterator CI = G.fc_begin(), CE = G.fc_end(); CI != CE; ++CI){ 00177 Instruction *CallI = CI->getCallSite().getInstruction(); 00178 for (BUDataStructures::callee_iterator I = BUInfo->callee_begin(CallI), 00179 E = BUInfo->callee_end(CallI); I != E; ++I) 00180 ComputePostOrder(*I->second, Visited, PostOrder); 00181 } 00182 00183 PostOrder.push_back(&G); 00184 } 00185 00186 00187 00188 00189 00190 // releaseMemory - If the pass pipeline is done with this pass, we can release 00191 // our memory... here... 00192 // 00193 // FIXME: This should be releaseMemory and will work fine, except that LoadVN 00194 // has no way to extend the lifetime of the pass, which screws up ds-aa. 00195 // 00196 void TDDataStructures::releaseMyMemory() { 00197 for (hash_map<Function*, DSGraph*>::iterator I = DSInfo.begin(), 00198 E = DSInfo.end(); I != E; ++I) { 00199 I->second->getReturnNodes().erase(I->first); 00200 if (I->second->getReturnNodes().empty()) 00201 delete I->second; 00202 } 00203 00204 // Empty map so next time memory is released, data structures are not 00205 // re-deleted. 00206 DSInfo.clear(); 00207 delete GlobalsGraph; 00208 GlobalsGraph = 0; 00209 } 00210 00211 /// InlineCallersIntoGraph - Inline all of the callers of the specified DS graph 00212 /// into it, then recompute completeness of nodes in the resultant graph. 00213 void TDDataStructures::InlineCallersIntoGraph(DSGraph &DSG) { 00214 // Inline caller graphs into this graph. First step, get the list of call 00215 // sites that call into this graph. 00216 std::vector<CallerCallEdge> EdgesFromCaller; 00217 std::map<DSGraph*, std::vector<CallerCallEdge> >::iterator 00218 CEI = CallerEdges.find(&DSG); 00219 if (CEI != CallerEdges.end()) { 00220 std::swap(CEI->second, EdgesFromCaller); 00221 CallerEdges.erase(CEI); 00222 } 00223 00224 // Sort the caller sites to provide a by-caller-graph ordering. 00225 std::sort(EdgesFromCaller.begin(), EdgesFromCaller.end()); 00226 00227 00228 // Merge information from the globals graph into this graph. FIXME: This is 00229 // stupid. Instead of us cloning information from the GG into this graph, 00230 // then having RemoveDeadNodes clone it back, we should do all of this as a 00231 // post-pass over all of the graphs. We need to take cloning out of 00232 // removeDeadNodes and gut removeDeadNodes at the same time first though. :( 00233 { 00234 DSGraph &GG = *DSG.getGlobalsGraph(); 00235 ReachabilityCloner RC(DSG, GG, 00236 DSGraph::DontCloneCallNodes | 00237 DSGraph::DontCloneAuxCallNodes); 00238 for (DSScalarMap::global_iterator 00239 GI = DSG.getScalarMap().global_begin(), 00240 E = DSG.getScalarMap().global_end(); GI != E; ++GI) 00241 RC.getClonedNH(GG.getNodeForValue(*GI)); 00242 } 00243 00244 DEBUG(std::cerr << "[TD] Inlining callers into '" << DSG.getFunctionNames() 00245 << "'\n"); 00246 00247 // Iteratively inline caller graphs into this graph. 00248 while (!EdgesFromCaller.empty()) { 00249 DSGraph &CallerGraph = *EdgesFromCaller.back().CallerGraph; 00250 00251 // Iterate through all of the call sites of this graph, cloning and merging 00252 // any nodes required by the call. 00253 ReachabilityCloner RC(DSG, CallerGraph, 00254 DSGraph::DontCloneCallNodes | 00255 DSGraph::DontCloneAuxCallNodes); 00256 00257 // Inline all call sites from this caller graph. 00258 do { 00259 const DSCallSite &CS = *EdgesFromCaller.back().CS; 00260 Function &CF = *EdgesFromCaller.back().CalledFunction; 00261 DEBUG(std::cerr << " [TD] Inlining graph into Fn '" 00262 << CF.getName() << "' from "); 00263 if (CallerGraph.getReturnNodes().empty()) 00264 DEBUG(std::cerr << "SYNTHESIZED INDIRECT GRAPH"); 00265 else 00266 DEBUG (std::cerr << "Fn '" 00267 << CS.getCallSite().getInstruction()-> 00268 getParent()->getParent()->getName() << "'"); 00269 DEBUG(std::cerr << ": " << CF.getFunctionType()->getNumParams() 00270 << " args\n"); 00271 00272 // Get the formal argument and return nodes for the called function and 00273 // merge them with the cloned subgraph. 00274 DSCallSite T1 = DSG.getCallSiteForArguments(CF); 00275 RC.mergeCallSite(T1, CS); 00276 ++NumTDInlines; 00277 00278 EdgesFromCaller.pop_back(); 00279 } while (!EdgesFromCaller.empty() && 00280 EdgesFromCaller.back().CallerGraph == &CallerGraph); 00281 } 00282 00283 00284 // Next, now that this graph is finalized, we need to recompute the 00285 // incompleteness markers for this graph and remove unreachable nodes. 00286 DSG.maskIncompleteMarkers(); 00287 00288 // If any of the functions has incomplete incoming arguments, don't mark any 00289 // of them as complete. 00290 bool HasIncompleteArgs = false; 00291 for (DSGraph::retnodes_iterator I = DSG.retnodes_begin(), 00292 E = DSG.retnodes_end(); I != E; ++I) 00293 if (ArgsRemainIncomplete.count(I->first)) { 00294 HasIncompleteArgs = true; 00295 break; 00296 } 00297 00298 // Recompute the Incomplete markers. Depends on whether args are complete 00299 unsigned Flags 00300 = HasIncompleteArgs ? DSGraph::MarkFormalArgs : DSGraph::IgnoreFormalArgs; 00301 DSG.markIncompleteNodes(Flags | DSGraph::IgnoreGlobals); 00302 00303 // Delete dead nodes. Treat globals that are unreachable as dead also. 00304 DSG.removeDeadNodes(DSGraph::RemoveUnreachableGlobals); 00305 00306 // We are done with computing the current TD Graph! Finally, before we can 00307 // finish processing this function, we figure out which functions it calls and 00308 // records these call graph edges, so that we have them when we process the 00309 // callee graphs. 00310 if (DSG.fc_begin() == DSG.fc_end()) return; 00311 00312 // Loop over all the call sites and all the callees at each call site, and add 00313 // edges to the CallerEdges structure for each callee. 00314 for (DSGraph::fc_iterator CI = DSG.fc_begin(), E = DSG.fc_end(); 00315 CI != E; ++CI) { 00316 00317 // Handle direct calls efficiently. 00318 if (CI->isDirectCall()) { 00319 if (!CI->getCalleeFunc()->isExternal() && 00320 !DSG.getReturnNodes().count(CI->getCalleeFunc())) 00321 CallerEdges[&getDSGraph(*CI->getCalleeFunc())] 00322 .push_back(CallerCallEdge(&DSG, &*CI, CI->getCalleeFunc())); 00323 continue; 00324 } 00325 00326 Instruction *CallI = CI->getCallSite().getInstruction(); 00327 // For each function in the invoked function list at this call site... 00328 BUDataStructures::callee_iterator IPI = 00329 BUInfo->callee_begin(CallI), IPE = BUInfo->callee_end(CallI); 00330 00331 // Skip over all calls to this graph (SCC calls). 00332 while (IPI != IPE && &getDSGraph(*IPI->second) == &DSG) 00333 ++IPI; 00334 00335 // All SCC calls? 00336 if (IPI == IPE) continue; 00337 00338 Function *FirstCallee = IPI->second; 00339 ++IPI; 00340 00341 // Skip over more SCC calls. 00342 while (IPI != IPE && &getDSGraph(*IPI->second) == &DSG) 00343 ++IPI; 00344 00345 // If there is exactly one callee from this call site, remember the edge in 00346 // CallerEdges. 00347 if (IPI == IPE) { 00348 if (!FirstCallee->isExternal()) 00349 CallerEdges[&getDSGraph(*FirstCallee)] 00350 .push_back(CallerCallEdge(&DSG, &*CI, FirstCallee)); 00351 continue; 00352 } 00353 00354 // Otherwise, there are multiple callees from this call site, so it must be 00355 // an indirect call. Chances are that there will be other call sites with 00356 // this set of targets. If so, we don't want to do M*N inlining operations, 00357 // so we build up a new, private, graph that represents the calls of all 00358 // calls to this set of functions. 00359 std::vector<Function*> Callees; 00360 for (BUDataStructures::ActualCalleesTy::const_iterator I = 00361 BUInfo->callee_begin(CallI), E = BUInfo->callee_end(CallI); 00362 I != E; ++I) 00363 if (!I->second->isExternal()) 00364 Callees.push_back(I->second); 00365 std::sort(Callees.begin(), Callees.end()); 00366 00367 std::map<std::vector<Function*>, DSGraph*>::iterator IndCallRecI = 00368 IndCallMap.lower_bound(Callees); 00369 00370 DSGraph *IndCallGraph; 00371 00372 // If we already have this graph, recycle it. 00373 if (IndCallRecI != IndCallMap.end() && IndCallRecI->first == Callees) { 00374 std::cerr << " [TD] *** Reuse of indcall graph for " << Callees.size() 00375 << " callees!\n"; 00376 IndCallGraph = IndCallRecI->second; 00377 } else { 00378 // Otherwise, create a new DSGraph to represent this. 00379 IndCallGraph = new DSGraph(DSG.getGlobalECs(), DSG.getTargetData()); 00380 00381 // Make a nullary dummy call site, which will eventually get some content 00382 // merged into it. The actual callee function doesn't matter here, so we 00383 // just pass it something to keep the ctor happy. 00384 std::vector<DSNodeHandle> ArgDummyVec; 00385 DSCallSite DummyCS(CI->getCallSite(), DSNodeHandle(), Callees[0]/*dummy*/, 00386 ArgDummyVec); 00387 IndCallGraph->getFunctionCalls().push_back(DummyCS); 00388 00389 IndCallRecI = IndCallMap.insert(IndCallRecI, 00390 std::make_pair(Callees, IndCallGraph)); 00391 00392 // Additionally, make sure that each of the callees inlines this graph 00393 // exactly once. 00394 DSCallSite *NCS = &IndCallGraph->getFunctionCalls().front(); 00395 for (unsigned i = 0, e = Callees.size(); i != e; ++i) { 00396 DSGraph& CalleeGraph = getDSGraph(*Callees[i]); 00397 if (&CalleeGraph != &DSG) 00398 CallerEdges[&CalleeGraph].push_back(CallerCallEdge(IndCallGraph, NCS, 00399 Callees[i])); 00400 } 00401 } 00402 00403 // Now that we know which graph to use for this, merge the caller 00404 // information into the graph, based on information from the call site. 00405 ReachabilityCloner RC(*IndCallGraph, DSG, 0); 00406 RC.mergeCallSite(IndCallGraph->getFunctionCalls().front(), *CI); 00407 } 00408 } 00409 00410 00411 static const Function *getFnForValue(const Value *V) { 00412 if (const Instruction *I = dyn_cast<Instruction>(V)) 00413 return I->getParent()->getParent(); 00414 else if (const Argument *A = dyn_cast<Argument>(V)) 00415 return A->getParent(); 00416 else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V)) 00417 return BB->getParent(); 00418 return 0; 00419 } 00420 00421 void TDDataStructures::deleteValue(Value *V) { 00422 if (const Function *F = getFnForValue(V)) { // Function local value? 00423 // If this is a function local value, just delete it from the scalar map! 00424 getDSGraph(*F).getScalarMap().eraseIfExists(V); 00425 return; 00426 } 00427 00428 if (Function *F = dyn_cast<Function>(V)) { 00429 assert(getDSGraph(*F).getReturnNodes().size() == 1 && 00430 "cannot handle scc's"); 00431 delete DSInfo[F]; 00432 DSInfo.erase(F); 00433 return; 00434 } 00435 00436 assert(!isa<GlobalVariable>(V) && "Do not know how to delete GV's yet!"); 00437 } 00438 00439 void TDDataStructures::copyValue(Value *From, Value *To) { 00440 if (From == To) return; 00441 if (const Function *F = getFnForValue(From)) { // Function local value? 00442 // If this is a function local value, just delete it from the scalar map! 00443 getDSGraph(*F).getScalarMap().copyScalarIfExists(From, To); 00444 return; 00445 } 00446 00447 if (Function *FromF = dyn_cast<Function>(From)) { 00448 Function *ToF = cast<Function>(To); 00449 assert(!DSInfo.count(ToF) && "New Function already exists!"); 00450 DSGraph *NG = new DSGraph(getDSGraph(*FromF), GlobalECs); 00451 DSInfo[ToF] = NG; 00452 assert(NG->getReturnNodes().size() == 1 && "Cannot copy SCC's yet!"); 00453 00454 // Change the Function* is the returnnodes map to the ToF. 00455 DSNodeHandle Ret = NG->retnodes_begin()->second; 00456 NG->getReturnNodes().clear(); 00457 NG->getReturnNodes()[ToF] = Ret; 00458 return; 00459 } 00460 00461 if (const Function *F = getFnForValue(To)) { 00462 DSGraph &G = getDSGraph(*F); 00463 G.getScalarMap().copyScalarIfExists(From, To); 00464 return; 00465 } 00466 00467 std::cerr << *From; 00468 std::cerr << *To; 00469 assert(0 && "Do not know how to copy this yet!"); 00470 abort(); 00471 }