LLVM API Documentation
00001 //===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines a very simple implementation of Andersen's interprocedural 00011 // alias analysis. This implementation does not include any of the fancy 00012 // features that make Andersen's reasonably efficient (like cycle elimination or 00013 // variable substitution), but it should be useful for getting precision 00014 // numbers and can be extended in the future. 00015 // 00016 // In pointer analysis terms, this is a subset-based, flow-insensitive, 00017 // field-insensitive, and context-insensitive algorithm pointer algorithm. 00018 // 00019 // This algorithm is implemented as three stages: 00020 // 1. Object identification. 00021 // 2. Inclusion constraint identification. 00022 // 3. Inclusion constraint solving. 00023 // 00024 // The object identification stage identifies all of the memory objects in the 00025 // program, which includes globals, heap allocated objects, and stack allocated 00026 // objects. 00027 // 00028 // The inclusion constraint identification stage finds all inclusion constraints 00029 // in the program by scanning the program, looking for pointer assignments and 00030 // other statements that effect the points-to graph. For a statement like "A = 00031 // B", this statement is processed to indicate that A can point to anything that 00032 // B can point to. Constraints can handle copies, loads, and stores. 00033 // 00034 // The inclusion constraint solving phase iteratively propagates the inclusion 00035 // constraints until a fixed point is reached. This is an O(N^3) algorithm. 00036 // 00037 // In the initial pass, all indirect function calls are completely ignored. As 00038 // the analysis discovers new targets of function pointers, it iteratively 00039 // resolves a precise (and conservative) call graph. Also related, this 00040 // analysis initially assumes that all internal functions have known incoming 00041 // pointers. If we find that an internal function's address escapes outside of 00042 // the program, we update this assumption. 00043 // 00044 // Future Improvements: 00045 // This implementation of Andersen's algorithm is extremely slow. To make it 00046 // scale reasonably well, the inclusion constraints could be sorted (easy), 00047 // offline variable substitution would be a huge win (straight-forward), and 00048 // online cycle elimination (trickier) might help as well. 00049 // 00050 //===----------------------------------------------------------------------===// 00051 00052 #define DEBUG_TYPE "anders-aa" 00053 #include "llvm/Constants.h" 00054 #include "llvm/DerivedTypes.h" 00055 #include "llvm/Instructions.h" 00056 #include "llvm/Module.h" 00057 #include "llvm/Pass.h" 00058 #include "llvm/Support/InstIterator.h" 00059 #include "llvm/Support/InstVisitor.h" 00060 #include "llvm/Analysis/AliasAnalysis.h" 00061 #include "llvm/Support/Debug.h" 00062 #include "llvm/ADT/Statistic.h" 00063 #include <set> 00064 using namespace llvm; 00065 00066 namespace { 00067 Statistic<> 00068 NumIters("anders-aa", "Number of iterations to reach convergence"); 00069 Statistic<> 00070 NumConstraints("anders-aa", "Number of constraints"); 00071 Statistic<> 00072 NumNodes("anders-aa", "Number of nodes"); 00073 Statistic<> 00074 NumEscapingFunctions("anders-aa", "Number of internal functions that escape"); 00075 Statistic<> 00076 NumIndirectCallees("anders-aa", "Number of indirect callees found"); 00077 00078 class Andersens : public ModulePass, public AliasAnalysis, 00079 private InstVisitor<Andersens> { 00080 /// Node class - This class is used to represent a memory object in the 00081 /// program, and is the primitive used to build the points-to graph. 00082 class Node { 00083 std::vector<Node*> Pointees; 00084 Value *Val; 00085 public: 00086 Node() : Val(0) {} 00087 Node *setValue(Value *V) { 00088 assert(Val == 0 && "Value already set for this node!"); 00089 Val = V; 00090 return this; 00091 } 00092 00093 /// getValue - Return the LLVM value corresponding to this node. 00094 Value *getValue() const { return Val; } 00095 00096 typedef std::vector<Node*>::const_iterator iterator; 00097 iterator begin() const { return Pointees.begin(); } 00098 iterator end() const { return Pointees.end(); } 00099 00100 /// addPointerTo - Add a pointer to the list of pointees of this node, 00101 /// returning true if this caused a new pointer to be added, or false if 00102 /// we already knew about the points-to relation. 00103 bool addPointerTo(Node *N) { 00104 std::vector<Node*>::iterator I = std::lower_bound(Pointees.begin(), 00105 Pointees.end(), 00106 N); 00107 if (I != Pointees.end() && *I == N) 00108 return false; 00109 Pointees.insert(I, N); 00110 return true; 00111 } 00112 00113 /// intersects - Return true if the points-to set of this node intersects 00114 /// with the points-to set of the specified node. 00115 bool intersects(Node *N) const; 00116 00117 /// intersectsIgnoring - Return true if the points-to set of this node 00118 /// intersects with the points-to set of the specified node on any nodes 00119 /// except for the specified node to ignore. 00120 bool intersectsIgnoring(Node *N, Node *Ignoring) const; 00121 00122 // Constraint application methods. 00123 bool copyFrom(Node *N); 00124 bool loadFrom(Node *N); 00125 bool storeThrough(Node *N); 00126 }; 00127 00128 /// GraphNodes - This vector is populated as part of the object 00129 /// identification stage of the analysis, which populates this vector with a 00130 /// node for each memory object and fills in the ValueNodes map. 00131 std::vector<Node> GraphNodes; 00132 00133 /// ValueNodes - This map indicates the Node that a particular Value* is 00134 /// represented by. This contains entries for all pointers. 00135 std::map<Value*, unsigned> ValueNodes; 00136 00137 /// ObjectNodes - This map contains entries for each memory object in the 00138 /// program: globals, alloca's and mallocs. 00139 std::map<Value*, unsigned> ObjectNodes; 00140 00141 /// ReturnNodes - This map contains an entry for each function in the 00142 /// program that returns a value. 00143 std::map<Function*, unsigned> ReturnNodes; 00144 00145 /// VarargNodes - This map contains the entry used to represent all pointers 00146 /// passed through the varargs portion of a function call for a particular 00147 /// function. An entry is not present in this map for functions that do not 00148 /// take variable arguments. 00149 std::map<Function*, unsigned> VarargNodes; 00150 00151 /// Constraint - Objects of this structure are used to represent the various 00152 /// constraints identified by the algorithm. The constraints are 'copy', 00153 /// for statements like "A = B", 'load' for statements like "A = *B", and 00154 /// 'store' for statements like "*A = B". 00155 struct Constraint { 00156 enum ConstraintType { Copy, Load, Store } Type; 00157 Node *Dest, *Src; 00158 00159 Constraint(ConstraintType Ty, Node *D, Node *S) 00160 : Type(Ty), Dest(D), Src(S) {} 00161 }; 00162 00163 /// Constraints - This vector contains a list of all of the constraints 00164 /// identified by the program. 00165 std::vector<Constraint> Constraints; 00166 00167 /// EscapingInternalFunctions - This set contains all of the internal 00168 /// functions that are found to escape from the program. If the address of 00169 /// an internal function is passed to an external function or otherwise 00170 /// escapes from the analyzed portion of the program, we must assume that 00171 /// any pointer arguments can alias the universal node. This set keeps 00172 /// track of those functions we are assuming to escape so far. 00173 std::set<Function*> EscapingInternalFunctions; 00174 00175 /// IndirectCalls - This contains a list of all of the indirect call sites 00176 /// in the program. Since the call graph is iteratively discovered, we may 00177 /// need to add constraints to our graph as we find new targets of function 00178 /// pointers. 00179 std::vector<CallSite> IndirectCalls; 00180 00181 /// IndirectCallees - For each call site in the indirect calls list, keep 00182 /// track of the callees that we have discovered so far. As the analysis 00183 /// proceeds, more callees are discovered, until the call graph finally 00184 /// stabilizes. 00185 std::map<CallSite, std::vector<Function*> > IndirectCallees; 00186 00187 /// This enum defines the GraphNodes indices that correspond to important 00188 /// fixed sets. 00189 enum { 00190 UniversalSet = 0, 00191 NullPtr = 1, 00192 NullObject = 2, 00193 }; 00194 00195 public: 00196 bool runOnModule(Module &M) { 00197 InitializeAliasAnalysis(this); 00198 IdentifyObjects(M); 00199 CollectConstraints(M); 00200 DEBUG(PrintConstraints()); 00201 SolveConstraints(); 00202 DEBUG(PrintPointsToGraph()); 00203 00204 // Free the constraints list, as we don't need it to respond to alias 00205 // requests. 00206 ObjectNodes.clear(); 00207 ReturnNodes.clear(); 00208 VarargNodes.clear(); 00209 EscapingInternalFunctions.clear(); 00210 std::vector<Constraint>().swap(Constraints); 00211 return false; 00212 } 00213 00214 void releaseMemory() { 00215 // FIXME: Until we have transitively required passes working correctly, 00216 // this cannot be enabled! Otherwise, using -count-aa with the pass 00217 // causes memory to be freed too early. :( 00218 #if 0 00219 // The memory objects and ValueNodes data structures at the only ones that 00220 // are still live after construction. 00221 std::vector<Node>().swap(GraphNodes); 00222 ValueNodes.clear(); 00223 #endif 00224 } 00225 00226 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 00227 AliasAnalysis::getAnalysisUsage(AU); 00228 AU.setPreservesAll(); // Does not transform code 00229 } 00230 00231 //------------------------------------------------ 00232 // Implement the AliasAnalysis API 00233 // 00234 AliasResult alias(const Value *V1, unsigned V1Size, 00235 const Value *V2, unsigned V2Size); 00236 void getMustAliases(Value *P, std::vector<Value*> &RetVals); 00237 bool pointsToConstantMemory(const Value *P); 00238 00239 virtual void deleteValue(Value *V) { 00240 ValueNodes.erase(V); 00241 getAnalysis<AliasAnalysis>().deleteValue(V); 00242 } 00243 00244 virtual void copyValue(Value *From, Value *To) { 00245 ValueNodes[To] = ValueNodes[From]; 00246 getAnalysis<AliasAnalysis>().copyValue(From, To); 00247 } 00248 00249 private: 00250 /// getNode - Return the node corresponding to the specified pointer scalar. 00251 /// 00252 Node *getNode(Value *V) { 00253 if (Constant *C = dyn_cast<Constant>(V)) 00254 if (!isa<GlobalValue>(C)) 00255 return getNodeForConstantPointer(C); 00256 00257 std::map<Value*, unsigned>::iterator I = ValueNodes.find(V); 00258 if (I == ValueNodes.end()) { 00259 V->dump(); 00260 assert(I != ValueNodes.end() && 00261 "Value does not have a node in the points-to graph!"); 00262 } 00263 return &GraphNodes[I->second]; 00264 } 00265 00266 /// getObject - Return the node corresponding to the memory object for the 00267 /// specified global or allocation instruction. 00268 Node *getObject(Value *V) { 00269 std::map<Value*, unsigned>::iterator I = ObjectNodes.find(V); 00270 assert(I != ObjectNodes.end() && 00271 "Value does not have an object in the points-to graph!"); 00272 return &GraphNodes[I->second]; 00273 } 00274 00275 /// getReturnNode - Return the node representing the return value for the 00276 /// specified function. 00277 Node *getReturnNode(Function *F) { 00278 std::map<Function*, unsigned>::iterator I = ReturnNodes.find(F); 00279 assert(I != ReturnNodes.end() && "Function does not return a value!"); 00280 return &GraphNodes[I->second]; 00281 } 00282 00283 /// getVarargNode - Return the node representing the variable arguments 00284 /// formal for the specified function. 00285 Node *getVarargNode(Function *F) { 00286 std::map<Function*, unsigned>::iterator I = VarargNodes.find(F); 00287 assert(I != VarargNodes.end() && "Function does not take var args!"); 00288 return &GraphNodes[I->second]; 00289 } 00290 00291 /// getNodeValue - Get the node for the specified LLVM value and set the 00292 /// value for it to be the specified value. 00293 Node *getNodeValue(Value &V) { 00294 return getNode(&V)->setValue(&V); 00295 } 00296 00297 void IdentifyObjects(Module &M); 00298 void CollectConstraints(Module &M); 00299 void SolveConstraints(); 00300 00301 Node *getNodeForConstantPointer(Constant *C); 00302 Node *getNodeForConstantPointerTarget(Constant *C); 00303 void AddGlobalInitializerConstraints(Node *N, Constant *C); 00304 void AddConstraintsForNonInternalLinkage(Function *F); 00305 void AddConstraintsForCall(CallSite CS, Function *F); 00306 00307 00308 void PrintNode(Node *N); 00309 void PrintConstraints(); 00310 void PrintPointsToGraph(); 00311 00312 //===------------------------------------------------------------------===// 00313 // Instruction visitation methods for adding constraints 00314 // 00315 friend class InstVisitor<Andersens>; 00316 void visitReturnInst(ReturnInst &RI); 00317 void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); } 00318 void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); } 00319 void visitCallSite(CallSite CS); 00320 void visitAllocationInst(AllocationInst &AI); 00321 void visitLoadInst(LoadInst &LI); 00322 void visitStoreInst(StoreInst &SI); 00323 void visitGetElementPtrInst(GetElementPtrInst &GEP); 00324 void visitPHINode(PHINode &PN); 00325 void visitCastInst(CastInst &CI); 00326 void visitSelectInst(SelectInst &SI); 00327 void visitVANext(VANextInst &I); 00328 void visitVAArg(VAArgInst &I); 00329 void visitInstruction(Instruction &I); 00330 }; 00331 00332 RegisterOpt<Andersens> X("anders-aa", 00333 "Andersen's Interprocedural Alias Analysis"); 00334 RegisterAnalysisGroup<AliasAnalysis, Andersens> Y; 00335 } 00336 00337 //===----------------------------------------------------------------------===// 00338 // AliasAnalysis Interface Implementation 00339 //===----------------------------------------------------------------------===// 00340 00341 AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size, 00342 const Value *V2, unsigned V2Size) { 00343 Node *N1 = getNode((Value*)V1); 00344 Node *N2 = getNode((Value*)V2); 00345 00346 // Check to see if the two pointers are known to not alias. They don't alias 00347 // if their points-to sets do not intersect. 00348 if (!N1->intersectsIgnoring(N2, &GraphNodes[NullObject])) 00349 return NoAlias; 00350 00351 return AliasAnalysis::alias(V1, V1Size, V2, V2Size); 00352 } 00353 00354 /// getMustAlias - We can provide must alias information if we know that a 00355 /// pointer can only point to a specific function or the null pointer. 00356 /// Unfortunately we cannot determine must-alias information for global 00357 /// variables or any other memory memory objects because we do not track whether 00358 /// a pointer points to the beginning of an object or a field of it. 00359 void Andersens::getMustAliases(Value *P, std::vector<Value*> &RetVals) { 00360 Node *N = getNode(P); 00361 Node::iterator I = N->begin(); 00362 if (I != N->end()) { 00363 // If there is exactly one element in the points-to set for the object... 00364 ++I; 00365 if (I == N->end()) { 00366 Node *Pointee = *N->begin(); 00367 00368 // If a function is the only object in the points-to set, then it must be 00369 // the destination. Note that we can't handle global variables here, 00370 // because we don't know if the pointer is actually pointing to a field of 00371 // the global or to the beginning of it. 00372 if (Value *V = Pointee->getValue()) { 00373 if (Function *F = dyn_cast<Function>(V)) 00374 RetVals.push_back(F); 00375 } else { 00376 // If the object in the points-to set is the null object, then the null 00377 // pointer is a must alias. 00378 if (Pointee == &GraphNodes[NullObject]) 00379 RetVals.push_back(Constant::getNullValue(P->getType())); 00380 } 00381 } 00382 } 00383 00384 AliasAnalysis::getMustAliases(P, RetVals); 00385 } 00386 00387 /// pointsToConstantMemory - If we can determine that this pointer only points 00388 /// to constant memory, return true. In practice, this means that if the 00389 /// pointer can only point to constant globals, functions, or the null pointer, 00390 /// return true. 00391 /// 00392 bool Andersens::pointsToConstantMemory(const Value *P) { 00393 Node *N = getNode((Value*)P); 00394 for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) { 00395 if (Value *V = (*I)->getValue()) { 00396 if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) && 00397 !cast<GlobalVariable>(V)->isConstant())) 00398 return AliasAnalysis::pointsToConstantMemory(P); 00399 } else { 00400 if (*I != &GraphNodes[NullObject]) 00401 return AliasAnalysis::pointsToConstantMemory(P); 00402 } 00403 } 00404 00405 return true; 00406 } 00407 00408 //===----------------------------------------------------------------------===// 00409 // Object Identification Phase 00410 //===----------------------------------------------------------------------===// 00411 00412 /// IdentifyObjects - This stage scans the program, adding an entry to the 00413 /// GraphNodes list for each memory object in the program (global stack or 00414 /// heap), and populates the ValueNodes and ObjectNodes maps for these objects. 00415 /// 00416 void Andersens::IdentifyObjects(Module &M) { 00417 unsigned NumObjects = 0; 00418 00419 // Object #0 is always the universal set: the object that we don't know 00420 // anything about. 00421 assert(NumObjects == UniversalSet && "Something changed!"); 00422 ++NumObjects; 00423 00424 // Object #1 always represents the null pointer. 00425 assert(NumObjects == NullPtr && "Something changed!"); 00426 ++NumObjects; 00427 00428 // Object #2 always represents the null object (the object pointed to by null) 00429 assert(NumObjects == NullObject && "Something changed!"); 00430 ++NumObjects; 00431 00432 // Add all the globals first. 00433 for (Module::giterator I = M.gbegin(), E = M.gend(); I != E; ++I) { 00434 ObjectNodes[I] = NumObjects++; 00435 ValueNodes[I] = NumObjects++; 00436 } 00437 00438 // Add nodes for all of the functions and the instructions inside of them. 00439 for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { 00440 // The function itself is a memory object. 00441 ValueNodes[F] = NumObjects++; 00442 ObjectNodes[F] = NumObjects++; 00443 if (isa<PointerType>(F->getFunctionType()->getReturnType())) 00444 ReturnNodes[F] = NumObjects++; 00445 if (F->getFunctionType()->isVarArg()) 00446 VarargNodes[F] = NumObjects++; 00447 00448 // Add nodes for all of the incoming pointer arguments. 00449 for (Function::aiterator I = F->abegin(), E = F->aend(); I != E; ++I) 00450 if (isa<PointerType>(I->getType())) 00451 ValueNodes[I] = NumObjects++; 00452 00453 // Scan the function body, creating a memory object for each heap/stack 00454 // allocation in the body of the function and a node to represent all 00455 // pointer values defined by instructions and used as operands. 00456 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 00457 // If this is an heap or stack allocation, create a node for the memory 00458 // object. 00459 if (isa<PointerType>(II->getType())) { 00460 ValueNodes[&*II] = NumObjects++; 00461 if (AllocationInst *AI = dyn_cast<AllocationInst>(&*II)) 00462 ObjectNodes[AI] = NumObjects++; 00463 } 00464 } 00465 } 00466 00467 // Now that we know how many objects to create, make them all now! 00468 GraphNodes.resize(NumObjects); 00469 NumNodes += NumObjects; 00470 } 00471 00472 //===----------------------------------------------------------------------===// 00473 // Constraint Identification Phase 00474 //===----------------------------------------------------------------------===// 00475 00476 /// getNodeForConstantPointer - Return the node corresponding to the constant 00477 /// pointer itself. 00478 Andersens::Node *Andersens::getNodeForConstantPointer(Constant *C) { 00479 assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); 00480 00481 if (isa<ConstantPointerNull>(C)) 00482 return &GraphNodes[NullPtr]; 00483 else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) 00484 return getNode(GV); 00485 else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 00486 switch (CE->getOpcode()) { 00487 case Instruction::GetElementPtr: 00488 return getNodeForConstantPointer(CE->getOperand(0)); 00489 case Instruction::Cast: 00490 if (isa<PointerType>(CE->getOperand(0)->getType())) 00491 return getNodeForConstantPointer(CE->getOperand(0)); 00492 else 00493 return &GraphNodes[UniversalSet]; 00494 default: 00495 std::cerr << "Constant Expr not yet handled: " << *CE << "\n"; 00496 assert(0); 00497 } 00498 } else { 00499 assert(0 && "Unknown constant pointer!"); 00500 } 00501 return 0; 00502 } 00503 00504 /// getNodeForConstantPointerTarget - Return the node POINTED TO by the 00505 /// specified constant pointer. 00506 Andersens::Node *Andersens::getNodeForConstantPointerTarget(Constant *C) { 00507 assert(isa<PointerType>(C->getType()) && "Not a constant pointer!"); 00508 00509 if (isa<ConstantPointerNull>(C)) 00510 return &GraphNodes[NullObject]; 00511 else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) 00512 return getObject(GV); 00513 else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 00514 switch (CE->getOpcode()) { 00515 case Instruction::GetElementPtr: 00516 return getNodeForConstantPointerTarget(CE->getOperand(0)); 00517 case Instruction::Cast: 00518 if (isa<PointerType>(CE->getOperand(0)->getType())) 00519 return getNodeForConstantPointerTarget(CE->getOperand(0)); 00520 else 00521 return &GraphNodes[UniversalSet]; 00522 default: 00523 std::cerr << "Constant Expr not yet handled: " << *CE << "\n"; 00524 assert(0); 00525 } 00526 } else { 00527 assert(0 && "Unknown constant pointer!"); 00528 } 00529 return 0; 00530 } 00531 00532 /// AddGlobalInitializerConstraints - Add inclusion constraints for the memory 00533 /// object N, which contains values indicated by C. 00534 void Andersens::AddGlobalInitializerConstraints(Node *N, Constant *C) { 00535 if (C->getType()->isFirstClassType()) { 00536 if (isa<PointerType>(C->getType())) 00537 N->addPointerTo(getNodeForConstantPointer(C)); 00538 } else if (C->isNullValue()) { 00539 N->addPointerTo(&GraphNodes[NullObject]); 00540 return; 00541 } else { 00542 // If this is an array or struct, include constraints for each element. 00543 assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C)); 00544 for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) 00545 AddGlobalInitializerConstraints(N, cast<Constant>(C->getOperand(i))); 00546 } 00547 } 00548 00549 void Andersens::AddConstraintsForNonInternalLinkage(Function *F) { 00550 for (Function::aiterator I = F->abegin(), E = F->aend(); I != E; ++I) 00551 if (isa<PointerType>(I->getType())) 00552 // If this is an argument of an externally accessible function, the 00553 // incoming pointer might point to anything. 00554 Constraints.push_back(Constraint(Constraint::Copy, getNode(I), 00555 &GraphNodes[UniversalSet])); 00556 } 00557 00558 00559 /// CollectConstraints - This stage scans the program, adding a constraint to 00560 /// the Constraints list for each instruction in the program that induces a 00561 /// constraint, and setting up the initial points-to graph. 00562 /// 00563 void Andersens::CollectConstraints(Module &M) { 00564 // First, the universal set points to itself. 00565 GraphNodes[UniversalSet].addPointerTo(&GraphNodes[UniversalSet]); 00566 00567 // Next, the null pointer points to the null object. 00568 GraphNodes[NullPtr].addPointerTo(&GraphNodes[NullObject]); 00569 00570 // Next, add any constraints on global variables and their initializers. 00571 for (Module::giterator I = M.gbegin(), E = M.gend(); I != E; ++I) { 00572 // Associate the address of the global object as pointing to the memory for 00573 // the global: &G = <G memory> 00574 Node *Object = getObject(I); 00575 Object->setValue(I); 00576 getNodeValue(*I)->addPointerTo(Object); 00577 00578 if (I->hasInitializer()) { 00579 AddGlobalInitializerConstraints(Object, I->getInitializer()); 00580 } else { 00581 // If it doesn't have an initializer (i.e. it's defined in another 00582 // translation unit), it points to the universal set. 00583 Constraints.push_back(Constraint(Constraint::Copy, Object, 00584 &GraphNodes[UniversalSet])); 00585 } 00586 } 00587 00588 for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { 00589 // Make the function address point to the function object. 00590 getNodeValue(*F)->addPointerTo(getObject(F)->setValue(F)); 00591 00592 // Set up the return value node. 00593 if (isa<PointerType>(F->getFunctionType()->getReturnType())) 00594 getReturnNode(F)->setValue(F); 00595 if (F->getFunctionType()->isVarArg()) 00596 getVarargNode(F)->setValue(F); 00597 00598 // Set up incoming argument nodes. 00599 for (Function::aiterator I = F->abegin(), E = F->aend(); I != E; ++I) 00600 if (isa<PointerType>(I->getType())) 00601 getNodeValue(*I); 00602 00603 if (!F->hasInternalLinkage()) 00604 AddConstraintsForNonInternalLinkage(F); 00605 00606 if (!F->isExternal()) { 00607 // Scan the function body, creating a memory object for each heap/stack 00608 // allocation in the body of the function and a node to represent all 00609 // pointer values defined by instructions and used as operands. 00610 visit(F); 00611 } else { 00612 // External functions that return pointers return the universal set. 00613 if (isa<PointerType>(F->getFunctionType()->getReturnType())) 00614 Constraints.push_back(Constraint(Constraint::Copy, 00615 getReturnNode(F), 00616 &GraphNodes[UniversalSet])); 00617 00618 // Any pointers that are passed into the function have the universal set 00619 // stored into them. 00620 for (Function::aiterator I = F->abegin(), E = F->aend(); I != E; ++I) 00621 if (isa<PointerType>(I->getType())) { 00622 // Pointers passed into external functions could have anything stored 00623 // through them. 00624 Constraints.push_back(Constraint(Constraint::Store, getNode(I), 00625 &GraphNodes[UniversalSet])); 00626 // Memory objects passed into external function calls can have the 00627 // universal set point to them. 00628 Constraints.push_back(Constraint(Constraint::Copy, 00629 &GraphNodes[UniversalSet], 00630 getNode(I))); 00631 } 00632 00633 // If this is an external varargs function, it can also store pointers 00634 // into any pointers passed through the varargs section. 00635 if (F->getFunctionType()->isVarArg()) 00636 Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F), 00637 &GraphNodes[UniversalSet])); 00638 } 00639 } 00640 NumConstraints += Constraints.size(); 00641 } 00642 00643 00644 void Andersens::visitInstruction(Instruction &I) { 00645 #ifdef NDEBUG 00646 return; // This function is just a big assert. 00647 #endif 00648 if (isa<BinaryOperator>(I)) 00649 return; 00650 // Most instructions don't have any effect on pointer values. 00651 switch (I.getOpcode()) { 00652 case Instruction::Br: 00653 case Instruction::Switch: 00654 case Instruction::Unwind: 00655 case Instruction::Unreachable: 00656 case Instruction::Free: 00657 case Instruction::Shl: 00658 case Instruction::Shr: 00659 return; 00660 default: 00661 // Is this something we aren't handling yet? 00662 std::cerr << "Unknown instruction: " << I; 00663 abort(); 00664 } 00665 } 00666 00667 void Andersens::visitAllocationInst(AllocationInst &AI) { 00668 getNodeValue(AI)->addPointerTo(getObject(&AI)->setValue(&AI)); 00669 } 00670 00671 void Andersens::visitReturnInst(ReturnInst &RI) { 00672 if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType())) 00673 // return V --> <Copy/retval{F}/v> 00674 Constraints.push_back(Constraint(Constraint::Copy, 00675 getReturnNode(RI.getParent()->getParent()), 00676 getNode(RI.getOperand(0)))); 00677 } 00678 00679 void Andersens::visitLoadInst(LoadInst &LI) { 00680 if (isa<PointerType>(LI.getType())) 00681 // P1 = load P2 --> <Load/P1/P2> 00682 Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI), 00683 getNode(LI.getOperand(0)))); 00684 } 00685 00686 void Andersens::visitStoreInst(StoreInst &SI) { 00687 if (isa<PointerType>(SI.getOperand(0)->getType())) 00688 // store P1, P2 --> <Store/P2/P1> 00689 Constraints.push_back(Constraint(Constraint::Store, 00690 getNode(SI.getOperand(1)), 00691 getNode(SI.getOperand(0)))); 00692 } 00693 00694 void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) { 00695 // P1 = getelementptr P2, ... --> <Copy/P1/P2> 00696 Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP), 00697 getNode(GEP.getOperand(0)))); 00698 } 00699 00700 void Andersens::visitPHINode(PHINode &PN) { 00701 if (isa<PointerType>(PN.getType())) { 00702 Node *PNN = getNodeValue(PN); 00703 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) 00704 // P1 = phi P2, P3 --> <Copy/P1/P2>, <Copy/P1/P3>, ... 00705 Constraints.push_back(Constraint(Constraint::Copy, PNN, 00706 getNode(PN.getIncomingValue(i)))); 00707 } 00708 } 00709 00710 void Andersens::visitCastInst(CastInst &CI) { 00711 Value *Op = CI.getOperand(0); 00712 if (isa<PointerType>(CI.getType())) { 00713 if (isa<PointerType>(Op->getType())) { 00714 // P1 = cast P2 --> <Copy/P1/P2> 00715 Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), 00716 getNode(CI.getOperand(0)))); 00717 } else { 00718 // P1 = cast int --> <Copy/P1/Univ> 00719 Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI), 00720 &GraphNodes[UniversalSet])); 00721 } 00722 } else if (isa<PointerType>(Op->getType())) { 00723 // int = cast P1 --> <Copy/Univ/P1> 00724 Constraints.push_back(Constraint(Constraint::Copy, 00725 &GraphNodes[UniversalSet], 00726 getNode(CI.getOperand(0)))); 00727 } 00728 } 00729 00730 void Andersens::visitSelectInst(SelectInst &SI) { 00731 if (isa<PointerType>(SI.getType())) { 00732 Node *SIN = getNodeValue(SI); 00733 // P1 = select C, P2, P3 ---> <Copy/P1/P2>, <Copy/P1/P3> 00734 Constraints.push_back(Constraint(Constraint::Copy, SIN, 00735 getNode(SI.getOperand(1)))); 00736 Constraints.push_back(Constraint(Constraint::Copy, SIN, 00737 getNode(SI.getOperand(2)))); 00738 } 00739 } 00740 00741 void Andersens::visitVANext(VANextInst &I) { 00742 // FIXME: Implement 00743 assert(0 && "vanext not handled yet!"); 00744 } 00745 void Andersens::visitVAArg(VAArgInst &I) { 00746 assert(0 && "vaarg not handled yet!"); 00747 } 00748 00749 /// AddConstraintsForCall - Add constraints for a call with actual arguments 00750 /// specified by CS to the function specified by F. Note that the types of 00751 /// arguments might not match up in the case where this is an indirect call and 00752 /// the function pointer has been casted. If this is the case, do something 00753 /// reasonable. 00754 void Andersens::AddConstraintsForCall(CallSite CS, Function *F) { 00755 if (isa<PointerType>(CS.getType())) { 00756 Node *CSN = getNode(CS.getInstruction()); 00757 if (isa<PointerType>(F->getFunctionType()->getReturnType())) { 00758 Constraints.push_back(Constraint(Constraint::Copy, CSN, 00759 getReturnNode(F))); 00760 } else { 00761 // If the function returns a non-pointer value, handle this just like we 00762 // treat a nonpointer cast to pointer. 00763 Constraints.push_back(Constraint(Constraint::Copy, CSN, 00764 &GraphNodes[UniversalSet])); 00765 } 00766 } else if (isa<PointerType>(F->getFunctionType()->getReturnType())) { 00767 Constraints.push_back(Constraint(Constraint::Copy, 00768 &GraphNodes[UniversalSet], 00769 getReturnNode(F))); 00770 } 00771 00772 Function::aiterator AI = F->abegin(), AE = F->aend(); 00773 CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end(); 00774 for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) 00775 if (isa<PointerType>(AI->getType())) { 00776 if (isa<PointerType>((*ArgI)->getType())) { 00777 // Copy the actual argument into the formal argument. 00778 Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), 00779 getNode(*ArgI))); 00780 } else { 00781 Constraints.push_back(Constraint(Constraint::Copy, getNode(AI), 00782 &GraphNodes[UniversalSet])); 00783 } 00784 } else if (isa<PointerType>((*ArgI)->getType())) { 00785 Constraints.push_back(Constraint(Constraint::Copy, 00786 &GraphNodes[UniversalSet], 00787 getNode(*ArgI))); 00788 } 00789 00790 // Copy all pointers passed through the varargs section to the varargs node. 00791 if (F->getFunctionType()->isVarArg()) 00792 for (; ArgI != ArgE; ++ArgI) 00793 if (isa<PointerType>((*ArgI)->getType())) 00794 Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F), 00795 getNode(*ArgI))); 00796 // If more arguments are passed in than we track, just drop them on the floor. 00797 } 00798 00799 void Andersens::visitCallSite(CallSite CS) { 00800 if (isa<PointerType>(CS.getType())) 00801 getNodeValue(*CS.getInstruction()); 00802 00803 if (Function *F = CS.getCalledFunction()) { 00804 AddConstraintsForCall(CS, F); 00805 } else { 00806 // We don't handle indirect call sites yet. Keep track of them for when we 00807 // discover the call graph incrementally. 00808 IndirectCalls.push_back(CS); 00809 } 00810 } 00811 00812 //===----------------------------------------------------------------------===// 00813 // Constraint Solving Phase 00814 //===----------------------------------------------------------------------===// 00815 00816 /// intersects - Return true if the points-to set of this node intersects 00817 /// with the points-to set of the specified node. 00818 bool Andersens::Node::intersects(Node *N) const { 00819 iterator I1 = begin(), I2 = N->begin(), E1 = end(), E2 = N->end(); 00820 while (I1 != E1 && I2 != E2) { 00821 if (*I1 == *I2) return true; 00822 if (*I1 < *I2) 00823 ++I1; 00824 else 00825 ++I2; 00826 } 00827 return false; 00828 } 00829 00830 /// intersectsIgnoring - Return true if the points-to set of this node 00831 /// intersects with the points-to set of the specified node on any nodes 00832 /// except for the specified node to ignore. 00833 bool Andersens::Node::intersectsIgnoring(Node *N, Node *Ignoring) const { 00834 iterator I1 = begin(), I2 = N->begin(), E1 = end(), E2 = N->end(); 00835 while (I1 != E1 && I2 != E2) { 00836 if (*I1 == *I2) { 00837 if (*I1 != Ignoring) return true; 00838 ++I1; ++I2; 00839 } else if (*I1 < *I2) 00840 ++I1; 00841 else 00842 ++I2; 00843 } 00844 return false; 00845 } 00846 00847 // Copy constraint: all edges out of the source node get copied to the 00848 // destination node. This returns true if a change is made. 00849 bool Andersens::Node::copyFrom(Node *N) { 00850 // Use a mostly linear-time merge since both of the lists are sorted. 00851 bool Changed = false; 00852 iterator I = N->begin(), E = N->end(); 00853 unsigned i = 0; 00854 while (I != E && i != Pointees.size()) { 00855 if (Pointees[i] < *I) { 00856 ++i; 00857 } else if (Pointees[i] == *I) { 00858 ++i; ++I; 00859 } else { 00860 // We found a new element to copy over. 00861 Changed = true; 00862 Pointees.insert(Pointees.begin()+i, *I); 00863 ++i; ++I; 00864 } 00865 } 00866 00867 if (I != E) { 00868 Pointees.insert(Pointees.end(), I, E); 00869 Changed = true; 00870 } 00871 00872 return Changed; 00873 } 00874 00875 bool Andersens::Node::loadFrom(Node *N) { 00876 bool Changed = false; 00877 for (iterator I = N->begin(), E = N->end(); I != E; ++I) 00878 Changed |= copyFrom(*I); 00879 return Changed; 00880 } 00881 00882 bool Andersens::Node::storeThrough(Node *N) { 00883 bool Changed = false; 00884 for (iterator I = begin(), E = end(); I != E; ++I) 00885 Changed |= (*I)->copyFrom(N); 00886 return Changed; 00887 } 00888 00889 00890 /// SolveConstraints - This stage iteratively processes the constraints list 00891 /// propagating constraints (adding edges to the Nodes in the points-to graph) 00892 /// until a fixed point is reached. 00893 /// 00894 void Andersens::SolveConstraints() { 00895 bool Changed = true; 00896 unsigned Iteration = 0; 00897 while (Changed) { 00898 Changed = false; 00899 ++NumIters; 00900 DEBUG(std::cerr << "Starting iteration #" << Iteration++ << "!\n"); 00901 00902 // Loop over all of the constraints, applying them in turn. 00903 for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { 00904 Constraint &C = Constraints[i]; 00905 switch (C.Type) { 00906 case Constraint::Copy: 00907 Changed |= C.Dest->copyFrom(C.Src); 00908 break; 00909 case Constraint::Load: 00910 Changed |= C.Dest->loadFrom(C.Src); 00911 break; 00912 case Constraint::Store: 00913 Changed |= C.Dest->storeThrough(C.Src); 00914 break; 00915 default: 00916 assert(0 && "Unknown constraint!"); 00917 } 00918 } 00919 00920 if (Changed) { 00921 // Check to see if any internal function's addresses have been passed to 00922 // external functions. If so, we have to assume that their incoming 00923 // arguments could be anything. If there are any internal functions in 00924 // the universal node that we don't know about, we must iterate. 00925 for (Node::iterator I = GraphNodes[UniversalSet].begin(), 00926 E = GraphNodes[UniversalSet].end(); I != E; ++I) 00927 if (Function *F = dyn_cast_or_null<Function>((*I)->getValue())) 00928 if (F->hasInternalLinkage() && 00929 EscapingInternalFunctions.insert(F).second) { 00930 // We found a function that is just now escaping. Mark it as if it 00931 // didn't have internal linkage. 00932 AddConstraintsForNonInternalLinkage(F); 00933 DEBUG(std::cerr << "Found escaping internal function: " 00934 << F->getName() << "\n"); 00935 ++NumEscapingFunctions; 00936 } 00937 00938 // Check to see if we have discovered any new callees of the indirect call 00939 // sites. If so, add constraints to the analysis. 00940 for (unsigned i = 0, e = IndirectCalls.size(); i != e; ++i) { 00941 CallSite CS = IndirectCalls[i]; 00942 std::vector<Function*> &KnownCallees = IndirectCallees[CS]; 00943 Node *CN = getNode(CS.getCalledValue()); 00944 00945 for (Node::iterator NI = CN->begin(), E = CN->end(); NI != E; ++NI) 00946 if (Function *F = dyn_cast_or_null<Function>((*NI)->getValue())) { 00947 std::vector<Function*>::iterator IP = 00948 std::lower_bound(KnownCallees.begin(), KnownCallees.end(), F); 00949 if (IP == KnownCallees.end() || *IP != F) { 00950 // Add the constraints for the call now. 00951 AddConstraintsForCall(CS, F); 00952 DEBUG(std::cerr << "Found actual callee '" 00953 << F->getName() << "' for call: " 00954 << *CS.getInstruction() << "\n"); 00955 ++NumIndirectCallees; 00956 KnownCallees.insert(IP, F); 00957 } 00958 } 00959 } 00960 } 00961 } 00962 } 00963 00964 00965 00966 //===----------------------------------------------------------------------===// 00967 // Debugging Output 00968 //===----------------------------------------------------------------------===// 00969 00970 void Andersens::PrintNode(Node *N) { 00971 if (N == &GraphNodes[UniversalSet]) { 00972 std::cerr << "<universal>"; 00973 return; 00974 } else if (N == &GraphNodes[NullPtr]) { 00975 std::cerr << "<nullptr>"; 00976 return; 00977 } else if (N == &GraphNodes[NullObject]) { 00978 std::cerr << "<null>"; 00979 return; 00980 } 00981 00982 assert(N->getValue() != 0 && "Never set node label!"); 00983 Value *V = N->getValue(); 00984 if (Function *F = dyn_cast<Function>(V)) { 00985 if (isa<PointerType>(F->getFunctionType()->getReturnType()) && 00986 N == getReturnNode(F)) { 00987 std::cerr << F->getName() << ":retval"; 00988 return; 00989 } else if (F->getFunctionType()->isVarArg() && N == getVarargNode(F)) { 00990 std::cerr << F->getName() << ":vararg"; 00991 return; 00992 } 00993 } 00994 00995 if (Instruction *I = dyn_cast<Instruction>(V)) 00996 std::cerr << I->getParent()->getParent()->getName() << ":"; 00997 else if (Argument *Arg = dyn_cast<Argument>(V)) 00998 std::cerr << Arg->getParent()->getName() << ":"; 00999 01000 if (V->hasName()) 01001 std::cerr << V->getName(); 01002 else 01003 std::cerr << "(unnamed)"; 01004 01005 if (isa<GlobalValue>(V) || isa<AllocationInst>(V)) 01006 if (N == getObject(V)) 01007 std::cerr << "<mem>"; 01008 } 01009 01010 void Andersens::PrintConstraints() { 01011 std::cerr << "Constraints:\n"; 01012 for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { 01013 std::cerr << " #" << i << ": "; 01014 Constraint &C = Constraints[i]; 01015 if (C.Type == Constraint::Store) 01016 std::cerr << "*"; 01017 PrintNode(C.Dest); 01018 std::cerr << " = "; 01019 if (C.Type == Constraint::Load) 01020 std::cerr << "*"; 01021 PrintNode(C.Src); 01022 std::cerr << "\n"; 01023 } 01024 } 01025 01026 void Andersens::PrintPointsToGraph() { 01027 std::cerr << "Points-to graph:\n"; 01028 for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) { 01029 Node *N = &GraphNodes[i]; 01030 std::cerr << "[" << (N->end() - N->begin()) << "] "; 01031 PrintNode(N); 01032 std::cerr << "\t--> "; 01033 for (Node::iterator I = N->begin(), E = N->end(); I != E; ++I) { 01034 if (I != N->begin()) std::cerr << ", "; 01035 PrintNode(*I); 01036 } 01037 std::cerr << "\n"; 01038 } 01039 }