LLVM API Documentation
00001 //===-- ProgramInfo.cpp - Compute and cache info about a program ----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the ProgramInfo and related classes, by sorting through 00011 // the loaded Module. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Debugger/ProgramInfo.h" 00016 #include "llvm/Constants.h" 00017 #include "llvm/DerivedTypes.h" 00018 #include "llvm/Intrinsics.h" 00019 #include "llvm/Instructions.h" 00020 #include "llvm/Module.h" 00021 #include "llvm/Debugger/SourceFile.h" 00022 #include "llvm/Debugger/SourceLanguage.h" 00023 #include "llvm/Support/FileUtilities.h" 00024 #include "llvm/Support/SlowOperationInformer.h" 00025 #include "llvm/ADT/STLExtras.h" 00026 #include <iostream> 00027 00028 using namespace llvm; 00029 00030 /// getGlobalVariablesUsing - Return all of the global variables which have the 00031 /// specified value in their initializer somewhere. 00032 static void getGlobalVariablesUsing(Value *V, 00033 std::vector<GlobalVariable*> &Found) { 00034 for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { 00035 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I)) 00036 Found.push_back(GV); 00037 else if (Constant *C = dyn_cast<Constant>(*I)) 00038 getGlobalVariablesUsing(C, Found); 00039 } 00040 } 00041 00042 /// getStringValue - Turn an LLVM constant pointer that eventually points to a 00043 /// global into a string value. Return an empty string if we can't do it. 00044 /// 00045 static std::string getStringValue(Value *V, unsigned Offset = 0) { 00046 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { 00047 if (GV->hasInitializer() && isa<ConstantArray>(GV->getInitializer())) { 00048 ConstantArray *Init = cast<ConstantArray>(GV->getInitializer()); 00049 if (Init->isString()) { 00050 std::string Result = Init->getAsString(); 00051 if (Offset < Result.size()) { 00052 // If we are pointing INTO The string, erase the beginning... 00053 Result.erase(Result.begin(), Result.begin()+Offset); 00054 00055 // Take off the null terminator, and any string fragments after it. 00056 std::string::size_type NullPos = Result.find_first_of((char)0); 00057 if (NullPos != std::string::npos) 00058 Result.erase(Result.begin()+NullPos, Result.end()); 00059 return Result; 00060 } 00061 } 00062 } 00063 } else if (Constant *C = dyn_cast<Constant>(V)) { 00064 if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) 00065 return getStringValue(GV, Offset); 00066 else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 00067 if (CE->getOpcode() == Instruction::GetElementPtr) { 00068 // Turn a gep into the specified offset. 00069 if (CE->getNumOperands() == 3 && 00070 cast<Constant>(CE->getOperand(1))->isNullValue() && 00071 isa<ConstantInt>(CE->getOperand(2))) { 00072 return getStringValue(CE->getOperand(0), 00073 Offset+cast<ConstantInt>(CE->getOperand(2))->getRawValue()); 00074 } 00075 } 00076 } 00077 } 00078 return ""; 00079 } 00080 00081 /// getNextStopPoint - Follow the def-use chains of the specified LLVM value, 00082 /// traversing the use chains until we get to a stoppoint. When we do, return 00083 /// the source location of the stoppoint. If we don't find a stoppoint, return 00084 /// null. 00085 static const GlobalVariable *getNextStopPoint(const Value *V, unsigned &LineNo, 00086 unsigned &ColNo) { 00087 // The use-def chains can fork. As such, we pick the lowest numbered one we 00088 // find. 00089 const GlobalVariable *LastDesc = 0; 00090 unsigned LastLineNo = ~0; 00091 unsigned LastColNo = ~0; 00092 00093 for (Value::use_const_iterator UI = V->use_begin(), E = V->use_end(); 00094 UI != E; ++UI) { 00095 bool ShouldRecurse = true; 00096 if (cast<Instruction>(*UI)->getOpcode() == Instruction::PHI) { 00097 // Infinite loops == bad, ignore PHI nodes. 00098 ShouldRecurse = false; 00099 } else if (const CallInst *CI = dyn_cast<CallInst>(*UI)) { 00100 // If we found a stop point, check to see if it is earlier than what we 00101 // already have. If so, remember it. 00102 if (const Function *F = CI->getCalledFunction()) 00103 if (F->getIntrinsicID() == Intrinsic::dbg_stoppoint) { 00104 unsigned CurLineNo = ~0, CurColNo = ~0; 00105 const GlobalVariable *CurDesc = 0; 00106 if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(2))) 00107 CurLineNo = C->getRawValue(); 00108 if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(3))) 00109 CurColNo = C->getRawValue(); 00110 const Value *Op = CI->getOperand(4); 00111 00112 if ((CurDesc = dyn_cast<GlobalVariable>(Op)) && 00113 (LineNo < LastLineNo || 00114 (LineNo == LastLineNo && ColNo < LastColNo))) { 00115 LastDesc = CurDesc; 00116 LastLineNo = CurLineNo; 00117 LastColNo = CurColNo; 00118 } 00119 ShouldRecurse = false; 00120 } 00121 00122 } 00123 00124 // If this is not a phi node or a stopping point, recursively scan the users 00125 // of this instruction to skip over region.begin's and the like. 00126 if (ShouldRecurse) { 00127 unsigned CurLineNo, CurColNo; 00128 if (const GlobalVariable *GV = getNextStopPoint(*UI, CurLineNo,CurColNo)){ 00129 if (LineNo < LastLineNo || (LineNo == LastLineNo && ColNo < LastColNo)){ 00130 LastDesc = GV; 00131 LastLineNo = CurLineNo; 00132 LastColNo = CurColNo; 00133 } 00134 } 00135 } 00136 } 00137 00138 if (LastDesc) { 00139 LineNo = LastLineNo != ~0U ? LastLineNo : 0; 00140 ColNo = LastColNo != ~0U ? LastColNo : 0; 00141 } 00142 return LastDesc; 00143 } 00144 00145 00146 //===----------------------------------------------------------------------===// 00147 // SourceFileInfo implementation 00148 // 00149 00150 SourceFileInfo::SourceFileInfo(const GlobalVariable *Desc, 00151 const SourceLanguage &Lang) 00152 : Language(&Lang), Descriptor(Desc) { 00153 Version = 0; 00154 SourceText = 0; 00155 00156 if (Desc && Desc->hasInitializer()) 00157 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer())) 00158 if (CS->getNumOperands() > 4) { 00159 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(CS->getOperand(1))) 00160 Version = CUI->getValue(); 00161 00162 BaseName = getStringValue(CS->getOperand(3)); 00163 Directory = getStringValue(CS->getOperand(4)); 00164 } 00165 } 00166 00167 SourceFileInfo::~SourceFileInfo() { 00168 delete SourceText; 00169 } 00170 00171 SourceFile &SourceFileInfo::getSourceText() const { 00172 // FIXME: this should take into account the source search directories! 00173 if (SourceText == 0) // Read the file in if we haven't already. 00174 if (!Directory.empty() && FileOpenable(Directory+"/"+BaseName)) 00175 SourceText = new SourceFile(Directory+"/"+BaseName, Descriptor); 00176 else 00177 SourceText = new SourceFile(BaseName, Descriptor); 00178 return *SourceText; 00179 } 00180 00181 00182 //===----------------------------------------------------------------------===// 00183 // SourceFunctionInfo implementation 00184 // 00185 SourceFunctionInfo::SourceFunctionInfo(ProgramInfo &PI, 00186 const GlobalVariable *Desc) 00187 : Descriptor(Desc) { 00188 LineNo = ColNo = 0; 00189 if (Desc && Desc->hasInitializer()) 00190 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer())) 00191 if (CS->getNumOperands() > 2) { 00192 // Entry #1 is the file descriptor. 00193 if (const GlobalVariable *GV = 00194 dyn_cast<GlobalVariable>(CS->getOperand(1))) 00195 SourceFile = &PI.getSourceFile(GV); 00196 00197 // Entry #2 is the function name. 00198 Name = getStringValue(CS->getOperand(2)); 00199 } 00200 } 00201 00202 /// getSourceLocation - This method returns the location of the first stopping 00203 /// point in the function. 00204 void SourceFunctionInfo::getSourceLocation(unsigned &RetLineNo, 00205 unsigned &RetColNo) const { 00206 // If we haven't computed this yet... 00207 if (!LineNo) { 00208 // Look at all of the users of the function descriptor, looking for calls to 00209 // %llvm.dbg.func.start. 00210 for (Value::use_const_iterator UI = Descriptor->use_begin(), 00211 E = Descriptor->use_end(); UI != E; ++UI) 00212 if (const CallInst *CI = dyn_cast<CallInst>(*UI)) 00213 if (const Function *F = CI->getCalledFunction()) 00214 if (F->getIntrinsicID() == Intrinsic::dbg_func_start) { 00215 // We found the start of the function. Check to see if there are 00216 // any stop points on the use-list of the function start. 00217 const GlobalVariable *SD = getNextStopPoint(CI, LineNo, ColNo); 00218 if (SD) { // We found the first stop point! 00219 // This is just a sanity check. 00220 if (getSourceFile().getDescriptor() != SD) 00221 std::cout << "WARNING: first line of function is not in the" 00222 " file that the function descriptor claims it is in.\n"; 00223 break; 00224 } 00225 } 00226 } 00227 RetLineNo = LineNo; RetColNo = ColNo; 00228 } 00229 00230 //===----------------------------------------------------------------------===// 00231 // ProgramInfo implementation 00232 // 00233 00234 ProgramInfo::ProgramInfo(Module *m) : M(m) { 00235 assert(M && "Cannot create program information with a null module!"); 00236 ProgramTimeStamp = getFileTimestamp(M->getModuleIdentifier()); 00237 00238 SourceFilesIsComplete = false; 00239 SourceFunctionsIsComplete = false; 00240 } 00241 00242 ProgramInfo::~ProgramInfo() { 00243 // Delete cached information about source program objects... 00244 for (std::map<const GlobalVariable*, SourceFileInfo*>::iterator 00245 I = SourceFiles.begin(), E = SourceFiles.end(); I != E; ++I) 00246 delete I->second; 00247 for (std::map<const GlobalVariable*, SourceFunctionInfo*>::iterator 00248 I = SourceFunctions.begin(), E = SourceFunctions.end(); I != E; ++I) 00249 delete I->second; 00250 00251 // Delete the source language caches. 00252 for (unsigned i = 0, e = LanguageCaches.size(); i != e; ++i) 00253 delete LanguageCaches[i].second; 00254 } 00255 00256 00257 //===----------------------------------------------------------------------===// 00258 // SourceFileInfo tracking... 00259 // 00260 00261 /// getSourceFile - Return source file information for the specified source file 00262 /// descriptor object, adding it to the collection as needed. This method 00263 /// always succeeds (is unambiguous), and is always efficient. 00264 /// 00265 const SourceFileInfo & 00266 ProgramInfo::getSourceFile(const GlobalVariable *Desc) { 00267 SourceFileInfo *&Result = SourceFiles[Desc]; 00268 if (Result) return *Result; 00269 00270 // Figure out what language this source file comes from... 00271 unsigned LangID = 0; // Zero is unknown language 00272 if (Desc && Desc->hasInitializer()) 00273 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer())) 00274 if (CS->getNumOperands() > 2) 00275 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(CS->getOperand(2))) 00276 LangID = CUI->getValue(); 00277 00278 const SourceLanguage &Lang = SourceLanguage::get(LangID); 00279 SourceFileInfo *New = Lang.createSourceFileInfo(Desc, *this); 00280 00281 // FIXME: this should check to see if there is already a Filename/WorkingDir 00282 // pair that matches this one. If so, we shouldn't create the duplicate! 00283 // 00284 SourceFileIndex.insert(std::make_pair(New->getBaseName(), New)); 00285 return *(Result = New); 00286 } 00287 00288 00289 /// getSourceFiles - Index all of the source files in the program and return 00290 /// a mapping of it. This information is lazily computed the first time 00291 /// that it is requested. Since this information can take a long time to 00292 /// compute, the user is given a chance to cancel it. If this occurs, an 00293 /// exception is thrown. 00294 const std::map<const GlobalVariable*, SourceFileInfo*> & 00295 ProgramInfo::getSourceFiles(bool RequiresCompleteMap) { 00296 // If we have a fully populated map, or if the client doesn't need one, just 00297 // return what we have. 00298 if (SourceFilesIsComplete || !RequiresCompleteMap) 00299 return SourceFiles; 00300 00301 // Ok, all of the source file descriptors (compile_unit in dwarf terms), 00302 // should be on the use list of the llvm.dbg.translation_units global. 00303 // 00304 GlobalVariable *Units = 00305 M->getGlobalVariable("llvm.dbg.translation_units", 00306 StructType::get(std::vector<const Type*>())); 00307 if (Units == 0) 00308 throw "Program contains no debugging information!"; 00309 00310 std::vector<GlobalVariable*> TranslationUnits; 00311 getGlobalVariablesUsing(Units, TranslationUnits); 00312 00313 SlowOperationInformer SOI("building source files index"); 00314 00315 // Loop over all of the translation units found, building the SourceFiles 00316 // mapping. 00317 for (unsigned i = 0, e = TranslationUnits.size(); i != e; ++i) { 00318 getSourceFile(TranslationUnits[i]); 00319 SOI.progress(i+1, e); 00320 } 00321 00322 // Ok, if we got this far, then we indexed the whole program. 00323 SourceFilesIsComplete = true; 00324 return SourceFiles; 00325 } 00326 00327 /// getSourceFile - Look up the file with the specified name. If there is 00328 /// more than one match for the specified filename, prompt the user to pick 00329 /// one. If there is no source file that matches the specified name, throw 00330 /// an exception indicating that we can't find the file. Otherwise, return 00331 /// the file information for that file. 00332 const SourceFileInfo &ProgramInfo::getSourceFile(const std::string &Filename) { 00333 std::multimap<std::string, SourceFileInfo*>::const_iterator Start, End; 00334 getSourceFiles(); 00335 tie(Start, End) = SourceFileIndex.equal_range(Filename); 00336 00337 if (Start == End) throw "Could not find source file '" + Filename + "'!"; 00338 const SourceFileInfo &SFI = *Start->second; 00339 ++Start; 00340 if (Start == End) return SFI; 00341 00342 throw "FIXME: Multiple source files with the same name not implemented!"; 00343 } 00344 00345 00346 //===----------------------------------------------------------------------===// 00347 // SourceFunctionInfo tracking... 00348 // 00349 00350 00351 /// getFunction - Return function information for the specified function 00352 /// descriptor object, adding it to the collection as needed. This method 00353 /// always succeeds (is unambiguous), and is always efficient. 00354 /// 00355 const SourceFunctionInfo & 00356 ProgramInfo::getFunction(const GlobalVariable *Desc) { 00357 SourceFunctionInfo *&Result = SourceFunctions[Desc]; 00358 if (Result) return *Result; 00359 00360 // Figure out what language this function comes from... 00361 const GlobalVariable *SourceFileDesc = 0; 00362 if (Desc && Desc->hasInitializer()) 00363 if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer())) 00364 if (CS->getNumOperands() > 0) 00365 if (const GlobalVariable *GV = 00366 dyn_cast<GlobalVariable>(CS->getOperand(1))) 00367 SourceFileDesc = GV; 00368 00369 const SourceLanguage &Lang = getSourceFile(SourceFileDesc).getLanguage(); 00370 return *(Result = Lang.createSourceFunctionInfo(Desc, *this)); 00371 } 00372 00373 00374 // getSourceFunctions - Index all of the functions in the program and return 00375 // them. This information is lazily computed the first time that it is 00376 // requested. Since this information can take a long time to compute, the user 00377 // is given a chance to cancel it. If this occurs, an exception is thrown. 00378 const std::map<const GlobalVariable*, SourceFunctionInfo*> & 00379 ProgramInfo::getSourceFunctions(bool RequiresCompleteMap) { 00380 if (SourceFunctionsIsComplete || !RequiresCompleteMap) 00381 return SourceFunctions; 00382 00383 // Ok, all of the source function descriptors (subprogram in dwarf terms), 00384 // should be on the use list of the llvm.dbg.translation_units global. 00385 // 00386 GlobalVariable *Units = 00387 M->getGlobalVariable("llvm.dbg.globals", 00388 StructType::get(std::vector<const Type*>())); 00389 if (Units == 0) 00390 throw "Program contains no debugging information!"; 00391 00392 std::vector<GlobalVariable*> Functions; 00393 getGlobalVariablesUsing(Units, Functions); 00394 00395 SlowOperationInformer SOI("building functions index"); 00396 00397 // Loop over all of the functions found, building the SourceFunctions mapping. 00398 for (unsigned i = 0, e = Functions.size(); i != e; ++i) { 00399 getFunction(Functions[i]); 00400 SOI.progress(i+1, e); 00401 } 00402 00403 // Ok, if we got this far, then we indexed the whole program. 00404 SourceFunctionsIsComplete = true; 00405 return SourceFunctions; 00406 }