LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

ProgramInfo.cpp

Go to the documentation of this file.
00001 //===-- ProgramInfo.cpp - Compute and cache info about a program ----------===//
00002 // 
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 // 
00008 //===----------------------------------------------------------------------===//
00009 // 
00010 // This file implements the ProgramInfo and related classes, by sorting through
00011 // the loaded Module.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "llvm/Debugger/ProgramInfo.h"
00016 #include "llvm/Constants.h"
00017 #include "llvm/DerivedTypes.h"
00018 #include "llvm/Intrinsics.h"
00019 #include "llvm/Instructions.h"
00020 #include "llvm/Module.h"
00021 #include "llvm/Debugger/SourceFile.h"
00022 #include "llvm/Debugger/SourceLanguage.h"
00023 #include "llvm/Support/FileUtilities.h"
00024 #include "llvm/Support/SlowOperationInformer.h"
00025 #include "llvm/ADT/STLExtras.h"
00026 #include <iostream>
00027 
00028 using namespace llvm;
00029 
00030 /// getGlobalVariablesUsing - Return all of the global variables which have the
00031 /// specified value in their initializer somewhere.
00032 static void getGlobalVariablesUsing(Value *V,
00033                                     std::vector<GlobalVariable*> &Found) {
00034   for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
00035     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I))
00036       Found.push_back(GV);
00037     else if (Constant *C = dyn_cast<Constant>(*I))
00038       getGlobalVariablesUsing(C, Found);
00039   }
00040 }
00041 
00042 /// getStringValue - Turn an LLVM constant pointer that eventually points to a
00043 /// global into a string value.  Return an empty string if we can't do it.
00044 ///
00045 static std::string getStringValue(Value *V, unsigned Offset = 0) {
00046   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
00047     if (GV->hasInitializer() && isa<ConstantArray>(GV->getInitializer())) {
00048       ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
00049       if (Init->isString()) {
00050         std::string Result = Init->getAsString();
00051         if (Offset < Result.size()) {
00052           // If we are pointing INTO The string, erase the beginning...
00053           Result.erase(Result.begin(), Result.begin()+Offset);
00054 
00055           // Take off the null terminator, and any string fragments after it.
00056           std::string::size_type NullPos = Result.find_first_of((char)0);
00057           if (NullPos != std::string::npos)
00058             Result.erase(Result.begin()+NullPos, Result.end());
00059           return Result;
00060         }
00061       }
00062     }
00063   } else if (Constant *C = dyn_cast<Constant>(V)) {
00064     if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
00065       return getStringValue(GV, Offset);
00066     else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
00067       if (CE->getOpcode() == Instruction::GetElementPtr) {
00068         // Turn a gep into the specified offset.
00069         if (CE->getNumOperands() == 3 &&
00070             cast<Constant>(CE->getOperand(1))->isNullValue() &&
00071             isa<ConstantInt>(CE->getOperand(2))) {
00072           return getStringValue(CE->getOperand(0),
00073                    Offset+cast<ConstantInt>(CE->getOperand(2))->getRawValue());
00074         }
00075       }
00076     }
00077   }
00078   return "";
00079 }
00080 
00081 /// getNextStopPoint - Follow the def-use chains of the specified LLVM value,
00082 /// traversing the use chains until we get to a stoppoint.  When we do, return
00083 /// the source location of the stoppoint.  If we don't find a stoppoint, return
00084 /// null.
00085 static const GlobalVariable *getNextStopPoint(const Value *V, unsigned &LineNo,
00086                                               unsigned &ColNo) {
00087   // The use-def chains can fork.  As such, we pick the lowest numbered one we
00088   // find.
00089   const GlobalVariable *LastDesc = 0;
00090   unsigned LastLineNo = ~0;
00091   unsigned LastColNo = ~0;
00092 
00093   for (Value::use_const_iterator UI = V->use_begin(), E = V->use_end();
00094        UI != E; ++UI) {
00095     bool ShouldRecurse = true;
00096     if (cast<Instruction>(*UI)->getOpcode() == Instruction::PHI) {
00097       // Infinite loops == bad, ignore PHI nodes.
00098       ShouldRecurse = false;
00099     } else if (const CallInst *CI = dyn_cast<CallInst>(*UI)) {
00100       // If we found a stop point, check to see if it is earlier than what we
00101       // already have.  If so, remember it.
00102       if (const Function *F = CI->getCalledFunction())
00103         if (F->getIntrinsicID() == Intrinsic::dbg_stoppoint) {
00104           unsigned CurLineNo = ~0, CurColNo = ~0;
00105           const GlobalVariable *CurDesc = 0;
00106           if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(2)))
00107             CurLineNo = C->getRawValue();
00108           if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getOperand(3)))
00109             CurColNo = C->getRawValue();
00110           const Value *Op = CI->getOperand(4);
00111           
00112           if ((CurDesc = dyn_cast<GlobalVariable>(Op)) &&
00113               (LineNo < LastLineNo ||
00114                (LineNo == LastLineNo && ColNo < LastColNo))) {
00115             LastDesc = CurDesc;
00116             LastLineNo = CurLineNo;
00117             LastColNo = CurColNo;            
00118           }
00119           ShouldRecurse = false;
00120         }
00121 
00122     }
00123 
00124     // If this is not a phi node or a stopping point, recursively scan the users
00125     // of this instruction to skip over region.begin's and the like.
00126     if (ShouldRecurse) {
00127       unsigned CurLineNo, CurColNo;
00128       if (const GlobalVariable *GV = getNextStopPoint(*UI, CurLineNo,CurColNo)){
00129         if (LineNo < LastLineNo || (LineNo == LastLineNo && ColNo < LastColNo)){
00130           LastDesc = GV;
00131           LastLineNo = CurLineNo;
00132           LastColNo = CurColNo;            
00133         }
00134       }
00135     }
00136   }
00137   
00138   if (LastDesc) {
00139     LineNo = LastLineNo != ~0U ? LastLineNo : 0;
00140     ColNo  = LastColNo  != ~0U ? LastColNo : 0;
00141   }
00142   return LastDesc;
00143 }
00144 
00145 
00146 //===----------------------------------------------------------------------===//
00147 // SourceFileInfo implementation
00148 //
00149 
00150 SourceFileInfo::SourceFileInfo(const GlobalVariable *Desc,
00151                                const SourceLanguage &Lang)
00152   : Language(&Lang), Descriptor(Desc) {
00153   Version = 0;
00154   SourceText = 0;
00155 
00156   if (Desc && Desc->hasInitializer())
00157     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
00158       if (CS->getNumOperands() > 4) {
00159         if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(CS->getOperand(1)))
00160           Version = CUI->getValue();
00161         
00162         BaseName  = getStringValue(CS->getOperand(3));
00163         Directory = getStringValue(CS->getOperand(4));
00164       }
00165 }
00166 
00167 SourceFileInfo::~SourceFileInfo() {
00168   delete SourceText;
00169 }
00170 
00171 SourceFile &SourceFileInfo::getSourceText() const {
00172   // FIXME: this should take into account the source search directories!
00173   if (SourceText == 0)  // Read the file in if we haven't already.
00174     if (!Directory.empty() && FileOpenable(Directory+"/"+BaseName))
00175       SourceText = new SourceFile(Directory+"/"+BaseName, Descriptor);
00176     else
00177       SourceText = new SourceFile(BaseName, Descriptor);
00178   return *SourceText;
00179 }
00180 
00181 
00182 //===----------------------------------------------------------------------===//
00183 // SourceFunctionInfo implementation
00184 //
00185 SourceFunctionInfo::SourceFunctionInfo(ProgramInfo &PI,
00186                                        const GlobalVariable *Desc)
00187   : Descriptor(Desc) {
00188   LineNo = ColNo = 0;
00189   if (Desc && Desc->hasInitializer())
00190     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
00191       if (CS->getNumOperands() > 2) {
00192         // Entry #1 is the file descriptor.
00193         if (const GlobalVariable *GV = 
00194             dyn_cast<GlobalVariable>(CS->getOperand(1)))
00195           SourceFile = &PI.getSourceFile(GV);
00196 
00197         // Entry #2 is the function name.
00198         Name = getStringValue(CS->getOperand(2));
00199       }
00200 }
00201 
00202 /// getSourceLocation - This method returns the location of the first stopping
00203 /// point in the function.
00204 void SourceFunctionInfo::getSourceLocation(unsigned &RetLineNo,
00205                                            unsigned &RetColNo) const {
00206   // If we haven't computed this yet...
00207   if (!LineNo) {
00208     // Look at all of the users of the function descriptor, looking for calls to
00209     // %llvm.dbg.func.start.
00210     for (Value::use_const_iterator UI = Descriptor->use_begin(),
00211            E = Descriptor->use_end(); UI != E; ++UI)
00212       if (const CallInst *CI = dyn_cast<CallInst>(*UI))
00213         if (const Function *F = CI->getCalledFunction())
00214           if (F->getIntrinsicID() == Intrinsic::dbg_func_start) {
00215             // We found the start of the function.  Check to see if there are
00216             // any stop points on the use-list of the function start.
00217             const GlobalVariable *SD = getNextStopPoint(CI, LineNo, ColNo);
00218             if (SD) {             // We found the first stop point!
00219               // This is just a sanity check.
00220               if (getSourceFile().getDescriptor() != SD)
00221                 std::cout << "WARNING: first line of function is not in the"
00222                   " file that the function descriptor claims it is in.\n";
00223               break;
00224             }
00225           }
00226   }
00227   RetLineNo = LineNo; RetColNo = ColNo;
00228 }
00229 
00230 //===----------------------------------------------------------------------===//
00231 // ProgramInfo implementation
00232 //
00233 
00234 ProgramInfo::ProgramInfo(Module *m) : M(m) {
00235   assert(M && "Cannot create program information with a null module!");
00236   ProgramTimeStamp = getFileTimestamp(M->getModuleIdentifier());
00237 
00238   SourceFilesIsComplete = false;
00239   SourceFunctionsIsComplete = false;
00240 }
00241 
00242 ProgramInfo::~ProgramInfo() {
00243   // Delete cached information about source program objects...
00244   for (std::map<const GlobalVariable*, SourceFileInfo*>::iterator
00245          I = SourceFiles.begin(), E = SourceFiles.end(); I != E; ++I)
00246     delete I->second;
00247   for (std::map<const GlobalVariable*, SourceFunctionInfo*>::iterator
00248          I = SourceFunctions.begin(), E = SourceFunctions.end(); I != E; ++I)
00249     delete I->second;
00250 
00251   // Delete the source language caches.
00252   for (unsigned i = 0, e = LanguageCaches.size(); i != e; ++i)
00253     delete LanguageCaches[i].second;
00254 }
00255 
00256 
00257 //===----------------------------------------------------------------------===//
00258 // SourceFileInfo tracking...
00259 //
00260 
00261 /// getSourceFile - Return source file information for the specified source file
00262 /// descriptor object, adding it to the collection as needed.  This method
00263 /// always succeeds (is unambiguous), and is always efficient.
00264 ///
00265 const SourceFileInfo &
00266 ProgramInfo::getSourceFile(const GlobalVariable *Desc) {
00267   SourceFileInfo *&Result = SourceFiles[Desc];
00268   if (Result) return *Result;
00269 
00270   // Figure out what language this source file comes from...
00271   unsigned LangID = 0;   // Zero is unknown language
00272   if (Desc && Desc->hasInitializer())
00273     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
00274       if (CS->getNumOperands() > 2)
00275         if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(CS->getOperand(2)))
00276           LangID = CUI->getValue();
00277 
00278   const SourceLanguage &Lang = SourceLanguage::get(LangID);
00279   SourceFileInfo *New = Lang.createSourceFileInfo(Desc, *this);
00280 
00281   // FIXME: this should check to see if there is already a Filename/WorkingDir
00282   // pair that matches this one.  If so, we shouldn't create the duplicate!
00283   //
00284   SourceFileIndex.insert(std::make_pair(New->getBaseName(), New));
00285   return *(Result = New);
00286 }
00287 
00288 
00289 /// getSourceFiles - Index all of the source files in the program and return
00290 /// a mapping of it.  This information is lazily computed the first time
00291 /// that it is requested.  Since this information can take a long time to
00292 /// compute, the user is given a chance to cancel it.  If this occurs, an
00293 /// exception is thrown.
00294 const std::map<const GlobalVariable*, SourceFileInfo*> &
00295 ProgramInfo::getSourceFiles(bool RequiresCompleteMap) {
00296   // If we have a fully populated map, or if the client doesn't need one, just
00297   // return what we have.
00298   if (SourceFilesIsComplete || !RequiresCompleteMap)
00299     return SourceFiles;
00300 
00301   // Ok, all of the source file descriptors (compile_unit in dwarf terms),
00302   // should be on the use list of the llvm.dbg.translation_units global.
00303   //
00304   GlobalVariable *Units =
00305     M->getGlobalVariable("llvm.dbg.translation_units",
00306                          StructType::get(std::vector<const Type*>()));
00307   if (Units == 0)
00308     throw "Program contains no debugging information!";
00309 
00310   std::vector<GlobalVariable*> TranslationUnits;
00311   getGlobalVariablesUsing(Units, TranslationUnits);
00312 
00313   SlowOperationInformer SOI("building source files index");
00314 
00315   // Loop over all of the translation units found, building the SourceFiles
00316   // mapping.
00317   for (unsigned i = 0, e = TranslationUnits.size(); i != e; ++i) {
00318     getSourceFile(TranslationUnits[i]);
00319     SOI.progress(i+1, e);
00320   }
00321 
00322   // Ok, if we got this far, then we indexed the whole program.
00323   SourceFilesIsComplete = true;
00324   return SourceFiles;
00325 }
00326 
00327 /// getSourceFile - Look up the file with the specified name.  If there is
00328 /// more than one match for the specified filename, prompt the user to pick
00329 /// one.  If there is no source file that matches the specified name, throw
00330 /// an exception indicating that we can't find the file.  Otherwise, return
00331 /// the file information for that file.
00332 const SourceFileInfo &ProgramInfo::getSourceFile(const std::string &Filename) {
00333   std::multimap<std::string, SourceFileInfo*>::const_iterator Start, End;
00334   getSourceFiles();
00335   tie(Start, End) = SourceFileIndex.equal_range(Filename);
00336   
00337   if (Start == End) throw "Could not find source file '" + Filename + "'!";
00338   const SourceFileInfo &SFI = *Start->second;
00339   ++Start;
00340   if (Start == End) return SFI;
00341 
00342   throw "FIXME: Multiple source files with the same name not implemented!";
00343 }
00344 
00345 
00346 //===----------------------------------------------------------------------===//
00347 // SourceFunctionInfo tracking...
00348 //
00349 
00350 
00351 /// getFunction - Return function information for the specified function
00352 /// descriptor object, adding it to the collection as needed.  This method
00353 /// always succeeds (is unambiguous), and is always efficient.
00354 ///
00355 const SourceFunctionInfo &
00356 ProgramInfo::getFunction(const GlobalVariable *Desc) {
00357   SourceFunctionInfo *&Result = SourceFunctions[Desc];
00358   if (Result) return *Result;
00359 
00360   // Figure out what language this function comes from...
00361   const GlobalVariable *SourceFileDesc = 0;
00362   if (Desc && Desc->hasInitializer())
00363     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
00364       if (CS->getNumOperands() > 0)
00365         if (const GlobalVariable *GV =
00366             dyn_cast<GlobalVariable>(CS->getOperand(1)))
00367           SourceFileDesc = GV;
00368 
00369   const SourceLanguage &Lang = getSourceFile(SourceFileDesc).getLanguage();
00370   return *(Result = Lang.createSourceFunctionInfo(Desc, *this));
00371 }
00372 
00373 
00374 // getSourceFunctions - Index all of the functions in the program and return
00375 // them.  This information is lazily computed the first time that it is
00376 // requested.  Since this information can take a long time to compute, the user
00377 // is given a chance to cancel it.  If this occurs, an exception is thrown.
00378 const std::map<const GlobalVariable*, SourceFunctionInfo*> &
00379 ProgramInfo::getSourceFunctions(bool RequiresCompleteMap) {
00380   if (SourceFunctionsIsComplete || !RequiresCompleteMap)
00381     return SourceFunctions;
00382 
00383   // Ok, all of the source function descriptors (subprogram in dwarf terms),
00384   // should be on the use list of the llvm.dbg.translation_units global.
00385   //
00386   GlobalVariable *Units =
00387     M->getGlobalVariable("llvm.dbg.globals",
00388                          StructType::get(std::vector<const Type*>()));
00389   if (Units == 0)
00390     throw "Program contains no debugging information!";
00391 
00392   std::vector<GlobalVariable*> Functions;
00393   getGlobalVariablesUsing(Units, Functions);
00394 
00395   SlowOperationInformer SOI("building functions index");
00396 
00397   // Loop over all of the functions found, building the SourceFunctions mapping.
00398   for (unsigned i = 0, e = Functions.size(); i != e; ++i) {
00399     getFunction(Functions[i]);
00400     SOI.progress(i+1, e);
00401   }
00402 
00403   // Ok, if we got this far, then we indexed the whole program.
00404   SourceFunctionsIsComplete = true;
00405   return SourceFunctions;
00406 }