LLVM API Documentation

ProgramInfo.cpp

Go to the documentation of this file.
00001 //===-- ProgramInfo.cpp - Compute and cache info about a program ----------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the ProgramInfo and related classes, by sorting through
00011 // the loaded Module.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "llvm/Debugger/ProgramInfo.h"
00016 #include "llvm/Constants.h"
00017 #include "llvm/DerivedTypes.h"
00018 #include "llvm/Intrinsics.h"
00019 #include "llvm/IntrinsicInst.h"
00020 #include "llvm/Instructions.h"
00021 #include "llvm/Module.h"
00022 #include "llvm/Debugger/SourceFile.h"
00023 #include "llvm/Debugger/SourceLanguage.h"
00024 #include "llvm/Support/SlowOperationInformer.h"
00025 #include "llvm/ADT/STLExtras.h"
00026 #include <iostream>
00027 
00028 using namespace llvm;
00029 
00030 /// getGlobalVariablesUsing - Return all of the global variables which have the
00031 /// specified value in their initializer somewhere.
00032 static void getGlobalVariablesUsing(Value *V,
00033                                     std::vector<GlobalVariable*> &Found) {
00034   for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
00035     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(*I))
00036       Found.push_back(GV);
00037     else if (Constant *C = dyn_cast<Constant>(*I))
00038       getGlobalVariablesUsing(C, Found);
00039   }
00040 }
00041 
00042 /// getNextStopPoint - Follow the def-use chains of the specified LLVM value,
00043 /// traversing the use chains until we get to a stoppoint.  When we do, return
00044 /// the source location of the stoppoint.  If we don't find a stoppoint, return
00045 /// null.
00046 static const GlobalVariable *getNextStopPoint(const Value *V, unsigned &LineNo,
00047                                               unsigned &ColNo) {
00048   // The use-def chains can fork.  As such, we pick the lowest numbered one we
00049   // find.
00050   const GlobalVariable *LastDesc = 0;
00051   unsigned LastLineNo = ~0;
00052   unsigned LastColNo = ~0;
00053 
00054   for (Value::use_const_iterator UI = V->use_begin(), E = V->use_end();
00055        UI != E; ++UI) {
00056     bool ShouldRecurse = true;
00057     if (cast<Instruction>(*UI)->getOpcode() == Instruction::PHI) {
00058       // Infinite loops == bad, ignore PHI nodes.
00059       ShouldRecurse = false;
00060     } else if (const CallInst *CI = dyn_cast<CallInst>(*UI)) {
00061       
00062       // If we found a stop point, check to see if it is earlier than what we
00063       // already have.  If so, remember it.
00064       if (const Function *F = CI->getCalledFunction())
00065         if (const DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(CI)) {
00066           unsigned CurLineNo = SPI->getLine();
00067           unsigned CurColNo = SPI->getColumn();
00068           const GlobalVariable *CurDesc = 0;
00069           const Value *Op = SPI->getContext();
00070 
00071           if ((CurDesc = dyn_cast<GlobalVariable>(Op)) &&
00072               (LineNo < LastLineNo ||
00073                (LineNo == LastLineNo && ColNo < LastColNo))) {
00074             LastDesc = CurDesc;
00075             LastLineNo = CurLineNo;
00076             LastColNo = CurColNo;
00077           }
00078           ShouldRecurse = false;
00079         }
00080     }
00081 
00082     // If this is not a phi node or a stopping point, recursively scan the users
00083     // of this instruction to skip over region.begin's and the like.
00084     if (ShouldRecurse) {
00085       unsigned CurLineNo, CurColNo;
00086       if (const GlobalVariable *GV = getNextStopPoint(*UI, CurLineNo,CurColNo)){
00087         if (LineNo < LastLineNo || (LineNo == LastLineNo && ColNo < LastColNo)){
00088           LastDesc = GV;
00089           LastLineNo = CurLineNo;
00090           LastColNo = CurColNo;
00091         }
00092       }
00093     }
00094   }
00095 
00096   if (LastDesc) {
00097     LineNo = LastLineNo != ~0U ? LastLineNo : 0;
00098     ColNo  = LastColNo  != ~0U ? LastColNo : 0;
00099   }
00100   return LastDesc;
00101 }
00102 
00103 
00104 //===----------------------------------------------------------------------===//
00105 // SourceFileInfo implementation
00106 //
00107 
00108 SourceFileInfo::SourceFileInfo(const GlobalVariable *Desc,
00109                                const SourceLanguage &Lang)
00110   : Language(&Lang), Descriptor(Desc) {
00111   Version = 0;
00112   SourceText = 0;
00113 
00114   if (Desc && Desc->hasInitializer())
00115     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
00116       if (CS->getNumOperands() > 4) {
00117         if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(CS->getOperand(1)))
00118           Version = CUI->getValue();
00119 
00120         BaseName  = CS->getOperand(3)->getStringValue();
00121         Directory = CS->getOperand(4)->getStringValue();
00122       }
00123 }
00124 
00125 SourceFileInfo::~SourceFileInfo() {
00126   delete SourceText;
00127 }
00128 
00129 SourceFile &SourceFileInfo::getSourceText() const {
00130   // FIXME: this should take into account the source search directories!
00131   if (SourceText == 0) { // Read the file in if we haven't already.
00132     sys::Path tmpPath;
00133     if (!Directory.empty())
00134       tmpPath.set(Directory);
00135     tmpPath.appendComponent(BaseName);
00136     if (tmpPath.canRead())
00137       SourceText = new SourceFile(tmpPath.toString(), Descriptor);
00138     else
00139       SourceText = new SourceFile(BaseName, Descriptor);
00140   }
00141   return *SourceText;
00142 }
00143 
00144 
00145 //===----------------------------------------------------------------------===//
00146 // SourceFunctionInfo implementation
00147 //
00148 SourceFunctionInfo::SourceFunctionInfo(ProgramInfo &PI,
00149                                        const GlobalVariable *Desc)
00150   : Descriptor(Desc) {
00151   LineNo = ColNo = 0;
00152   if (Desc && Desc->hasInitializer())
00153     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
00154       if (CS->getNumOperands() > 2) {
00155         // Entry #1 is the file descriptor.
00156         if (const GlobalVariable *GV =
00157             dyn_cast<GlobalVariable>(CS->getOperand(1)))
00158           SourceFile = &PI.getSourceFile(GV);
00159 
00160         // Entry #2 is the function name.
00161         Name = CS->getOperand(2)->getStringValue();
00162       }
00163 }
00164 
00165 /// getSourceLocation - This method returns the location of the first stopping
00166 /// point in the function.
00167 void SourceFunctionInfo::getSourceLocation(unsigned &RetLineNo,
00168                                            unsigned &RetColNo) const {
00169   // If we haven't computed this yet...
00170   if (!LineNo) {
00171     // Look at all of the users of the function descriptor, looking for calls to
00172     // %llvm.dbg.func.start.
00173     for (Value::use_const_iterator UI = Descriptor->use_begin(),
00174            E = Descriptor->use_end(); UI != E; ++UI)
00175       if (const CallInst *CI = dyn_cast<CallInst>(*UI))
00176         if (const Function *F = CI->getCalledFunction())
00177           if (F->getIntrinsicID() == Intrinsic::dbg_func_start) {
00178             // We found the start of the function.  Check to see if there are
00179             // any stop points on the use-list of the function start.
00180             const GlobalVariable *SD = getNextStopPoint(CI, LineNo, ColNo);
00181             if (SD) {             // We found the first stop point!
00182               // This is just a sanity check.
00183               if (getSourceFile().getDescriptor() != SD)
00184                 std::cout << "WARNING: first line of function is not in the"
00185                   " file that the function descriptor claims it is in.\n";
00186               break;
00187             }
00188           }
00189   }
00190   RetLineNo = LineNo; RetColNo = ColNo;
00191 }
00192 
00193 //===----------------------------------------------------------------------===//
00194 // ProgramInfo implementation
00195 //
00196 
00197 ProgramInfo::ProgramInfo(Module *m) : M(m), ProgramTimeStamp(0,0) {
00198   assert(M && "Cannot create program information with a null module!");
00199   sys::Path modulePath(M->getModuleIdentifier());
00200   ProgramTimeStamp = modulePath.getTimestamp();
00201 
00202   SourceFilesIsComplete = false;
00203   SourceFunctionsIsComplete = false;
00204 }
00205 
00206 ProgramInfo::~ProgramInfo() {
00207   // Delete cached information about source program objects...
00208   for (std::map<const GlobalVariable*, SourceFileInfo*>::iterator
00209          I = SourceFiles.begin(), E = SourceFiles.end(); I != E; ++I)
00210     delete I->second;
00211   for (std::map<const GlobalVariable*, SourceFunctionInfo*>::iterator
00212          I = SourceFunctions.begin(), E = SourceFunctions.end(); I != E; ++I)
00213     delete I->second;
00214 
00215   // Delete the source language caches.
00216   for (unsigned i = 0, e = LanguageCaches.size(); i != e; ++i)
00217     delete LanguageCaches[i].second;
00218 }
00219 
00220 
00221 //===----------------------------------------------------------------------===//
00222 // SourceFileInfo tracking...
00223 //
00224 
00225 /// getSourceFile - Return source file information for the specified source file
00226 /// descriptor object, adding it to the collection as needed.  This method
00227 /// always succeeds (is unambiguous), and is always efficient.
00228 ///
00229 const SourceFileInfo &
00230 ProgramInfo::getSourceFile(const GlobalVariable *Desc) {
00231   SourceFileInfo *&Result = SourceFiles[Desc];
00232   if (Result) return *Result;
00233 
00234   // Figure out what language this source file comes from...
00235   unsigned LangID = 0;   // Zero is unknown language
00236   if (Desc && Desc->hasInitializer())
00237     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
00238       if (CS->getNumOperands() > 2)
00239         if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(CS->getOperand(2)))
00240           LangID = CUI->getValue();
00241 
00242   const SourceLanguage &Lang = SourceLanguage::get(LangID);
00243   SourceFileInfo *New = Lang.createSourceFileInfo(Desc, *this);
00244 
00245   // FIXME: this should check to see if there is already a Filename/WorkingDir
00246   // pair that matches this one.  If so, we shouldn't create the duplicate!
00247   //
00248   SourceFileIndex.insert(std::make_pair(New->getBaseName(), New));
00249   return *(Result = New);
00250 }
00251 
00252 
00253 /// getSourceFiles - Index all of the source files in the program and return
00254 /// a mapping of it.  This information is lazily computed the first time
00255 /// that it is requested.  Since this information can take a long time to
00256 /// compute, the user is given a chance to cancel it.  If this occurs, an
00257 /// exception is thrown.
00258 const std::map<const GlobalVariable*, SourceFileInfo*> &
00259 ProgramInfo::getSourceFiles(bool RequiresCompleteMap) {
00260   // If we have a fully populated map, or if the client doesn't need one, just
00261   // return what we have.
00262   if (SourceFilesIsComplete || !RequiresCompleteMap)
00263     return SourceFiles;
00264 
00265   // Ok, all of the source file descriptors (compile_unit in dwarf terms),
00266   // should be on the use list of the llvm.dbg.translation_units global.
00267   //
00268   GlobalVariable *Units =
00269     M->getGlobalVariable("llvm.dbg.translation_units",
00270                          StructType::get(std::vector<const Type*>()));
00271   if (Units == 0)
00272     throw "Program contains no debugging information!";
00273 
00274   std::vector<GlobalVariable*> TranslationUnits;
00275   getGlobalVariablesUsing(Units, TranslationUnits);
00276 
00277   SlowOperationInformer SOI("building source files index");
00278 
00279   // Loop over all of the translation units found, building the SourceFiles
00280   // mapping.
00281   for (unsigned i = 0, e = TranslationUnits.size(); i != e; ++i) {
00282     getSourceFile(TranslationUnits[i]);
00283     if (SOI.progress(i+1, e))
00284       throw "While building source files index, operation cancelled.";
00285   }
00286 
00287   // Ok, if we got this far, then we indexed the whole program.
00288   SourceFilesIsComplete = true;
00289   return SourceFiles;
00290 }
00291 
00292 /// getSourceFile - Look up the file with the specified name.  If there is
00293 /// more than one match for the specified filename, prompt the user to pick
00294 /// one.  If there is no source file that matches the specified name, throw
00295 /// an exception indicating that we can't find the file.  Otherwise, return
00296 /// the file information for that file.
00297 const SourceFileInfo &ProgramInfo::getSourceFile(const std::string &Filename) {
00298   std::multimap<std::string, SourceFileInfo*>::const_iterator Start, End;
00299   getSourceFiles();
00300   tie(Start, End) = SourceFileIndex.equal_range(Filename);
00301 
00302   if (Start == End) throw "Could not find source file '" + Filename + "'!";
00303   const SourceFileInfo &SFI = *Start->second;
00304   ++Start;
00305   if (Start == End) return SFI;
00306 
00307   throw "FIXME: Multiple source files with the same name not implemented!";
00308 }
00309 
00310 
00311 //===----------------------------------------------------------------------===//
00312 // SourceFunctionInfo tracking...
00313 //
00314 
00315 
00316 /// getFunction - Return function information for the specified function
00317 /// descriptor object, adding it to the collection as needed.  This method
00318 /// always succeeds (is unambiguous), and is always efficient.
00319 ///
00320 const SourceFunctionInfo &
00321 ProgramInfo::getFunction(const GlobalVariable *Desc) {
00322   SourceFunctionInfo *&Result = SourceFunctions[Desc];
00323   if (Result) return *Result;
00324 
00325   // Figure out what language this function comes from...
00326   const GlobalVariable *SourceFileDesc = 0;
00327   if (Desc && Desc->hasInitializer())
00328     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Desc->getInitializer()))
00329       if (CS->getNumOperands() > 0)
00330         if (const GlobalVariable *GV =
00331             dyn_cast<GlobalVariable>(CS->getOperand(1)))
00332           SourceFileDesc = GV;
00333 
00334   const SourceLanguage &Lang = getSourceFile(SourceFileDesc).getLanguage();
00335   return *(Result = Lang.createSourceFunctionInfo(Desc, *this));
00336 }
00337 
00338 
00339 // getSourceFunctions - Index all of the functions in the program and return
00340 // them.  This information is lazily computed the first time that it is
00341 // requested.  Since this information can take a long time to compute, the user
00342 // is given a chance to cancel it.  If this occurs, an exception is thrown.
00343 const std::map<const GlobalVariable*, SourceFunctionInfo*> &
00344 ProgramInfo::getSourceFunctions(bool RequiresCompleteMap) {
00345   if (SourceFunctionsIsComplete || !RequiresCompleteMap)
00346     return SourceFunctions;
00347 
00348   // Ok, all of the source function descriptors (subprogram in dwarf terms),
00349   // should be on the use list of the llvm.dbg.translation_units global.
00350   //
00351   GlobalVariable *Units =
00352     M->getGlobalVariable("llvm.dbg.globals",
00353                          StructType::get(std::vector<const Type*>()));
00354   if (Units == 0)
00355     throw "Program contains no debugging information!";
00356 
00357   std::vector<GlobalVariable*> Functions;
00358   getGlobalVariablesUsing(Units, Functions);
00359 
00360   SlowOperationInformer SOI("building functions index");
00361 
00362   // Loop over all of the functions found, building the SourceFunctions mapping.
00363   for (unsigned i = 0, e = Functions.size(); i != e; ++i) {
00364     getFunction(Functions[i]);
00365     if (SOI.progress(i+1, e))
00366       throw "While functions index, operation cancelled.";
00367   }
00368 
00369   // Ok, if we got this far, then we indexed the whole program.
00370   SourceFunctionsIsComplete = true;
00371   return SourceFunctions;
00372 }