LLVM API Documentation

Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

ReaderWrappers.cpp

Go to the documentation of this file.
00001 //===- ReaderWrappers.cpp - Parse bytecode from file or buffer  -----------===//
00002 // 
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 // 
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements loading and parsing a bytecode file and parsing a
00011 // bytecode module from a given buffer.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "llvm/Bytecode/Analyzer.h"
00016 #include "llvm/Bytecode/Reader.h"
00017 #include "Reader.h"
00018 #include "llvm/Module.h"
00019 #include "llvm/Instructions.h"
00020 #include "llvm/Support/FileUtilities.h"
00021 #include "llvm/ADT/StringExtras.h"
00022 #include "llvm/Config/unistd.h"
00023 #include <cerrno>
00024 using namespace llvm;
00025 
00026 //===----------------------------------------------------------------------===//
00027 // BytecodeFileReader - Read from an mmap'able file descriptor.
00028 //
00029 
00030 namespace {
00031   /// BytecodeFileReader - parses a bytecode file from a file
00032   ///
00033   class BytecodeFileReader : public BytecodeReader {
00034   private:
00035     unsigned char *Buffer;
00036     unsigned Length;
00037 
00038     BytecodeFileReader(const BytecodeFileReader&); // Do not implement
00039     void operator=(const BytecodeFileReader &BFR); // Do not implement
00040 
00041   public:
00042     BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0);
00043     ~BytecodeFileReader();
00044   };
00045 }
00046 
00047 static std::string ErrnoMessage (int savedErrNum, std::string descr) {
00048    return ::strerror(savedErrNum) + std::string(", while trying to ") + descr;
00049 }
00050 
00051 BytecodeFileReader::BytecodeFileReader(const std::string &Filename,
00052                                        llvm::BytecodeHandler* H ) 
00053   : BytecodeReader(H)
00054 {
00055   Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length);
00056   if (Buffer == 0)
00057     throw "Error reading file '" + Filename + "'.";
00058 
00059   try {
00060     // Parse the bytecode we mmapped in
00061     ParseBytecode(Buffer, Length, Filename);
00062   } catch (...) {
00063     UnmapFileFromAddressSpace(Buffer, Length);
00064     throw;
00065   }
00066 }
00067 
00068 BytecodeFileReader::~BytecodeFileReader() {
00069   // Unmmap the bytecode...
00070   UnmapFileFromAddressSpace(Buffer, Length);
00071 }
00072 
00073 //===----------------------------------------------------------------------===//
00074 // BytecodeBufferReader - Read from a memory buffer
00075 //
00076 
00077 namespace {
00078   /// BytecodeBufferReader - parses a bytecode file from a buffer
00079   ///
00080   class BytecodeBufferReader : public BytecodeReader {
00081   private:
00082     const unsigned char *Buffer;
00083     bool MustDelete;
00084 
00085     BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement
00086     void operator=(const BytecodeBufferReader &BFR);   // Do not implement
00087 
00088   public:
00089     BytecodeBufferReader(const unsigned char *Buf, unsigned Length,
00090                          const std::string &ModuleID,
00091                          llvm::BytecodeHandler* Handler = 0);
00092     ~BytecodeBufferReader();
00093 
00094   };
00095 }
00096 
00097 BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf,
00098                                            unsigned Length,
00099                                            const std::string &ModuleID,
00100                                            llvm::BytecodeHandler* H )
00101   : BytecodeReader(H)
00102 {
00103   // If not aligned, allocate a new buffer to hold the bytecode...
00104   const unsigned char *ParseBegin = 0;
00105   if (reinterpret_cast<uint64_t>(Buf) & 3) {
00106     Buffer = new unsigned char[Length+4];
00107     unsigned Offset = 4 - ((intptr_t)Buffer & 3);   // Make sure it's aligned
00108     ParseBegin = Buffer + Offset;
00109     memcpy((unsigned char*)ParseBegin, Buf, Length);    // Copy it over
00110     MustDelete = true;
00111   } else {
00112     // If we don't need to copy it over, just use the caller's copy
00113     ParseBegin = Buffer = Buf;
00114     MustDelete = false;
00115   }
00116   try {
00117     ParseBytecode(ParseBegin, Length, ModuleID);
00118   } catch (...) {
00119     if (MustDelete) delete [] Buffer;
00120     throw;
00121   }
00122 }
00123 
00124 BytecodeBufferReader::~BytecodeBufferReader() {
00125   if (MustDelete) delete [] Buffer;
00126 }
00127 
00128 //===----------------------------------------------------------------------===//
00129 //  BytecodeStdinReader - Read bytecode from Standard Input
00130 //
00131 
00132 namespace {
00133   /// BytecodeStdinReader - parses a bytecode file from stdin
00134   /// 
00135   class BytecodeStdinReader : public BytecodeReader {
00136   private:
00137     std::vector<unsigned char> FileData;
00138     unsigned char *FileBuf;
00139 
00140     BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement
00141     void operator=(const BytecodeStdinReader &BFR);  // Do not implement
00142 
00143   public:
00144     BytecodeStdinReader( llvm::BytecodeHandler* H = 0 );
00145   };
00146 }
00147 
00148 BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H ) 
00149   : BytecodeReader(H)
00150 {
00151   int BlockSize;
00152   unsigned char Buffer[4096*4];
00153 
00154   // Read in all of the data from stdin, we cannot mmap stdin...
00155   while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) {
00156     if (BlockSize == -1)
00157       throw ErrnoMessage(errno, "read from standard input");
00158     
00159     FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
00160   }
00161 
00162   if (FileData.empty())
00163     throw std::string("Standard Input empty!");
00164 
00165   FileBuf = &FileData[0];
00166   ParseBytecode(FileBuf, FileData.size(), "<stdin>");
00167 }
00168 
00169 //===----------------------------------------------------------------------===//
00170 //  Varargs transmogrification code...
00171 //
00172 
00173 // CheckVarargs - This is used to automatically translate old-style varargs to
00174 // new style varargs for backwards compatibility.
00175 static ModuleProvider *CheckVarargs(ModuleProvider *MP) {
00176   Module *M = MP->getModule();
00177   
00178   // Check to see if va_start takes arguments...
00179   Function *F = M->getNamedFunction("llvm.va_start");
00180   if (F == 0) return MP;  // No varargs use, just return.
00181 
00182   if (F->getFunctionType()->getNumParams() == 0)
00183     return MP;  // Modern varargs processing, just return.
00184 
00185   // If we get to this point, we know that we have an old-style module.
00186   // Materialize the whole thing to perform the rewriting.
00187   MP->materializeModule();
00188 
00189   // If the user is making use of obsolete varargs intrinsics, adjust them for
00190   // the user.
00191   if (Function *F = M->getNamedFunction("llvm.va_start")) {
00192     assert(F->asize() == 1 && "Obsolete va_start takes 1 argument!");
00193         
00194     const Type *RetTy = F->getFunctionType()->getParamType(0);
00195     RetTy = cast<PointerType>(RetTy)->getElementType();
00196     Function *NF = M->getOrInsertFunction("llvm.va_start", RetTy, 0);
00197         
00198     for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; )
00199       if (CallInst *CI = dyn_cast<CallInst>(*I++)) {
00200         Value *V = new CallInst(NF, "", CI);
00201         new StoreInst(V, CI->getOperand(1), CI);
00202         CI->getParent()->getInstList().erase(CI);
00203       }
00204     F->setName("");
00205   }
00206 
00207   if (Function *F = M->getNamedFunction("llvm.va_end")) {
00208     assert(F->asize() == 1 && "Obsolete va_end takes 1 argument!");
00209     const Type *ArgTy = F->getFunctionType()->getParamType(0);
00210     ArgTy = cast<PointerType>(ArgTy)->getElementType();
00211     Function *NF = M->getOrInsertFunction("llvm.va_end", Type::VoidTy,
00212                                                   ArgTy, 0);
00213         
00214     for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; )
00215       if (CallInst *CI = dyn_cast<CallInst>(*I++)) {
00216         Value *V = new LoadInst(CI->getOperand(1), "", CI);
00217         new CallInst(NF, V, "", CI);
00218         CI->getParent()->getInstList().erase(CI);
00219       }
00220     F->setName("");
00221   }
00222       
00223   if (Function *F = M->getNamedFunction("llvm.va_copy")) {
00224     assert(F->asize() == 2 && "Obsolete va_copy takes 2 argument!");
00225     const Type *ArgTy = F->getFunctionType()->getParamType(0);
00226     ArgTy = cast<PointerType>(ArgTy)->getElementType();
00227     Function *NF = M->getOrInsertFunction("llvm.va_copy", ArgTy,
00228                                                   ArgTy, 0);
00229         
00230     for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; )
00231       if (CallInst *CI = dyn_cast<CallInst>(*I++)) {
00232         Value *V = new CallInst(NF, CI->getOperand(2), "", CI);
00233         new StoreInst(V, CI->getOperand(1), CI);
00234         CI->getParent()->getInstList().erase(CI);
00235       }
00236     F->setName("");
00237   }
00238   return MP;
00239 }
00240 
00241 //===----------------------------------------------------------------------===//
00242 // Wrapper functions
00243 //===----------------------------------------------------------------------===//
00244 
00245 /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a
00246 /// buffer
00247 ModuleProvider* 
00248 llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer,
00249                                       unsigned Length,
00250                                       const std::string &ModuleID,
00251                                       BytecodeHandler* H ) {
00252   return CheckVarargs(
00253       new BytecodeBufferReader(Buffer, Length, ModuleID, H));
00254 }
00255 
00256 /// ParseBytecodeBuffer - Parse a given bytecode buffer
00257 ///
00258 Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length,
00259                                   const std::string &ModuleID,
00260                                   std::string *ErrorStr){
00261   try {
00262     std::auto_ptr<ModuleProvider>
00263       AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID));
00264     return AMP->releaseModule();
00265   } catch (std::string &err) {
00266     if (ErrorStr) *ErrorStr = err;
00267     return 0;
00268   }
00269 }
00270 
00271 /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file
00272 ///
00273 ModuleProvider *llvm::getBytecodeModuleProvider(const std::string &Filename,
00274                                                 BytecodeHandler* H) {
00275   if (Filename != std::string("-"))        // Read from a file...
00276     return CheckVarargs(new BytecodeFileReader(Filename,H));
00277   else                                     // Read from stdin
00278     return CheckVarargs(new BytecodeStdinReader(H));
00279 }
00280 
00281 /// ParseBytecodeFile - Parse the given bytecode file
00282 ///
00283 Module *llvm::ParseBytecodeFile(const std::string &Filename,
00284                                 std::string *ErrorStr) {
00285   try {
00286     std::auto_ptr<ModuleProvider> AMP(getBytecodeModuleProvider(Filename));
00287     return AMP->releaseModule();
00288   } catch (std::string &err) {
00289     if (ErrorStr) *ErrorStr = err;
00290     return 0;
00291   }
00292 }
00293 
00294 // AnalyzeBytecodeFile - analyze one file
00295 Module* llvm::AnalyzeBytecodeFile(
00296   const std::string &Filename,  ///< File to analyze
00297   BytecodeAnalysis& bca,        ///< Statistical output
00298   std::string *ErrorStr,        ///< Error output
00299   std::ostream* output          ///< Dump output
00300 )
00301 {
00302   try {
00303     BytecodeHandler* analyzerHandler =createBytecodeAnalyzerHandler(bca,output);
00304     std::auto_ptr<ModuleProvider> AMP(
00305       getBytecodeModuleProvider(Filename,analyzerHandler));
00306     return AMP->releaseModule();
00307   } catch (std::string &err) {
00308     if (ErrorStr) *ErrorStr = err;
00309     return 0;
00310   }
00311 }
00312 
00313 // AnalyzeBytecodeBuffer - analyze a buffer
00314 Module* llvm::AnalyzeBytecodeBuffer(
00315   const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
00316   unsigned Length,             ///< Size of the bytecode buffer
00317   const std::string& ModuleID, ///< Identifier for the module
00318   BytecodeAnalysis& bca,       ///< The results of the analysis
00319   std::string* ErrorStr,       ///< Errors, if any.
00320   std::ostream* output         ///< Dump output, if any
00321 )
00322 {
00323   try {
00324     BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output);
00325     std::auto_ptr<ModuleProvider>
00326       AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, hdlr));
00327     return AMP->releaseModule();
00328   } catch (std::string &err) {
00329     if (ErrorStr) *ErrorStr = err;
00330     return 0;
00331   }
00332 }
00333 
00334 bool llvm::GetBytecodeDependentLibraries(const std::string &fname, 
00335                                          Module::LibraryListType& deplibs) {
00336   try {
00337     std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fname));
00338     Module* M = AMP->releaseModule();
00339 
00340     deplibs = M->getLibraries();
00341     delete M;
00342     return true;
00343   } catch (...) {
00344     deplibs.clear();
00345     return false;
00346   }
00347 }
00348 
00349 namespace {
00350 void getSymbols(Module*M, std::vector<std::string>& symbols) {
00351   // Loop over global variables
00352   for (Module::giterator GI = M->gbegin(), GE=M->gend(); GI != GE; ++GI) {
00353     if (GI->hasInitializer()) {
00354       std::string name ( GI->getName() );
00355       if (!name.empty()) {
00356         symbols.push_back(name);
00357       }
00358     }
00359   }
00360 
00361   //Loop over functions
00362   for (Module::iterator FI = M->begin(), FE=M->end(); FI != FE; ++FI) {
00363     if (!FI->isExternal()) {
00364       std::string name ( FI->getName() );
00365       if (!name.empty()) {
00366         symbols.push_back(name);
00367       }
00368     }
00369   }
00370 }
00371 }
00372 
00373 // Get just the externally visible defined symbols from the bytecode
00374 bool llvm::GetBytecodeSymbols(const sys::Path& fName,
00375                               std::vector<std::string>& symbols) {
00376   try {
00377     std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fName.get()));
00378 
00379     // Get the module from the provider
00380     Module* M = AMP->materializeModule();
00381 
00382     // Get the symbols
00383     getSymbols(M, symbols);
00384 
00385     // Done with the module
00386     return true;
00387 
00388   } catch (...) {
00389     return false;
00390   }
00391 }
00392 
00393 ModuleProvider* 
00394 llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length,
00395                          const std::string& ModuleID,
00396                          std::vector<std::string>& symbols) {
00397 
00398   ModuleProvider* MP = 0;
00399   try {
00400     // Get the module provider
00401     MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID);
00402 
00403     // Get the module from the provider
00404     Module* M = MP->materializeModule();
00405 
00406     // Get the symbols
00407     getSymbols(M, symbols);
00408 
00409     // Done with the module. Note that ModuleProvider will delete the
00410     // Module when it is deleted. Also note that its the caller's responsibility
00411     // to delete the ModuleProvider.
00412     return MP;
00413 
00414   } catch (...) {
00415     // We delete only the ModuleProvider here because its destructor will
00416     // also delete the Module (we used materializeModule not releaseModule).
00417     delete MP;
00418   }
00419   return 0;
00420 }
00421 // vim: sw=2 ai