LLVM API Documentation

ReaderWrappers.cpp

Go to the documentation of this file.
00001 //===- ReaderWrappers.cpp - Parse bytecode from file or buffer  -----------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements loading and parsing a bytecode file and parsing a
00011 // bytecode module from a given buffer.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "llvm/Bytecode/Analyzer.h"
00016 #include "llvm/Bytecode/Reader.h"
00017 #include "Reader.h"
00018 #include "llvm/Module.h"
00019 #include "llvm/Instructions.h"
00020 #include "llvm/ADT/StringExtras.h"
00021 #include "llvm/System/MappedFile.h"
00022 #include <cerrno>
00023 #include <iostream>
00024 #include <memory>
00025 
00026 using namespace llvm;
00027 
00028 //===----------------------------------------------------------------------===//
00029 // BytecodeFileReader - Read from an mmap'able file descriptor.
00030 //
00031 
00032 namespace {
00033   /// BytecodeFileReader - parses a bytecode file from a file
00034   ///
00035   class BytecodeFileReader : public BytecodeReader {
00036   private:
00037     sys::MappedFile mapFile;
00038 
00039     BytecodeFileReader(const BytecodeFileReader&); // Do not implement
00040     void operator=(const BytecodeFileReader &BFR); // Do not implement
00041 
00042   public:
00043     BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0);
00044   };
00045 }
00046 
00047 BytecodeFileReader::BytecodeFileReader(const std::string &Filename,
00048                                        llvm::BytecodeHandler* H )
00049   : BytecodeReader(H)
00050   , mapFile( sys::Path(Filename))
00051 {
00052   mapFile.map();
00053   unsigned char* buffer = reinterpret_cast<unsigned char*>(mapFile.base());
00054   ParseBytecode(buffer, mapFile.size(), Filename);
00055 }
00056 
00057 //===----------------------------------------------------------------------===//
00058 // BytecodeBufferReader - Read from a memory buffer
00059 //
00060 
00061 namespace {
00062   /// BytecodeBufferReader - parses a bytecode file from a buffer
00063   ///
00064   class BytecodeBufferReader : public BytecodeReader {
00065   private:
00066     const unsigned char *Buffer;
00067     bool MustDelete;
00068 
00069     BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement
00070     void operator=(const BytecodeBufferReader &BFR);   // Do not implement
00071 
00072   public:
00073     BytecodeBufferReader(const unsigned char *Buf, unsigned Length,
00074                          const std::string &ModuleID,
00075                          llvm::BytecodeHandler* Handler = 0);
00076     ~BytecodeBufferReader();
00077 
00078   };
00079 }
00080 
00081 BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf,
00082                                            unsigned Length,
00083                                            const std::string &ModuleID,
00084                                            llvm::BytecodeHandler* H )
00085   : BytecodeReader(H)
00086 {
00087   // If not aligned, allocate a new buffer to hold the bytecode...
00088   const unsigned char *ParseBegin = 0;
00089   if (reinterpret_cast<uint64_t>(Buf) & 3) {
00090     Buffer = new unsigned char[Length+4];
00091     unsigned Offset = 4 - ((intptr_t)Buffer & 3);   // Make sure it's aligned
00092     ParseBegin = Buffer + Offset;
00093     memcpy((unsigned char*)ParseBegin, Buf, Length);    // Copy it over
00094     MustDelete = true;
00095   } else {
00096     // If we don't need to copy it over, just use the caller's copy
00097     ParseBegin = Buffer = Buf;
00098     MustDelete = false;
00099   }
00100   try {
00101     ParseBytecode(ParseBegin, Length, ModuleID);
00102   } catch (...) {
00103     if (MustDelete) delete [] Buffer;
00104     throw;
00105   }
00106 }
00107 
00108 BytecodeBufferReader::~BytecodeBufferReader() {
00109   if (MustDelete) delete [] Buffer;
00110 }
00111 
00112 //===----------------------------------------------------------------------===//
00113 //  BytecodeStdinReader - Read bytecode from Standard Input
00114 //
00115 
00116 namespace {
00117   /// BytecodeStdinReader - parses a bytecode file from stdin
00118   ///
00119   class BytecodeStdinReader : public BytecodeReader {
00120   private:
00121     std::vector<unsigned char> FileData;
00122     unsigned char *FileBuf;
00123 
00124     BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement
00125     void operator=(const BytecodeStdinReader &BFR);  // Do not implement
00126 
00127   public:
00128     BytecodeStdinReader( llvm::BytecodeHandler* H = 0 );
00129   };
00130 }
00131 
00132 BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H )
00133   : BytecodeReader(H)
00134 {
00135   char Buffer[4096*4];
00136 
00137   // Read in all of the data from stdin, we cannot mmap stdin...
00138   while (std::cin.good()) {
00139     std::cin.read(Buffer, 4096*4);
00140     int BlockSize = std::cin.gcount();
00141     if (0 >= BlockSize)
00142       break;
00143     FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
00144   }
00145 
00146   if (FileData.empty())
00147     throw std::string("Standard Input empty!");
00148 
00149   FileBuf = &FileData[0];
00150   ParseBytecode(FileBuf, FileData.size(), "<stdin>");
00151 }
00152 
00153 //===----------------------------------------------------------------------===//
00154 // Varargs transmogrification code...
00155 //
00156 
00157 // CheckVarargs - This is used to automatically translate old-style varargs to
00158 // new style varargs for backwards compatibility.
00159 static ModuleProvider* CheckVarargs(ModuleProvider* MP) {
00160   Module* M = MP->getModule();
00161 
00162   // check to see if va_start takes arguements...
00163   Function* F = M->getNamedFunction("llvm.va_start");
00164   if(F == 0) return MP; //No varargs use, just return.
00165 
00166   if (F->getFunctionType()->getNumParams() == 1)
00167     return MP; // Modern varargs processing, just return.
00168 
00169   // If we get to this point, we know that we have an old-style module.
00170   // Materialize the whole thing to perform the rewriting.
00171   MP->materializeModule();
00172 
00173   if(Function* F = M->getNamedFunction("llvm.va_start")) {
00174     assert(F->arg_size() == 0 && "Obsolete va_start takes 0 argument!");
00175 
00176     //foo = va_start()
00177     // ->
00178     //bar = alloca typeof(foo)
00179     //va_start(bar)
00180     //foo = load bar
00181 
00182     const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
00183     const Type* ArgTy = F->getFunctionType()->getReturnType();
00184     const Type* ArgTyPtr = PointerType::get(ArgTy);
00185     Function* NF = M->getOrInsertFunction("llvm.va_start",
00186                                           RetTy, ArgTyPtr, (Type *)0);
00187 
00188     for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
00189       if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
00190         AllocaInst* bar = new AllocaInst(ArgTy, 0, "vastart.fix.1", CI);
00191         new CallInst(NF, bar, "", CI);
00192         Value* foo = new LoadInst(bar, "vastart.fix.2", CI);
00193         CI->replaceAllUsesWith(foo);
00194         CI->getParent()->getInstList().erase(CI);
00195       }
00196     F->setName("");
00197   }
00198 
00199   if(Function* F = M->getNamedFunction("llvm.va_end")) {
00200     assert(F->arg_size() == 1 && "Obsolete va_end takes 1 argument!");
00201     //vaend foo
00202     // ->
00203     //bar = alloca 1 of typeof(foo)
00204     //vaend bar
00205     const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
00206     const Type* ArgTy = F->getFunctionType()->getParamType(0);
00207     const Type* ArgTyPtr = PointerType::get(ArgTy);
00208     Function* NF = M->getOrInsertFunction("llvm.va_end",
00209                                           RetTy, ArgTyPtr, (Type *)0);
00210 
00211     for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
00212       if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
00213         AllocaInst* bar = new AllocaInst(ArgTy, 0, "vaend.fix.1", CI);
00214         new StoreInst(CI->getOperand(1), bar, CI);
00215         new CallInst(NF, bar, "", CI);
00216         CI->getParent()->getInstList().erase(CI);
00217       }
00218     F->setName("");
00219   }
00220 
00221   if(Function* F = M->getNamedFunction("llvm.va_copy")) {
00222     assert(F->arg_size() == 1 && "Obsolete va_copy takes 1 argument!");
00223     //foo = vacopy(bar)
00224     // ->
00225     //a = alloca 1 of typeof(foo)
00226     //b = alloca 1 of typeof(foo)
00227     //store bar -> b
00228     //vacopy(a, b)
00229     //foo = load a
00230 
00231     const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
00232     const Type* ArgTy = F->getFunctionType()->getReturnType();
00233     const Type* ArgTyPtr = PointerType::get(ArgTy);
00234     Function* NF = M->getOrInsertFunction("llvm.va_copy",
00235                                           RetTy, ArgTyPtr, ArgTyPtr, (Type *)0);
00236 
00237     for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
00238       if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
00239         AllocaInst* a = new AllocaInst(ArgTy, 0, "vacopy.fix.1", CI);
00240         AllocaInst* b = new AllocaInst(ArgTy, 0, "vacopy.fix.2", CI);
00241         new StoreInst(CI->getOperand(1), b, CI);
00242         new CallInst(NF, a, b, "", CI);
00243         Value* foo = new LoadInst(a, "vacopy.fix.3", CI);
00244         CI->replaceAllUsesWith(foo);
00245         CI->getParent()->getInstList().erase(CI);
00246       }
00247     F->setName("");
00248   }
00249   return MP;
00250 }
00251 
00252 //===----------------------------------------------------------------------===//
00253 // Wrapper functions
00254 //===----------------------------------------------------------------------===//
00255 
00256 /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a
00257 /// buffer
00258 ModuleProvider*
00259 llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer,
00260                                       unsigned Length,
00261                                       const std::string &ModuleID,
00262                                       BytecodeHandler* H ) {
00263   return CheckVarargs(
00264      new BytecodeBufferReader(Buffer, Length, ModuleID, H));
00265 }
00266 
00267 /// ParseBytecodeBuffer - Parse a given bytecode buffer
00268 ///
00269 Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length,
00270                                   const std::string &ModuleID,
00271                                   std::string *ErrorStr){
00272   try {
00273     std::auto_ptr<ModuleProvider>
00274       AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID));
00275     return AMP->releaseModule();
00276   } catch (std::string &err) {
00277     if (ErrorStr) *ErrorStr = err;
00278     return 0;
00279   }
00280 }
00281 
00282 /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file
00283 ///
00284 ModuleProvider *llvm::getBytecodeModuleProvider(const std::string &Filename,
00285                                                 BytecodeHandler* H) {
00286   if (Filename != std::string("-"))        // Read from a file...
00287     return CheckVarargs(new BytecodeFileReader(Filename,H));
00288   else                                     // Read from stdin
00289     return CheckVarargs(new BytecodeStdinReader(H));
00290 }
00291 
00292 /// ParseBytecodeFile - Parse the given bytecode file
00293 ///
00294 Module *llvm::ParseBytecodeFile(const std::string &Filename,
00295                                 std::string *ErrorStr) {
00296   try {
00297     std::auto_ptr<ModuleProvider> AMP(getBytecodeModuleProvider(Filename));
00298     return AMP->releaseModule();
00299   } catch (std::string &err) {
00300     if (ErrorStr) *ErrorStr = err;
00301     return 0;
00302   }
00303 }
00304 
00305 // AnalyzeBytecodeFile - analyze one file
00306 Module* llvm::AnalyzeBytecodeFile(
00307   const std::string &Filename,  ///< File to analyze
00308   BytecodeAnalysis& bca,        ///< Statistical output
00309   std::string *ErrorStr,        ///< Error output
00310   std::ostream* output          ///< Dump output
00311 )
00312 {
00313   try {
00314     BytecodeHandler* analyzerHandler =createBytecodeAnalyzerHandler(bca,output);
00315     std::auto_ptr<ModuleProvider> AMP(
00316       getBytecodeModuleProvider(Filename,analyzerHandler));
00317     return AMP->releaseModule();
00318   } catch (std::string &err) {
00319     if (ErrorStr) *ErrorStr = err;
00320     return 0;
00321   }
00322 }
00323 
00324 // AnalyzeBytecodeBuffer - analyze a buffer
00325 Module* llvm::AnalyzeBytecodeBuffer(
00326   const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
00327   unsigned Length,             ///< Size of the bytecode buffer
00328   const std::string& ModuleID, ///< Identifier for the module
00329   BytecodeAnalysis& bca,       ///< The results of the analysis
00330   std::string* ErrorStr,       ///< Errors, if any.
00331   std::ostream* output         ///< Dump output, if any
00332 )
00333 {
00334   try {
00335     BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output);
00336     std::auto_ptr<ModuleProvider>
00337       AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, hdlr));
00338     return AMP->releaseModule();
00339   } catch (std::string &err) {
00340     if (ErrorStr) *ErrorStr = err;
00341     return 0;
00342   }
00343 }
00344 
00345 bool llvm::GetBytecodeDependentLibraries(const std::string &fname,
00346                                          Module::LibraryListType& deplibs) {
00347   try {
00348     std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fname));
00349     Module* M = AMP->releaseModule();
00350 
00351     deplibs = M->getLibraries();
00352     delete M;
00353     return true;
00354   } catch (...) {
00355     deplibs.clear();
00356     return false;
00357   }
00358 }
00359 
00360 static void getSymbols(Module*M, std::vector<std::string>& symbols) {
00361   // Loop over global variables
00362   for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
00363     if (!GI->isExternal() && !GI->hasInternalLinkage())
00364       if (!GI->getName().empty())
00365         symbols.push_back(GI->getName());
00366 
00367   // Loop over functions.
00368   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
00369     if (!FI->isExternal() && !FI->hasInternalLinkage())
00370       if (!FI->getName().empty())
00371         symbols.push_back(FI->getName());
00372 }
00373 
00374 // Get just the externally visible defined symbols from the bytecode
00375 bool llvm::GetBytecodeSymbols(const sys::Path& fName,
00376                               std::vector<std::string>& symbols) {
00377   try {
00378     std::auto_ptr<ModuleProvider> AMP(
00379         getBytecodeModuleProvider(fName.toString()));
00380 
00381     // Get the module from the provider
00382     Module* M = AMP->materializeModule();
00383 
00384     // Get the symbols
00385     getSymbols(M, symbols);
00386 
00387     // Done with the module
00388     return true;
00389 
00390   } catch (...) {
00391     return false;
00392   }
00393 }
00394 
00395 ModuleProvider*
00396 llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length,
00397                          const std::string& ModuleID,
00398                          std::vector<std::string>& symbols) {
00399 
00400   ModuleProvider* MP = 0;
00401   try {
00402     // Get the module provider
00403     MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID);
00404 
00405     // Get the module from the provider
00406     Module* M = MP->materializeModule();
00407 
00408     // Get the symbols
00409     getSymbols(M, symbols);
00410 
00411     // Done with the module. Note that ModuleProvider will delete the
00412     // Module when it is deleted. Also note that its the caller's responsibility
00413     // to delete the ModuleProvider.
00414     return MP;
00415 
00416   } catch (...) {
00417     // We delete only the ModuleProvider here because its destructor will
00418     // also delete the Module (we used materializeModule not releaseModule).
00419     delete MP;
00420   }
00421   return 0;
00422 }