LLVM API Documentation

ReaderWrappers.cpp

Go to the documentation of this file.
00001 //===- ReaderWrappers.cpp - Parse bytecode from file or buffer  -----------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file was developed by the LLVM research group and is distributed under
00006 // the University of Illinois Open Source License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements loading and parsing a bytecode file and parsing a
00011 // bytecode module from a given buffer.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "llvm/Bytecode/Analyzer.h"
00016 #include "llvm/Bytecode/Reader.h"
00017 #include "Reader.h"
00018 #include "llvm/Module.h"
00019 #include "llvm/Instructions.h"
00020 #include "llvm/ADT/StringExtras.h"
00021 #include "llvm/System/MappedFile.h"
00022 #include "llvm/System/Program.h"
00023 #include <cerrno>
00024 #include <iostream>
00025 #include <memory>
00026 
00027 using namespace llvm;
00028 
00029 //===----------------------------------------------------------------------===//
00030 // BytecodeFileReader - Read from an mmap'able file descriptor.
00031 //
00032 
00033 namespace {
00034   /// BytecodeFileReader - parses a bytecode file from a file
00035   ///
00036   class BytecodeFileReader : public BytecodeReader {
00037   private:
00038     sys::MappedFile mapFile;
00039 
00040     BytecodeFileReader(const BytecodeFileReader&); // Do not implement
00041     void operator=(const BytecodeFileReader &BFR); // Do not implement
00042 
00043   public:
00044     BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0);
00045   };
00046 }
00047 
00048 BytecodeFileReader::BytecodeFileReader(const std::string &Filename,
00049                                        llvm::BytecodeHandler* H )
00050   : BytecodeReader(H)
00051   , mapFile( sys::Path(Filename))
00052 {
00053   mapFile.map();
00054   unsigned char* buffer = reinterpret_cast<unsigned char*>(mapFile.base());
00055   ParseBytecode(buffer, mapFile.size(), Filename);
00056 }
00057 
00058 //===----------------------------------------------------------------------===//
00059 // BytecodeBufferReader - Read from a memory buffer
00060 //
00061 
00062 namespace {
00063   /// BytecodeBufferReader - parses a bytecode file from a buffer
00064   ///
00065   class BytecodeBufferReader : public BytecodeReader {
00066   private:
00067     const unsigned char *Buffer;
00068     bool MustDelete;
00069 
00070     BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement
00071     void operator=(const BytecodeBufferReader &BFR);   // Do not implement
00072 
00073   public:
00074     BytecodeBufferReader(const unsigned char *Buf, unsigned Length,
00075                          const std::string &ModuleID,
00076                          llvm::BytecodeHandler* Handler = 0);
00077     ~BytecodeBufferReader();
00078 
00079   };
00080 }
00081 
00082 BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf,
00083                                            unsigned Length,
00084                                            const std::string &ModuleID,
00085                                            llvm::BytecodeHandler* H )
00086   : BytecodeReader(H)
00087 {
00088   // If not aligned, allocate a new buffer to hold the bytecode...
00089   const unsigned char *ParseBegin = 0;
00090   if (reinterpret_cast<uint64_t>(Buf) & 3) {
00091     Buffer = new unsigned char[Length+4];
00092     unsigned Offset = 4 - ((intptr_t)Buffer & 3);   // Make sure it's aligned
00093     ParseBegin = Buffer + Offset;
00094     memcpy((unsigned char*)ParseBegin, Buf, Length);    // Copy it over
00095     MustDelete = true;
00096   } else {
00097     // If we don't need to copy it over, just use the caller's copy
00098     ParseBegin = Buffer = Buf;
00099     MustDelete = false;
00100   }
00101   try {
00102     ParseBytecode(ParseBegin, Length, ModuleID);
00103   } catch (...) {
00104     if (MustDelete) delete [] Buffer;
00105     throw;
00106   }
00107 }
00108 
00109 BytecodeBufferReader::~BytecodeBufferReader() {
00110   if (MustDelete) delete [] Buffer;
00111 }
00112 
00113 //===----------------------------------------------------------------------===//
00114 //  BytecodeStdinReader - Read bytecode from Standard Input
00115 //
00116 
00117 namespace {
00118   /// BytecodeStdinReader - parses a bytecode file from stdin
00119   ///
00120   class BytecodeStdinReader : public BytecodeReader {
00121   private:
00122     std::vector<unsigned char> FileData;
00123     unsigned char *FileBuf;
00124 
00125     BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement
00126     void operator=(const BytecodeStdinReader &BFR);  // Do not implement
00127 
00128   public:
00129     BytecodeStdinReader( llvm::BytecodeHandler* H = 0 );
00130   };
00131 }
00132 
00133 BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H )
00134   : BytecodeReader(H)
00135 {
00136   sys::Program::ChangeStdinToBinary();
00137   char Buffer[4096*4];
00138 
00139   // Read in all of the data from stdin, we cannot mmap stdin...
00140   while (std::cin.good()) {
00141     std::cin.read(Buffer, 4096*4);
00142     int BlockSize = std::cin.gcount();
00143     if (0 >= BlockSize)
00144       break;
00145     FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
00146   }
00147 
00148   if (FileData.empty())
00149     throw std::string("Standard Input empty!");
00150 
00151   FileBuf = &FileData[0];
00152   ParseBytecode(FileBuf, FileData.size(), "<stdin>");
00153 }
00154 
00155 //===----------------------------------------------------------------------===//
00156 // Varargs transmogrification code...
00157 //
00158 
00159 // CheckVarargs - This is used to automatically translate old-style varargs to
00160 // new style varargs for backwards compatibility.
00161 static ModuleProvider* CheckVarargs(ModuleProvider* MP) {
00162   Module* M = MP->getModule();
00163 
00164   // check to see if va_start takes arguements...
00165   Function* F = M->getNamedFunction("llvm.va_start");
00166   if(F == 0) return MP; //No varargs use, just return.
00167 
00168   if (F->getFunctionType()->getNumParams() == 1)
00169     return MP; // Modern varargs processing, just return.
00170 
00171   // If we get to this point, we know that we have an old-style module.
00172   // Materialize the whole thing to perform the rewriting.
00173   if (MP->materializeModule() == 0)
00174     return 0;
00175 
00176   if(Function* F = M->getNamedFunction("llvm.va_start")) {
00177     assert(F->arg_size() == 0 && "Obsolete va_start takes 0 argument!");
00178 
00179     //foo = va_start()
00180     // ->
00181     //bar = alloca typeof(foo)
00182     //va_start(bar)
00183     //foo = load bar
00184 
00185     const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
00186     const Type* ArgTy = F->getFunctionType()->getReturnType();
00187     const Type* ArgTyPtr = PointerType::get(ArgTy);
00188     Function* NF = M->getOrInsertFunction("llvm.va_start",
00189                                           RetTy, ArgTyPtr, (Type *)0);
00190 
00191     for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
00192       if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
00193         AllocaInst* bar = new AllocaInst(ArgTy, 0, "vastart.fix.1", CI);
00194         new CallInst(NF, bar, "", CI);
00195         Value* foo = new LoadInst(bar, "vastart.fix.2", CI);
00196         CI->replaceAllUsesWith(foo);
00197         CI->getParent()->getInstList().erase(CI);
00198       }
00199     F->setName("");
00200   }
00201 
00202   if(Function* F = M->getNamedFunction("llvm.va_end")) {
00203     assert(F->arg_size() == 1 && "Obsolete va_end takes 1 argument!");
00204     //vaend foo
00205     // ->
00206     //bar = alloca 1 of typeof(foo)
00207     //vaend bar
00208     const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
00209     const Type* ArgTy = F->getFunctionType()->getParamType(0);
00210     const Type* ArgTyPtr = PointerType::get(ArgTy);
00211     Function* NF = M->getOrInsertFunction("llvm.va_end",
00212                                           RetTy, ArgTyPtr, (Type *)0);
00213 
00214     for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
00215       if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
00216         AllocaInst* bar = new AllocaInst(ArgTy, 0, "vaend.fix.1", CI);
00217         new StoreInst(CI->getOperand(1), bar, CI);
00218         new CallInst(NF, bar, "", CI);
00219         CI->getParent()->getInstList().erase(CI);
00220       }
00221     F->setName("");
00222   }
00223 
00224   if(Function* F = M->getNamedFunction("llvm.va_copy")) {
00225     assert(F->arg_size() == 1 && "Obsolete va_copy takes 1 argument!");
00226     //foo = vacopy(bar)
00227     // ->
00228     //a = alloca 1 of typeof(foo)
00229     //b = alloca 1 of typeof(foo)
00230     //store bar -> b
00231     //vacopy(a, b)
00232     //foo = load a
00233 
00234     const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
00235     const Type* ArgTy = F->getFunctionType()->getReturnType();
00236     const Type* ArgTyPtr = PointerType::get(ArgTy);
00237     Function* NF = M->getOrInsertFunction("llvm.va_copy",
00238                                           RetTy, ArgTyPtr, ArgTyPtr, (Type *)0);
00239 
00240     for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
00241       if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
00242         AllocaInst* a = new AllocaInst(ArgTy, 0, "vacopy.fix.1", CI);
00243         AllocaInst* b = new AllocaInst(ArgTy, 0, "vacopy.fix.2", CI);
00244         new StoreInst(CI->getOperand(1), b, CI);
00245         new CallInst(NF, a, b, "", CI);
00246         Value* foo = new LoadInst(a, "vacopy.fix.3", CI);
00247         CI->replaceAllUsesWith(foo);
00248         CI->getParent()->getInstList().erase(CI);
00249       }
00250     F->setName("");
00251   }
00252   return MP;
00253 }
00254 
00255 //===----------------------------------------------------------------------===//
00256 // Wrapper functions
00257 //===----------------------------------------------------------------------===//
00258 
00259 /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a
00260 /// buffer
00261 ModuleProvider*
00262 llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer,
00263                                       unsigned Length,
00264                                       const std::string &ModuleID,
00265                                       BytecodeHandler* H ) {
00266   return CheckVarargs(
00267      new BytecodeBufferReader(Buffer, Length, ModuleID, H));
00268 }
00269 
00270 /// ParseBytecodeBuffer - Parse a given bytecode buffer
00271 ///
00272 Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length,
00273                                   const std::string &ModuleID,
00274                                   std::string *ErrorStr){
00275   try {
00276     std::auto_ptr<ModuleProvider>
00277       AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID));
00278     return AMP->releaseModule();
00279   } catch (std::string &err) {
00280     if (ErrorStr) *ErrorStr = err;
00281     return 0;
00282   }
00283 }
00284 
00285 /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file
00286 ///
00287 ModuleProvider *llvm::getBytecodeModuleProvider(const std::string &Filename,
00288                                                 BytecodeHandler* H) {
00289   if (Filename != std::string("-"))        // Read from a file...
00290     return CheckVarargs(new BytecodeFileReader(Filename,H));
00291   else                                     // Read from stdin
00292     return CheckVarargs(new BytecodeStdinReader(H));
00293 }
00294 
00295 /// ParseBytecodeFile - Parse the given bytecode file
00296 ///
00297 Module *llvm::ParseBytecodeFile(const std::string &Filename,
00298                                 std::string *ErrorStr) {
00299   try {
00300     std::auto_ptr<ModuleProvider> AMP(getBytecodeModuleProvider(Filename));
00301     return AMP->releaseModule();
00302   } catch (std::string &err) {
00303     if (ErrorStr) *ErrorStr = err;
00304     return 0;
00305   }
00306 }
00307 
00308 // AnalyzeBytecodeFile - analyze one file
00309 Module* llvm::AnalyzeBytecodeFile(
00310   const std::string &Filename,  ///< File to analyze
00311   BytecodeAnalysis& bca,        ///< Statistical output
00312   std::string *ErrorStr,        ///< Error output
00313   std::ostream* output          ///< Dump output
00314 )
00315 {
00316   try {
00317     BytecodeHandler* analyzerHandler =createBytecodeAnalyzerHandler(bca,output);
00318     std::auto_ptr<ModuleProvider> AMP(
00319       getBytecodeModuleProvider(Filename,analyzerHandler));
00320     return AMP->releaseModule();
00321   } catch (std::string &err) {
00322     if (ErrorStr) *ErrorStr = err;
00323     return 0;
00324   }
00325 }
00326 
00327 // AnalyzeBytecodeBuffer - analyze a buffer
00328 Module* llvm::AnalyzeBytecodeBuffer(
00329   const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
00330   unsigned Length,             ///< Size of the bytecode buffer
00331   const std::string& ModuleID, ///< Identifier for the module
00332   BytecodeAnalysis& bca,       ///< The results of the analysis
00333   std::string* ErrorStr,       ///< Errors, if any.
00334   std::ostream* output         ///< Dump output, if any
00335 )
00336 {
00337   try {
00338     BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output);
00339     std::auto_ptr<ModuleProvider>
00340       AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, hdlr));
00341     return AMP->releaseModule();
00342   } catch (std::string &err) {
00343     if (ErrorStr) *ErrorStr = err;
00344     return 0;
00345   }
00346 }
00347 
00348 bool llvm::GetBytecodeDependentLibraries(const std::string &fname,
00349                                          Module::LibraryListType& deplibs) {
00350   try {
00351     std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fname));
00352     Module* M = AMP->releaseModule();
00353 
00354     deplibs = M->getLibraries();
00355     delete M;
00356     return true;
00357   } catch (...) {
00358     deplibs.clear();
00359     return false;
00360   }
00361 }
00362 
00363 static void getSymbols(Module*M, std::vector<std::string>& symbols) {
00364   // Loop over global variables
00365   for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
00366     if (!GI->isExternal() && !GI->hasInternalLinkage())
00367       if (!GI->getName().empty())
00368         symbols.push_back(GI->getName());
00369 
00370   // Loop over functions.
00371   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
00372     if (!FI->isExternal() && !FI->hasInternalLinkage())
00373       if (!FI->getName().empty())
00374         symbols.push_back(FI->getName());
00375 }
00376 
00377 // Get just the externally visible defined symbols from the bytecode
00378 bool llvm::GetBytecodeSymbols(const sys::Path& fName,
00379                               std::vector<std::string>& symbols) {
00380   std::auto_ptr<ModuleProvider> AMP(
00381       getBytecodeModuleProvider(fName.toString()));
00382 
00383   // Get the module from the provider
00384   Module* M = AMP->materializeModule();
00385   if (M == 0) return false;
00386 
00387   // Get the symbols
00388   getSymbols(M, symbols);
00389 
00390   // Done with the module
00391   return true;
00392 }
00393 
00394 ModuleProvider*
00395 llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length,
00396                          const std::string& ModuleID,
00397                          std::vector<std::string>& symbols) {
00398 
00399   ModuleProvider* MP = 0;
00400   try {
00401     // Get the module provider
00402     MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID);
00403 
00404     // Get the module from the provider
00405     Module* M = MP->materializeModule();
00406     if (M == 0) return 0;
00407 
00408     // Get the symbols
00409     getSymbols(M, symbols);
00410 
00411     // Done with the module. Note that ModuleProvider will delete the
00412     // Module when it is deleted. Also note that its the caller's responsibility
00413     // to delete the ModuleProvider.
00414     return MP;
00415 
00416   } catch (...) {
00417     // We delete only the ModuleProvider here because its destructor will
00418     // also delete the Module (we used materializeModule not releaseModule).
00419     delete MP;
00420   }
00421   return 0;
00422 }