LLVM API Documentation
00001 //===- ReaderWrappers.cpp - Parse bytecode from file or buffer -----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements loading and parsing a bytecode file and parsing a 00011 // bytecode module from a given buffer. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Bytecode/Analyzer.h" 00016 #include "llvm/Bytecode/Reader.h" 00017 #include "Reader.h" 00018 #include "llvm/Module.h" 00019 #include "llvm/Instructions.h" 00020 #include "llvm/ADT/StringExtras.h" 00021 #include "llvm/System/MappedFile.h" 00022 #include <cerrno> 00023 #include <iostream> 00024 #include <memory> 00025 00026 using namespace llvm; 00027 00028 //===----------------------------------------------------------------------===// 00029 // BytecodeFileReader - Read from an mmap'able file descriptor. 00030 // 00031 00032 namespace { 00033 /// BytecodeFileReader - parses a bytecode file from a file 00034 /// 00035 class BytecodeFileReader : public BytecodeReader { 00036 private: 00037 sys::MappedFile mapFile; 00038 00039 BytecodeFileReader(const BytecodeFileReader&); // Do not implement 00040 void operator=(const BytecodeFileReader &BFR); // Do not implement 00041 00042 public: 00043 BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0); 00044 }; 00045 } 00046 00047 BytecodeFileReader::BytecodeFileReader(const std::string &Filename, 00048 llvm::BytecodeHandler* H ) 00049 : BytecodeReader(H) 00050 , mapFile( sys::Path(Filename)) 00051 { 00052 mapFile.map(); 00053 unsigned char* buffer = reinterpret_cast<unsigned char*>(mapFile.base()); 00054 ParseBytecode(buffer, mapFile.size(), Filename); 00055 } 00056 00057 //===----------------------------------------------------------------------===// 00058 // BytecodeBufferReader - Read from a memory buffer 00059 // 00060 00061 namespace { 00062 /// BytecodeBufferReader - parses a bytecode file from a buffer 00063 /// 00064 class BytecodeBufferReader : public BytecodeReader { 00065 private: 00066 const unsigned char *Buffer; 00067 bool MustDelete; 00068 00069 BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement 00070 void operator=(const BytecodeBufferReader &BFR); // Do not implement 00071 00072 public: 00073 BytecodeBufferReader(const unsigned char *Buf, unsigned Length, 00074 const std::string &ModuleID, 00075 llvm::BytecodeHandler* Handler = 0); 00076 ~BytecodeBufferReader(); 00077 00078 }; 00079 } 00080 00081 BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf, 00082 unsigned Length, 00083 const std::string &ModuleID, 00084 llvm::BytecodeHandler* H ) 00085 : BytecodeReader(H) 00086 { 00087 // If not aligned, allocate a new buffer to hold the bytecode... 00088 const unsigned char *ParseBegin = 0; 00089 if (reinterpret_cast<uint64_t>(Buf) & 3) { 00090 Buffer = new unsigned char[Length+4]; 00091 unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned 00092 ParseBegin = Buffer + Offset; 00093 memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over 00094 MustDelete = true; 00095 } else { 00096 // If we don't need to copy it over, just use the caller's copy 00097 ParseBegin = Buffer = Buf; 00098 MustDelete = false; 00099 } 00100 try { 00101 ParseBytecode(ParseBegin, Length, ModuleID); 00102 } catch (...) { 00103 if (MustDelete) delete [] Buffer; 00104 throw; 00105 } 00106 } 00107 00108 BytecodeBufferReader::~BytecodeBufferReader() { 00109 if (MustDelete) delete [] Buffer; 00110 } 00111 00112 //===----------------------------------------------------------------------===// 00113 // BytecodeStdinReader - Read bytecode from Standard Input 00114 // 00115 00116 namespace { 00117 /// BytecodeStdinReader - parses a bytecode file from stdin 00118 /// 00119 class BytecodeStdinReader : public BytecodeReader { 00120 private: 00121 std::vector<unsigned char> FileData; 00122 unsigned char *FileBuf; 00123 00124 BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement 00125 void operator=(const BytecodeStdinReader &BFR); // Do not implement 00126 00127 public: 00128 BytecodeStdinReader( llvm::BytecodeHandler* H = 0 ); 00129 }; 00130 } 00131 00132 BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H ) 00133 : BytecodeReader(H) 00134 { 00135 char Buffer[4096*4]; 00136 00137 // Read in all of the data from stdin, we cannot mmap stdin... 00138 while (std::cin.good()) { 00139 std::cin.read(Buffer, 4096*4); 00140 int BlockSize = std::cin.gcount(); 00141 if (0 >= BlockSize) 00142 break; 00143 FileData.insert(FileData.end(), Buffer, Buffer+BlockSize); 00144 } 00145 00146 if (FileData.empty()) 00147 throw std::string("Standard Input empty!"); 00148 00149 FileBuf = &FileData[0]; 00150 ParseBytecode(FileBuf, FileData.size(), "<stdin>"); 00151 } 00152 00153 //===----------------------------------------------------------------------===// 00154 // Varargs transmogrification code... 00155 // 00156 00157 // CheckVarargs - This is used to automatically translate old-style varargs to 00158 // new style varargs for backwards compatibility. 00159 static ModuleProvider* CheckVarargs(ModuleProvider* MP) { 00160 Module* M = MP->getModule(); 00161 00162 // check to see if va_start takes arguements... 00163 Function* F = M->getNamedFunction("llvm.va_start"); 00164 if(F == 0) return MP; //No varargs use, just return. 00165 00166 if (F->getFunctionType()->getNumParams() == 1) 00167 return MP; // Modern varargs processing, just return. 00168 00169 // If we get to this point, we know that we have an old-style module. 00170 // Materialize the whole thing to perform the rewriting. 00171 MP->materializeModule(); 00172 00173 if(Function* F = M->getNamedFunction("llvm.va_start")) { 00174 assert(F->arg_size() == 0 && "Obsolete va_start takes 0 argument!"); 00175 00176 //foo = va_start() 00177 // -> 00178 //bar = alloca typeof(foo) 00179 //va_start(bar) 00180 //foo = load bar 00181 00182 const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); 00183 const Type* ArgTy = F->getFunctionType()->getReturnType(); 00184 const Type* ArgTyPtr = PointerType::get(ArgTy); 00185 Function* NF = M->getOrInsertFunction("llvm.va_start", 00186 RetTy, ArgTyPtr, (Type *)0); 00187 00188 for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) 00189 if (CallInst* CI = dyn_cast<CallInst>(*I++)) { 00190 AllocaInst* bar = new AllocaInst(ArgTy, 0, "vastart.fix.1", CI); 00191 new CallInst(NF, bar, "", CI); 00192 Value* foo = new LoadInst(bar, "vastart.fix.2", CI); 00193 CI->replaceAllUsesWith(foo); 00194 CI->getParent()->getInstList().erase(CI); 00195 } 00196 F->setName(""); 00197 } 00198 00199 if(Function* F = M->getNamedFunction("llvm.va_end")) { 00200 assert(F->arg_size() == 1 && "Obsolete va_end takes 1 argument!"); 00201 //vaend foo 00202 // -> 00203 //bar = alloca 1 of typeof(foo) 00204 //vaend bar 00205 const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); 00206 const Type* ArgTy = F->getFunctionType()->getParamType(0); 00207 const Type* ArgTyPtr = PointerType::get(ArgTy); 00208 Function* NF = M->getOrInsertFunction("llvm.va_end", 00209 RetTy, ArgTyPtr, (Type *)0); 00210 00211 for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) 00212 if (CallInst* CI = dyn_cast<CallInst>(*I++)) { 00213 AllocaInst* bar = new AllocaInst(ArgTy, 0, "vaend.fix.1", CI); 00214 new StoreInst(CI->getOperand(1), bar, CI); 00215 new CallInst(NF, bar, "", CI); 00216 CI->getParent()->getInstList().erase(CI); 00217 } 00218 F->setName(""); 00219 } 00220 00221 if(Function* F = M->getNamedFunction("llvm.va_copy")) { 00222 assert(F->arg_size() == 1 && "Obsolete va_copy takes 1 argument!"); 00223 //foo = vacopy(bar) 00224 // -> 00225 //a = alloca 1 of typeof(foo) 00226 //b = alloca 1 of typeof(foo) 00227 //store bar -> b 00228 //vacopy(a, b) 00229 //foo = load a 00230 00231 const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); 00232 const Type* ArgTy = F->getFunctionType()->getReturnType(); 00233 const Type* ArgTyPtr = PointerType::get(ArgTy); 00234 Function* NF = M->getOrInsertFunction("llvm.va_copy", 00235 RetTy, ArgTyPtr, ArgTyPtr, (Type *)0); 00236 00237 for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) 00238 if (CallInst* CI = dyn_cast<CallInst>(*I++)) { 00239 AllocaInst* a = new AllocaInst(ArgTy, 0, "vacopy.fix.1", CI); 00240 AllocaInst* b = new AllocaInst(ArgTy, 0, "vacopy.fix.2", CI); 00241 new StoreInst(CI->getOperand(1), b, CI); 00242 new CallInst(NF, a, b, "", CI); 00243 Value* foo = new LoadInst(a, "vacopy.fix.3", CI); 00244 CI->replaceAllUsesWith(foo); 00245 CI->getParent()->getInstList().erase(CI); 00246 } 00247 F->setName(""); 00248 } 00249 return MP; 00250 } 00251 00252 //===----------------------------------------------------------------------===// 00253 // Wrapper functions 00254 //===----------------------------------------------------------------------===// 00255 00256 /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a 00257 /// buffer 00258 ModuleProvider* 00259 llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer, 00260 unsigned Length, 00261 const std::string &ModuleID, 00262 BytecodeHandler* H ) { 00263 return CheckVarargs( 00264 new BytecodeBufferReader(Buffer, Length, ModuleID, H)); 00265 } 00266 00267 /// ParseBytecodeBuffer - Parse a given bytecode buffer 00268 /// 00269 Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length, 00270 const std::string &ModuleID, 00271 std::string *ErrorStr){ 00272 try { 00273 std::auto_ptr<ModuleProvider> 00274 AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID)); 00275 return AMP->releaseModule(); 00276 } catch (std::string &err) { 00277 if (ErrorStr) *ErrorStr = err; 00278 return 0; 00279 } 00280 } 00281 00282 /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file 00283 /// 00284 ModuleProvider *llvm::getBytecodeModuleProvider(const std::string &Filename, 00285 BytecodeHandler* H) { 00286 if (Filename != std::string("-")) // Read from a file... 00287 return CheckVarargs(new BytecodeFileReader(Filename,H)); 00288 else // Read from stdin 00289 return CheckVarargs(new BytecodeStdinReader(H)); 00290 } 00291 00292 /// ParseBytecodeFile - Parse the given bytecode file 00293 /// 00294 Module *llvm::ParseBytecodeFile(const std::string &Filename, 00295 std::string *ErrorStr) { 00296 try { 00297 std::auto_ptr<ModuleProvider> AMP(getBytecodeModuleProvider(Filename)); 00298 return AMP->releaseModule(); 00299 } catch (std::string &err) { 00300 if (ErrorStr) *ErrorStr = err; 00301 return 0; 00302 } 00303 } 00304 00305 // AnalyzeBytecodeFile - analyze one file 00306 Module* llvm::AnalyzeBytecodeFile( 00307 const std::string &Filename, ///< File to analyze 00308 BytecodeAnalysis& bca, ///< Statistical output 00309 std::string *ErrorStr, ///< Error output 00310 std::ostream* output ///< Dump output 00311 ) 00312 { 00313 try { 00314 BytecodeHandler* analyzerHandler =createBytecodeAnalyzerHandler(bca,output); 00315 std::auto_ptr<ModuleProvider> AMP( 00316 getBytecodeModuleProvider(Filename,analyzerHandler)); 00317 return AMP->releaseModule(); 00318 } catch (std::string &err) { 00319 if (ErrorStr) *ErrorStr = err; 00320 return 0; 00321 } 00322 } 00323 00324 // AnalyzeBytecodeBuffer - analyze a buffer 00325 Module* llvm::AnalyzeBytecodeBuffer( 00326 const unsigned char* Buffer, ///< Pointer to start of bytecode buffer 00327 unsigned Length, ///< Size of the bytecode buffer 00328 const std::string& ModuleID, ///< Identifier for the module 00329 BytecodeAnalysis& bca, ///< The results of the analysis 00330 std::string* ErrorStr, ///< Errors, if any. 00331 std::ostream* output ///< Dump output, if any 00332 ) 00333 { 00334 try { 00335 BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output); 00336 std::auto_ptr<ModuleProvider> 00337 AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, hdlr)); 00338 return AMP->releaseModule(); 00339 } catch (std::string &err) { 00340 if (ErrorStr) *ErrorStr = err; 00341 return 0; 00342 } 00343 } 00344 00345 bool llvm::GetBytecodeDependentLibraries(const std::string &fname, 00346 Module::LibraryListType& deplibs) { 00347 try { 00348 std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fname)); 00349 Module* M = AMP->releaseModule(); 00350 00351 deplibs = M->getLibraries(); 00352 delete M; 00353 return true; 00354 } catch (...) { 00355 deplibs.clear(); 00356 return false; 00357 } 00358 } 00359 00360 static void getSymbols(Module*M, std::vector<std::string>& symbols) { 00361 // Loop over global variables 00362 for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI) 00363 if (!GI->isExternal() && !GI->hasInternalLinkage()) 00364 if (!GI->getName().empty()) 00365 symbols.push_back(GI->getName()); 00366 00367 // Loop over functions. 00368 for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) 00369 if (!FI->isExternal() && !FI->hasInternalLinkage()) 00370 if (!FI->getName().empty()) 00371 symbols.push_back(FI->getName()); 00372 } 00373 00374 // Get just the externally visible defined symbols from the bytecode 00375 bool llvm::GetBytecodeSymbols(const sys::Path& fName, 00376 std::vector<std::string>& symbols) { 00377 try { 00378 std::auto_ptr<ModuleProvider> AMP( 00379 getBytecodeModuleProvider(fName.toString())); 00380 00381 // Get the module from the provider 00382 Module* M = AMP->materializeModule(); 00383 00384 // Get the symbols 00385 getSymbols(M, symbols); 00386 00387 // Done with the module 00388 return true; 00389 00390 } catch (...) { 00391 return false; 00392 } 00393 } 00394 00395 ModuleProvider* 00396 llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length, 00397 const std::string& ModuleID, 00398 std::vector<std::string>& symbols) { 00399 00400 ModuleProvider* MP = 0; 00401 try { 00402 // Get the module provider 00403 MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID); 00404 00405 // Get the module from the provider 00406 Module* M = MP->materializeModule(); 00407 00408 // Get the symbols 00409 getSymbols(M, symbols); 00410 00411 // Done with the module. Note that ModuleProvider will delete the 00412 // Module when it is deleted. Also note that its the caller's responsibility 00413 // to delete the ModuleProvider. 00414 return MP; 00415 00416 } catch (...) { 00417 // We delete only the ModuleProvider here because its destructor will 00418 // also delete the Module (we used materializeModule not releaseModule). 00419 delete MP; 00420 } 00421 return 0; 00422 }