LLVM API Documentation
00001 //===- ReaderWrappers.cpp - Parse bytecode from file or buffer -----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements loading and parsing a bytecode file and parsing a 00011 // bytecode module from a given buffer. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Bytecode/Analyzer.h" 00016 #include "llvm/Bytecode/Reader.h" 00017 #include "Reader.h" 00018 #include "llvm/Module.h" 00019 #include "llvm/Instructions.h" 00020 #include "llvm/ADT/StringExtras.h" 00021 #include "llvm/System/MappedFile.h" 00022 #include "llvm/System/Program.h" 00023 #include <cerrno> 00024 #include <iostream> 00025 #include <memory> 00026 00027 using namespace llvm; 00028 00029 //===----------------------------------------------------------------------===// 00030 // BytecodeFileReader - Read from an mmap'able file descriptor. 00031 // 00032 00033 namespace { 00034 /// BytecodeFileReader - parses a bytecode file from a file 00035 /// 00036 class BytecodeFileReader : public BytecodeReader { 00037 private: 00038 sys::MappedFile mapFile; 00039 00040 BytecodeFileReader(const BytecodeFileReader&); // Do not implement 00041 void operator=(const BytecodeFileReader &BFR); // Do not implement 00042 00043 public: 00044 BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0); 00045 }; 00046 } 00047 00048 BytecodeFileReader::BytecodeFileReader(const std::string &Filename, 00049 llvm::BytecodeHandler* H ) 00050 : BytecodeReader(H) 00051 , mapFile( sys::Path(Filename)) 00052 { 00053 mapFile.map(); 00054 unsigned char* buffer = reinterpret_cast<unsigned char*>(mapFile.base()); 00055 ParseBytecode(buffer, mapFile.size(), Filename); 00056 } 00057 00058 //===----------------------------------------------------------------------===// 00059 // BytecodeBufferReader - Read from a memory buffer 00060 // 00061 00062 namespace { 00063 /// BytecodeBufferReader - parses a bytecode file from a buffer 00064 /// 00065 class BytecodeBufferReader : public BytecodeReader { 00066 private: 00067 const unsigned char *Buffer; 00068 bool MustDelete; 00069 00070 BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement 00071 void operator=(const BytecodeBufferReader &BFR); // Do not implement 00072 00073 public: 00074 BytecodeBufferReader(const unsigned char *Buf, unsigned Length, 00075 const std::string &ModuleID, 00076 llvm::BytecodeHandler* Handler = 0); 00077 ~BytecodeBufferReader(); 00078 00079 }; 00080 } 00081 00082 BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf, 00083 unsigned Length, 00084 const std::string &ModuleID, 00085 llvm::BytecodeHandler* H ) 00086 : BytecodeReader(H) 00087 { 00088 // If not aligned, allocate a new buffer to hold the bytecode... 00089 const unsigned char *ParseBegin = 0; 00090 if (reinterpret_cast<uint64_t>(Buf) & 3) { 00091 Buffer = new unsigned char[Length+4]; 00092 unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned 00093 ParseBegin = Buffer + Offset; 00094 memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over 00095 MustDelete = true; 00096 } else { 00097 // If we don't need to copy it over, just use the caller's copy 00098 ParseBegin = Buffer = Buf; 00099 MustDelete = false; 00100 } 00101 try { 00102 ParseBytecode(ParseBegin, Length, ModuleID); 00103 } catch (...) { 00104 if (MustDelete) delete [] Buffer; 00105 throw; 00106 } 00107 } 00108 00109 BytecodeBufferReader::~BytecodeBufferReader() { 00110 if (MustDelete) delete [] Buffer; 00111 } 00112 00113 //===----------------------------------------------------------------------===// 00114 // BytecodeStdinReader - Read bytecode from Standard Input 00115 // 00116 00117 namespace { 00118 /// BytecodeStdinReader - parses a bytecode file from stdin 00119 /// 00120 class BytecodeStdinReader : public BytecodeReader { 00121 private: 00122 std::vector<unsigned char> FileData; 00123 unsigned char *FileBuf; 00124 00125 BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement 00126 void operator=(const BytecodeStdinReader &BFR); // Do not implement 00127 00128 public: 00129 BytecodeStdinReader( llvm::BytecodeHandler* H = 0 ); 00130 }; 00131 } 00132 00133 BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H ) 00134 : BytecodeReader(H) 00135 { 00136 sys::Program::ChangeStdinToBinary(); 00137 char Buffer[4096*4]; 00138 00139 // Read in all of the data from stdin, we cannot mmap stdin... 00140 while (std::cin.good()) { 00141 std::cin.read(Buffer, 4096*4); 00142 int BlockSize = std::cin.gcount(); 00143 if (0 >= BlockSize) 00144 break; 00145 FileData.insert(FileData.end(), Buffer, Buffer+BlockSize); 00146 } 00147 00148 if (FileData.empty()) 00149 throw std::string("Standard Input empty!"); 00150 00151 FileBuf = &FileData[0]; 00152 ParseBytecode(FileBuf, FileData.size(), "<stdin>"); 00153 } 00154 00155 //===----------------------------------------------------------------------===// 00156 // Varargs transmogrification code... 00157 // 00158 00159 // CheckVarargs - This is used to automatically translate old-style varargs to 00160 // new style varargs for backwards compatibility. 00161 static ModuleProvider* CheckVarargs(ModuleProvider* MP) { 00162 Module* M = MP->getModule(); 00163 00164 // check to see if va_start takes arguements... 00165 Function* F = M->getNamedFunction("llvm.va_start"); 00166 if(F == 0) return MP; //No varargs use, just return. 00167 00168 if (F->getFunctionType()->getNumParams() == 1) 00169 return MP; // Modern varargs processing, just return. 00170 00171 // If we get to this point, we know that we have an old-style module. 00172 // Materialize the whole thing to perform the rewriting. 00173 if (MP->materializeModule() == 0) 00174 return 0; 00175 00176 if(Function* F = M->getNamedFunction("llvm.va_start")) { 00177 assert(F->arg_size() == 0 && "Obsolete va_start takes 0 argument!"); 00178 00179 //foo = va_start() 00180 // -> 00181 //bar = alloca typeof(foo) 00182 //va_start(bar) 00183 //foo = load bar 00184 00185 const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); 00186 const Type* ArgTy = F->getFunctionType()->getReturnType(); 00187 const Type* ArgTyPtr = PointerType::get(ArgTy); 00188 Function* NF = M->getOrInsertFunction("llvm.va_start", 00189 RetTy, ArgTyPtr, (Type *)0); 00190 00191 for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) 00192 if (CallInst* CI = dyn_cast<CallInst>(*I++)) { 00193 AllocaInst* bar = new AllocaInst(ArgTy, 0, "vastart.fix.1", CI); 00194 new CallInst(NF, bar, "", CI); 00195 Value* foo = new LoadInst(bar, "vastart.fix.2", CI); 00196 CI->replaceAllUsesWith(foo); 00197 CI->getParent()->getInstList().erase(CI); 00198 } 00199 F->setName(""); 00200 } 00201 00202 if(Function* F = M->getNamedFunction("llvm.va_end")) { 00203 assert(F->arg_size() == 1 && "Obsolete va_end takes 1 argument!"); 00204 //vaend foo 00205 // -> 00206 //bar = alloca 1 of typeof(foo) 00207 //vaend bar 00208 const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); 00209 const Type* ArgTy = F->getFunctionType()->getParamType(0); 00210 const Type* ArgTyPtr = PointerType::get(ArgTy); 00211 Function* NF = M->getOrInsertFunction("llvm.va_end", 00212 RetTy, ArgTyPtr, (Type *)0); 00213 00214 for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) 00215 if (CallInst* CI = dyn_cast<CallInst>(*I++)) { 00216 AllocaInst* bar = new AllocaInst(ArgTy, 0, "vaend.fix.1", CI); 00217 new StoreInst(CI->getOperand(1), bar, CI); 00218 new CallInst(NF, bar, "", CI); 00219 CI->getParent()->getInstList().erase(CI); 00220 } 00221 F->setName(""); 00222 } 00223 00224 if(Function* F = M->getNamedFunction("llvm.va_copy")) { 00225 assert(F->arg_size() == 1 && "Obsolete va_copy takes 1 argument!"); 00226 //foo = vacopy(bar) 00227 // -> 00228 //a = alloca 1 of typeof(foo) 00229 //b = alloca 1 of typeof(foo) 00230 //store bar -> b 00231 //vacopy(a, b) 00232 //foo = load a 00233 00234 const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); 00235 const Type* ArgTy = F->getFunctionType()->getReturnType(); 00236 const Type* ArgTyPtr = PointerType::get(ArgTy); 00237 Function* NF = M->getOrInsertFunction("llvm.va_copy", 00238 RetTy, ArgTyPtr, ArgTyPtr, (Type *)0); 00239 00240 for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) 00241 if (CallInst* CI = dyn_cast<CallInst>(*I++)) { 00242 AllocaInst* a = new AllocaInst(ArgTy, 0, "vacopy.fix.1", CI); 00243 AllocaInst* b = new AllocaInst(ArgTy, 0, "vacopy.fix.2", CI); 00244 new StoreInst(CI->getOperand(1), b, CI); 00245 new CallInst(NF, a, b, "", CI); 00246 Value* foo = new LoadInst(a, "vacopy.fix.3", CI); 00247 CI->replaceAllUsesWith(foo); 00248 CI->getParent()->getInstList().erase(CI); 00249 } 00250 F->setName(""); 00251 } 00252 return MP; 00253 } 00254 00255 //===----------------------------------------------------------------------===// 00256 // Wrapper functions 00257 //===----------------------------------------------------------------------===// 00258 00259 /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a 00260 /// buffer 00261 ModuleProvider* 00262 llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer, 00263 unsigned Length, 00264 const std::string &ModuleID, 00265 BytecodeHandler* H ) { 00266 return CheckVarargs( 00267 new BytecodeBufferReader(Buffer, Length, ModuleID, H)); 00268 } 00269 00270 /// ParseBytecodeBuffer - Parse a given bytecode buffer 00271 /// 00272 Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length, 00273 const std::string &ModuleID, 00274 std::string *ErrorStr){ 00275 try { 00276 std::auto_ptr<ModuleProvider> 00277 AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID)); 00278 return AMP->releaseModule(); 00279 } catch (std::string &err) { 00280 if (ErrorStr) *ErrorStr = err; 00281 return 0; 00282 } 00283 } 00284 00285 /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file 00286 /// 00287 ModuleProvider *llvm::getBytecodeModuleProvider(const std::string &Filename, 00288 BytecodeHandler* H) { 00289 if (Filename != std::string("-")) // Read from a file... 00290 return CheckVarargs(new BytecodeFileReader(Filename,H)); 00291 else // Read from stdin 00292 return CheckVarargs(new BytecodeStdinReader(H)); 00293 } 00294 00295 /// ParseBytecodeFile - Parse the given bytecode file 00296 /// 00297 Module *llvm::ParseBytecodeFile(const std::string &Filename, 00298 std::string *ErrorStr) { 00299 try { 00300 std::auto_ptr<ModuleProvider> AMP(getBytecodeModuleProvider(Filename)); 00301 return AMP->releaseModule(); 00302 } catch (std::string &err) { 00303 if (ErrorStr) *ErrorStr = err; 00304 return 0; 00305 } 00306 } 00307 00308 // AnalyzeBytecodeFile - analyze one file 00309 Module* llvm::AnalyzeBytecodeFile( 00310 const std::string &Filename, ///< File to analyze 00311 BytecodeAnalysis& bca, ///< Statistical output 00312 std::string *ErrorStr, ///< Error output 00313 std::ostream* output ///< Dump output 00314 ) 00315 { 00316 try { 00317 BytecodeHandler* analyzerHandler =createBytecodeAnalyzerHandler(bca,output); 00318 std::auto_ptr<ModuleProvider> AMP( 00319 getBytecodeModuleProvider(Filename,analyzerHandler)); 00320 return AMP->releaseModule(); 00321 } catch (std::string &err) { 00322 if (ErrorStr) *ErrorStr = err; 00323 return 0; 00324 } 00325 } 00326 00327 // AnalyzeBytecodeBuffer - analyze a buffer 00328 Module* llvm::AnalyzeBytecodeBuffer( 00329 const unsigned char* Buffer, ///< Pointer to start of bytecode buffer 00330 unsigned Length, ///< Size of the bytecode buffer 00331 const std::string& ModuleID, ///< Identifier for the module 00332 BytecodeAnalysis& bca, ///< The results of the analysis 00333 std::string* ErrorStr, ///< Errors, if any. 00334 std::ostream* output ///< Dump output, if any 00335 ) 00336 { 00337 try { 00338 BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output); 00339 std::auto_ptr<ModuleProvider> 00340 AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, hdlr)); 00341 return AMP->releaseModule(); 00342 } catch (std::string &err) { 00343 if (ErrorStr) *ErrorStr = err; 00344 return 0; 00345 } 00346 } 00347 00348 bool llvm::GetBytecodeDependentLibraries(const std::string &fname, 00349 Module::LibraryListType& deplibs) { 00350 try { 00351 std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fname)); 00352 Module* M = AMP->releaseModule(); 00353 00354 deplibs = M->getLibraries(); 00355 delete M; 00356 return true; 00357 } catch (...) { 00358 deplibs.clear(); 00359 return false; 00360 } 00361 } 00362 00363 static void getSymbols(Module*M, std::vector<std::string>& symbols) { 00364 // Loop over global variables 00365 for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI) 00366 if (!GI->isExternal() && !GI->hasInternalLinkage()) 00367 if (!GI->getName().empty()) 00368 symbols.push_back(GI->getName()); 00369 00370 // Loop over functions. 00371 for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) 00372 if (!FI->isExternal() && !FI->hasInternalLinkage()) 00373 if (!FI->getName().empty()) 00374 symbols.push_back(FI->getName()); 00375 } 00376 00377 // Get just the externally visible defined symbols from the bytecode 00378 bool llvm::GetBytecodeSymbols(const sys::Path& fName, 00379 std::vector<std::string>& symbols) { 00380 std::auto_ptr<ModuleProvider> AMP( 00381 getBytecodeModuleProvider(fName.toString())); 00382 00383 // Get the module from the provider 00384 Module* M = AMP->materializeModule(); 00385 if (M == 0) return false; 00386 00387 // Get the symbols 00388 getSymbols(M, symbols); 00389 00390 // Done with the module 00391 return true; 00392 } 00393 00394 ModuleProvider* 00395 llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length, 00396 const std::string& ModuleID, 00397 std::vector<std::string>& symbols) { 00398 00399 ModuleProvider* MP = 0; 00400 try { 00401 // Get the module provider 00402 MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID); 00403 00404 // Get the module from the provider 00405 Module* M = MP->materializeModule(); 00406 if (M == 0) return 0; 00407 00408 // Get the symbols 00409 getSymbols(M, symbols); 00410 00411 // Done with the module. Note that ModuleProvider will delete the 00412 // Module when it is deleted. Also note that its the caller's responsibility 00413 // to delete the ModuleProvider. 00414 return MP; 00415 00416 } catch (...) { 00417 // We delete only the ModuleProvider here because its destructor will 00418 // also delete the Module (we used materializeModule not releaseModule). 00419 delete MP; 00420 } 00421 return 0; 00422 }