LLVM API Documentation
00001 //===- ReaderWrappers.cpp - Parse bytecode from file or buffer -----------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements loading and parsing a bytecode file and parsing a 00011 // bytecode module from a given buffer. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Bytecode/Analyzer.h" 00016 #include "llvm/Bytecode/Reader.h" 00017 #include "Reader.h" 00018 #include "llvm/Module.h" 00019 #include "llvm/Instructions.h" 00020 #include "llvm/Support/FileUtilities.h" 00021 #include "llvm/ADT/StringExtras.h" 00022 #include "llvm/Config/unistd.h" 00023 #include <cerrno> 00024 using namespace llvm; 00025 00026 //===----------------------------------------------------------------------===// 00027 // BytecodeFileReader - Read from an mmap'able file descriptor. 00028 // 00029 00030 namespace { 00031 /// BytecodeFileReader - parses a bytecode file from a file 00032 /// 00033 class BytecodeFileReader : public BytecodeReader { 00034 private: 00035 unsigned char *Buffer; 00036 unsigned Length; 00037 00038 BytecodeFileReader(const BytecodeFileReader&); // Do not implement 00039 void operator=(const BytecodeFileReader &BFR); // Do not implement 00040 00041 public: 00042 BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0); 00043 ~BytecodeFileReader(); 00044 }; 00045 } 00046 00047 static std::string ErrnoMessage (int savedErrNum, std::string descr) { 00048 return ::strerror(savedErrNum) + std::string(", while trying to ") + descr; 00049 } 00050 00051 BytecodeFileReader::BytecodeFileReader(const std::string &Filename, 00052 llvm::BytecodeHandler* H ) 00053 : BytecodeReader(H) 00054 { 00055 Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length); 00056 if (Buffer == 0) 00057 throw "Error reading file '" + Filename + "'."; 00058 00059 try { 00060 // Parse the bytecode we mmapped in 00061 ParseBytecode(Buffer, Length, Filename); 00062 } catch (...) { 00063 UnmapFileFromAddressSpace(Buffer, Length); 00064 throw; 00065 } 00066 } 00067 00068 BytecodeFileReader::~BytecodeFileReader() { 00069 // Unmmap the bytecode... 00070 UnmapFileFromAddressSpace(Buffer, Length); 00071 } 00072 00073 //===----------------------------------------------------------------------===// 00074 // BytecodeBufferReader - Read from a memory buffer 00075 // 00076 00077 namespace { 00078 /// BytecodeBufferReader - parses a bytecode file from a buffer 00079 /// 00080 class BytecodeBufferReader : public BytecodeReader { 00081 private: 00082 const unsigned char *Buffer; 00083 bool MustDelete; 00084 00085 BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement 00086 void operator=(const BytecodeBufferReader &BFR); // Do not implement 00087 00088 public: 00089 BytecodeBufferReader(const unsigned char *Buf, unsigned Length, 00090 const std::string &ModuleID, 00091 llvm::BytecodeHandler* Handler = 0); 00092 ~BytecodeBufferReader(); 00093 00094 }; 00095 } 00096 00097 BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf, 00098 unsigned Length, 00099 const std::string &ModuleID, 00100 llvm::BytecodeHandler* H ) 00101 : BytecodeReader(H) 00102 { 00103 // If not aligned, allocate a new buffer to hold the bytecode... 00104 const unsigned char *ParseBegin = 0; 00105 if (reinterpret_cast<uint64_t>(Buf) & 3) { 00106 Buffer = new unsigned char[Length+4]; 00107 unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned 00108 ParseBegin = Buffer + Offset; 00109 memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over 00110 MustDelete = true; 00111 } else { 00112 // If we don't need to copy it over, just use the caller's copy 00113 ParseBegin = Buffer = Buf; 00114 MustDelete = false; 00115 } 00116 try { 00117 ParseBytecode(ParseBegin, Length, ModuleID); 00118 } catch (...) { 00119 if (MustDelete) delete [] Buffer; 00120 throw; 00121 } 00122 } 00123 00124 BytecodeBufferReader::~BytecodeBufferReader() { 00125 if (MustDelete) delete [] Buffer; 00126 } 00127 00128 //===----------------------------------------------------------------------===// 00129 // BytecodeStdinReader - Read bytecode from Standard Input 00130 // 00131 00132 namespace { 00133 /// BytecodeStdinReader - parses a bytecode file from stdin 00134 /// 00135 class BytecodeStdinReader : public BytecodeReader { 00136 private: 00137 std::vector<unsigned char> FileData; 00138 unsigned char *FileBuf; 00139 00140 BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement 00141 void operator=(const BytecodeStdinReader &BFR); // Do not implement 00142 00143 public: 00144 BytecodeStdinReader( llvm::BytecodeHandler* H = 0 ); 00145 }; 00146 } 00147 00148 BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H ) 00149 : BytecodeReader(H) 00150 { 00151 int BlockSize; 00152 unsigned char Buffer[4096*4]; 00153 00154 // Read in all of the data from stdin, we cannot mmap stdin... 00155 while ((BlockSize = ::read(0 /*stdin*/, Buffer, 4096*4))) { 00156 if (BlockSize == -1) 00157 throw ErrnoMessage(errno, "read from standard input"); 00158 00159 FileData.insert(FileData.end(), Buffer, Buffer+BlockSize); 00160 } 00161 00162 if (FileData.empty()) 00163 throw std::string("Standard Input empty!"); 00164 00165 FileBuf = &FileData[0]; 00166 ParseBytecode(FileBuf, FileData.size(), "<stdin>"); 00167 } 00168 00169 //===----------------------------------------------------------------------===// 00170 // Varargs transmogrification code... 00171 // 00172 00173 // CheckVarargs - This is used to automatically translate old-style varargs to 00174 // new style varargs for backwards compatibility. 00175 static ModuleProvider *CheckVarargs(ModuleProvider *MP) { 00176 Module *M = MP->getModule(); 00177 00178 // Check to see if va_start takes arguments... 00179 Function *F = M->getNamedFunction("llvm.va_start"); 00180 if (F == 0) return MP; // No varargs use, just return. 00181 00182 if (F->getFunctionType()->getNumParams() == 0) 00183 return MP; // Modern varargs processing, just return. 00184 00185 // If we get to this point, we know that we have an old-style module. 00186 // Materialize the whole thing to perform the rewriting. 00187 MP->materializeModule(); 00188 00189 // If the user is making use of obsolete varargs intrinsics, adjust them for 00190 // the user. 00191 if (Function *F = M->getNamedFunction("llvm.va_start")) { 00192 assert(F->asize() == 1 && "Obsolete va_start takes 1 argument!"); 00193 00194 const Type *RetTy = F->getFunctionType()->getParamType(0); 00195 RetTy = cast<PointerType>(RetTy)->getElementType(); 00196 Function *NF = M->getOrInsertFunction("llvm.va_start", RetTy, 0); 00197 00198 for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ) 00199 if (CallInst *CI = dyn_cast<CallInst>(*I++)) { 00200 Value *V = new CallInst(NF, "", CI); 00201 new StoreInst(V, CI->getOperand(1), CI); 00202 CI->getParent()->getInstList().erase(CI); 00203 } 00204 F->setName(""); 00205 } 00206 00207 if (Function *F = M->getNamedFunction("llvm.va_end")) { 00208 assert(F->asize() == 1 && "Obsolete va_end takes 1 argument!"); 00209 const Type *ArgTy = F->getFunctionType()->getParamType(0); 00210 ArgTy = cast<PointerType>(ArgTy)->getElementType(); 00211 Function *NF = M->getOrInsertFunction("llvm.va_end", Type::VoidTy, 00212 ArgTy, 0); 00213 00214 for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ) 00215 if (CallInst *CI = dyn_cast<CallInst>(*I++)) { 00216 Value *V = new LoadInst(CI->getOperand(1), "", CI); 00217 new CallInst(NF, V, "", CI); 00218 CI->getParent()->getInstList().erase(CI); 00219 } 00220 F->setName(""); 00221 } 00222 00223 if (Function *F = M->getNamedFunction("llvm.va_copy")) { 00224 assert(F->asize() == 2 && "Obsolete va_copy takes 2 argument!"); 00225 const Type *ArgTy = F->getFunctionType()->getParamType(0); 00226 ArgTy = cast<PointerType>(ArgTy)->getElementType(); 00227 Function *NF = M->getOrInsertFunction("llvm.va_copy", ArgTy, 00228 ArgTy, 0); 00229 00230 for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ) 00231 if (CallInst *CI = dyn_cast<CallInst>(*I++)) { 00232 Value *V = new CallInst(NF, CI->getOperand(2), "", CI); 00233 new StoreInst(V, CI->getOperand(1), CI); 00234 CI->getParent()->getInstList().erase(CI); 00235 } 00236 F->setName(""); 00237 } 00238 return MP; 00239 } 00240 00241 //===----------------------------------------------------------------------===// 00242 // Wrapper functions 00243 //===----------------------------------------------------------------------===// 00244 00245 /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a 00246 /// buffer 00247 ModuleProvider* 00248 llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer, 00249 unsigned Length, 00250 const std::string &ModuleID, 00251 BytecodeHandler* H ) { 00252 return CheckVarargs( 00253 new BytecodeBufferReader(Buffer, Length, ModuleID, H)); 00254 } 00255 00256 /// ParseBytecodeBuffer - Parse a given bytecode buffer 00257 /// 00258 Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length, 00259 const std::string &ModuleID, 00260 std::string *ErrorStr){ 00261 try { 00262 std::auto_ptr<ModuleProvider> 00263 AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID)); 00264 return AMP->releaseModule(); 00265 } catch (std::string &err) { 00266 if (ErrorStr) *ErrorStr = err; 00267 return 0; 00268 } 00269 } 00270 00271 /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file 00272 /// 00273 ModuleProvider *llvm::getBytecodeModuleProvider(const std::string &Filename, 00274 BytecodeHandler* H) { 00275 if (Filename != std::string("-")) // Read from a file... 00276 return CheckVarargs(new BytecodeFileReader(Filename,H)); 00277 else // Read from stdin 00278 return CheckVarargs(new BytecodeStdinReader(H)); 00279 } 00280 00281 /// ParseBytecodeFile - Parse the given bytecode file 00282 /// 00283 Module *llvm::ParseBytecodeFile(const std::string &Filename, 00284 std::string *ErrorStr) { 00285 try { 00286 std::auto_ptr<ModuleProvider> AMP(getBytecodeModuleProvider(Filename)); 00287 return AMP->releaseModule(); 00288 } catch (std::string &err) { 00289 if (ErrorStr) *ErrorStr = err; 00290 return 0; 00291 } 00292 } 00293 00294 // AnalyzeBytecodeFile - analyze one file 00295 Module* llvm::AnalyzeBytecodeFile( 00296 const std::string &Filename, ///< File to analyze 00297 BytecodeAnalysis& bca, ///< Statistical output 00298 std::string *ErrorStr, ///< Error output 00299 std::ostream* output ///< Dump output 00300 ) 00301 { 00302 try { 00303 BytecodeHandler* analyzerHandler =createBytecodeAnalyzerHandler(bca,output); 00304 std::auto_ptr<ModuleProvider> AMP( 00305 getBytecodeModuleProvider(Filename,analyzerHandler)); 00306 return AMP->releaseModule(); 00307 } catch (std::string &err) { 00308 if (ErrorStr) *ErrorStr = err; 00309 return 0; 00310 } 00311 } 00312 00313 // AnalyzeBytecodeBuffer - analyze a buffer 00314 Module* llvm::AnalyzeBytecodeBuffer( 00315 const unsigned char* Buffer, ///< Pointer to start of bytecode buffer 00316 unsigned Length, ///< Size of the bytecode buffer 00317 const std::string& ModuleID, ///< Identifier for the module 00318 BytecodeAnalysis& bca, ///< The results of the analysis 00319 std::string* ErrorStr, ///< Errors, if any. 00320 std::ostream* output ///< Dump output, if any 00321 ) 00322 { 00323 try { 00324 BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output); 00325 std::auto_ptr<ModuleProvider> 00326 AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, hdlr)); 00327 return AMP->releaseModule(); 00328 } catch (std::string &err) { 00329 if (ErrorStr) *ErrorStr = err; 00330 return 0; 00331 } 00332 } 00333 00334 bool llvm::GetBytecodeDependentLibraries(const std::string &fname, 00335 Module::LibraryListType& deplibs) { 00336 try { 00337 std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fname)); 00338 Module* M = AMP->releaseModule(); 00339 00340 deplibs = M->getLibraries(); 00341 delete M; 00342 return true; 00343 } catch (...) { 00344 deplibs.clear(); 00345 return false; 00346 } 00347 } 00348 00349 namespace { 00350 void getSymbols(Module*M, std::vector<std::string>& symbols) { 00351 // Loop over global variables 00352 for (Module::giterator GI = M->gbegin(), GE=M->gend(); GI != GE; ++GI) { 00353 if (GI->hasInitializer()) { 00354 std::string name ( GI->getName() ); 00355 if (!name.empty()) { 00356 symbols.push_back(name); 00357 } 00358 } 00359 } 00360 00361 //Loop over functions 00362 for (Module::iterator FI = M->begin(), FE=M->end(); FI != FE; ++FI) { 00363 if (!FI->isExternal()) { 00364 std::string name ( FI->getName() ); 00365 if (!name.empty()) { 00366 symbols.push_back(name); 00367 } 00368 } 00369 } 00370 } 00371 } 00372 00373 // Get just the externally visible defined symbols from the bytecode 00374 bool llvm::GetBytecodeSymbols(const sys::Path& fName, 00375 std::vector<std::string>& symbols) { 00376 try { 00377 std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fName.get())); 00378 00379 // Get the module from the provider 00380 Module* M = AMP->materializeModule(); 00381 00382 // Get the symbols 00383 getSymbols(M, symbols); 00384 00385 // Done with the module 00386 return true; 00387 00388 } catch (...) { 00389 return false; 00390 } 00391 } 00392 00393 ModuleProvider* 00394 llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length, 00395 const std::string& ModuleID, 00396 std::vector<std::string>& symbols) { 00397 00398 ModuleProvider* MP = 0; 00399 try { 00400 // Get the module provider 00401 MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID); 00402 00403 // Get the module from the provider 00404 Module* M = MP->materializeModule(); 00405 00406 // Get the symbols 00407 getSymbols(M, symbols); 00408 00409 // Done with the module. Note that ModuleProvider will delete the 00410 // Module when it is deleted. Also note that its the caller's responsibility 00411 // to delete the ModuleProvider. 00412 return MP; 00413 00414 } catch (...) { 00415 // We delete only the ModuleProvider here because its destructor will 00416 // also delete the Module (we used materializeModule not releaseModule). 00417 delete MP; 00418 } 00419 return 0; 00420 } 00421 // vim: sw=2 ai