LLVM API Documentation
00001 //===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Reid Spencer and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Builds up standard unix archive files (.a) containing LLVM bytecode. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "ArchiveInternals.h" 00015 #include "llvm/Bytecode/Reader.h" 00016 #include <memory> 00017 00018 using namespace llvm; 00019 00020 /// Read a variable-bit-rate encoded unsigned integer 00021 inline unsigned readInteger(const char*&At, const char*End) { 00022 unsigned Shift = 0; 00023 unsigned Result = 0; 00024 00025 do { 00026 if (At == End) 00027 throw std::string("Ran out of data reading vbr_uint!"); 00028 Result |= (unsigned)((*At++) & 0x7F) << Shift; 00029 Shift += 7; 00030 } while (At[-1] & 0x80); 00031 return Result; 00032 } 00033 00034 // Completely parse the Archive's symbol table and populate symTab member var. 00035 void 00036 Archive::parseSymbolTable(const void* data, unsigned size) { 00037 const char* At = (const char*) data; 00038 const char* End = At + size; 00039 while (At < End) { 00040 unsigned offset = readInteger(At, End); 00041 unsigned length = readInteger(At, End); 00042 if (At + length > End) 00043 throw std::string("malformed symbol table"); 00044 // we don't care if it can't be inserted (duplicate entry) 00045 symTab.insert(std::make_pair(std::string(At, length), offset)); 00046 At += length; 00047 } 00048 symTabSize = size; 00049 } 00050 00051 // This member parses an ArchiveMemberHeader that is presumed to be pointed to 00052 // by At. The At pointer is updated to the byte just after the header, which 00053 // can be variable in size. 00054 ArchiveMember* 00055 Archive::parseMemberHeader(const char*& At, const char* End) { 00056 assert(At + sizeof(ArchiveMemberHeader) < End && "Not enough data"); 00057 00058 // Cast archive member header 00059 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; 00060 At += sizeof(ArchiveMemberHeader); 00061 00062 // Instantiate the ArchiveMember to be filled 00063 ArchiveMember* member = new ArchiveMember(this); 00064 00065 // Extract the size and determine if the file is 00066 // compressed or not (negative length). 00067 int flags = 0; 00068 int MemberSize = atoi(Hdr->size); 00069 if (MemberSize < 0) { 00070 flags |= ArchiveMember::CompressedFlag; 00071 MemberSize = -MemberSize; 00072 } 00073 00074 // Check the size of the member for sanity 00075 if (At + MemberSize > End) 00076 throw std::string("invalid member length in archive file"); 00077 00078 // Check the member signature 00079 if (!Hdr->checkSignature()) 00080 throw std::string("invalid file member signature"); 00081 00082 // Convert and check the member name 00083 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol 00084 // table. The special name "//" and 14 blanks is for a string table, used 00085 // for long file names. This library doesn't generate either of those but 00086 // it will accept them. If the name starts with #1/ and the remainder is 00087 // digits, then those digits specify the length of the name that is 00088 // stored immediately following the header. The special name 00089 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bytecode. 00090 // Anything else is a regular, short filename that is terminated with 00091 // a '/' and blanks. 00092 00093 std::string pathname; 00094 switch (Hdr->name[0]) { 00095 case '#': 00096 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { 00097 if (isdigit(Hdr->name[3])) { 00098 unsigned len = atoi(&Hdr->name[3]); 00099 pathname.assign(At, len); 00100 At += len; 00101 MemberSize -= len; 00102 flags |= ArchiveMember::HasLongFilenameFlag; 00103 } else 00104 throw std::string("invalid long filename"); 00105 } else if (Hdr->name[1] == '_' && 00106 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { 00107 // The member is using a long file name (>15 chars) format. 00108 // This format is standard for 4.4BSD and Mac OSX operating 00109 // systems. LLVM uses it similarly. In this format, the 00110 // remainder of the name field (after #1/) specifies the 00111 // length of the file name which occupy the first bytes of 00112 // the member's data. The pathname already has the #1/ stripped. 00113 pathname.assign(ARFILE_LLVM_SYMTAB_NAME); 00114 flags |= ArchiveMember::LLVMSymbolTableFlag; 00115 } 00116 break; 00117 case '/': 00118 if (Hdr->name[1]== '/') { 00119 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { 00120 pathname.assign(ARFILE_STRTAB_NAME); 00121 flags |= ArchiveMember::StringTableFlag; 00122 } else { 00123 throw std::string("invalid string table name"); 00124 } 00125 } else if (Hdr->name[1] == ' ') { 00126 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { 00127 pathname.assign(ARFILE_SVR4_SYMTAB_NAME); 00128 flags |= ArchiveMember::SVR4SymbolTableFlag; 00129 } else { 00130 throw std::string("invalid SVR4 symbol table name"); 00131 } 00132 } else if (isdigit(Hdr->name[1])) { 00133 unsigned index = atoi(&Hdr->name[1]); 00134 if (index < strtab.length()) { 00135 const char* namep = strtab.c_str() + index; 00136 const char* endp = strtab.c_str() + strtab.length(); 00137 const char* p = namep; 00138 const char* last_p = p; 00139 while (p < endp) { 00140 if (*p == '\n' && *last_p == '/') { 00141 pathname.assign(namep, last_p - namep); 00142 flags |= ArchiveMember::HasLongFilenameFlag; 00143 break; 00144 } 00145 last_p = p; 00146 p++; 00147 } 00148 if (p >= endp) 00149 throw std::string("missing name termiantor in string table"); 00150 } else { 00151 throw std::string("name index beyond string table"); 00152 } 00153 } 00154 break; 00155 case '_': 00156 if (Hdr->name[1] == '_' && 00157 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { 00158 pathname.assign(ARFILE_BSD4_SYMTAB_NAME); 00159 flags |= ArchiveMember::BSD4SymbolTableFlag; 00160 break; 00161 } 00162 /* FALL THROUGH */ 00163 00164 default: 00165 char* slash = (char*) memchr(Hdr->name, '/', 16); 00166 if (slash == 0) 00167 slash = Hdr->name + 16; 00168 pathname.assign(Hdr->name, slash - Hdr->name); 00169 break; 00170 } 00171 00172 // Determine if this is a bytecode file 00173 switch (sys::IdentifyFileType(At, 4)) { 00174 case sys::BytecodeFileType: 00175 flags |= ArchiveMember::BytecodeFlag; 00176 break; 00177 case sys::CompressedBytecodeFileType: 00178 flags |= ArchiveMember::CompressedBytecodeFlag; 00179 flags &= ~ArchiveMember::CompressedFlag; 00180 break; 00181 default: 00182 flags &= ~(ArchiveMember::BytecodeFlag| 00183 ArchiveMember::CompressedBytecodeFlag); 00184 break; 00185 } 00186 00187 // Fill in fields of the ArchiveMember 00188 member->next = 0; 00189 member->prev = 0; 00190 member->parent = this; 00191 member->path.set(pathname); 00192 member->info.fileSize = MemberSize; 00193 member->info.modTime.fromEpochTime(atoi(Hdr->date)); 00194 unsigned int mode; 00195 sscanf(Hdr->mode, "%o", &mode); 00196 member->info.mode = mode; 00197 member->info.user = atoi(Hdr->uid); 00198 member->info.group = atoi(Hdr->gid); 00199 member->flags = flags; 00200 member->data = At; 00201 00202 return member; 00203 } 00204 00205 void 00206 Archive::checkSignature() { 00207 // Check the magic string at file's header 00208 if (mapfile->size() < 8 || memcmp(base, ARFILE_MAGIC, 8)) 00209 throw std::string("invalid signature for an archive file"); 00210 } 00211 00212 // This function loads the entire archive and fully populates its ilist with 00213 // the members of the archive file. This is typically used in preparation for 00214 // editing the contents of the archive. 00215 void 00216 Archive::loadArchive() { 00217 00218 // Set up parsing 00219 members.clear(); 00220 symTab.clear(); 00221 const char *At = base; 00222 const char *End = base + mapfile->size(); 00223 00224 checkSignature(); 00225 At += 8; // Skip the magic string. 00226 00227 bool seenSymbolTable = false; 00228 bool foundFirstFile = false; 00229 while (At < End) { 00230 // parse the member header 00231 const char* Save = At; 00232 ArchiveMember* mbr = parseMemberHeader(At, End); 00233 00234 // check if this is the foreign symbol table 00235 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 00236 // We just save this but don't do anything special 00237 // with it. It doesn't count as the "first file". 00238 if (foreignST) { 00239 // What? Multiple foreign symbol tables? Just chuck it 00240 // and retain the last one found. 00241 delete foreignST; 00242 } 00243 foreignST = mbr; 00244 At += mbr->getSize(); 00245 if ((intptr_t(At) & 1) == 1) 00246 At++; 00247 } else if (mbr->isStringTable()) { 00248 // Simply suck the entire string table into a string 00249 // variable. This will be used to get the names of the 00250 // members that use the "/ddd" format for their names 00251 // (SVR4 style long names). 00252 strtab.assign(At, mbr->getSize()); 00253 At += mbr->getSize(); 00254 if ((intptr_t(At) & 1) == 1) 00255 At++; 00256 delete mbr; 00257 } else if (mbr->isLLVMSymbolTable()) { 00258 // This is the LLVM symbol table for the archive. If we've seen it 00259 // already, its an error. Otherwise, parse the symbol table and move on. 00260 if (seenSymbolTable) 00261 throw std::string("invalid archive: multiple symbol tables"); 00262 parseSymbolTable(mbr->getData(), mbr->getSize()); 00263 seenSymbolTable = true; 00264 At += mbr->getSize(); 00265 if ((intptr_t(At) & 1) == 1) 00266 At++; 00267 delete mbr; // We don't need this member in the list of members. 00268 } else { 00269 // This is just a regular file. If its the first one, save its offset. 00270 // Otherwise just push it on the list and move on to the next file. 00271 if (!foundFirstFile) { 00272 firstFileOffset = Save - base; 00273 foundFirstFile = true; 00274 } 00275 members.push_back(mbr); 00276 At += mbr->getSize(); 00277 if ((intptr_t(At) & 1) == 1) 00278 At++; 00279 } 00280 } 00281 } 00282 00283 // Open and completely load the archive file. 00284 Archive* 00285 Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage) { 00286 try { 00287 std::auto_ptr<Archive> result ( new Archive(file, true)); 00288 result->loadArchive(); 00289 return result.release(); 00290 } catch (const std::string& msg) { 00291 if (ErrorMessage) { 00292 *ErrorMessage = msg; 00293 } 00294 return 0; 00295 } 00296 } 00297 00298 // Get all the bytecode modules from the archive 00299 bool 00300 Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) { 00301 00302 for (iterator I=begin(), E=end(); I != E; ++I) { 00303 if (I->isBytecode() || I->isCompressedBytecode()) { 00304 std::string FullMemberName = archPath.toString() + 00305 "(" + I->getPath().toString() + ")"; 00306 Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(), 00307 I->getSize(), FullMemberName, ErrMessage); 00308 if (!M) 00309 return true; 00310 00311 Modules.push_back(M); 00312 } 00313 } 00314 return false; 00315 } 00316 00317 // Load just the symbol table from the archive file 00318 void 00319 Archive::loadSymbolTable() { 00320 00321 // Set up parsing 00322 members.clear(); 00323 symTab.clear(); 00324 const char *At = base; 00325 const char *End = base + mapfile->size(); 00326 00327 // Make sure we're dealing with an archive 00328 checkSignature(); 00329 00330 At += 8; // Skip signature 00331 00332 // Parse the first file member header 00333 const char* FirstFile = At; 00334 ArchiveMember* mbr = parseMemberHeader(At, End); 00335 00336 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 00337 // Skip the foreign symbol table, we don't do anything with it 00338 At += mbr->getSize(); 00339 if ((intptr_t(At) & 1) == 1) 00340 At++; 00341 delete mbr; 00342 00343 // Read the next one 00344 FirstFile = At; 00345 mbr = parseMemberHeader(At, End); 00346 } 00347 00348 if (mbr->isStringTable()) { 00349 // Process the string table entry 00350 strtab.assign((const char*)mbr->getData(), mbr->getSize()); 00351 At += mbr->getSize(); 00352 if ((intptr_t(At) & 1) == 1) 00353 At++; 00354 delete mbr; 00355 // Get the next one 00356 FirstFile = At; 00357 mbr = parseMemberHeader(At, End); 00358 } 00359 00360 // See if its the symbol table 00361 if (mbr->isLLVMSymbolTable()) { 00362 parseSymbolTable(mbr->getData(), mbr->getSize()); 00363 At += mbr->getSize(); 00364 if ((intptr_t(At) & 1) == 1) 00365 At++; 00366 FirstFile = At; 00367 } else { 00368 // There's no symbol table in the file. We have to rebuild it from scratch 00369 // because the intent of this method is to get the symbol table loaded so 00370 // it can be searched efficiently. 00371 // Add the member to the members list 00372 members.push_back(mbr); 00373 } 00374 00375 firstFileOffset = FirstFile - base; 00376 } 00377 00378 // Open the archive and load just the symbol tables 00379 Archive* 00380 Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) { 00381 try { 00382 std::auto_ptr<Archive> result ( new Archive(file, true) ); 00383 result->loadSymbolTable(); 00384 return result.release(); 00385 } catch (const std::string& msg) { 00386 if (ErrorMessage) { 00387 *ErrorMessage = msg; 00388 } 00389 return 0; 00390 } 00391 } 00392 00393 // Look up one symbol in the symbol table and return a ModuleProvider for the 00394 // module that defines that symbol. 00395 ModuleProvider* 00396 Archive::findModuleDefiningSymbol(const std::string& symbol) { 00397 SymTabType::iterator SI = symTab.find(symbol); 00398 if (SI == symTab.end()) 00399 return 0; 00400 00401 // The symbol table was previously constructed assuming that the members were 00402 // written without the symbol table header. Because VBR encoding is used, the 00403 // values could not be adjusted to account for the offset of the symbol table 00404 // because that could affect the size of the symbol table due to VBR encoding. 00405 // We now have to account for this by adjusting the offset by the size of the 00406 // symbol table and its header. 00407 unsigned fileOffset = 00408 SI->second + // offset in symbol-table-less file 00409 firstFileOffset; // add offset to first "real" file in archive 00410 00411 // See if the module is already loaded 00412 ModuleMap::iterator MI = modules.find(fileOffset); 00413 if (MI != modules.end()) 00414 return MI->second.first; 00415 00416 // Module hasn't been loaded yet, we need to load it 00417 const char* modptr = base + fileOffset; 00418 ArchiveMember* mbr = parseMemberHeader(modptr, base + mapfile->size()); 00419 00420 // Now, load the bytecode module to get the ModuleProvider 00421 std::string FullMemberName = archPath.toString() + "(" + 00422 mbr->getPath().toString() + ")"; 00423 ModuleProvider* mp = getBytecodeBufferModuleProvider( 00424 (const unsigned char*) mbr->getData(), mbr->getSize(), 00425 FullMemberName, 0); 00426 00427 modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); 00428 00429 return mp; 00430 } 00431 00432 // Look up multiple symbols in the symbol table and return a set of 00433 // ModuleProviders that define those symbols. 00434 void 00435 Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, 00436 std::set<ModuleProvider*>& result) 00437 { 00438 assert(mapfile && base && "Can't findModulesDefiningSymbols on new archive"); 00439 if (symTab.empty()) { 00440 // We don't have a symbol table, so we must build it now but lets also 00441 // make sure that we populate the modules table as we do this to ensure 00442 // that we don't load them twice when findModuleDefiningSymbol is called 00443 // below. 00444 00445 // Get a pointer to the first file 00446 const char* At = ((const char*)base) + firstFileOffset; 00447 const char* End = ((const char*)base) + mapfile->size(); 00448 00449 while ( At < End) { 00450 // Compute the offset to be put in the symbol table 00451 unsigned offset = At - base - firstFileOffset; 00452 00453 // Parse the file's header 00454 ArchiveMember* mbr = parseMemberHeader(At, End); 00455 00456 // If it contains symbols 00457 if (mbr->isBytecode() || mbr->isCompressedBytecode()) { 00458 // Get the symbols 00459 std::vector<std::string> symbols; 00460 std::string FullMemberName = archPath.toString() + "(" + 00461 mbr->getPath().toString() + ")"; 00462 ModuleProvider* MP = GetBytecodeSymbols((const unsigned char*)At, 00463 mbr->getSize(), FullMemberName, symbols); 00464 00465 if (MP) { 00466 // Insert the module's symbols into the symbol table 00467 for (std::vector<std::string>::iterator I = symbols.begin(), 00468 E=symbols.end(); I != E; ++I ) { 00469 symTab.insert(std::make_pair(*I, offset)); 00470 } 00471 // Insert the ModuleProvider and the ArchiveMember into the table of 00472 // modules. 00473 modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); 00474 } else { 00475 throw std::string("Can't parse bytecode member: ") + 00476 mbr->getPath().toString(); 00477 } 00478 } 00479 00480 // Go to the next file location 00481 At += mbr->getSize(); 00482 if ((intptr_t(At) & 1) == 1) 00483 At++; 00484 } 00485 } 00486 00487 // At this point we have a valid symbol table (one way or another) so we 00488 // just use it to quickly find the symbols requested. 00489 00490 for (std::set<std::string>::iterator I=symbols.begin(), 00491 E=symbols.end(); I != E;) { 00492 // See if this symbol exists 00493 ModuleProvider* mp = findModuleDefiningSymbol(*I); 00494 if (mp) { 00495 // The symbol exists, insert the ModuleProvider into our result, 00496 // duplicates wil be ignored 00497 result.insert(mp); 00498 00499 // Remove the symbol now that its been resolved, being careful to 00500 // post-increment the iterator. 00501 symbols.erase(I++); 00502 } else { 00503 ++I; 00504 } 00505 } 00506 } 00507 00508 bool Archive::isBytecodeArchive() { 00509 // Make sure the symTab has been loaded. In most cases this should have been 00510 // done when the archive was constructed, but still, this is just in case. 00511 if (!symTab.size()) 00512 loadSymbolTable(); 00513 00514 // Now that we know it's been loaded, return true 00515 // if it has a size 00516 if (symTab.size()) return true; 00517 00518 //We still can't be sure it isn't a bytecode archive 00519 loadArchive(); 00520 00521 std::vector<Module *> Modules; 00522 std::string ErrorMessage; 00523 00524 // Scan the archive, trying to load a bytecode member. We only load one to 00525 // see if this works. 00526 for (iterator I = begin(), E = end(); I != E; ++I) { 00527 if (!I->isBytecode() && !I->isCompressedBytecode()) 00528 continue; 00529 00530 std::string FullMemberName = 00531 archPath.toString() + "(" + I->getPath().toString() + ")"; 00532 Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(), 00533 I->getSize(), FullMemberName); 00534 if (!M) 00535 return false; // Couldn't parse bytecode, not a bytecode archive. 00536 delete M; 00537 return true; 00538 } 00539 00540 return false; 00541 }