LLVM API Documentation
00001 //===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Reid Spencer and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Builds up standard unix archive files (.a) containing LLVM bytecode. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "ArchiveInternals.h" 00015 #include "llvm/Bytecode/Reader.h" 00016 #include <memory> 00017 00018 using namespace llvm; 00019 00020 /// Read a variable-bit-rate encoded unsigned integer 00021 inline unsigned readInteger(const char*&At, const char*End){ 00022 unsigned Shift = 0; 00023 unsigned Result = 0; 00024 00025 do { 00026 if (At == End) 00027 return Result; 00028 Result |= (unsigned)((*At++) & 0x7F) << Shift; 00029 Shift += 7; 00030 } while (At[-1] & 0x80); 00031 return Result; 00032 } 00033 00034 // Completely parse the Archive's symbol table and populate symTab member var. 00035 bool 00036 Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) { 00037 const char* At = (const char*) data; 00038 const char* End = At + size; 00039 while (At < End) { 00040 unsigned offset = readInteger(At, End); 00041 if (At == End) { 00042 if (error) 00043 *error = "Ran out of data reading vbr_uint for symtab offset!"; 00044 return false; 00045 } 00046 unsigned length = readInteger(At, End); 00047 if (At == End) { 00048 if (error) 00049 *error = "Ran out of data reading vbr_uint for symtab length!"; 00050 return false; 00051 } 00052 if (At + length > End) { 00053 if (error) 00054 *error = "Malformed symbol table: length not consistent with size"; 00055 return false; 00056 } 00057 // we don't care if it can't be inserted (duplicate entry) 00058 symTab.insert(std::make_pair(std::string(At, length), offset)); 00059 At += length; 00060 } 00061 symTabSize = size; 00062 return true; 00063 } 00064 00065 // This member parses an ArchiveMemberHeader that is presumed to be pointed to 00066 // by At. The At pointer is updated to the byte just after the header, which 00067 // can be variable in size. 00068 ArchiveMember* 00069 Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) 00070 { 00071 assert(At + sizeof(ArchiveMemberHeader) < End && "Not enough data"); 00072 00073 // Cast archive member header 00074 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; 00075 At += sizeof(ArchiveMemberHeader); 00076 00077 // Extract the size and determine if the file is 00078 // compressed or not (negative length). 00079 int flags = 0; 00080 int MemberSize = atoi(Hdr->size); 00081 if (MemberSize < 0) { 00082 flags |= ArchiveMember::CompressedFlag; 00083 MemberSize = -MemberSize; 00084 } 00085 00086 // Check the size of the member for sanity 00087 if (At + MemberSize > End) { 00088 if (error) 00089 *error = "invalid member length in archive file"; 00090 return 0; 00091 } 00092 00093 // Check the member signature 00094 if (!Hdr->checkSignature()) { 00095 if (error) 00096 *error = "invalid file member signature"; 00097 return 0; 00098 } 00099 00100 // Convert and check the member name 00101 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol 00102 // table. The special name "//" and 14 blanks is for a string table, used 00103 // for long file names. This library doesn't generate either of those but 00104 // it will accept them. If the name starts with #1/ and the remainder is 00105 // digits, then those digits specify the length of the name that is 00106 // stored immediately following the header. The special name 00107 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bytecode. 00108 // Anything else is a regular, short filename that is terminated with 00109 // a '/' and blanks. 00110 00111 std::string pathname; 00112 switch (Hdr->name[0]) { 00113 case '#': 00114 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { 00115 if (isdigit(Hdr->name[3])) { 00116 unsigned len = atoi(&Hdr->name[3]); 00117 pathname.assign(At, len); 00118 At += len; 00119 MemberSize -= len; 00120 flags |= ArchiveMember::HasLongFilenameFlag; 00121 } else { 00122 if (error) 00123 *error = "invalid long filename"; 00124 return 0; 00125 } 00126 } else if (Hdr->name[1] == '_' && 00127 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { 00128 // The member is using a long file name (>15 chars) format. 00129 // This format is standard for 4.4BSD and Mac OSX operating 00130 // systems. LLVM uses it similarly. In this format, the 00131 // remainder of the name field (after #1/) specifies the 00132 // length of the file name which occupy the first bytes of 00133 // the member's data. The pathname already has the #1/ stripped. 00134 pathname.assign(ARFILE_LLVM_SYMTAB_NAME); 00135 flags |= ArchiveMember::LLVMSymbolTableFlag; 00136 } 00137 break; 00138 case '/': 00139 if (Hdr->name[1]== '/') { 00140 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { 00141 pathname.assign(ARFILE_STRTAB_NAME); 00142 flags |= ArchiveMember::StringTableFlag; 00143 } else { 00144 if (error) 00145 *error = "invalid string table name"; 00146 return 0; 00147 } 00148 } else if (Hdr->name[1] == ' ') { 00149 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { 00150 pathname.assign(ARFILE_SVR4_SYMTAB_NAME); 00151 flags |= ArchiveMember::SVR4SymbolTableFlag; 00152 } else { 00153 if (error) 00154 *error = "invalid SVR4 symbol table name"; 00155 return 0; 00156 } 00157 } else if (isdigit(Hdr->name[1])) { 00158 unsigned index = atoi(&Hdr->name[1]); 00159 if (index < strtab.length()) { 00160 const char* namep = strtab.c_str() + index; 00161 const char* endp = strtab.c_str() + strtab.length(); 00162 const char* p = namep; 00163 const char* last_p = p; 00164 while (p < endp) { 00165 if (*p == '\n' && *last_p == '/') { 00166 pathname.assign(namep, last_p - namep); 00167 flags |= ArchiveMember::HasLongFilenameFlag; 00168 break; 00169 } 00170 last_p = p; 00171 p++; 00172 } 00173 if (p >= endp) { 00174 if (error) 00175 *error = "missing name termiantor in string table"; 00176 return 0; 00177 } 00178 } else { 00179 if (error) 00180 *error = "name index beyond string table"; 00181 return 0; 00182 } 00183 } 00184 break; 00185 case '_': 00186 if (Hdr->name[1] == '_' && 00187 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { 00188 pathname.assign(ARFILE_BSD4_SYMTAB_NAME); 00189 flags |= ArchiveMember::BSD4SymbolTableFlag; 00190 break; 00191 } 00192 /* FALL THROUGH */ 00193 00194 default: 00195 char* slash = (char*) memchr(Hdr->name, '/', 16); 00196 if (slash == 0) 00197 slash = Hdr->name + 16; 00198 pathname.assign(Hdr->name, slash - Hdr->name); 00199 break; 00200 } 00201 00202 // Determine if this is a bytecode file 00203 switch (sys::IdentifyFileType(At, 4)) { 00204 case sys::BytecodeFileType: 00205 flags |= ArchiveMember::BytecodeFlag; 00206 break; 00207 case sys::CompressedBytecodeFileType: 00208 flags |= ArchiveMember::CompressedBytecodeFlag; 00209 flags &= ~ArchiveMember::CompressedFlag; 00210 break; 00211 default: 00212 flags &= ~(ArchiveMember::BytecodeFlag| 00213 ArchiveMember::CompressedBytecodeFlag); 00214 break; 00215 } 00216 00217 // Instantiate the ArchiveMember to be filled 00218 ArchiveMember* member = new ArchiveMember(this); 00219 00220 // Fill in fields of the ArchiveMember 00221 member->next = 0; 00222 member->prev = 0; 00223 member->parent = this; 00224 member->path.set(pathname); 00225 member->info.fileSize = MemberSize; 00226 member->info.modTime.fromEpochTime(atoi(Hdr->date)); 00227 unsigned int mode; 00228 sscanf(Hdr->mode, "%o", &mode); 00229 member->info.mode = mode; 00230 member->info.user = atoi(Hdr->uid); 00231 member->info.group = atoi(Hdr->gid); 00232 member->flags = flags; 00233 member->data = At; 00234 00235 return member; 00236 } 00237 00238 bool 00239 Archive::checkSignature(std::string* error) { 00240 // Check the magic string at file's header 00241 if (mapfile->size() < 8 || memcmp(base, ARFILE_MAGIC, 8)) { 00242 if (error) 00243 *error = "invalid signature for an archive file"; 00244 return false; 00245 } 00246 return true; 00247 } 00248 00249 // This function loads the entire archive and fully populates its ilist with 00250 // the members of the archive file. This is typically used in preparation for 00251 // editing the contents of the archive. 00252 bool 00253 Archive::loadArchive(std::string* error) { 00254 00255 // Set up parsing 00256 members.clear(); 00257 symTab.clear(); 00258 const char *At = base; 00259 const char *End = base + mapfile->size(); 00260 00261 if (!checkSignature(error)) 00262 return false; 00263 00264 At += 8; // Skip the magic string. 00265 00266 bool seenSymbolTable = false; 00267 bool foundFirstFile = false; 00268 while (At < End) { 00269 // parse the member header 00270 const char* Save = At; 00271 ArchiveMember* mbr = parseMemberHeader(At, End, error); 00272 if (!mbr) 00273 return false; 00274 00275 // check if this is the foreign symbol table 00276 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 00277 // We just save this but don't do anything special 00278 // with it. It doesn't count as the "first file". 00279 if (foreignST) { 00280 // What? Multiple foreign symbol tables? Just chuck it 00281 // and retain the last one found. 00282 delete foreignST; 00283 } 00284 foreignST = mbr; 00285 At += mbr->getSize(); 00286 if ((intptr_t(At) & 1) == 1) 00287 At++; 00288 } else if (mbr->isStringTable()) { 00289 // Simply suck the entire string table into a string 00290 // variable. This will be used to get the names of the 00291 // members that use the "/ddd" format for their names 00292 // (SVR4 style long names). 00293 strtab.assign(At, mbr->getSize()); 00294 At += mbr->getSize(); 00295 if ((intptr_t(At) & 1) == 1) 00296 At++; 00297 delete mbr; 00298 } else if (mbr->isLLVMSymbolTable()) { 00299 // This is the LLVM symbol table for the archive. If we've seen it 00300 // already, its an error. Otherwise, parse the symbol table and move on. 00301 if (seenSymbolTable) { 00302 if (error) 00303 *error = "invalid archive: multiple symbol tables"; 00304 return false; 00305 } 00306 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error)) 00307 return false; 00308 seenSymbolTable = true; 00309 At += mbr->getSize(); 00310 if ((intptr_t(At) & 1) == 1) 00311 At++; 00312 delete mbr; // We don't need this member in the list of members. 00313 } else { 00314 // This is just a regular file. If its the first one, save its offset. 00315 // Otherwise just push it on the list and move on to the next file. 00316 if (!foundFirstFile) { 00317 firstFileOffset = Save - base; 00318 foundFirstFile = true; 00319 } 00320 members.push_back(mbr); 00321 At += mbr->getSize(); 00322 if ((intptr_t(At) & 1) == 1) 00323 At++; 00324 } 00325 } 00326 return true; 00327 } 00328 00329 // Open and completely load the archive file. 00330 Archive* 00331 Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage) 00332 { 00333 std::auto_ptr<Archive> result ( new Archive(file, true)); 00334 if (!result->loadArchive(ErrorMessage)) 00335 return 0; 00336 return result.release(); 00337 } 00338 00339 // Get all the bytecode modules from the archive 00340 bool 00341 Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) { 00342 00343 for (iterator I=begin(), E=end(); I != E; ++I) { 00344 if (I->isBytecode() || I->isCompressedBytecode()) { 00345 std::string FullMemberName = archPath.toString() + 00346 "(" + I->getPath().toString() + ")"; 00347 Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(), 00348 I->getSize(), FullMemberName, ErrMessage); 00349 if (!M) 00350 return true; 00351 00352 Modules.push_back(M); 00353 } 00354 } 00355 return false; 00356 } 00357 00358 // Load just the symbol table from the archive file 00359 bool 00360 Archive::loadSymbolTable(std::string* ErrorMsg) { 00361 00362 // Set up parsing 00363 members.clear(); 00364 symTab.clear(); 00365 const char *At = base; 00366 const char *End = base + mapfile->size(); 00367 00368 // Make sure we're dealing with an archive 00369 if (!checkSignature(ErrorMsg)) 00370 return false; 00371 00372 At += 8; // Skip signature 00373 00374 // Parse the first file member header 00375 const char* FirstFile = At; 00376 ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg); 00377 if (!mbr) 00378 return false; 00379 00380 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 00381 // Skip the foreign symbol table, we don't do anything with it 00382 At += mbr->getSize(); 00383 if ((intptr_t(At) & 1) == 1) 00384 At++; 00385 delete mbr; 00386 00387 // Read the next one 00388 FirstFile = At; 00389 mbr = parseMemberHeader(At, End, ErrorMsg); 00390 if (!mbr) { 00391 delete mbr; 00392 return false; 00393 } 00394 } 00395 00396 if (mbr->isStringTable()) { 00397 // Process the string table entry 00398 strtab.assign((const char*)mbr->getData(), mbr->getSize()); 00399 At += mbr->getSize(); 00400 if ((intptr_t(At) & 1) == 1) 00401 At++; 00402 delete mbr; 00403 // Get the next one 00404 FirstFile = At; 00405 mbr = parseMemberHeader(At, End, ErrorMsg); 00406 if (!mbr) { 00407 delete mbr; 00408 return false; 00409 } 00410 } 00411 00412 // See if its the symbol table 00413 if (mbr->isLLVMSymbolTable()) { 00414 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) { 00415 delete mbr; 00416 return false; 00417 } 00418 00419 At += mbr->getSize(); 00420 if ((intptr_t(At) & 1) == 1) 00421 At++; 00422 delete mbr; 00423 // Can't be any more symtab headers so just advance 00424 FirstFile = At; 00425 } else { 00426 // There's no symbol table in the file. We have to rebuild it from scratch 00427 // because the intent of this method is to get the symbol table loaded so 00428 // it can be searched efficiently. 00429 // Add the member to the members list 00430 members.push_back(mbr); 00431 } 00432 00433 firstFileOffset = FirstFile - base; 00434 return true; 00435 } 00436 00437 // Open the archive and load just the symbol tables 00438 Archive* 00439 Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) { 00440 std::auto_ptr<Archive> result ( new Archive(file, true) ); 00441 if (!result->loadSymbolTable(ErrorMessage)) 00442 return 0; 00443 return result.release(); 00444 } 00445 00446 // Look up one symbol in the symbol table and return a ModuleProvider for the 00447 // module that defines that symbol. 00448 ModuleProvider* 00449 Archive::findModuleDefiningSymbol(const std::string& symbol, 00450 std::string* ErrMsg) { 00451 SymTabType::iterator SI = symTab.find(symbol); 00452 if (SI == symTab.end()) 00453 return 0; 00454 00455 // The symbol table was previously constructed assuming that the members were 00456 // written without the symbol table header. Because VBR encoding is used, the 00457 // values could not be adjusted to account for the offset of the symbol table 00458 // because that could affect the size of the symbol table due to VBR encoding. 00459 // We now have to account for this by adjusting the offset by the size of the 00460 // symbol table and its header. 00461 unsigned fileOffset = 00462 SI->second + // offset in symbol-table-less file 00463 firstFileOffset; // add offset to first "real" file in archive 00464 00465 // See if the module is already loaded 00466 ModuleMap::iterator MI = modules.find(fileOffset); 00467 if (MI != modules.end()) 00468 return MI->second.first; 00469 00470 // Module hasn't been loaded yet, we need to load it 00471 const char* modptr = base + fileOffset; 00472 ArchiveMember* mbr = parseMemberHeader(modptr, base + mapfile->size(),ErrMsg); 00473 if (!mbr) 00474 return false; 00475 00476 // Now, load the bytecode module to get the ModuleProvider 00477 std::string FullMemberName = archPath.toString() + "(" + 00478 mbr->getPath().toString() + ")"; 00479 ModuleProvider* mp = getBytecodeBufferModuleProvider( 00480 (const unsigned char*) mbr->getData(), mbr->getSize(), 00481 FullMemberName, 0); 00482 00483 modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); 00484 00485 return mp; 00486 } 00487 00488 // Look up multiple symbols in the symbol table and return a set of 00489 // ModuleProviders that define those symbols. 00490 bool 00491 Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, 00492 std::set<ModuleProvider*>& result, 00493 std::string* error) 00494 { 00495 assert(mapfile && base && "Can't findModulesDefiningSymbols on new archive"); 00496 if (symTab.empty()) { 00497 // We don't have a symbol table, so we must build it now but lets also 00498 // make sure that we populate the modules table as we do this to ensure 00499 // that we don't load them twice when findModuleDefiningSymbol is called 00500 // below. 00501 00502 // Get a pointer to the first file 00503 const char* At = ((const char*)base) + firstFileOffset; 00504 const char* End = ((const char*)base) + mapfile->size(); 00505 00506 while ( At < End) { 00507 // Compute the offset to be put in the symbol table 00508 unsigned offset = At - base - firstFileOffset; 00509 00510 // Parse the file's header 00511 ArchiveMember* mbr = parseMemberHeader(At, End, error); 00512 if (!mbr) 00513 return false; 00514 00515 // If it contains symbols 00516 if (mbr->isBytecode() || mbr->isCompressedBytecode()) { 00517 // Get the symbols 00518 std::vector<std::string> symbols; 00519 std::string FullMemberName = archPath.toString() + "(" + 00520 mbr->getPath().toString() + ")"; 00521 ModuleProvider* MP = GetBytecodeSymbols((const unsigned char*)At, 00522 mbr->getSize(), FullMemberName, symbols); 00523 00524 if (MP) { 00525 // Insert the module's symbols into the symbol table 00526 for (std::vector<std::string>::iterator I = symbols.begin(), 00527 E=symbols.end(); I != E; ++I ) { 00528 symTab.insert(std::make_pair(*I, offset)); 00529 } 00530 // Insert the ModuleProvider and the ArchiveMember into the table of 00531 // modules. 00532 modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); 00533 } else { 00534 if (error) 00535 *error = "Can't parse bytecode member: " + 00536 mbr->getPath().toString(); 00537 delete mbr; 00538 return false; 00539 } 00540 } 00541 00542 // Go to the next file location 00543 At += mbr->getSize(); 00544 if ((intptr_t(At) & 1) == 1) 00545 At++; 00546 } 00547 } 00548 00549 // At this point we have a valid symbol table (one way or another) so we 00550 // just use it to quickly find the symbols requested. 00551 00552 for (std::set<std::string>::iterator I=symbols.begin(), 00553 E=symbols.end(); I != E;) { 00554 // See if this symbol exists 00555 ModuleProvider* mp = findModuleDefiningSymbol(*I,error); 00556 if (mp) { 00557 // The symbol exists, insert the ModuleProvider into our result, 00558 // duplicates wil be ignored 00559 result.insert(mp); 00560 00561 // Remove the symbol now that its been resolved, being careful to 00562 // post-increment the iterator. 00563 symbols.erase(I++); 00564 } else { 00565 ++I; 00566 } 00567 } 00568 return true; 00569 } 00570 00571 bool Archive::isBytecodeArchive() { 00572 // Make sure the symTab has been loaded. In most cases this should have been 00573 // done when the archive was constructed, but still, this is just in case. 00574 if (!symTab.size()) 00575 if (!loadSymbolTable(0)) 00576 return false; 00577 00578 // Now that we know it's been loaded, return true 00579 // if it has a size 00580 if (symTab.size()) return true; 00581 00582 //We still can't be sure it isn't a bytecode archive 00583 if (!loadArchive(0)) 00584 return false; 00585 00586 std::vector<Module *> Modules; 00587 std::string ErrorMessage; 00588 00589 // Scan the archive, trying to load a bytecode member. We only load one to 00590 // see if this works. 00591 for (iterator I = begin(), E = end(); I != E; ++I) { 00592 if (!I->isBytecode() && !I->isCompressedBytecode()) 00593 continue; 00594 00595 std::string FullMemberName = 00596 archPath.toString() + "(" + I->getPath().toString() + ")"; 00597 Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(), 00598 I->getSize(), FullMemberName); 00599 if (!M) 00600 return false; // Couldn't parse bytecode, not a bytecode archive. 00601 delete M; 00602 return true; 00603 } 00604 00605 return false; 00606 }