LLVM API Documentation
00001 //===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by Reid Spencer and is distributed under the 00006 // University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Builds up standard unix archive files (.a) containing LLVM bytecode. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "ArchiveInternals.h" 00015 #include "llvm/Bytecode/Reader.h" 00016 00017 using namespace llvm; 00018 00019 /// Read a variable-bit-rate encoded unsigned integer 00020 inline unsigned readInteger(const char*&At, const char*End) { 00021 unsigned Shift = 0; 00022 unsigned Result = 0; 00023 00024 do { 00025 if (At == End) 00026 throw std::string("Ran out of data reading vbr_uint!"); 00027 Result |= (unsigned)((*At++) & 0x7F) << Shift; 00028 Shift += 7; 00029 } while (At[-1] & 0x80); 00030 return Result; 00031 } 00032 00033 // Completely parse the Archive's symbol table and populate symTab member var. 00034 void 00035 Archive::parseSymbolTable(const void* data, unsigned size) { 00036 const char* At = (const char*) data; 00037 const char* End = At + size; 00038 while (At < End) { 00039 unsigned offset = readInteger(At, End); 00040 unsigned length = readInteger(At, End); 00041 if (At + length > End) 00042 throw std::string("malformed symbol table"); 00043 // we don't care if it can't be inserted (duplicate entry) 00044 symTab.insert(std::make_pair(std::string(At, length), offset)); 00045 At += length; 00046 } 00047 symTabSize = size; 00048 } 00049 00050 // This member parses an ArchiveMemberHeader that is presumed to be pointed to 00051 // by At. The At pointer is updated to the byte just after the header, which 00052 // can be variable in size. 00053 ArchiveMember* 00054 Archive::parseMemberHeader(const char*& At, const char* End) { 00055 assert(At + sizeof(ArchiveMemberHeader) < End && "Not enough data"); 00056 00057 // Cast archive member header 00058 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; 00059 At += sizeof(ArchiveMemberHeader); 00060 00061 // Instantiate the ArchiveMember to be filled 00062 ArchiveMember* member = new ArchiveMember(this); 00063 00064 // Extract the size and determine if the file is 00065 // compressed or not (negative length). 00066 int flags = 0; 00067 int MemberSize = atoi(Hdr->size); 00068 if (MemberSize < 0) { 00069 flags |= ArchiveMember::CompressedFlag; 00070 MemberSize = -MemberSize; 00071 } 00072 00073 // Check the size of the member for sanity 00074 if (At + MemberSize > End) 00075 throw std::string("invalid member length in archive file"); 00076 00077 // Check the member signature 00078 if (!Hdr->checkSignature()) 00079 throw std::string("invalid file member signature"); 00080 00081 // Convert and check the member name 00082 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol 00083 // table. The special name "//" and 14 blanks is for a string table, used 00084 // for long file names. This library doesn't generate either of those but 00085 // it will accept them. If the name starts with #1/ and the remainder is 00086 // digits, then those digits specify the length of the name that is 00087 // stored immediately following the header. The special name 00088 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bytecode. 00089 // Anything else is a regular, short filename that is terminated with 00090 // a '/' and blanks. 00091 00092 std::string pathname; 00093 unsigned index; 00094 switch (Hdr->name[0]) { 00095 case '#': 00096 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { 00097 if (isdigit(Hdr->name[3])) { 00098 unsigned len = atoi(&Hdr->name[3]); 00099 pathname.assign(At, len); 00100 At += len; 00101 MemberSize -= len; 00102 flags |= ArchiveMember::HasLongFilenameFlag; 00103 } else 00104 throw std::string("invalid long filename"); 00105 } else if (Hdr->name[1] == '_' && 00106 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { 00107 // The member is using a long file name (>15 chars) format. 00108 // This format is standard for 4.4BSD and Mac OSX operating 00109 // systems. LLVM uses it similarly. In this format, the 00110 // remainder of the name field (after #1/) specifies the 00111 // length of the file name which occupy the first bytes of 00112 // the member's data. The pathname already has the #1/ stripped. 00113 pathname.assign(ARFILE_LLVM_SYMTAB_NAME); 00114 flags |= ArchiveMember::LLVMSymbolTableFlag; 00115 } 00116 break; 00117 case '/': 00118 if (Hdr->name[1]== '/') { 00119 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { 00120 pathname.assign(ARFILE_STRTAB_NAME); 00121 flags |= ArchiveMember::StringTableFlag; 00122 } else { 00123 throw std::string("invalid string table name"); 00124 } 00125 } else if (Hdr->name[1] == ' ') { 00126 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { 00127 pathname.assign(ARFILE_SVR4_SYMTAB_NAME); 00128 flags |= ArchiveMember::SVR4SymbolTableFlag; 00129 } else { 00130 throw std::string("invalid SVR4 symbol table name"); 00131 } 00132 } else if (isdigit(Hdr->name[1])) { 00133 unsigned index = atoi(&Hdr->name[1]); 00134 if (index < strtab.length()) { 00135 const char* namep = strtab.c_str() + index; 00136 const char* endp = strtab.c_str() + strtab.length(); 00137 const char* p = namep; 00138 const char* last_p = p; 00139 while (p < endp) { 00140 if (*p == '\n' && *last_p == '/') { 00141 pathname.assign(namep, last_p - namep); 00142 flags |= ArchiveMember::HasLongFilenameFlag; 00143 break; 00144 } 00145 last_p = p; 00146 p++; 00147 } 00148 if (p >= endp) 00149 throw std::string("missing name termiantor in string table"); 00150 } else { 00151 throw std::string("name index beyond string table"); 00152 } 00153 } 00154 break; 00155 case '_': 00156 if (Hdr->name[1] == '_' && 00157 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { 00158 pathname.assign(ARFILE_BSD4_SYMTAB_NAME); 00159 flags |= ArchiveMember::BSD4SymbolTableFlag; 00160 break; 00161 } 00162 /* FALL THROUGH */ 00163 00164 default: 00165 char* slash = (char*) memchr(Hdr->name, '/', 16); 00166 if (slash == 0) 00167 slash = Hdr->name + 16; 00168 pathname.assign(Hdr->name, slash - Hdr->name); 00169 break; 00170 } 00171 00172 // Determine if this is a bytecode file 00173 switch (sys::IdentifyFileType(At, 4)) { 00174 case sys::BytecodeFileType: 00175 flags |= ArchiveMember::BytecodeFlag; 00176 break; 00177 case sys::CompressedBytecodeFileType: 00178 flags |= ArchiveMember::CompressedBytecodeFlag; 00179 flags &= ~ArchiveMember::CompressedFlag; 00180 break; 00181 default: 00182 flags &= ~(ArchiveMember::BytecodeFlag| 00183 ArchiveMember::CompressedBytecodeFlag); 00184 break; 00185 } 00186 00187 // Fill in fields of the ArchiveMember 00188 member->next = 0; 00189 member->prev = 0; 00190 member->parent = this; 00191 member->path.setFile(pathname); 00192 member->info.fileSize = MemberSize; 00193 member->info.modTime.fromEpochTime(atoi(Hdr->date)); 00194 sscanf(Hdr->mode, "%o", &(member->info.mode)); 00195 member->info.user = atoi(Hdr->uid); 00196 member->info.group = atoi(Hdr->gid); 00197 member->flags = flags; 00198 member->data = At; 00199 00200 return member; 00201 } 00202 00203 void 00204 Archive::checkSignature() { 00205 // Check the magic string at file's header 00206 if (mapfile->size() < 8 || memcmp(base, ARFILE_MAGIC, 8)) 00207 throw std::string("invalid signature for an archive file"); 00208 } 00209 00210 // This function loads the entire archive and fully populates its ilist with 00211 // the members of the archive file. This is typically used in preparation for 00212 // editing the contents of the archive. 00213 void 00214 Archive::loadArchive() { 00215 00216 // Set up parsing 00217 members.clear(); 00218 symTab.clear(); 00219 const char *At = base; 00220 const char *End = base + mapfile->size(); 00221 00222 checkSignature(); 00223 At += 8; // Skip the magic string. 00224 00225 bool seenSymbolTable = false; 00226 bool foundFirstFile = false; 00227 while (At < End) { 00228 // parse the member header 00229 const char* Save = At; 00230 ArchiveMember* mbr = parseMemberHeader(At, End); 00231 00232 // check if this is the foreign symbol table 00233 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 00234 // We just save this but don't do anything special 00235 // with it. It doesn't count as the "first file". 00236 if (foreignST) { 00237 // What? Multiple foreign symbol tables? Just chuck it 00238 // and retain the last one found. 00239 delete foreignST; 00240 } 00241 foreignST = mbr; 00242 At += mbr->getSize(); 00243 if ((intptr_t(At) & 1) == 1) 00244 At++; 00245 } else if (mbr->isStringTable()) { 00246 // Simply suck the entire string table into a string 00247 // variable. This will be used to get the names of the 00248 // members that use the "/ddd" format for their names 00249 // (SVR4 style long names). 00250 strtab.assign(At, mbr->getSize()); 00251 At += mbr->getSize(); 00252 if ((intptr_t(At) & 1) == 1) 00253 At++; 00254 delete mbr; 00255 } else if (mbr->isLLVMSymbolTable()) { 00256 // This is the LLVM symbol table for the archive. If we've seen it 00257 // already, its an error. Otherwise, parse the symbol table and move on. 00258 if (seenSymbolTable) 00259 throw std::string("invalid archive: multiple symbol tables"); 00260 parseSymbolTable(mbr->getData(), mbr->getSize()); 00261 seenSymbolTable = true; 00262 At += mbr->getSize(); 00263 if ((intptr_t(At) & 1) == 1) 00264 At++; 00265 delete mbr; // We don't need this member in the list of members. 00266 } else { 00267 // This is just a regular file. If its the first one, save its offset. 00268 // Otherwise just push it on the list and move on to the next file. 00269 if (!foundFirstFile) { 00270 firstFileOffset = Save - base; 00271 foundFirstFile = true; 00272 } 00273 members.push_back(mbr); 00274 At += mbr->getSize(); 00275 if ((intptr_t(At) & 1) == 1) 00276 At++; 00277 } 00278 } 00279 } 00280 00281 // Open and completely load the archive file. 00282 Archive* 00283 Archive::OpenAndLoad(const sys::Path& file) { 00284 00285 Archive* result = new Archive(file, true); 00286 00287 result->loadArchive(); 00288 00289 return result; 00290 } 00291 00292 // Get all the bytecode modules from the archive 00293 bool 00294 Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) { 00295 00296 for (iterator I=begin(), E=end(); I != E; ++I) { 00297 if (I->isBytecode() || I->isCompressedBytecode()) { 00298 std::string FullMemberName = archPath.get() + 00299 "(" + I->getPath().get() + ")"; 00300 Module* M = ParseBytecodeBuffer((const unsigned char*)I->getData(), 00301 I->getSize(), FullMemberName, ErrMessage); 00302 if (!M) 00303 return true; 00304 00305 Modules.push_back(M); 00306 } 00307 } 00308 return false; 00309 } 00310 00311 // Load just the symbol table from the archive file 00312 void 00313 Archive::loadSymbolTable() { 00314 00315 // Set up parsing 00316 members.clear(); 00317 symTab.clear(); 00318 const char *At = base; 00319 const char *End = base + mapfile->size(); 00320 00321 // Make sure we're dealing with an archive 00322 checkSignature(); 00323 00324 At += 8; // Skip signature 00325 00326 // Parse the first file member header 00327 const char* FirstFile = At; 00328 ArchiveMember* mbr = parseMemberHeader(At, End); 00329 00330 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 00331 // Skip the foreign symbol table, we don't do anything with it 00332 At += mbr->getSize(); 00333 if ((intptr_t(At) & 1) == 1) 00334 At++; 00335 delete mbr; 00336 00337 // Read the next one 00338 FirstFile = At; 00339 mbr = parseMemberHeader(At, End); 00340 } 00341 00342 if (mbr->isStringTable()) { 00343 // Process the string table entry 00344 strtab.assign((const char*)mbr->getData(), mbr->getSize()); 00345 At += mbr->getSize(); 00346 if ((intptr_t(At) & 1) == 1) 00347 At++; 00348 delete mbr; 00349 // Get the next one 00350 FirstFile = At; 00351 mbr = parseMemberHeader(At, End); 00352 } 00353 00354 // See if its the symbol table 00355 if (mbr->isLLVMSymbolTable()) { 00356 parseSymbolTable(mbr->getData(), mbr->getSize()); 00357 At += mbr->getSize(); 00358 if ((intptr_t(At) & 1) == 1) 00359 At++; 00360 FirstFile = At; 00361 } else { 00362 // There's no symbol table in the file. We have to rebuild it from scratch 00363 // because the intent of this method is to get the symbol table loaded so 00364 // it can be searched efficiently. 00365 // Add the member to the members list 00366 members.push_back(mbr); 00367 } 00368 00369 firstFileOffset = FirstFile - base; 00370 } 00371 00372 // Open the archive and load just the symbol tables 00373 Archive* 00374 Archive::OpenAndLoadSymbols(const sys::Path& file) { 00375 Archive* result = new Archive(file, true); 00376 00377 result->loadSymbolTable(); 00378 00379 return result; 00380 } 00381 00382 // Look up one symbol in the symbol table and return a ModuleProvider for the 00383 // module that defines that symbol. 00384 ModuleProvider* 00385 Archive::findModuleDefiningSymbol(const std::string& symbol) { 00386 SymTabType::iterator SI = symTab.find(symbol); 00387 if (SI == symTab.end()) 00388 return 0; 00389 00390 // The symbol table was previously constructed assuming that the members were 00391 // written without the symbol table header. Because VBR encoding is used, the 00392 // values could not be adjusted to account for the offset of the symbol table 00393 // because that could affect the size of the symbol table due to VBR encoding. 00394 // We now have to account for this by adjusting the offset by the size of the 00395 // symbol table and its header. 00396 unsigned fileOffset = 00397 SI->second + // offset in symbol-table-less file 00398 firstFileOffset; // add offset to first "real" file in archive 00399 00400 // See if the module is already loaded 00401 ModuleMap::iterator MI = modules.find(fileOffset); 00402 if (MI != modules.end()) 00403 return MI->second.first; 00404 00405 // Module hasn't been loaded yet, we need to load it 00406 const char* modptr = base + fileOffset; 00407 ArchiveMember* mbr = parseMemberHeader(modptr, base + mapfile->size()); 00408 00409 // Now, load the bytecode module to get the ModuleProvider 00410 std::string FullMemberName = archPath.get() + "(" + 00411 mbr->getPath().get() + ")"; 00412 ModuleProvider* mp = getBytecodeBufferModuleProvider( 00413 (const unsigned char*) mbr->getData(), mbr->getSize(), 00414 FullMemberName, 0); 00415 00416 modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); 00417 00418 return mp; 00419 } 00420 00421 // Look up multiple symbols in the symbol table and return a set of 00422 // ModuleProviders that define those symbols. 00423 void 00424 Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, 00425 std::set<ModuleProvider*>& result) 00426 { 00427 assert(mapfile && base && "Can't findModulesDefiningSymbols on new archive"); 00428 if (symTab.empty()) { 00429 // We don't have a symbol table, so we must build it now but lets also 00430 // make sure that we populate the modules table as we do this to ensure 00431 // that we don't load them twice when findModuleDefiningSymbol is called 00432 // below. 00433 00434 // Get a pointer to the first file 00435 const char* At = ((const char*)base) + firstFileOffset; 00436 const char* End = ((const char*)base) + mapfile->size(); 00437 00438 while ( At < End) { 00439 // Compute the offset to be put in the symbol table 00440 unsigned offset = At - base - firstFileOffset; 00441 00442 // Parse the file's header 00443 ArchiveMember* mbr = parseMemberHeader(At, End); 00444 00445 // If it contains symbols 00446 if (mbr->isBytecode() || mbr->isCompressedBytecode()) { 00447 // Get the symbols 00448 std::vector<std::string> symbols; 00449 std::string FullMemberName = archPath.get() + "(" + 00450 mbr->getPath().get() + ")"; 00451 ModuleProvider* MP = GetBytecodeSymbols((const unsigned char*)At, 00452 mbr->getSize(), FullMemberName, symbols); 00453 00454 if (MP) { 00455 // Insert the module's symbols into the symbol table 00456 for (std::vector<std::string>::iterator I = symbols.begin(), 00457 E=symbols.end(); I != E; ++I ) { 00458 symTab.insert(std::make_pair(*I, offset)); 00459 } 00460 // Insert the ModuleProvider and the ArchiveMember into the table of 00461 // modules. 00462 modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); 00463 } else { 00464 throw std::string("Can't parse bytecode member: ") + 00465 mbr->getPath().get(); 00466 } 00467 } 00468 00469 // Go to the next file location 00470 At += mbr->getSize(); 00471 if ((intptr_t(At) & 1) == 1) 00472 At++; 00473 } 00474 } 00475 00476 // At this point we have a valid symbol table (one way or another) so we 00477 // just use it to quickly find the symbols requested. 00478 00479 for (std::set<std::string>::iterator I=symbols.begin(), 00480 E=symbols.end(); I != E;) { 00481 // See if this symbol exists 00482 ModuleProvider* mp = findModuleDefiningSymbol(*I); 00483 if (mp) { 00484 // The symbol exists, insert the ModuleProvider into our result, 00485 // duplicates wil be ignored 00486 result.insert(mp); 00487 00488 // Remove the symbol now that its been resolved, being careful to 00489 // post-increment the iterator. 00490 symbols.erase(I++); 00491 } else { 00492 ++I; 00493 } 00494 } 00495 }