LLVM API Documentation
00001 //===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file was developed by the LLVM research group and is distributed under 00006 // the University of Illinois Open Source License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file contains routines to handle linking together LLVM bytecode files, 00011 // and to handle annoying things like static libraries. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "llvm/Linker.h" 00016 #include "llvm/Module.h" 00017 #include "llvm/ModuleProvider.h" 00018 #include "llvm/PassManager.h" 00019 #include "llvm/ADT/SetOperations.h" 00020 #include "llvm/Bytecode/Reader.h" 00021 #include "llvm/Bytecode/Archive.h" 00022 #include "llvm/Bytecode/WriteBytecodePass.h" 00023 #include "llvm/Target/TargetData.h" 00024 #include "llvm/Transforms/IPO.h" 00025 #include "llvm/Transforms/Scalar.h" 00026 #include "llvm/Config/config.h" 00027 #include "llvm/Support/CommandLine.h" 00028 #include "llvm/Support/FileUtilities.h" 00029 #include "llvm/Support/Timer.h" 00030 #include "llvm/System/Signals.h" 00031 #include "llvm/Support/SystemUtils.h" 00032 #include <algorithm> 00033 #include <fstream> 00034 #include <memory> 00035 #include <set> 00036 using namespace llvm; 00037 00038 /// FindLib - Try to convert Filename into the name of a file that we can open, 00039 /// if it does not already name a file we can open, by first trying to open 00040 /// Filename, then libFilename.[suffix] for each of a set of several common 00041 /// library suffixes, in each of the directories in Paths and the directory 00042 /// named by the value of the environment variable LLVM_LIB_SEARCH_PATH. Returns 00043 /// an empty string if no matching file can be found. 00044 /// 00045 std::string llvm::FindLib(const std::string &Filename, 00046 const std::vector<std::string> &Paths, 00047 bool SharedObjectOnly) { 00048 // Determine if the pathname can be found as it stands. 00049 if (FileOpenable(Filename)) 00050 return Filename; 00051 00052 // If that doesn't work, convert the name into a library name. 00053 std::string LibName = "lib" + Filename; 00054 00055 // Iterate over the directories in Paths to see if we can find the library 00056 // there. 00057 for (unsigned Index = 0; Index != Paths.size(); ++Index) { 00058 std::string Directory = Paths[Index] + "/"; 00059 00060 if (!SharedObjectOnly && FileOpenable(Directory + LibName + ".bca")) 00061 return Directory + LibName + ".bca"; 00062 00063 if (FileOpenable(Directory + LibName + LTDL_SHLIB_EXT)) 00064 return Directory + LibName + LTDL_SHLIB_EXT; 00065 00066 if (!SharedObjectOnly && FileOpenable(Directory + LibName + ".a")) 00067 return Directory + LibName + ".a"; 00068 } 00069 00070 // One last hope: Check LLVM_LIB_SEARCH_PATH. 00071 char *SearchPath = getenv("LLVM_LIB_SEARCH_PATH"); 00072 if (SearchPath == NULL) 00073 return std::string(); 00074 00075 LibName = std::string(SearchPath) + "/" + LibName; 00076 if (FileOpenable(LibName)) 00077 return LibName; 00078 00079 return std::string(); 00080 } 00081 00082 /// GetAllDefinedSymbols - Modifies its parameter DefinedSymbols to contain the 00083 /// name of each externally-visible symbol defined in M. 00084 /// 00085 void llvm::GetAllDefinedSymbols(Module *M, 00086 std::set<std::string> &DefinedSymbols) { 00087 for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) 00088 if (I->hasName() && !I->isExternal() && !I->hasInternalLinkage()) 00089 DefinedSymbols.insert(I->getName()); 00090 for (Module::giterator I = M->gbegin(), E = M->gend(); I != E; ++I) 00091 if (I->hasName() && !I->isExternal() && !I->hasInternalLinkage()) 00092 DefinedSymbols.insert(I->getName()); 00093 } 00094 00095 /// GetAllUndefinedSymbols - calculates the set of undefined symbols that still 00096 /// exist in an LLVM module. This is a bit tricky because there may be two 00097 /// symbols with the same name but different LLVM types that will be resolved to 00098 /// each other but aren't currently (thus we need to treat it as resolved). 00099 /// 00100 /// Inputs: 00101 /// M - The module in which to find undefined symbols. 00102 /// 00103 /// Outputs: 00104 /// UndefinedSymbols - A set of C++ strings containing the name of all 00105 /// undefined symbols. 00106 /// 00107 void 00108 llvm::GetAllUndefinedSymbols(Module *M, 00109 std::set<std::string> &UndefinedSymbols) { 00110 std::set<std::string> DefinedSymbols; 00111 UndefinedSymbols.clear(); 00112 00113 for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) 00114 if (I->hasName()) { 00115 if (I->isExternal()) 00116 UndefinedSymbols.insert(I->getName()); 00117 else if (!I->hasInternalLinkage()) 00118 DefinedSymbols.insert(I->getName()); 00119 } 00120 for (Module::giterator I = M->gbegin(), E = M->gend(); I != E; ++I) 00121 if (I->hasName()) { 00122 if (I->isExternal()) 00123 UndefinedSymbols.insert(I->getName()); 00124 else if (!I->hasInternalLinkage()) 00125 DefinedSymbols.insert(I->getName()); 00126 } 00127 00128 // Prune out any defined symbols from the undefined symbols set... 00129 for (std::set<std::string>::iterator I = UndefinedSymbols.begin(); 00130 I != UndefinedSymbols.end(); ) 00131 if (DefinedSymbols.count(*I)) 00132 UndefinedSymbols.erase(I++); // This symbol really is defined! 00133 else 00134 ++I; // Keep this symbol in the undefined symbols list 00135 } 00136 00137 00138 /// LoadObject - Read in and parse the bytecode file named by FN and return the 00139 /// module it contains (wrapped in an auto_ptr), or 0 and set ErrorMessage if an 00140 /// error occurs. 00141 /// 00142 static std::auto_ptr<Module> LoadObject(const std::string &FN, 00143 std::string &ErrorMessage) { 00144 std::string ParserErrorMessage; 00145 Module *Result = ParseBytecodeFile(FN, &ParserErrorMessage); 00146 if (Result) return std::auto_ptr<Module>(Result); 00147 ErrorMessage = "Bytecode file '" + FN + "' could not be loaded"; 00148 if (ParserErrorMessage.size()) ErrorMessage += ": " + ParserErrorMessage; 00149 return std::auto_ptr<Module>(); 00150 } 00151 00152 /// LinkInArchive - opens an archive library and link in all objects which 00153 /// provide symbols that are currently undefined. 00154 /// 00155 /// Inputs: 00156 /// M - The module in which to link the archives. 00157 /// Filename - The pathname of the archive. 00158 /// Verbose - Flags whether verbose messages should be printed. 00159 /// 00160 /// Outputs: 00161 /// ErrorMessage - A C++ string detailing what error occurred, if any. 00162 /// 00163 /// Return Value: 00164 /// TRUE - An error occurred. 00165 /// FALSE - No errors. 00166 /// 00167 bool llvm::LinkInArchive(Module *M, 00168 const std::string &Filename, 00169 std::string* ErrorMessage, 00170 bool Verbose) 00171 { 00172 // Find all of the symbols currently undefined in the bytecode program. 00173 // If all the symbols are defined, the program is complete, and there is 00174 // no reason to link in any archive files. 00175 std::set<std::string> UndefinedSymbols; 00176 GetAllUndefinedSymbols(M, UndefinedSymbols); 00177 00178 if (UndefinedSymbols.empty()) { 00179 if (Verbose) std::cerr << " No symbols undefined, don't link library!\n"; 00180 return false; // No need to link anything in! 00181 } 00182 00183 // Open the archive file 00184 if (Verbose) std::cerr << " Loading archive file '" << Filename << "'\n"; 00185 std::auto_ptr<Archive> AutoArch ( 00186 Archive::OpenAndLoadSymbols(sys::Path(Filename))); 00187 00188 Archive* arch = AutoArch.get(); 00189 00190 // Save a set of symbols that are not defined by the archive. Since we're 00191 // entering a loop, there's no point searching for these multiple times. This 00192 // variable is used to "set_subtract" from the set of undefined symbols. 00193 std::set<std::string> NotDefinedByArchive; 00194 00195 // While we are linking in object files, loop. 00196 while (true) { 00197 00198 // Find the modules we need to link into the target module 00199 std::set<ModuleProvider*> Modules; 00200 arch->findModulesDefiningSymbols(UndefinedSymbols, Modules); 00201 00202 // If we didn't find any more modules to link this time, we are done 00203 // searching this archive. 00204 if (Modules.empty()) 00205 break; 00206 00207 // Any symbols remaining in UndefinedSymbols after 00208 // findModulesDefiningSymbols are ones that the archive does not define. So 00209 // we add them to the NotDefinedByArchive variable now. 00210 NotDefinedByArchive.insert(UndefinedSymbols.begin(), 00211 UndefinedSymbols.end()); 00212 00213 // Loop over all the ModuleProviders that we got back from the archive 00214 for (std::set<ModuleProvider*>::iterator I=Modules.begin(), E=Modules.end(); 00215 I != E; ++I) { 00216 00217 // Get the module we must link in. 00218 std::auto_ptr<Module> AutoModule( (*I)->releaseModule() ); 00219 Module* aModule = AutoModule.get(); 00220 00221 // Link it in 00222 if (LinkModules(M, aModule, ErrorMessage)) 00223 return true; // Couldn't link in the module 00224 } 00225 00226 // Get the undefined symbols from the aggregate module. This recomputes the 00227 // symbols we still need after the new modules have been linked in. 00228 GetAllUndefinedSymbols(M, UndefinedSymbols); 00229 00230 // At this point we have two sets of undefined symbols: UndefinedSymbols 00231 // which holds the undefined symbols from all the modules, and 00232 // NotDefinedByArchive which holds symbols we know the archive doesn't 00233 // define. There's no point searching for symbols that we won't find in the 00234 // archive so we subtract these sets. 00235 set_subtract<std::set<std::string>,std::set<std::string> >( 00236 UndefinedSymbols,NotDefinedByArchive); 00237 00238 // If there's no symbols left, no point in continuing to search the 00239 // archive. 00240 if (UndefinedSymbols.empty()) 00241 break; 00242 } 00243 00244 return false; 00245 } 00246 00247 /// LinkInFile - opens a bytecode file and links in all objects which 00248 /// provide symbols that are currently undefined. 00249 /// 00250 /// Inputs: 00251 /// HeadModule - The module in which to link the bytecode file. 00252 /// Filename - The pathname of the bytecode file. 00253 /// Verbose - Flags whether verbose messages should be printed. 00254 /// 00255 /// Outputs: 00256 /// ErrorMessage - A C++ string detailing what error occurred, if any. 00257 /// 00258 /// Return Value: 00259 /// TRUE - An error occurred. 00260 /// FALSE - No errors. 00261 /// 00262 static bool LinkInFile(Module *HeadModule, 00263 const std::string &Filename, 00264 std::string &ErrorMessage, 00265 bool Verbose) 00266 { 00267 std::auto_ptr<Module> M(LoadObject(Filename, ErrorMessage)); 00268 if (M.get() == 0) return true; 00269 bool Result = LinkModules(HeadModule, M.get(), &ErrorMessage); 00270 if (Verbose) std::cerr << "Linked in bytecode file '" << Filename << "'\n"; 00271 return Result; 00272 } 00273 00274 /// LinkFiles - takes a module and a list of files and links them all together. 00275 /// It locates the file either in the current directory, as its absolute 00276 /// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH. 00277 /// 00278 /// Inputs: 00279 /// progname - The name of the program (infamous argv[0]). 00280 /// HeadModule - The module under which all files will be linked. 00281 /// Files - A vector of C++ strings indicating the LLVM bytecode filenames 00282 /// to be linked. The names can refer to a mixture of pure LLVM 00283 /// bytecode files and archive (ar) formatted files. 00284 /// Verbose - Flags whether verbose output should be printed while linking. 00285 /// 00286 /// Outputs: 00287 /// HeadModule - The module will have the specified LLVM bytecode files linked 00288 /// in. 00289 /// 00290 /// Return value: 00291 /// FALSE - No errors. 00292 /// TRUE - Some error occurred. 00293 /// 00294 bool llvm::LinkFiles(const char *progname, Module *HeadModule, 00295 const std::vector<std::string> &Files, bool Verbose) { 00296 // String in which to receive error messages. 00297 std::string ErrorMessage; 00298 00299 // Full pathname of the file 00300 std::string Pathname; 00301 00302 // Get the library search path from the environment 00303 char *SearchPath = getenv("LLVM_LIB_SEARCH_PATH"); 00304 00305 for (unsigned i = 0; i < Files.size(); ++i) { 00306 // Determine where this file lives. 00307 if (FileOpenable(Files[i])) { 00308 Pathname = Files[i]; 00309 } else { 00310 if (SearchPath == NULL) { 00311 std::cerr << progname << ": Cannot find linker input file '" 00312 << Files[i] << "'\n"; 00313 std::cerr << progname 00314 << ": Warning: Your LLVM_LIB_SEARCH_PATH is unset.\n"; 00315 return true; 00316 } 00317 00318 Pathname = std::string(SearchPath)+"/"+Files[i]; 00319 if (!FileOpenable(Pathname)) { 00320 std::cerr << progname << ": Cannot find linker input file '" 00321 << Files[i] << "'\n"; 00322 return true; 00323 } 00324 } 00325 00326 // A user may specify an ar archive without -l, perhaps because it 00327 // is not installed as a library. Detect that and link the library. 00328 if (IsArchive(Pathname)) { 00329 if (Verbose) 00330 std::cerr << "Trying to link archive '" << Pathname << "'\n"; 00331 00332 if (LinkInArchive(HeadModule, Pathname, &ErrorMessage, Verbose)) { 00333 std::cerr << progname << ": Error linking in archive '" << Pathname 00334 << "': " << ErrorMessage << "\n"; 00335 return true; 00336 } 00337 } else if (IsBytecode(Pathname)) { 00338 if (Verbose) 00339 std::cerr << "Trying to link bytecode file '" << Pathname << "'\n"; 00340 00341 if (LinkInFile(HeadModule, Pathname, ErrorMessage, Verbose)) { 00342 std::cerr << progname << ": Error linking in bytecode file '" 00343 << Pathname << "': " << ErrorMessage << "\n"; 00344 return true; 00345 } 00346 } else { 00347 std::cerr << progname << ": Warning: invalid file `" << Pathname 00348 << "' ignored.\n"; 00349 } 00350 } 00351 00352 return false; 00353 } 00354 00355 /// LinkOneLibrary - links one library of any kind into the HeadModule 00356 static inline void LinkOneLibrary(const char*progname, Module* HeadModule, 00357 const std::string& Lib, 00358 const std::vector<std::string>& LibPaths, 00359 bool Verbose, bool Native) { 00360 00361 // String in which to receive error messages. 00362 std::string ErrorMessage; 00363 00364 // Determine where this library lives. 00365 std::string Pathname = FindLib(Lib, LibPaths); 00366 if (Pathname.empty()) { 00367 // If the pathname does not exist, then simply return if we're doing a 00368 // native link and give a warning if we're doing a bytecode link. 00369 if (!Native) { 00370 std::cerr << progname << ": WARNING: Cannot find library -l" 00371 << Lib << "\n"; 00372 return; 00373 } 00374 } 00375 00376 // A user may specify an ar archive without -l, perhaps because it 00377 // is not installed as a library. Detect that and link the library. 00378 if (IsArchive(Pathname)) { 00379 if (Verbose) 00380 std::cerr << "Trying to link archive '" << Pathname << "' (-l" 00381 << Lib << ")\n"; 00382 00383 if (LinkInArchive(HeadModule, Pathname, &ErrorMessage, Verbose)) { 00384 std::cerr << progname << ": " << ErrorMessage 00385 << ": Error linking in archive '" << Pathname << "' (-l" 00386 << Lib << ")\n"; 00387 exit(1); 00388 } 00389 } else { 00390 std::cerr << progname << ": WARNING: Supposed library -l" 00391 << Lib << " isn't a library.\n"; 00392 } 00393 } 00394 00395 /// LinkLibraries - takes the specified library files and links them into the 00396 /// main bytecode object file. 00397 /// 00398 /// Inputs: 00399 /// progname - The name of the program (infamous argv[0]). 00400 /// HeadModule - The module into which all necessary libraries will be linked. 00401 /// Libraries - The list of libraries to link into the module. 00402 /// LibPaths - The list of library paths in which to find libraries. 00403 /// Verbose - Flags whether verbose messages should be printed. 00404 /// Native - Flags whether native code is being generated. 00405 /// 00406 /// Outputs: 00407 /// HeadModule - The module will have all necessary libraries linked in. 00408 /// 00409 /// Return value: 00410 /// FALSE - No error. 00411 /// TRUE - Error. 00412 /// 00413 void llvm::LinkLibraries(const char *progname, Module *HeadModule, 00414 const std::vector<std::string> &Libraries, 00415 const std::vector<std::string> &LibPaths, 00416 bool Verbose, bool Native) { 00417 00418 // Process the set of libraries we've been provided 00419 for (unsigned i = 0; i < Libraries.size(); ++i) { 00420 LinkOneLibrary(progname,HeadModule,Libraries[i],LibPaths,Verbose,Native); 00421 } 00422 00423 // At this point we have processed all the libraries provided to us. Since 00424 // we have an aggregated module at this point, the dependent libraries in 00425 // that module should also be aggregated with duplicates eliminated. This is 00426 // now the time to process the dependent libraries to resolve any remaining 00427 // symbols. 00428 const Module::LibraryListType& DepLibs = HeadModule->getLibraries(); 00429 for (Module::LibraryListType::const_iterator I = DepLibs.begin(), 00430 E = DepLibs.end(); I != E; ++I) { 00431 LinkOneLibrary(progname,HeadModule,*I,LibPaths,Verbose,Native); 00432 } 00433 }