1 //===- gccld.cpp - LLVM 'ld' compatible linker ----------------------------===//
3 // This utility is intended to be compatible with GCC, and follows standard
4 // system 'ld' conventions. As such, the default output file is ./a.out.
5 // Additionally, this program outputs a shell script that is used to invoke LLI
6 // to execute the program. In this manner, the generated executable (a.out for
7 // example), is directly executable, whereas the bytecode file actually lives in
8 // the a.out.bc file generated by this program. Also, Force is on by default.
10 // Note that if someone (or a script) deletes the executable program generated,
11 // the .bc file will be left around. Considering that this is a temporary hack,
12 // I'm not too worried about this.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Utils/Linker.h"
17 #include "llvm/Module.h"
18 #include "llvm/PassManager.h"
19 #include "llvm/Bytecode/Reader.h"
20 #include "llvm/Bytecode/WriteBytecodePass.h"
21 #include "llvm/Target/TargetData.h"
22 #include "llvm/Transforms/IPO.h"
23 #include "llvm/Transforms/Scalar.h"
24 #include "Support/CommandLine.h"
25 #include "Support/Signals.h"
30 #include <sys/types.h> // For FileExists
35 InputFilenames(cl::Positional, cl::desc("<input bytecode files>"),
39 OutputFilename("o", cl::desc("Override output filename"), cl::init("a.out"),
40 cl::value_desc("filename"));
43 Verbose("v", cl::desc("Print information about actions taken"));
46 LibPaths("L", cl::desc("Specify a library search path"), cl::Prefix,
47 cl::value_desc("directory"));
50 Libraries("l", cl::desc("Specify libraries to link to"), cl::Prefix,
51 cl::value_desc("library prefix"));
54 Strip("s", cl::desc("Strip symbol info from executable"));
57 NoInternalize("disable-internalize",
58 cl::desc("Do not mark all symbols as internal"));
60 ExportDynamic("export-dynamic", cl::desc("Alias for -disable-internalize"),
61 cl::aliasopt(NoInternalize));
64 LinkAsLibrary("link-as-library", cl::desc("Link the .bc files together as a"
65 " library, not an executable"));
67 // Compatibility options that are ignored, but support by LD
69 CO3("soname", cl::Hidden, cl::desc("Compatibility option: ignored"));
71 CO4("version-script", cl::Hidden, cl::desc("Compatibility option: ignored"));
73 CO5("eh-frame-hdr", cl::Hidden, cl::desc("Compatibility option: ignored"));
75 CO6("r", cl::Hidden, cl::desc("Compatibility option: ignored"));
78 // FileExists - Return true if the specified string is an openable file...
79 static inline bool FileExists(const std::string &FN) {
81 return stat(FN.c_str(), &StatBuf) != -1;
85 // LoadObject - Read the specified "object file", which should not search the
86 // library path to find it.
87 static inline std::auto_ptr<Module> LoadObject(std::string FN,
88 std::string &OutErrorMessage) {
89 if (Verbose) std::cerr << "Loading '" << FN << "'\n";
90 if (!FileExists(FN)) {
91 // Attempt to load from the LLVM_LIB_SEARCH_PATH directory... if we would
92 // otherwise fail. This is used to locate objects like crtend.o.
94 char *SearchPath = getenv("LLVM_LIB_SEARCH_PATH");
95 if (SearchPath && FileExists(std::string(SearchPath)+"/"+FN))
96 FN = std::string(SearchPath)+"/"+FN;
98 OutErrorMessage = "could not find input file '" + FN + "'!";
99 return std::auto_ptr<Module>();
103 std::string ErrorMessage;
104 Module *Result = ParseBytecodeFile(FN, &ErrorMessage);
105 if (Result) return std::auto_ptr<Module>(Result);
107 OutErrorMessage = "Bytecode file '" + FN + "' corrupt!";
108 if (ErrorMessage.size()) OutErrorMessage += ": " + ErrorMessage;
109 return std::auto_ptr<Module>();
113 static Module *LoadSingleLibraryObject(const std::string &Filename) {
114 std::string ErrorMessage;
115 std::auto_ptr<Module> M = LoadObject(Filename, ErrorMessage);
116 if (M.get() == 0 && Verbose) {
117 std::cerr << "Error loading '" + Filename + "'";
118 if (!ErrorMessage.empty()) std::cerr << ": " << ErrorMessage;
125 // IsArchive - Returns true iff FILENAME appears to be the name of an ar
126 // archive file. It determines this by checking the magic string at the
127 // beginning of the file.
128 static bool IsArchive(const std::string &filename) {
129 std::string ArchiveMagic("!<arch>\012");
130 char buf[1 + ArchiveMagic.size()];
131 std::ifstream f(filename.c_str());
132 f.read(buf, ArchiveMagic.size());
133 buf[ArchiveMagic.size()] = '\0';
134 return ArchiveMagic == buf;
137 // LoadLibraryExactName - This looks for a file with a known name and tries to
138 // load it, similarly to LoadLibraryFromDirectory().
139 static inline bool LoadLibraryExactName(const std::string &FileName,
140 std::vector<Module*> &Objects, bool &isArchive) {
141 if (Verbose) std::cerr << " Considering '" << FileName << "'\n";
142 if (FileExists(FileName)) {
143 if (IsArchive(FileName)) {
144 std::string ErrorMessage;
145 if (Verbose) std::cerr << " Loading '" << FileName << "'\n";
146 if (!ReadArchiveFile(FileName, Objects, &ErrorMessage)) {
148 return false; // Success!
151 std::cerr << " Error loading archive '" + FileName + "'";
152 if (!ErrorMessage.empty()) std::cerr << ": " << ErrorMessage;
156 if (Module *M = LoadSingleLibraryObject(FileName)) {
158 Objects.push_back(M);
166 // LoadLibrary - Try to load a library named LIBNAME that contains
167 // LLVM bytecode. If SEARCH is true, then search for a file named
168 // libLIBNAME.{a,so,bc} in the current library search path. Otherwise,
169 // assume LIBNAME is the real name of the library file. This method puts
170 // the loaded modules into the Objects list, and sets isArchive to true if
171 // a .a file was loaded. It returns true if no library is found or if an
172 // error occurs; otherwise it returns false.
174 static inline bool LoadLibrary(const std::string &LibName,
175 std::vector<Module*> &Objects, bool &isArchive,
176 bool search, std::string &ErrorMessage) {
178 // First, try the current directory. Then, iterate over the
179 // directories in LibPaths, looking for a suitable match for LibName
181 for (unsigned NextLibPathIdx = 0; NextLibPathIdx != LibPaths.size();
183 std::string Directory = LibPaths[NextLibPathIdx] + "/";
184 if (!LoadLibraryExactName(Directory + "lib" + LibName + ".a",
187 if (!LoadLibraryExactName(Directory + "lib" + LibName + ".so",
190 if (!LoadLibraryExactName(Directory + "lib" + LibName + ".bc",
195 // If they said no searching, then assume LibName is the real name.
196 if (!LoadLibraryExactName(LibName, Objects, isArchive))
199 ErrorMessage = "error linking library '-l" + LibName+ "': library not found!";
203 static void GetAllDefinedSymbols(Module *M,
204 std::set<std::string> &DefinedSymbols) {
205 for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
206 if (I->hasName() && !I->isExternal() && !I->hasInternalLinkage())
207 DefinedSymbols.insert(I->getName());
208 for (Module::giterator I = M->gbegin(), E = M->gend(); I != E; ++I)
209 if (I->hasName() && !I->isExternal() && !I->hasInternalLinkage())
210 DefinedSymbols.insert(I->getName());
213 // GetAllUndefinedSymbols - This calculates the set of undefined symbols that
214 // still exist in an LLVM module. This is a bit tricky because there may be two
215 // symbols with the same name, but different LLVM types that will be resolved to
216 // each other, but aren't currently (thus we need to treat it as resolved).
218 static void GetAllUndefinedSymbols(Module *M,
219 std::set<std::string> &UndefinedSymbols) {
220 std::set<std::string> DefinedSymbols;
221 UndefinedSymbols.clear(); // Start out empty
223 for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
226 UndefinedSymbols.insert(I->getName());
227 else if (!I->hasInternalLinkage())
228 DefinedSymbols.insert(I->getName());
230 for (Module::giterator I = M->gbegin(), E = M->gend(); I != E; ++I)
233 UndefinedSymbols.insert(I->getName());
234 else if (!I->hasInternalLinkage())
235 DefinedSymbols.insert(I->getName());
238 // Prune out any defined symbols from the undefined symbols set...
239 for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
240 I != UndefinedSymbols.end(); )
241 if (DefinedSymbols.count(*I))
242 UndefinedSymbols.erase(I++); // This symbol really is defined!
244 ++I; // Keep this symbol in the undefined symbols list
248 static bool LinkLibrary(Module *M, const std::string &LibName,
249 bool search, std::string &ErrorMessage) {
250 std::set<std::string> UndefinedSymbols;
251 GetAllUndefinedSymbols(M, UndefinedSymbols);
252 if (UndefinedSymbols.empty()) {
253 if (Verbose) std::cerr << " No symbols undefined, don't link library!\n";
254 return false; // No need to link anything in!
257 std::vector<Module*> Objects;
259 if (LoadLibrary(LibName, Objects, isArchive, search, ErrorMessage))
262 // Figure out which symbols are defined by all of the modules in the .a file
263 std::vector<std::set<std::string> > DefinedSymbols;
264 DefinedSymbols.resize(Objects.size());
265 for (unsigned i = 0; i != Objects.size(); ++i)
266 GetAllDefinedSymbols(Objects[i], DefinedSymbols[i]);
269 while (Linked) { // While we are linking in object files, loop.
272 for (unsigned i = 0; i != Objects.size(); ++i) {
273 // Consider whether we need to link in this module... we only need to
274 // link it in if it defines some symbol which is so far undefined.
276 const std::set<std::string> &DefSymbols = DefinedSymbols[i];
278 bool ObjectRequired = false;
279 for (std::set<std::string>::iterator I = UndefinedSymbols.begin(),
280 E = UndefinedSymbols.end(); I != E; ++I)
281 if (DefSymbols.count(*I)) {
283 std::cerr << " Found object providing symbol '" << *I << "'...\n";
284 ObjectRequired = true;
288 // We DO need to link this object into the program...
289 if (ObjectRequired) {
290 if (LinkModules(M, Objects[i], &ErrorMessage))
291 return true; // Couldn't link in the right object file...
293 // Since we have linked in this object, delete it from the list of
294 // objects to consider in this archive file.
295 std::swap(Objects[i], Objects.back());
296 std::swap(DefinedSymbols[i], DefinedSymbols.back());
298 DefinedSymbols.pop_back();
299 --i; // Do not skip an entry
301 // The undefined symbols set should have shrunk.
302 GetAllUndefinedSymbols(M, UndefinedSymbols);
303 Linked = true; // We have linked something in!
311 static int PrintAndReturn(const char *progname, const std::string &Message,
312 const std::string &Extra = "") {
313 std::cerr << progname << Extra << ": " << Message << "\n";
318 int main(int argc, char **argv) {
319 cl::ParseCommandLineOptions(argc, argv, " llvm linker for GCC\n");
321 std::string ErrorMessage;
322 std::auto_ptr<Module> Composite(LoadObject(InputFilenames[0], ErrorMessage));
323 if (Composite.get() == 0)
324 return PrintAndReturn(argv[0], ErrorMessage);
326 // We always look first in the current directory when searching for libraries.
327 LibPaths.insert(LibPaths.begin(), ".");
329 // If the user specied an extra search path in their environment, respect it.
330 if (char *SearchPath = getenv("LLVM_LIB_SEARCH_PATH"))
331 LibPaths.push_back(SearchPath);
333 for (unsigned i = 1; i < InputFilenames.size(); ++i) {
334 // A user may specify an ar archive without -l, perhaps because it
335 // is not installed as a library. Detect that and link the library.
336 if (IsArchive(InputFilenames[i])) {
337 if (Verbose) std::cerr << "Linking archive '" << InputFilenames[i]
339 if (LinkLibrary(Composite.get(), InputFilenames[i], false, ErrorMessage))
340 return PrintAndReturn(argv[0], ErrorMessage,
341 ": error linking in '" + InputFilenames[i] + "'");
345 std::auto_ptr<Module> M(LoadObject(InputFilenames[i], ErrorMessage));
347 return PrintAndReturn(argv[0], ErrorMessage);
349 if (Verbose) std::cerr << "Linking in '" << InputFilenames[i] << "'\n";
351 if (LinkModules(Composite.get(), M.get(), &ErrorMessage))
352 return PrintAndReturn(argv[0], ErrorMessage,
353 ": error linking in '" + InputFilenames[i] + "'");
356 // Remove any consecutive duplicates of the same library...
357 Libraries.erase(std::unique(Libraries.begin(), Libraries.end()),
360 // Link in all of the libraries next...
361 for (unsigned i = 0; i != Libraries.size(); ++i) {
362 if (Verbose) std::cerr << "Linking in library: -l" << Libraries[i] << "\n";
363 if (LinkLibrary(Composite.get(), Libraries[i], true, ErrorMessage))
364 return PrintAndReturn(argv[0], ErrorMessage);
367 // In addition to just linking the input from GCC, we also want to spiff it up
368 // a little bit. Do this now.
372 // Add an appropriate TargetData instance for this module...
373 Passes.add(new TargetData("gccld", Composite.get()));
375 // Linking modules together can lead to duplicated global constants, only keep
376 // one copy of each constant...
378 Passes.add(createConstantMergePass());
380 // If the -s command line option was specified, strip the symbols out of the
381 // resulting program to make it smaller. -s is a GCC option that we are
385 Passes.add(createSymbolStrippingPass());
387 // Often if the programmer does not specify proper prototypes for the
388 // functions they are calling, they end up calling a vararg version of the
389 // function that does not get a body filled in (the real function has typed
390 // arguments). This pass merges the two functions.
392 Passes.add(createFunctionResolvingPass());
394 if (!NoInternalize) {
395 // Now that composite has been compiled, scan through the module, looking
396 // for a main function. If main is defined, mark all other functions
399 Passes.add(createInternalizePass());
402 // Remove unused arguments from functions...
404 Passes.add(createDeadArgEliminationPass());
406 // The FuncResolve pass may leave cruft around if functions were prototyped
407 // differently than they were defined. Remove this cruft.
409 Passes.add(createInstructionCombiningPass());
411 // Delete basic blocks, which optimization passes may have killed...
413 Passes.add(createCFGSimplificationPass());
415 // Now that we have optimized the program, discard unreachable functions...
417 Passes.add(createGlobalDCEPass());
419 // Add the pass that writes bytecode to the output file...
420 std::string RealBytecodeOutput = OutputFilename;
421 if (!LinkAsLibrary) RealBytecodeOutput += ".bc";
422 std::ofstream Out(RealBytecodeOutput.c_str());
424 return PrintAndReturn(argv[0], "error opening '" + RealBytecodeOutput +
426 Passes.add(new WriteBytecodePass(&Out)); // Write bytecode to file...
428 // Make sure that the Out file gets unlink'd from the disk if we get a SIGINT
429 RemoveFileOnSignal(RealBytecodeOutput);
431 // Run our queue of passes all at once now, efficiently.
432 Passes.run(*Composite.get());
435 if (!LinkAsLibrary) {
436 // Output the script to start the program...
437 std::ofstream Out2(OutputFilename.c_str());
439 return PrintAndReturn(argv[0], "error opening '" + OutputFilename +
441 Out2 << "#!/bin/sh\nlli -q -abort-on-exception $0.bc $*\n";
444 // Make the script executable...
445 chmod(OutputFilename.c_str(), 0755);
447 // Make the bytecode file directly executable in LLEE as well
448 chmod(RealBytecodeOutput.c_str(), 0755);