1 //===- gccld.cpp - LLVM 'ld' compatible linker ----------------------------===//
3 // This utility is intended to be compatible with GCC, and follows standard
4 // system 'ld' conventions. As such, the default output file is ./a.out.
5 // Additionally, this program outputs a shell script that is used to invoke LLI
6 // to execute the program. In this manner, the generated executable (a.out for
7 // example), is directly executable, whereas the bytecode file actually lives in
8 // the a.out.bc file generated by this program. Also, Force is on by default.
10 // Note that if someone (or a script) deletes the executable program generated,
11 // the .bc file will be left around. Considering that this is a temporary hack,
12 // I'm not too worried about this.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Utils/Linker.h"
17 #include "llvm/Module.h"
18 #include "llvm/PassManager.h"
19 #include "llvm/Bytecode/Reader.h"
20 #include "llvm/Bytecode/WriteBytecodePass.h"
21 #include "llvm/Target/TargetData.h"
22 #include "llvm/Transforms/IPO.h"
23 #include "llvm/Transforms/Scalar.h"
24 #include "Support/FileUtilities.h"
25 #include "Support/SystemUtils.h"
26 #include "Support/CommandLine.h"
27 #include "Support/Signals.h"
28 #include "Config/unistd.h"
36 InputFilenames(cl::Positional, cl::desc("<input bytecode files>"),
40 OutputFilename("o", cl::desc("Override output filename"), cl::init("a.out"),
41 cl::value_desc("filename"));
44 Verbose("v", cl::desc("Print information about actions taken"));
47 LibPaths("L", cl::desc("Specify a library search path"), cl::Prefix,
48 cl::value_desc("directory"));
51 Libraries("l", cl::desc("Specify libraries to link to"), cl::Prefix,
52 cl::value_desc("library prefix"));
55 Strip("s", cl::desc("Strip symbol info from executable"));
58 NoInternalize("disable-internalize",
59 cl::desc("Do not mark all symbols as internal"));
61 ExportDynamic("export-dynamic", cl::desc("Alias for -disable-internalize"),
62 cl::aliasopt(NoInternalize));
65 LinkAsLibrary("link-as-library", cl::desc("Link the .bc files together as a"
66 " library, not an executable"));
69 Native("native", cl::desc("Generate a native binary instead of a shell script"));
71 // Compatibility options that are ignored, but support by LD
73 CO3("soname", cl::Hidden, cl::desc("Compatibility option: ignored"));
75 CO4("version-script", cl::Hidden, cl::desc("Compatibility option: ignored"));
77 CO5("eh-frame-hdr", cl::Hidden, cl::desc("Compatibility option: ignored"));
79 CO6("r", cl::Hidden, cl::desc("Compatibility option: ignored"));
82 // FileExists - Return true if the specified string is an openable file...
83 static inline bool FileExists(const std::string &FN) {
84 return access(FN.c_str(), F_OK) != -1;
88 // LoadObject - Read the specified "object file", which should not search the
89 // library path to find it.
90 static inline std::auto_ptr<Module> LoadObject(std::string FN,
91 std::string &OutErrorMessage) {
92 if (Verbose) std::cerr << "Loading '" << FN << "'\n";
93 if (!FileExists(FN)) {
94 // Attempt to load from the LLVM_LIB_SEARCH_PATH directory... if we would
95 // otherwise fail. This is used to locate objects like crtend.o.
97 char *SearchPath = getenv("LLVM_LIB_SEARCH_PATH");
98 if (SearchPath && FileExists(std::string(SearchPath)+"/"+FN))
99 FN = std::string(SearchPath)+"/"+FN;
101 OutErrorMessage = "could not find input file '" + FN + "'!";
102 return std::auto_ptr<Module>();
106 std::string ErrorMessage;
107 Module *Result = ParseBytecodeFile(FN, &ErrorMessage);
108 if (Result) return std::auto_ptr<Module>(Result);
110 OutErrorMessage = "Bytecode file '" + FN + "' corrupt!";
111 if (ErrorMessage.size()) OutErrorMessage += ": " + ErrorMessage;
112 return std::auto_ptr<Module>();
116 static Module *LoadSingleLibraryObject(const std::string &Filename) {
117 std::string ErrorMessage;
118 std::auto_ptr<Module> M = LoadObject(Filename, ErrorMessage);
119 if (M.get() == 0 && Verbose) {
120 std::cerr << "Error loading '" + Filename + "'";
121 if (!ErrorMessage.empty()) std::cerr << ": " << ErrorMessage;
128 // IsArchive - Returns true iff FILENAME appears to be the name of an ar
129 // archive file. It determines this by checking the magic string at the
130 // beginning of the file.
131 static bool IsArchive(const std::string &filename) {
132 std::string ArchiveMagic("!<arch>\012");
133 char buf[1 + ArchiveMagic.size()];
134 std::ifstream f(filename.c_str());
135 f.read(buf, ArchiveMagic.size());
136 buf[ArchiveMagic.size()] = '\0';
137 return ArchiveMagic == buf;
140 // LoadLibraryExactName - This looks for a file with a known name and tries to
141 // load it, similarly to LoadLibraryFromDirectory().
142 static inline bool LoadLibraryExactName(const std::string &FileName,
143 std::vector<Module*> &Objects, bool &isArchive) {
144 if (Verbose) std::cerr << " Considering '" << FileName << "'\n";
145 if (FileExists(FileName)) {
146 if (IsArchive(FileName)) {
147 std::string ErrorMessage;
148 if (Verbose) std::cerr << " Loading '" << FileName << "'\n";
149 if (!ReadArchiveFile(FileName, Objects, &ErrorMessage)) {
151 return false; // Success!
154 std::cerr << " Error loading archive '" + FileName + "'";
155 if (!ErrorMessage.empty()) std::cerr << ": " << ErrorMessage;
159 if (Module *M = LoadSingleLibraryObject(FileName)) {
161 Objects.push_back(M);
169 // LoadLibrary - Try to load a library named LIBNAME that contains
170 // LLVM bytecode. If SEARCH is true, then search for a file named
171 // libLIBNAME.{a,so,bc} in the current library search path. Otherwise,
172 // assume LIBNAME is the real name of the library file. This method puts
173 // the loaded modules into the Objects list, and sets isArchive to true if
174 // a .a file was loaded. It returns true if no library is found or if an
175 // error occurs; otherwise it returns false.
177 static inline bool LoadLibrary(const std::string &LibName,
178 std::vector<Module*> &Objects, bool &isArchive,
179 bool search, std::string &ErrorMessage) {
181 // First, try the current directory. Then, iterate over the
182 // directories in LibPaths, looking for a suitable match for LibName
184 for (unsigned NextLibPathIdx = 0; NextLibPathIdx != LibPaths.size();
186 std::string Directory = LibPaths[NextLibPathIdx] + "/";
187 if (!LoadLibraryExactName(Directory + "lib" + LibName + ".a",
190 if (!LoadLibraryExactName(Directory + "lib" + LibName + ".so",
193 if (!LoadLibraryExactName(Directory + "lib" + LibName + ".bc",
198 // If they said no searching, then assume LibName is the real name.
199 if (!LoadLibraryExactName(LibName, Objects, isArchive))
202 ErrorMessage = "error linking library '-l" + LibName+ "': library not found!";
206 static void GetAllDefinedSymbols(Module *M,
207 std::set<std::string> &DefinedSymbols) {
208 for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
209 if (I->hasName() && !I->isExternal() && !I->hasInternalLinkage())
210 DefinedSymbols.insert(I->getName());
211 for (Module::giterator I = M->gbegin(), E = M->gend(); I != E; ++I)
212 if (I->hasName() && !I->isExternal() && !I->hasInternalLinkage())
213 DefinedSymbols.insert(I->getName());
216 // GetAllUndefinedSymbols - This calculates the set of undefined symbols that
217 // still exist in an LLVM module. This is a bit tricky because there may be two
218 // symbols with the same name, but different LLVM types that will be resolved to
219 // each other, but aren't currently (thus we need to treat it as resolved).
221 static void GetAllUndefinedSymbols(Module *M,
222 std::set<std::string> &UndefinedSymbols) {
223 std::set<std::string> DefinedSymbols;
224 UndefinedSymbols.clear(); // Start out empty
226 for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
229 UndefinedSymbols.insert(I->getName());
230 else if (!I->hasInternalLinkage())
231 DefinedSymbols.insert(I->getName());
233 for (Module::giterator I = M->gbegin(), E = M->gend(); I != E; ++I)
236 UndefinedSymbols.insert(I->getName());
237 else if (!I->hasInternalLinkage())
238 DefinedSymbols.insert(I->getName());
241 // Prune out any defined symbols from the undefined symbols set...
242 for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
243 I != UndefinedSymbols.end(); )
244 if (DefinedSymbols.count(*I))
245 UndefinedSymbols.erase(I++); // This symbol really is defined!
247 ++I; // Keep this symbol in the undefined symbols list
251 static bool LinkLibrary(Module *M, const std::string &LibName,
252 bool search, std::string &ErrorMessage) {
253 std::set<std::string> UndefinedSymbols;
254 GetAllUndefinedSymbols(M, UndefinedSymbols);
255 if (UndefinedSymbols.empty()) {
256 if (Verbose) std::cerr << " No symbols undefined, don't link library!\n";
257 return false; // No need to link anything in!
260 std::vector<Module*> Objects;
262 if (LoadLibrary(LibName, Objects, isArchive, search, ErrorMessage))
265 // Figure out which symbols are defined by all of the modules in the .a file
266 std::vector<std::set<std::string> > DefinedSymbols;
267 DefinedSymbols.resize(Objects.size());
268 for (unsigned i = 0; i != Objects.size(); ++i)
269 GetAllDefinedSymbols(Objects[i], DefinedSymbols[i]);
272 while (Linked) { // While we are linking in object files, loop.
275 for (unsigned i = 0; i != Objects.size(); ++i) {
276 // Consider whether we need to link in this module... we only need to
277 // link it in if it defines some symbol which is so far undefined.
279 const std::set<std::string> &DefSymbols = DefinedSymbols[i];
281 bool ObjectRequired = false;
282 for (std::set<std::string>::iterator I = UndefinedSymbols.begin(),
283 E = UndefinedSymbols.end(); I != E; ++I)
284 if (DefSymbols.count(*I)) {
286 std::cerr << " Found object providing symbol '" << *I << "'...\n";
287 ObjectRequired = true;
291 // We DO need to link this object into the program...
292 if (ObjectRequired) {
293 if (LinkModules(M, Objects[i], &ErrorMessage))
294 return true; // Couldn't link in the right object file...
296 // Since we have linked in this object, delete it from the list of
297 // objects to consider in this archive file.
298 std::swap(Objects[i], Objects.back());
299 std::swap(DefinedSymbols[i], DefinedSymbols.back());
301 DefinedSymbols.pop_back();
302 --i; // Do not skip an entry
304 // The undefined symbols set should have shrunk.
305 GetAllUndefinedSymbols(M, UndefinedSymbols);
306 Linked = true; // We have linked something in!
314 static int PrintAndReturn(const char *progname, const std::string &Message,
315 const std::string &Extra = "") {
316 std::cerr << progname << Extra << ": " << Message << "\n";
322 // Function: copy_env()
325 // This function takes an array of environment variables and makes a
326 // copy of it. This copy can then be manipulated any way the caller likes
327 // without affecting the process's real environment.
330 // envp - An array of C strings containing an environment.
336 // NULL - An error occurred.
337 // Otherwise, a pointer to a new array of C strings is returned. Every string
338 // in the array is a duplicate of the one in the original array (i.e. we do
339 // not copy the char *'s from one array to another).
342 copy_env (char ** const envp)
344 // The new environment list
347 // The number of entries in the old environment list
351 // Count the number of entries in the old list;
353 for (entries = 0; envp[entries] != NULL; entries++)
359 // Add one more entry for the NULL pointer that ends the list.
364 // If there are no entries at all, just return NULL.
372 // Allocate a new environment list.
374 if ((newenv = new (char *) [entries]) == NULL)
380 // Make a copy of the list. Don't forget the NULL that ends the list.
383 while (envp[entries] != NULL)
385 newenv[entries] = new char[strlen (envp[entries]) + 1];
386 strcpy (newenv[entries], envp[entries]);
389 newenv[entries] = NULL;
396 // Function: remove_env()
399 // Remove the specified environment variable from the environment array.
402 // name - The name of the variable to remove. It cannot be NULL.
403 // envp - The array of environment variables. It cannot be NULL.
406 // envp - The pointer to the specified variable name is removed.
412 // This is mainly done because functions to remove items from the environment
413 // are not available across all platforms. In particular, Solaris does not
414 // seem to have an unsetenv() function or a setenv() function (or they are
415 // undocumented if they do exist).
418 remove_env (const char * name, char ** const envp)
420 // Pointer for scanning arrays
423 // Index for selecting elements of the environment array
426 for (index=0; envp[index] != NULL; index++)
429 // Find the first equals sign in the array and make it an EOS character.
431 p = strchr (envp[index], '=');
442 // Compare the two strings. If they are equal, zap this string.
443 // Otherwise, restore it.
445 if (!strcmp (name, envp[index]))
459 int main(int argc, char **argv, char ** envp) {
460 cl::ParseCommandLineOptions(argc, argv, " llvm linker for GCC\n");
462 std::string ErrorMessage;
463 std::auto_ptr<Module> Composite(LoadObject(InputFilenames[0], ErrorMessage));
464 if (Composite.get() == 0)
465 return PrintAndReturn(argv[0], ErrorMessage);
467 // We always look first in the current directory when searching for libraries.
468 LibPaths.insert(LibPaths.begin(), ".");
470 // If the user specied an extra search path in their environment, respect it.
471 if (char *SearchPath = getenv("LLVM_LIB_SEARCH_PATH"))
472 LibPaths.push_back(SearchPath);
474 for (unsigned i = 1; i < InputFilenames.size(); ++i) {
475 // A user may specify an ar archive without -l, perhaps because it
476 // is not installed as a library. Detect that and link the library.
477 if (IsArchive(InputFilenames[i])) {
478 if (Verbose) std::cerr << "Linking archive '" << InputFilenames[i]
480 if (LinkLibrary(Composite.get(), InputFilenames[i], false, ErrorMessage))
481 return PrintAndReturn(argv[0], ErrorMessage,
482 ": error linking in '" + InputFilenames[i] + "'");
486 std::auto_ptr<Module> M(LoadObject(InputFilenames[i], ErrorMessage));
488 return PrintAndReturn(argv[0], ErrorMessage);
490 if (Verbose) std::cerr << "Linking in '" << InputFilenames[i] << "'\n";
492 if (LinkModules(Composite.get(), M.get(), &ErrorMessage))
493 return PrintAndReturn(argv[0], ErrorMessage,
494 ": error linking in '" + InputFilenames[i] + "'");
497 // Remove any consecutive duplicates of the same library...
498 Libraries.erase(std::unique(Libraries.begin(), Libraries.end()),
501 // Link in all of the libraries next...
502 for (unsigned i = 0; i != Libraries.size(); ++i) {
503 if (Verbose) std::cerr << "Linking in library: -l" << Libraries[i] << "\n";
504 if (LinkLibrary(Composite.get(), Libraries[i], true, ErrorMessage))
505 return PrintAndReturn(argv[0], ErrorMessage);
508 // In addition to just linking the input from GCC, we also want to spiff it up
509 // a little bit. Do this now.
513 // Add an appropriate TargetData instance for this module...
514 Passes.add(new TargetData("gccld", Composite.get()));
516 // Linking modules together can lead to duplicated global constants, only keep
517 // one copy of each constant...
519 Passes.add(createConstantMergePass());
521 // If the -s command line option was specified, strip the symbols out of the
522 // resulting program to make it smaller. -s is a GCC option that we are
526 Passes.add(createSymbolStrippingPass());
528 // Often if the programmer does not specify proper prototypes for the
529 // functions they are calling, they end up calling a vararg version of the
530 // function that does not get a body filled in (the real function has typed
531 // arguments). This pass merges the two functions.
533 Passes.add(createFunctionResolvingPass());
535 if (!NoInternalize) {
536 // Now that composite has been compiled, scan through the module, looking
537 // for a main function. If main is defined, mark all other functions
540 Passes.add(createInternalizePass());
543 // Remove unused arguments from functions...
545 Passes.add(createDeadArgEliminationPass());
547 // The FuncResolve pass may leave cruft around if functions were prototyped
548 // differently than they were defined. Remove this cruft.
550 Passes.add(createInstructionCombiningPass());
552 // Delete basic blocks, which optimization passes may have killed...
554 Passes.add(createCFGSimplificationPass());
556 // Now that we have optimized the program, discard unreachable functions...
558 Passes.add(createGlobalDCEPass());
560 // Add the pass that writes bytecode to the output file...
561 std::string RealBytecodeOutput = OutputFilename;
562 if (!LinkAsLibrary) RealBytecodeOutput += ".bc";
563 std::ofstream Out(RealBytecodeOutput.c_str());
565 return PrintAndReturn(argv[0], "error opening '" + RealBytecodeOutput +
567 Passes.add(new WriteBytecodePass(&Out)); // Write bytecode to file...
569 // Make sure that the Out file gets unlink'd from the disk if we get a SIGINT
570 RemoveFileOnSignal(RealBytecodeOutput);
572 // Run our queue of passes all at once now, efficiently.
573 Passes.run(*Composite.get());
576 if (!LinkAsLibrary) {
578 // If the user wants to generate a native executable, compile it from the
581 // Otherwise, create a script that will run the bytecode through the JIT.
586 // Remove these environment variables from the environment of the
587 // programs that we will execute. It appears that GCC sets these
588 // environment variables so that the programs it uses can configure
589 // themselves identically.
591 // However, when we invoke GCC below, we want it to use its normal
592 // configuration. Hence, we must sanitize it's environment.
594 char ** clean_env = copy_env (envp);
595 if (clean_env == NULL)
597 return PrintAndReturn (argv[0], "Failed to duplicate environment");
599 remove_env ("LIBRARY_PATH", clean_env);
600 remove_env ("COLLECT_GCC_OPTIONS", clean_env);
601 remove_env ("GCC_EXEC_PREFIX", clean_env);
602 remove_env ("COMPILER_PATH", clean_env);
603 remove_env ("COLLECT_GCC", clean_env);
606 // Determine the locations of the llc and gcc programs.
608 std::string llc=FindExecutable ("llc", argv[0]);
609 std::string gcc=FindExecutable ("gcc", argv[0]);
612 return PrintAndReturn (argv[0], "Failed to find llc");
617 return PrintAndReturn (argv[0], "Failed to find gcc");
621 // Run LLC to convert the bytecode file into assembly code.
624 std::string AssemblyFile = OutputFilename + ".s";
626 cmd[0] = llc.c_str();
629 cmd[3] = AssemblyFile.c_str();
630 cmd[4] = RealBytecodeOutput.c_str();
632 if ((ExecWait (cmd, clean_env)) == -1)
634 return PrintAndReturn (argv[0], "Failed to compile bytecode");
638 // Run GCC to assemble and link the program into native code.
641 // We can't just assemble and link the file with the system assembler
642 // and linker because we don't know where to put the _start symbol.
643 // GCC mysteriously knows how to do it.
645 cmd[0] = gcc.c_str();
647 cmd[2] = OutputFilename.c_str();
648 cmd[3] = AssemblyFile.c_str();
650 if ((ExecWait (cmd, clean_env)) == -1)
652 return PrintAndReturn (argv[0], "Failed to link native code file");
656 // The assembly file is no longer needed. Remove it, but do not exit
657 // if we fail to unlink it.
659 if (((access (AssemblyFile.c_str(), F_OK)) != -1) &&
660 ((unlink (AssemblyFile.c_str())) == -1))
662 std::cerr << "Warning: Failed to unlink " << AssemblyFile << "\n";
667 // Output the script to start the program...
668 std::ofstream Out2(OutputFilename.c_str());
670 return PrintAndReturn(argv[0], "error opening '" + OutputFilename +
672 Out2 << "#!/bin/sh\nlli -q $0.bc $*\n";
676 // Make the script executable...
677 MakeFileExecutable (OutputFilename);
679 // Make the bytecode file readable and directly executable in LLEE as well
680 MakeFileExecutable (RealBytecodeOutput);
681 MakeFileReadable (RealBytecodeOutput);