1 //=== MachOWriter.h - Target-independent Mach-O writer support --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Nate Begeman and is distributed under the
6 // University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the MachOWriter class.
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_CODEGEN_MACHOWRITER_H
15 #define LLVM_CODEGEN_MACHOWRITER_H
17 #include "llvm/DerivedTypes.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/Target/TargetData.h"
20 #include "llvm/Target/TargetMachine.h"
26 class MachineCodeEmitter;
27 class MachOCodeEmitter;
29 /// MachOWriter - This class implements the common target-independent code for
30 /// writing Mach-O files. Targets should derive a class from this to
31 /// parameterize the output format.
33 class MachOWriter : public MachineFunctionPass {
34 friend class MachOCodeEmitter;
36 MachineCodeEmitter &getMachineCodeEmitter() const {
37 return *(MachineCodeEmitter*)MCE;
42 typedef std::vector<unsigned char> DataBuffer;
45 MachOWriter(std::ostream &O, TargetMachine &TM);
47 /// Output stream to send the resultant object file to.
51 /// Target machine description.
55 /// Mang - The object used to perform name mangling for this module.
59 /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
60 /// code for functions to the .o file.
61 MachOCodeEmitter *MCE;
63 /// is64Bit/isLittleEndian - This information is inferred from the target
64 /// machine directly, indicating what header values and flags to set.
65 bool is64Bit, isLittleEndian;
67 /// doInitialization - Emit the file header and all of the global variables
68 /// for the module to the Mach-O file.
69 bool doInitialization(Module &M);
71 bool runOnMachineFunction(MachineFunction &MF);
73 /// doFinalization - Now that the module has been completely processed, emit
74 /// the Mach-O file to 'O'.
75 bool doFinalization(Module &M);
77 /// MachOHeader - This struct contains the header information about a
78 /// specific architecture type/subtype pair that is emitted to the file.
80 uint32_t magic; // mach magic number identifier
81 uint32_t cputype; // cpu specifier
82 uint32_t cpusubtype; // machine specifier
83 uint32_t filetype; // type of file
84 uint32_t ncmds; // number of load commands
85 uint32_t sizeofcmds; // the size of all the load commands
86 uint32_t flags; // flags
87 uint32_t reserved; // 64-bit only
89 /// HeaderData - The actual data for the header which we are building
90 /// up for emission to the file.
91 DataBuffer HeaderData;
93 // Constants for the cputype field
94 // see <mach/machine.h>
95 enum { CPU_TYPE_I386 = 7,
96 CPU_TYPE_X86_64 = 7 | 0x1000000,
99 CPU_TYPE_POWERPC = 18,
100 CPU_TYPE_POWERPC64 = 18 | 0x1000000
103 // Constants for the cpusubtype field
104 // see <mach/machine.h>
105 enum { CPU_SUBTYPE_I386_ALL = 3,
106 CPU_SUBTYPE_X86_64_ALL = 3,
107 CPU_SUBTYPE_ARM_ALL = 0,
108 CPU_SUBTYPE_SPARC_ALL = 0,
109 CPU_SUBTYPE_POWERPC_ALL = 0
112 // Constants for the filetype field
113 // see <mach-o/loader.h> for additional info on the various types
114 enum { MH_OBJECT = 1, // relocatable object file
115 MH_EXECUTE = 2, // demand paged executable file
116 MH_FVMLIB = 3, // fixed VM shared library file
117 MH_CORE = 4, // core file
118 MH_PRELOAD = 5, // preloaded executable file
119 MH_DYLIB = 6, // dynamically bound shared library
120 MH_DYLINKER = 7, // dynamic link editor
121 MH_BUNDLE = 8, // dynamically bound bundle file
122 MH_DYLIB_STUB = 9, // shared library stub for static linking only
123 MH_DSYM = 10 // companion file wiht only debug sections
126 // Constants for the flags field
127 enum { MH_NOUNDEFS = 1 << 0,
128 // the object file has no undefined references
129 MH_INCRLINK = 1 << 1,
130 // the object file is the output of an incremental link against
131 // a base file and cannot be link edited again
132 MH_DYLDLINK = 1 << 2,
133 // the object file is input for the dynamic linker and cannot be
134 // statically link edited again.
135 MH_BINDATLOAD = 1 << 3,
136 // the object file's undefined references are bound by the
137 // dynamic linker when loaded.
138 MH_PREBOUND = 1 << 4,
139 // the file has its dynamic undefined references prebound
140 MH_SPLIT_SEGS = 1 << 5,
141 // the file has its read-only and read-write segments split
142 // see <mach/shared_memory_server.h>
143 MH_LAZY_INIT = 1 << 6,
144 // the shared library init routine is to be run lazily via
145 // catching memory faults to its writable segments (obsolete)
146 MH_TWOLEVEL = 1 << 7,
147 // the image is using two-level namespace bindings
148 MH_FORCE_FLAT = 1 << 8,
149 // the executable is forcing all images to use flat namespace
151 MH_NOMULTIDEFS = 1 << 8,
152 // this umbrella guarantees no multiple definitions of symbols
153 // in its sub-images so the two-level namespace hints can
155 MH_NOFIXPREBINDING = 1 << 10,
156 // do not have dyld notify the prebidning agent about this
158 MH_PREBINDABLE = 1 << 11,
159 // the binary is not prebound but can have its prebinding
160 // redone. only used when MH_PREBOUND is not set.
161 MH_ALLMODSBOUND = 1 << 12,
162 // indicates that this binary binds to all two-level namespace
163 // modules of its dependent libraries. Only used when
164 // MH_PREBINDABLE and MH_TWOLEVEL are both set.
165 MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13,
166 // safe to divide up the sections into sub-sections via symbols
167 // for dead code stripping.
168 MH_CANONICAL = 1 << 14,
169 // the binary has been canonicalized via the unprebind operation
170 MH_WEAK_DEFINES = 1 << 15,
171 // the final linked image contains external weak symbols
172 MH_BINDS_TO_WEAK = 1 << 16,
173 // the final linked image uses weak symbols
174 MH_ALLOW_STACK_EXECUTION = 1 << 17
175 // When this bit is set, all stacks in the task will be given
176 // stack execution privilege. Only used in MH_EXECUTE filetype
179 MachOHeader() : magic(0), cputype(0), cpusubtype(0), filetype(0),
180 ncmds(0), sizeofcmds(0), flags(0), reserved(0) { }
182 /// cmdSize - This routine returns the size of the MachOSection as written
183 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
184 unsigned cmdSize(bool is64Bit) const {
186 return 8 * sizeof(uint32_t);
188 return 7 * sizeof(uint32_t);
191 /// setMagic - This routine sets the appropriate value for the 'magic'
192 /// field based on pointer size and endianness.
193 void setMagic(bool isLittleEndian, bool is64Bit) {
195 if (is64Bit) magic = 0xcffaedfe;
196 else magic = 0xcefaedfe;
198 if (is64Bit) magic = 0xfeedfacf;
199 else magic = 0xfeedface;
203 /// Header - An instance of MachOHeader that we will update while we build
204 /// the file, and then emit during finalization.
209 /// MachOSegment - This struct contains the necessary information to
210 /// emit the load commands for each section in the file.
211 struct MachOSegment {
212 uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64
213 uint32_t cmdsize; // Total size of this struct and section commands
214 std::string segname; // segment name
215 uint64_t vmaddr; // address of this segment
216 uint64_t vmsize; // size of this segment, may be larger than filesize
217 uint64_t fileoff; // offset in file
218 uint64_t filesize; // amount to read from file
219 uint32_t maxprot; // maximum VM protection
220 uint32_t initprot; // initial VM protection
221 uint32_t nsects; // number of sections in this segment
222 uint32_t flags; // flags
224 // Constants for the vm protection fields
225 // see <mach-o/vm_prot.h>
226 enum { VM_PROT_NONE = 0x00,
227 VM_PROT_READ = 0x01, // read permission
228 VM_PROT_WRITE = 0x02, // write permission
229 VM_PROT_EXECUTE = 0x04, // execute permission,
233 // Constants for the cmd field
234 // see <mach-o/loader.h>
235 enum { LC_SEGMENT = 0x01, // segment of this file to be mapped
236 LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped
239 /// cmdSize - This routine returns the size of the MachOSection as written
240 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
241 unsigned cmdSize(bool is64Bit) const {
243 return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16;
245 return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits
248 MachOSegment(const std::string &seg, bool is64Bit)
249 : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg),
250 vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL),
251 initprot(VM_PROT_ALL), nsects(0), flags(0) { }
254 /// MachOSection - This struct contains information about each section in a
255 /// particular segment that is emitted to the file. This is eventually
256 /// turned into the SectionCommand in the load command for a particlar
258 struct MachOSection {
259 std::string sectname; // name of this section,
260 std::string segname; // segment this section goes in
261 uint64_t addr; // memory address of this section
262 uint64_t size; // size in bytes of this section
263 uint32_t offset; // file offset of this section
264 uint32_t align; // section alignment (power of 2)
265 uint32_t reloff; // file offset of relocation entries
266 uint32_t nreloc; // number of relocation entries
267 uint32_t flags; // flags (section type and attributes)
268 uint32_t reserved1; // reserved (for offset or index)
269 uint32_t reserved2; // reserved (for count or sizeof)
270 uint32_t reserved3; // reserved (64 bit only)
272 /// A unique number for this section, which will be used to match symbols
273 /// to the correct section.
276 /// SectionData - The actual data for this section which we are building
277 /// up for emission to the file.
278 DataBuffer SectionData;
280 // Constants for the section types (low 8 bits of flags field)
281 // see <mach-o/loader.h>
282 enum { S_REGULAR = 0,
285 // zero fill on demand section
286 S_CSTRING_LITERALS = 2,
287 // section with only literal C strings
288 S_4BYTE_LITERALS = 3,
289 // section with only 4 byte literals
290 S_8BYTE_LITERALS = 4,
291 // section with only 8 byte literals
292 S_LITERAL_POINTERS = 5,
293 // section with only pointers to literals
294 S_NON_LAZY_SYMBOL_POINTERS = 6,
295 // section with only non-lazy symbol pointers
296 S_LAZY_SYMBOL_POINTERS = 7,
297 // section with only lazy symbol pointers
299 // section with only symbol stubs
300 // byte size of stub in the reserved2 field
301 S_MOD_INIT_FUNC_POINTERS = 9,
302 // section with only function pointers for initialization
303 S_MOD_TERM_FUNC_POINTERS = 10,
304 // section with only function pointers for termination
306 // section contains symbols that are coalesced
308 // zero fill on demand section (that can be larger than 4GB)
310 // section with only pairs of function pointers for interposing
311 S_16BYTE_LITERALS = 14
312 // section with only 16 byte literals
315 // Constants for the section flags (high 24 bits of flags field)
316 // see <mach-o/loader.h>
317 enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31,
318 // section contains only true machine instructions
319 S_ATTR_NO_TOC = 1 << 30,
320 // section contains coalesced symbols that are not to be in a
321 // ranlib table of contents
322 S_ATTR_STRIP_STATIC_SYMS = 1 << 29,
323 // ok to strip static symbols in this section in files with the
325 S_ATTR_NO_DEAD_STRIP = 1 << 28,
327 S_ATTR_LIVE_SUPPORT = 1 << 27,
328 // blocks are live if they reference live blocks
329 S_ATTR_SELF_MODIFYING_CODE = 1 << 26,
330 // used with i386 code stubs written on by dyld
331 S_ATTR_DEBUG = 1 << 25,
333 S_ATTR_SOME_INSTRUCTIONS = 1 << 10,
334 // section contains some machine instructions
335 S_ATTR_EXT_RELOC = 1 << 9,
336 // section has external relocation entries
337 S_ATTR_LOC_RELOC = 1 << 8
338 // section has local relocation entries
341 /// cmdSize - This routine returns the size of the MachOSection as written
342 /// to disk, depending on whether the destination is a 64 bit Mach-O file.
343 unsigned cmdSize(bool is64Bit) const {
345 return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32;
347 return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits
350 MachOSection(const std::string &seg, const std::string §)
351 : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(0),
352 reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0),
356 /// SectionList - This is the list of sections that we have emitted to the
357 /// file. Once the file has been completely built, the segment load command
358 /// SectionCommands are constructed from this info.
359 std::list<MachOSection> SectionList;
361 /// SectionLookup - This is a mapping from section name to SectionList entry
362 std::map<std::string, MachOSection*> SectionLookup;
364 /// getSection - Return the section with the specified name, creating a new
365 /// section if one does not already exist.
366 MachOSection &getSection(const std::string &seg, const std::string §,
367 unsigned Flags = 0) {
368 MachOSection *&SN = SectionLookup[seg+sect];
371 SectionList.push_back(MachOSection(seg, sect));
372 SN = &SectionList.back();
373 SN->Index = SectionList.size();
374 SN->flags = MachOSection::S_REGULAR | Flags;
377 MachOSection &getTextSection() {
378 return getSection("__TEXT", "__text",
379 MachOSection::S_ATTR_PURE_INSTRUCTIONS |
380 MachOSection::S_ATTR_SOME_INSTRUCTIONS);
382 MachOSection &getBSSSection() {
383 return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL);
385 MachOSection &getDataSection() {
386 return getSection("__DATA", "__data");
388 MachOSection &getConstSection(const Type *Ty) {
389 // FIXME: support cstring literals and pointer literal
390 if (Ty->isPrimitiveType()) {
391 unsigned Size = TM.getTargetData()->getTypeSize(Ty);
393 default: break; // Fall through to __TEXT,__const
395 return getSection("__TEXT", "__literal4",
396 MachOSection::S_4BYTE_LITERALS);
398 return getSection("__TEXT", "__literal8",
399 MachOSection::S_8BYTE_LITERALS);
401 return getSection("__TEXT", "__literal16",
402 MachOSection::S_16BYTE_LITERALS);
405 return getSection("__TEXT", "__const");
408 /// MachOSymTab - This struct contains information about the offsets and
409 /// size of symbol table information.
412 uint32_t cmd; // LC_SYMTAB
413 uint32_t cmdsize; // sizeof( MachOSymTab )
414 uint32_t symoff; // symbol table offset
415 uint32_t nsyms; // number of symbol table entries
416 uint32_t stroff; // string table offset
417 uint32_t strsize; // string table size in bytes
419 // Constants for the cmd field
420 // see <mach-o/loader.h>
421 enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info
424 MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0),
425 nsyms(0), stroff(0), strsize(0) { }
428 /// MachOSymTab - This struct contains information about the offsets and
429 /// size of symbol table information.
431 struct MachODySymTab {
432 uint32_t cmd; // LC_DYSYMTAB
433 uint32_t cmdsize; // sizeof( MachODySymTab )
434 uint32_t ilocalsym; // index to local symbols
435 uint32_t nlocalsym; // number of local symbols
436 uint32_t iextdefsym; // index to externally defined symbols
437 uint32_t nextdefsym; // number of externally defined symbols
438 uint32_t iundefsym; // index to undefined symbols
439 uint32_t nundefsym; // number of undefined symbols
440 uint32_t tocoff; // file offset to table of contents
441 uint32_t ntoc; // number of entries in table of contents
442 uint32_t modtaboff; // file offset to module table
443 uint32_t nmodtab; // number of module table entries
444 uint32_t extrefsymoff; // offset to referenced symbol table
445 uint32_t nextrefsyms; // number of referenced symbol table entries
446 uint32_t indirectsymoff; // file offset to the indirect symbol table
447 uint32_t nindirectsyms; // number of indirect symbol table entries
448 uint32_t extreloff; // offset to external relocation entries
449 uint32_t nextrel; // number of external relocation entries
450 uint32_t locreloff; // offset to local relocation entries
451 uint32_t nlocrel; // number of local relocation entries
453 // Constants for the cmd field
454 // see <mach-o/loader.h>
455 enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info
458 MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)),
459 ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0),
460 iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0),
461 nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0),
462 nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { }
465 /// SymTab - The "stab" style symbol table information
467 /// DySymTab - symbol table info for the dynamic link editor
468 MachODySymTab DySymTab;
470 /// MachOSym - This struct contains information about each symbol that is
471 /// added to logical symbol table for the module. This is eventually
472 /// turned into a real symbol table in the file.
474 const GlobalValue *GV; // The global value this corresponds to.
475 uint32_t n_strx; // index into the string table
476 uint8_t n_type; // type flag
477 uint8_t n_sect; // section number or NO_SECT
478 int16_t n_desc; // see <mach-o/stab.h>
479 uint64_t n_value; // value for this symbol (or stab offset)
481 // Constants for the n_sect field
482 // see <mach-o/nlist.h>
483 enum { NO_SECT = 0 }; // symbol is not in any section
485 // Constants for the n_type field
486 // see <mach-o/nlist.h>
487 enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT
488 N_ABS = 0x2, // absolute, n_sect == NO_SECT
489 N_SECT = 0xe, // defined in section number n_sect
490 N_PBUD = 0xc, // prebound undefined (defined in a dylib)
491 N_INDR = 0xa // indirect
493 // The following bits are OR'd into the types above. For example, a type
494 // of 0x0f would be an external N_SECT symbol (0x0e | 0x01).
495 enum { N_EXT = 0x01, // external symbol bit
496 N_PEXT = 0x10 // private external symbol bit
499 // Constants for the n_desc field
500 // see <mach-o/loader.h>
501 enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0,
502 REFERENCE_FLAG_UNDEFINED_LAZY = 1,
503 REFERENCE_FLAG_DEFINED = 2,
504 REFERENCE_FLAG_PRIVATE_DEFINED = 3,
505 REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4,
506 REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5
508 enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped
509 N_WEAK_REF = 0x0040, // symbol is weak referenced
510 N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition
513 /// entrySize - This routine returns the size of a symbol table entry as
515 static unsigned entrySize() { return 12; }
517 MachOSym(const GlobalValue *gv, uint8_t sect) : GV(gv), n_strx(0),
518 n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect), n_desc(0),
522 /// SymbolTable - This is the list of symbols we have emitted to the file.
523 /// This actually gets rearranged before emission to the file (to put the
524 /// local symbols first in the list).
525 std::vector<MachOSym> SymbolTable;
527 /// PendingSyms - This is a list of externally defined symbols that we have
528 /// been asked to emit, but have not seen a reference to. When a reference
529 /// is seen, the symbol will move from this list to the SymbolTable.
530 std::vector<MachOSym> PendingSyms;
532 /// DynamicSymbolTable - This is just a vector of indices into
533 /// SymbolTable to aid in emitting the DYSYMTAB load command.
534 std::vector<unsigned> DynamicSymbolTable;
536 /// StringTable - The table of strings referenced by SymbolTable entries
537 std::vector<std::string> StringTable;
539 // align - Emit padding into the file until the current output position is
540 // aligned to the specified power of two boundary.
541 static void align(DataBuffer &Output, unsigned Boundary) {
542 assert(Boundary && (Boundary & (Boundary-1)) == 0 &&
543 "Must align to 2^k boundary");
544 size_t Size = Output.size();
545 if (Size & (Boundary-1)) {
546 // Add padding to get alignment to the correct place.
547 size_t Pad = Boundary-(Size & (Boundary-1));
548 Output.resize(Size+Pad);
552 void outbyte(DataBuffer &Output, unsigned char X) {
555 void outhalf(DataBuffer &Output, unsigned short X) {
556 if (isLittleEndian) {
557 Output.push_back(X&255);
558 Output.push_back(X >> 8);
560 Output.push_back(X >> 8);
561 Output.push_back(X&255);
564 void outword(DataBuffer &Output, unsigned X) {
565 if (isLittleEndian) {
566 Output.push_back((X >> 0) & 255);
567 Output.push_back((X >> 8) & 255);
568 Output.push_back((X >> 16) & 255);
569 Output.push_back((X >> 24) & 255);
571 Output.push_back((X >> 24) & 255);
572 Output.push_back((X >> 16) & 255);
573 Output.push_back((X >> 8) & 255);
574 Output.push_back((X >> 0) & 255);
577 void outxword(DataBuffer &Output, uint64_t X) {
578 if (isLittleEndian) {
579 Output.push_back(unsigned(X >> 0) & 255);
580 Output.push_back(unsigned(X >> 8) & 255);
581 Output.push_back(unsigned(X >> 16) & 255);
582 Output.push_back(unsigned(X >> 24) & 255);
583 Output.push_back(unsigned(X >> 32) & 255);
584 Output.push_back(unsigned(X >> 40) & 255);
585 Output.push_back(unsigned(X >> 48) & 255);
586 Output.push_back(unsigned(X >> 56) & 255);
588 Output.push_back(unsigned(X >> 56) & 255);
589 Output.push_back(unsigned(X >> 48) & 255);
590 Output.push_back(unsigned(X >> 40) & 255);
591 Output.push_back(unsigned(X >> 32) & 255);
592 Output.push_back(unsigned(X >> 24) & 255);
593 Output.push_back(unsigned(X >> 16) & 255);
594 Output.push_back(unsigned(X >> 8) & 255);
595 Output.push_back(unsigned(X >> 0) & 255);
598 void outaddr32(DataBuffer &Output, unsigned X) {
601 void outaddr64(DataBuffer &Output, uint64_t X) {
604 void outaddr(DataBuffer &Output, uint64_t X) {
606 outword(Output, (unsigned)X);
610 void outstring(DataBuffer &Output, std::string &S, unsigned Length) {
611 unsigned len_to_copy = S.length() < Length ? S.length() : Length;
612 unsigned len_to_fill = S.length() < Length ? Length-S.length() : 0;
614 for (unsigned i = 0; i < len_to_copy; ++i)
615 outbyte(Output, S[i]);
617 for (unsigned i = 0; i < len_to_fill; ++i)
622 void AddSymbolToSection(MachOSection &MOS, GlobalVariable *GV);
623 void EmitGlobal(GlobalVariable *GV);
624 void EmitHeaderAndLoadCommands();
626 void EmitRelocations();
627 void EmitSymbolTable();
628 void EmitStringTable();