1 //===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "MCTargetDesc/X86MCTargetDesc.h"
11 #include "MCTargetDesc/X86FixupKinds.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/MC/MCAsmLayout.h"
14 #include "llvm/MC/MCAssembler.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCMachObjectWriter.h"
17 #include "llvm/MC/MCSectionMachO.h"
18 #include "llvm/MC/MCValue.h"
19 #include "llvm/Object/MachOFormat.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/Format.h"
24 using namespace llvm::object;
27 class X86MachObjectWriter : public MCMachObjectTargetWriter {
28 bool RecordScatteredRelocation(MachObjectWriter *Writer,
29 const MCAssembler &Asm,
30 const MCAsmLayout &Layout,
31 const MCFragment *Fragment,
35 uint64_t &FixedValue);
36 void RecordTLVPRelocation(MachObjectWriter *Writer,
37 const MCAssembler &Asm,
38 const MCAsmLayout &Layout,
39 const MCFragment *Fragment,
42 uint64_t &FixedValue);
44 void RecordX86Relocation(MachObjectWriter *Writer,
45 const MCAssembler &Asm,
46 const MCAsmLayout &Layout,
47 const MCFragment *Fragment,
50 uint64_t &FixedValue);
51 void RecordX86_64Relocation(MachObjectWriter *Writer,
52 const MCAssembler &Asm,
53 const MCAsmLayout &Layout,
54 const MCFragment *Fragment,
57 uint64_t &FixedValue);
59 X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
61 : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
62 /*UseAggressiveSymbolFolding=*/Is64Bit) {}
64 void RecordRelocation(MachObjectWriter *Writer,
65 const MCAssembler &Asm, const MCAsmLayout &Layout,
66 const MCFragment *Fragment, const MCFixup &Fixup,
67 MCValue Target, uint64_t &FixedValue) {
68 if (Writer->is64Bit())
69 RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
72 RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
78 static bool isFixupKindRIPRel(unsigned Kind) {
79 return Kind == X86::reloc_riprel_4byte ||
80 Kind == X86::reloc_riprel_4byte_movq_load;
83 static unsigned getFixupKindLog2Size(unsigned Kind) {
86 llvm_unreachable("invalid fixup kind!");
88 case FK_Data_1: return 0;
90 case FK_Data_2: return 1;
92 // FIXME: Remove these!!!
93 case X86::reloc_riprel_4byte:
94 case X86::reloc_riprel_4byte_movq_load:
95 case X86::reloc_signed_4byte:
96 case FK_Data_4: return 2;
97 case FK_Data_8: return 3;
101 void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
102 const MCAssembler &Asm,
103 const MCAsmLayout &Layout,
104 const MCFragment *Fragment,
105 const MCFixup &Fixup,
107 uint64_t &FixedValue) {
108 unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
109 unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
110 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
113 uint32_t FixupOffset =
114 Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
115 uint32_t FixupAddress =
116 Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
119 unsigned IsExtern = 0;
122 Value = Target.getConstant();
125 // Compensate for the relocation offset, Darwin x86_64 relocations only have
126 // the addend and appear to have attempted to define it to be the actual
127 // expression addend without the PCrel bias. However, instructions with data
128 // following the relocation are not accommodated for (see comment below
129 // regarding SIGNED{1,2,4}), so it isn't exactly that either.
130 Value += 1LL << Log2Size;
133 if (Target.isAbsolute()) { // constant
134 // SymbolNum of 0 indicates the absolute section.
135 Type = macho::RIT_X86_64_Unsigned;
138 // FIXME: I believe this is broken, I don't think the linker can understand
139 // it. I think it would require a local relocation, but I'm not sure if that
140 // would work either. The official way to get an absolute PCrel relocation
141 // is to use an absolute symbol (which we don't support yet).
144 Type = macho::RIT_X86_64_Branch;
146 } else if (Target.getSymB()) { // A - B + constant
147 const MCSymbol *A = &Target.getSymA()->getSymbol();
148 MCSymbolData &A_SD = Asm.getSymbolData(*A);
149 const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
151 const MCSymbol *B = &Target.getSymB()->getSymbol();
152 MCSymbolData &B_SD = Asm.getSymbolData(*B);
153 const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
155 // Neither symbol can be modified.
156 if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
157 Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
158 report_fatal_error("unsupported relocation of modified symbol");
160 // We don't support PCrel relocations of differences. Darwin 'as' doesn't
161 // implement most of these correctly.
163 report_fatal_error("unsupported pc-relative relocation of difference");
165 // The support for the situation where one or both of the symbols would
166 // require a local relocation is handled just like if the symbols were
167 // external. This is certainly used in the case of debug sections where the
168 // section has only temporary symbols and thus the symbols don't have base
169 // symbols. This is encoded using the section ordinal and non-extern
170 // relocation entries.
172 // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
173 // single SIGNED relocation); reject it for now. Except the case where both
174 // symbols don't have a base, equal but both NULL.
175 if (A_Base == B_Base && A_Base)
176 report_fatal_error("unsupported relocation with identical base");
178 // A subtraction expression where both symbols are undefined is a
179 // non-relocatable expression.
180 if (A->isUndefined() && B->isUndefined())
181 report_fatal_error("unsupported relocation with subtraction expression");
183 Value += Writer->getSymbolAddress(&A_SD, Layout) -
184 (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
185 Value -= Writer->getSymbolAddress(&B_SD, Layout) -
186 (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
189 Index = A_Base->getIndex();
193 Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
196 Type = macho::RIT_X86_64_Unsigned;
198 macho::RelocationEntry MRE;
199 MRE.Word0 = FixupOffset;
200 MRE.Word1 = ((Index << 0) |
205 Writer->addRelocation(Fragment->getParent(), MRE);
208 Index = B_Base->getIndex();
212 Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
215 Type = macho::RIT_X86_64_Subtractor;
217 const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
218 MCSymbolData &SD = Asm.getSymbolData(*Symbol);
219 const MCSymbolData *Base = Asm.getAtom(&SD);
221 // Relocations inside debug sections always use local relocations when
222 // possible. This seems to be done because the debugger doesn't fully
223 // understand x86_64 relocation entries, and expects to find values that
224 // have already been fixed up.
225 if (Symbol->isInSection()) {
226 const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
227 Fragment->getParent()->getSection());
228 if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
232 // x86_64 almost always uses external relocations, except when there is no
233 // symbol to use as a base address (a local symbol with no preceding
234 // non-local symbol).
236 Index = Base->getIndex();
239 // Add the local offset, if needed.
241 Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
242 } else if (Symbol->isInSection() && !Symbol->isVariable()) {
243 // The index is the section ordinal (1-based).
244 Index = SD.getFragment()->getParent()->getOrdinal() + 1;
246 Value += Writer->getSymbolAddress(&SD, Layout);
249 Value -= FixupAddress + (1 << Log2Size);
250 } else if (Symbol->isVariable()) {
251 const MCExpr *Value = Symbol->getVariableValue();
253 bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
254 Writer->getSectionAddressMap());
259 report_fatal_error("unsupported relocation of variable '" +
260 Symbol->getName() + "'");
263 report_fatal_error("unsupported relocation of undefined symbol '" +
264 Symbol->getName() + "'");
267 MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
270 if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
271 // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
272 // rewrite the movq to an leaq at link time if the symbol ends up in
273 // the same linkage unit.
274 if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
275 Type = macho::RIT_X86_64_GOTLoad;
277 Type = macho::RIT_X86_64_GOT;
278 } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
279 Type = macho::RIT_X86_64_TLV;
280 } else if (Modifier != MCSymbolRefExpr::VK_None) {
281 report_fatal_error("unsupported symbol modifier in relocation");
283 Type = macho::RIT_X86_64_Signed;
285 // The Darwin x86_64 relocation format has a problem where it cannot
286 // encode an address (L<foo> + <constant>) which is outside the atom
287 // containing L<foo>. Generally, this shouldn't occur but it does
288 // happen when we have a RIPrel instruction with data following the
289 // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
290 // adjustment Darwin x86_64 uses, the offset is still negative and the
291 // linker has no way to recognize this.
293 // To work around this, Darwin uses several special relocation types
294 // to indicate the offsets. However, the specification or
295 // implementation of these seems to also be incomplete; they should
296 // adjust the addend as well based on the actual encoded instruction
297 // (the additional bias), but instead appear to just look at the final
299 switch (-(Target.getConstant() + (1LL << Log2Size))) {
300 case 1: Type = macho::RIT_X86_64_Signed1; break;
301 case 2: Type = macho::RIT_X86_64_Signed2; break;
302 case 4: Type = macho::RIT_X86_64_Signed4; break;
306 if (Modifier != MCSymbolRefExpr::VK_None)
307 report_fatal_error("unsupported symbol modifier in branch "
310 Type = macho::RIT_X86_64_Branch;
313 if (Modifier == MCSymbolRefExpr::VK_GOT) {
314 Type = macho::RIT_X86_64_GOT;
315 } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
316 // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
317 // case all we do is set the PCrel bit in the relocation entry; this is
318 // used with exception handling, for example. The source is required to
319 // include any necessary offset directly.
320 Type = macho::RIT_X86_64_GOT;
322 } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
323 report_fatal_error("TLVP symbol modifier should have been rip-rel");
324 } else if (Modifier != MCSymbolRefExpr::VK_None)
325 report_fatal_error("unsupported symbol modifier in relocation");
327 Type = macho::RIT_X86_64_Unsigned;
328 unsigned Kind = Fixup.getKind();
329 if (Kind == X86::reloc_signed_4byte)
330 report_fatal_error("32-bit absolute addressing is not supported in "
336 // x86_64 always writes custom values into the fixups.
339 // struct relocation_info (8 bytes)
340 macho::RelocationEntry MRE;
341 MRE.Word0 = FixupOffset;
342 MRE.Word1 = ((Index << 0) |
347 Writer->addRelocation(Fragment->getParent(), MRE);
350 bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
351 const MCAssembler &Asm,
352 const MCAsmLayout &Layout,
353 const MCFragment *Fragment,
354 const MCFixup &Fixup,
357 uint64_t &FixedValue) {
358 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
359 unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
360 unsigned Type = macho::RIT_Vanilla;
363 const MCSymbol *A = &Target.getSymA()->getSymbol();
364 MCSymbolData *A_SD = &Asm.getSymbolData(*A);
366 if (!A_SD->getFragment())
367 report_fatal_error("symbol '" + A->getName() +
368 "' can not be undefined in a subtraction expression");
370 uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
371 uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
372 FixedValue += SecAddr;
375 if (const MCSymbolRefExpr *B = Target.getSymB()) {
376 MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
378 if (!B_SD->getFragment())
379 report_fatal_error("symbol '" + B->getSymbol().getName() +
380 "' can not be undefined in a subtraction expression");
382 // Select the appropriate difference relocation type.
384 // Note that there is no longer any semantic difference between these two
385 // relocation types from the linkers point of view, this is done solely for
386 // pedantic compatibility with 'as'.
387 Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
388 (unsigned)macho::RIT_Generic_LocalDifference;
389 Value2 = Writer->getSymbolAddress(B_SD, Layout);
390 FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
393 // Relocations are written out in reverse order, so the PAIR comes first.
394 if (Type == macho::RIT_Difference ||
395 Type == macho::RIT_Generic_LocalDifference) {
396 // If the offset is too large to fit in a scattered relocation,
397 // we're hosed. It's an unfortunate limitation of the MachO format.
398 if (FixupOffset > 0xffffff) {
400 format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
401 Asm.getContext().FatalError(Fixup.getLoc(),
402 Twine("Section too large, can't encode "
403 "r_address (") + Buffer +
404 ") into 24 bits of scattered "
405 "relocation entry.");
406 llvm_unreachable("fatal error returned?!");
409 macho::RelocationEntry MRE;
410 MRE.Word0 = ((0 << 0) |
411 (macho::RIT_Pair << 24) |
414 macho::RF_Scattered);
416 Writer->addRelocation(Fragment->getParent(), MRE);
418 // If the offset is more than 24-bits, it won't fit in a scattered
419 // relocation offset field, so we fall back to using a non-scattered
420 // relocation. This is a bit risky, as if the offset reaches out of
421 // the block and the linker is doing scattered loading on this
422 // symbol, things can go badly.
424 // Required for 'as' compatibility.
425 if (FixupOffset > 0xffffff)
429 macho::RelocationEntry MRE;
430 MRE.Word0 = ((FixupOffset << 0) |
434 macho::RF_Scattered);
436 Writer->addRelocation(Fragment->getParent(), MRE);
440 void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
441 const MCAssembler &Asm,
442 const MCAsmLayout &Layout,
443 const MCFragment *Fragment,
444 const MCFixup &Fixup,
446 uint64_t &FixedValue) {
447 assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
449 "Should only be called with a 32-bit TLVP relocation!");
451 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
452 uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
453 unsigned IsPCRel = 0;
455 // Get the symbol data.
456 MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
457 unsigned Index = SD_A->getIndex();
459 // We're only going to have a second symbol in pic mode and it'll be a
460 // subtraction from the picbase. For 32-bit pic the addend is the difference
461 // between the picbase and the next address. For 32-bit static the addend is
463 if (Target.getSymB()) {
464 // If this is a subtraction then we're pcrel.
465 uint32_t FixupAddress =
466 Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
467 MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
469 FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
470 Target.getConstant());
471 FixedValue += 1ULL << Log2Size;
476 // struct relocation_info (8 bytes)
477 macho::RelocationEntry MRE;
479 MRE.Word1 = ((Index << 0) |
482 (1 << 27) | // Extern
483 (macho::RIT_Generic_TLV << 28)); // Type
484 Writer->addRelocation(Fragment->getParent(), MRE);
487 void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
488 const MCAssembler &Asm,
489 const MCAsmLayout &Layout,
490 const MCFragment *Fragment,
491 const MCFixup &Fixup,
493 uint64_t &FixedValue) {
494 unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
495 unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
497 // If this is a 32-bit TLVP reloc it's handled a bit differently.
498 if (Target.getSymA() &&
499 Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
500 RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
505 // If this is a difference or a defined symbol plus an offset, then we need a
506 // scattered relocation entry. Differences always require scattered
508 if (Target.getSymB()) {
509 RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
510 Target, Log2Size, FixedValue);
514 // Get the symbol data, if any.
515 MCSymbolData *SD = 0;
516 if (Target.getSymA())
517 SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
519 // If this is an internal relocation with an offset, it also needs a scattered
521 uint32_t Offset = Target.getConstant();
523 Offset += 1 << Log2Size;
524 // Try to record the scattered relocation if needed. Fall back to non
525 // scattered if necessary (see comments in RecordScatteredRelocation()
527 if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD) &&
528 RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
529 Target, Log2Size, FixedValue))
533 uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
535 unsigned IsExtern = 0;
538 if (Target.isAbsolute()) { // constant
539 // SymbolNum of 0 indicates the absolute section.
541 // FIXME: Currently, these are never generated (see code below). I cannot
542 // find a case where they are actually emitted.
543 Type = macho::RIT_Vanilla;
545 // Resolve constant variables.
546 if (SD->getSymbol().isVariable()) {
548 if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
549 Res, Layout, Writer->getSectionAddressMap())) {
555 // Check whether we need an external or internal relocation.
556 if (Writer->doesSymbolRequireExternRelocation(SD)) {
558 Index = SD->getIndex();
559 // For external relocations, make sure to offset the fixup value to
560 // compensate for the addend of the symbol address, if it was
561 // undefined. This occurs with weak definitions, for example.
562 if (!SD->Symbol->isUndefined())
563 FixedValue -= Layout.getSymbolOffset(SD);
565 // The index is the section ordinal (1-based).
566 const MCSectionData &SymSD = Asm.getSectionData(
567 SD->getSymbol().getSection());
568 Index = SymSD.getOrdinal() + 1;
569 FixedValue += Writer->getSectionAddress(&SymSD);
572 FixedValue -= Writer->getSectionAddress(Fragment->getParent());
574 Type = macho::RIT_Vanilla;
577 // struct relocation_info (8 bytes)
578 macho::RelocationEntry MRE;
579 MRE.Word0 = FixupOffset;
580 MRE.Word1 = ((Index << 0) |
585 Writer->addRelocation(Fragment->getParent(), MRE);
588 MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
591 uint32_t CPUSubtype) {
592 return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
595 OS, /*IsLittleEndian=*/true);