From ef8ad01b02bd6decc99f587bf800c58fb765384a Mon Sep 17 00:00:00 2001 From: Frederic Riss Date: Wed, 2 Sep 2015 16:49:13 +0000 Subject: [PATCH] Reapply r246012 [dsymutil] Emit real dSYM companion binaries. With a fix for big endian machines. Thanks to Daniel Sanders for the debugging! Original commit message: The binaries containing the linked DWARF generated by dsymutil are not standard relocatable object files like emitted did previsously. They should be dSYM companion files, which means they have a different file type in the header, but also a couple other peculiarities: - they contain the segments and sections from the original binary in their load commands, but not the actual contents. This means they get an address and a size, but their offset is always 0 (but these are not virtual sections) - they also conatin all the defined symbols from the original binary This makes MC a really bad fit to emit these kind of binaries. The approach that was used in this patch is to leverage MC's section layout for the debug sections, but to use a replacement for MachObjectWriter that lives in MachOUtils.cpp. Some of the low-level helpers from MachObjectWriter were reused too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246673 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/tools/dsymutil/Inputs/basic.macho.i386 | Bin 0 -> 9080 bytes test/tools/dsymutil/X86/dsym-companion.test | 339 ++++++++++++++++ tools/dsymutil/DwarfLinker.cpp | 10 +- tools/dsymutil/MachOUtils.cpp | 426 ++++++++++++++++++++ tools/dsymutil/MachOUtils.h | 6 + 5 files changed, 778 insertions(+), 3 deletions(-) create mode 100755 test/tools/dsymutil/Inputs/basic.macho.i386 create mode 100644 test/tools/dsymutil/X86/dsym-companion.test diff --git a/test/tools/dsymutil/Inputs/basic.macho.i386 b/test/tools/dsymutil/Inputs/basic.macho.i386 new file mode 100755 index 0000000000000000000000000000000000000000..ee6be096fe5a1e060efc9db2dc33dc57d4079f85 GIT binary patch literal 9080 zcmeHNO>7%Q6rLq67;55xatH;QRb{mhfo+&I{UbhJn zk|hI*WeNGh1;K#}m-dhg2RNl_OM0s4g$o>dppsFGlmmyTM1=3#-ElUKxg#`Bc{|^m zdGp?z&toaGIt!~paGXdLR&$SEiJ;(nY6~lTK;A zjLzN~E{_Uv68)F4P?cBBRW0j3iHDRYre>#9FIp3-))3TB-c&78mI|`ET+EaUa?!2y zo$vW#ZaDXYh@nKvobB4m+FL`!^z%h}V(iS|oNT#RESFSY?>eGE433BEz>>QnWu{u~ zT6^n=nLF$Lw-}7)@1IlCr{AznOsl<+#sKOkpGzQ+`yibAkPvmca9nXqvscIkKTGBx zfzHBy9<7T25d_>1dV^ZSis+li;3F=(n*t)vQ0CY4(*Yk%NPIp{`y#a_4Z&ofj%x(?0>}5F=ew4@(k9>dEC=sVU7tX$GAQr)>Y}<`~D?=D$FcbCMc@lZI&4oEouG1w~p%fP+y z*AV}G^fNv&Looh%C^6$J%z8}xK7cm;iRnXszHcF>KXD#sY83~dN@VP+lYJqX?e+v6MLWAtPCDb0$C70syIn%wQmt%~1%WQEMw9|^fw zaW2@{&rZqpbB*@~81D(dZSDu)UiVS|c(TtI23axPE5a*UG6;VnAggXIBQyAA3cgm5 Fe*ib5U4j4r literal 0 HcmV?d00001 diff --git a/test/tools/dsymutil/X86/dsym-companion.test b/test/tools/dsymutil/X86/dsym-companion.test new file mode 100644 index 00000000000..4327a2953d9 --- /dev/null +++ b/test/tools/dsymutil/X86/dsym-companion.test @@ -0,0 +1,339 @@ +RUN: llvm-dsymutil -o - %p/../Inputs/basic.macho.i386 -f | llvm-readobj -file-headers -program-headers -sections -symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK32 +RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 -f | llvm-readobj -file-headers -program-headers -sections -symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK64 + +This test checks that the dSYM companion binaries generated in 32 and 64 bits +are correct. The check are pretty strict (we check even the offsets and sizes +of the sections) in order to test the VM address layout algorithm. As the +debug sections are generated, this is a bit risky, but I don't expect +llvm-dsymutil's output to change much for these tiny C programs so this should +be OK. +The 32bits version doesn't have object files, thus it has basically no debug +sections. + +CHECK32: Format: Mach-O 32-bit i386 +CHECK32: Arch: i386 +CHECK32: AddressSize: 32bit +CHECK64: Format: Mach-O 64-bit x86-64 +CHECK64: Arch: x86_64 +CHECK64: AddressSize: 64bit +CHECK: MachHeader { +CHECK32: Magic: Magic (0xFEEDFACE) +CHECK32: CpuType: X86 (0x7) +CHECK32: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3) +CHECK64: Magic: Magic64 (0xFEEDFACF) +CHECK64: CpuType: X86-64 (0x1000007) +CHECK64: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3) +CHECK: FileType: DWARFSymbol (0xA) +CHECK: NumOfLoadCommands: 7 +CHECK: Flags [ (0x0) +CHECK: } +CHECK: Sections [ +CHECK: Section { +CHECK: Name: __text +CHECK: Segment: __TEXT +CHECK32: Address: 0x1E90 +CHECK32: Size: 0x11A +CHECK64: Address: 0x100000EA0 +CHECK64: Size: 0x109 +CHECK: Offset: 0 +CHECK: Alignment: 4 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x800004) +CHECK: PureInstructions (0x800000) +CHECK: SomeInstructions (0x4) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: Section { +CHECK: Name: __unwind_info +CHECK: Segment: __TEXT +CHECK32: Address: 0x1FAC +CHECK64: Address: 0x100000FAC +CHECK: Size: 0x48 +CHECK: Offset: 0 +CHECK: Alignment: 2 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK32: Section { +CHECK32: Name: __nl_symbol_ptr +CHECK32: Segment: __DATA +CHECK32: Address: 0x2000 +CHECK32: Size: 0x4 +CHECK32: Offset: 0 +CHECK32: Alignment: 2 +CHECK32: RelocationOffset: 0x0 +CHECK32: RelocationCount: 0 +CHECK32: Type: 0x6 +CHECK32: Attributes [ (0x0) +CHECK32: ] +CHECK32: Reserved1: 0x0 +CHECK32: Reserved2: 0x0 +CHECK32: } +CHECK: Section { +CHECK: Name: __data +CHECK: Segment: __DATA +CHECK32: Address: 0x2004 +CHECK64: Address: 0x100001000 +CHECK: Size: 0x4 +CHECK: Offset: 0 +CHECK: Alignment: 2 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: Section { +CHECK: Name: __common +CHECK: Segment: __DATA +CHECK32: Address: 0x2008 +CHECK64: Address: 0x100001004 +CHECK: Size: 0x4 +CHECK: Offset: 0 +CHECK: Alignment: 2 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: LocReloc (0x1) +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: Section { +CHECK: Name: __bss +CHECK: Segment: __DATA +CHECK32: Address: 0x200C +CHECK64: Address: 0x100001008 +CHECK: Size: 0x4 +CHECK: Offset: 0 +CHECK: Alignment: 2 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: LocReloc (0x1) +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK64: Section { +CHECK64: Name: __debug_line +CHECK64: Segment: __DWARF +CHECK64: Address: 0x100003000 +CHECK64: Size: 0xEA +CHECK64: Offset: 8192 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK64: Section { +CHECK64: Name: __debug_pubnames +CHECK64: Segment: __DWARF +CHECK64: Address: 0x1000030EA +CHECK64: Size: 0x7F +CHECK64: Offset: 8426 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK64: Section { +CHECK64: Name: __debug_pubtypes +CHECK64: Segment: __DWARF +CHECK64: Address: 0x100003169 +CHECK64: Size: 0x57 +CHECK64: Offset: 8553 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK64: Section { +CHECK64: Name: __debug_aranges +CHECK64: Segment: __DWARF +CHECK64: Address: 0x1000031C0 +CHECK64: Size: 0xB0 +CHECK64: Offset: 8640 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK64: Section { +CHECK64: Name: __debug_info +CHECK64: Segment: __DWARF +CHECK64: Address: 0x100003270 +CHECK64: Size: 0x1BC +CHECK64: Offset: 8816 +CHECK64: Alignment: 0 +CHECK64: RelocationOffset: 0x0 +CHECK64: RelocationCount: 0 +CHECK64: Type: 0x0 +CHECK64: Attributes [ (0x0) +CHECK64: ] +CHECK64: Reserved1: 0x0 +CHECK64: Reserved2: 0x0 +CHECK64: } +CHECK: Section { +CHECK: Name: __debug_abbrev +CHECK: Segment: __DWARF +CHECK32: Address: 0x4000 +CHECK32: Size: 0x1 +CHECK32: Offset: 8192 +CHECK64: Address: 0x10000342C +CHECK64: Size: 0x8F +CHECK64: Offset: 9260 +CHECK: Alignment: 0 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: Section { +CHECK: Name: __debug_str +CHECK: Segment: __DWARF +CHECK32: Address: 0x4001 +CHECK32: Size: 0x1 +CHECK32: Offset: 8193 +CHECK64: Address: 0x1000034BB +CHECK64: Size: 0x9F +CHECK64: Offset: 9403 +CHECK: Alignment: 0 +CHECK: RelocationOffset: 0x0 +CHECK: RelocationCount: 0 +CHECK: Type: 0x0 +CHECK: Attributes [ (0x0) +CHECK: ] +CHECK: Reserved1: 0x0 +CHECK: Reserved2: 0x0 +CHECK: } +CHECK: ] +CHECK: Symbols [ +CHECK: Symbol { +CHECK: Name: _inc (2) +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1F20 +CHECK64: Value: 0x100000F20 +CHECK: } +CHECK: Symbol { +CHECK: Name: _inc (2) +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1F90 +CHECK64: Value: 0x100000F90 +CHECK: } +CHECK: Symbol { +CHECK: Name: _baz (7) +CHECK: Type: Section (0xE) +CHECK: Section: __data +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x2004 +CHECK64: Value: 0x100001000 +CHECK: } +CHECK: Symbol { +CHECK: Name: _private_int (12) +CHECK: Type: Section (0xE) +CHECK: Section: __bss +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x200C +CHECK64: Value: 0x100001008 +CHECK: } +CHECK: Symbol { +CHECK: Name: __mh_execute_header (25) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x10) +CHECK: ReferencedDynamically (0x10) +CHECK: ] +CHECK32: Value: 0x1000 +CHECK64: Value: 0x100000000 +CHECK: } +CHECK: Symbol { +CHECK: Name: _bar (45) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1F40 +CHECK64: Value: 0x100000F40 +CHECK: } +CHECK: Symbol { +CHECK: Name: _foo (50) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1EC0 +CHECK64: Value: 0x100000ED0 +CHECK: } +CHECK: Symbol { +CHECK: Name: _main (55) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __text +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x1E90 +CHECK64: Value: 0x100000EA0 +CHECK: } +CHECK: Symbol { +CHECK: Name: _val (61) +CHECK: Extern +CHECK: Type: Section (0xE) +CHECK: Section: __common +CHECK: RefType: UndefinedNonLazy (0x0) +CHECK: Flags [ (0x0) +CHECK: ] +CHECK32: Value: 0x2008 +CHECK64: Value: 0x100001004 +CHECK: } +CHECK: ] + diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp index ed06dba8ab3..84e4e4b31a2 100644 --- a/tools/dsymutil/DwarfLinker.cpp +++ b/tools/dsymutil/DwarfLinker.cpp @@ -10,6 +10,7 @@ #include "BinaryHolder.h" #include "DebugMap.h" #include "dsymutil.h" +#include "MachOUtils.h" #include "NonRelocatableStringpool.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/StringMap.h" @@ -475,7 +476,7 @@ public: bool init(Triple TheTriple, StringRef OutputFilename); /// \brief Dump the file to the disk. - bool finish(); + bool finish(const DebugMap &); AsmPrinter &getAsmPrinter() const { return *Asm; } @@ -617,7 +618,10 @@ bool DwarfStreamer::init(Triple TheTriple, StringRef OutputFilename) { return true; } -bool DwarfStreamer::finish() { +bool DwarfStreamer::finish(const DebugMap &DM) { + if (DM.getTriple().isOSDarwin() && !DM.getBinaryPath().empty()) + return MachOUtils::generateDsymCompanion(DM, *MS, *OutFile); + MS->Finish(); return true; } @@ -3057,7 +3061,7 @@ bool DwarfLinker::link(const DebugMap &Map) { Streamer->emitStrings(StringPool); } - return Options.NoOutput ? true : Streamer->finish(); + return Options.NoOutput ? true : Streamer->finish(Map); } } diff --git a/tools/dsymutil/MachOUtils.cpp b/tools/dsymutil/MachOUtils.cpp index 8f4d2610b8f..b6438789465 100644 --- a/tools/dsymutil/MachOUtils.cpp +++ b/tools/dsymutil/MachOUtils.cpp @@ -8,7 +8,16 @@ //===----------------------------------------------------------------------===// #include "MachOUtils.h" +#include "BinaryHolder.h" +#include "DebugMap.h" #include "dsymutil.h" +#include "NonRelocatableStringpool.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Object/MachO.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" @@ -88,6 +97,423 @@ bool generateUniversalBinary(SmallVectorImpl &ArchFiles, return Options.NoOutput ? true : runLipo(Args); } + +// Return a MachO::segment_command_64 that holds the same values as +// the passed MachO::segment_command. We do that to avoid having to +// duplicat the logic for 32bits and 64bits segments. +struct MachO::segment_command_64 adaptFrom32bits(MachO::segment_command Seg) { + MachO::segment_command_64 Seg64; + Seg64.cmd = Seg.cmd; + Seg64.cmdsize = Seg.cmdsize; + memcpy(Seg64.segname, Seg.segname, sizeof(Seg.segname)); + Seg64.vmaddr = Seg.vmaddr; + Seg64.vmsize = Seg.vmsize; + Seg64.fileoff = Seg.fileoff; + Seg64.filesize = Seg.filesize; + Seg64.maxprot = Seg.maxprot; + Seg64.initprot = Seg.initprot; + Seg64.nsects = Seg.nsects; + Seg64.flags = Seg.flags; + return Seg64; +} + +// Iterate on all \a Obj segments, and apply \a Handler to them. +template +static void iterateOnSegments(const object::MachOObjectFile &Obj, + FunctionTy Handler) { + for (const auto &LCI : Obj.load_commands()) { + MachO::segment_command_64 Segment; + if (LCI.C.cmd == MachO::LC_SEGMENT) + Segment = adaptFrom32bits(Obj.getSegmentLoadCommand(LCI)); + else if (LCI.C.cmd == MachO::LC_SEGMENT_64) + Segment = Obj.getSegment64LoadCommand(LCI); + else + continue; + + Handler(Segment); + } +} + +// Transfer the symbols described by \a NList to \a NewSymtab which is +// just the raw contents of the symbol table for the dSYM companion file. +// \returns whether the symbol was tranfered or not. +template +static bool transferSymbol(NListTy NList, bool IsLittleEndian, + StringRef Strings, SmallVectorImpl &NewSymtab, + NonRelocatableStringpool &NewStrings, + bool &InDebugNote) { + // Do not transfer undefined symbols, we want real addresses. + if ((NList.n_type & MachO::N_TYPE) == MachO::N_UNDF) + return false; + + StringRef Name = StringRef(Strings.begin() + NList.n_strx); + if (InDebugNote) { + InDebugNote = + (NList.n_type != MachO::N_SO) || (!Name.empty() && Name[0] != '\0'); + return false; + } else if (NList.n_type == MachO::N_SO) { + InDebugNote = true; + return false; + } + + // FIXME: The + 1 is here to mimic dsymutil-classic that has 2 empty + // strings at the start of the generated string table (There is + // corresponding code in the string table emission). + NList.n_strx = NewStrings.getStringOffset(Name) + 1; + if (IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(NList); + + NewSymtab.append(reinterpret_cast(&NList), + reinterpret_cast(&NList + 1)); + return true; +} + +// Wrapper around transferSymbol to transfer all of \a Obj symbols +// to \a NewSymtab. This function does not write in the output file. +// \returns the number of symbols in \a NewSymtab. +static unsigned transferSymbols(const object::MachOObjectFile &Obj, + SmallVectorImpl &NewSymtab, + NonRelocatableStringpool &NewStrings) { + unsigned Syms = 0; + StringRef Strings = Obj.getStringTableData(); + bool IsLittleEndian = Obj.isLittleEndian(); + bool InDebugNote = false; + + if (Obj.is64Bit()) { + for (const object::SymbolRef &Symbol : Obj.symbols()) { + object::DataRefImpl DRI = Symbol.getRawDataRefImpl(); + if (transferSymbol(Obj.getSymbol64TableEntry(DRI), IsLittleEndian, + Strings, NewSymtab, NewStrings, InDebugNote)) + ++Syms; + } + } else { + for (const object::SymbolRef &Symbol : Obj.symbols()) { + object::DataRefImpl DRI = Symbol.getRawDataRefImpl(); + if (transferSymbol(Obj.getSymbolTableEntry(DRI), IsLittleEndian, Strings, + NewSymtab, NewStrings, InDebugNote)) + ++Syms; + } + } + return Syms; +} + +static MachO::section +getSection(const object::MachOObjectFile &Obj, + const MachO::segment_command &Seg, + const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) { + return Obj.getSection(LCI, Idx); +} + +static MachO::section_64 +getSection(const object::MachOObjectFile &Obj, + const MachO::segment_command_64 &Seg, + const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) { + return Obj.getSection64(LCI, Idx); +} + +// Transfer \a Segment from \a Obj to the output file. This calls into \a Writer +// to write these load commands directly in the output file at the current +// position. +// The function also tries to find a hole in the address map to fit the __DWARF +// segment of \a DwarfSegmentSize size. \a EndAddress is updated to point at the +// highest segment address. +// When the __LINKEDIT segment is transfered, its offset and size are set resp. +// to \a LinkeditOffset and \a LinkeditSize. +template +static void transferSegmentAndSections( + const object::MachOObjectFile::LoadCommandInfo &LCI, SegmentTy Segment, + const object::MachOObjectFile &Obj, MCObjectWriter &Writer, + uint64_t LinkeditOffset, uint64_t LinkeditSize, uint64_t DwarfSegmentSize, + uint64_t &GapForDwarf, uint64_t &EndAddress) { + if (StringRef("__DWARF") == Segment.segname) + return; + + Segment.fileoff = Segment.filesize = 0; + + if (StringRef("__LINKEDIT") == Segment.segname) { + Segment.fileoff = LinkeditOffset; + Segment.filesize = LinkeditSize; + } + + // Check if the end address of the last segment and our current + // start address leave a sufficient gap to store the __DWARF + // segment. + uint64_t PrevEndAddress = EndAddress; + EndAddress = RoundUpToAlignment(EndAddress, 0x1000); + if (GapForDwarf == UINT64_MAX && Segment.vmaddr > EndAddress && + Segment.vmaddr - EndAddress >= DwarfSegmentSize) + GapForDwarf = EndAddress; + + // The segments are not necessarily sorted by their vmaddr. + EndAddress = + std::max(PrevEndAddress, Segment.vmaddr + Segment.vmsize); + unsigned nsects = Segment.nsects; + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Segment); + Writer.writeBytes( + StringRef(reinterpret_cast(&Segment), sizeof(Segment))); + for (unsigned i = 0; i < nsects; ++i) { + auto Sect = getSection(Obj, Segment, LCI, i); + Sect.offset = Sect.reloff = Sect.nreloc = 0; + if (Obj.isLittleEndian() != sys::IsLittleEndianHost) + MachO::swapStruct(Sect); + Writer.writeBytes(StringRef(reinterpret_cast(&Sect), sizeof(Sect))); + } +} + +// Write the __DWARF segment load command to the output file. +static void createDwarfSegment(uint64_t VMAddr, uint64_t FileOffset, + uint64_t FileSize, unsigned NumSections, + MCAsmLayout &Layout, MachObjectWriter &Writer) { + Writer.writeSegmentLoadCommand("__DWARF", NumSections, VMAddr, + RoundUpToAlignment(FileSize, 0x1000), + FileOffset, FileSize, /* MaxProt */ 7, + /* InitProt =*/3); + + for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { + MCSection *Sec = Layout.getSectionOrder()[i]; + if (Sec->begin() == Sec->end() || !Layout.getSectionFileSize(Sec)) + continue; + + unsigned Align = Sec->getAlignment(); + if (Align > 1) { + VMAddr = RoundUpToAlignment(VMAddr, Align); + FileOffset = RoundUpToAlignment(FileOffset, Align); + } + Writer.writeSection(Layout, *Sec, VMAddr, FileOffset, 0, 0, 0); + + FileOffset += Layout.getSectionAddressSize(Sec); + VMAddr += Layout.getSectionAddressSize(Sec); + } +} + +static bool isExecutable(const object::MachOObjectFile &Obj) { + if (Obj.is64Bit()) + return Obj.getHeader64().filetype != MachO::MH_OBJECT; + else + return Obj.getHeader().filetype != MachO::MH_OBJECT; +} + +static bool hasLinkEditSegment(const object::MachOObjectFile &Obj) { + bool HasLinkEditSegment = false; + iterateOnSegments(Obj, [&](const MachO::segment_command_64 &Segment) { + if (StringRef("__LINKEDIT") == Segment.segname) + HasLinkEditSegment = true; + }); + return HasLinkEditSegment; +} + +static unsigned segmentLoadCommandSize(bool Is64Bit, unsigned NumSections) { + if (Is64Bit) + return sizeof(MachO::segment_command_64) + + NumSections * sizeof(MachO::section_64); + + return sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); +} + +// Stream a dSYM companion binary file corresponding to the binary referenced +// by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to +// \a OutFile and it must be using a MachObjectWriter object to do so. +bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS, + raw_fd_ostream &OutFile) { + auto &ObjectStreamer = static_cast(MS); + MCAssembler &MCAsm = ObjectStreamer.getAssembler(); + auto &Writer = static_cast(MCAsm.getWriter()); + MCAsmLayout Layout(MCAsm); + + MCAsm.layout(Layout); + + BinaryHolder InputBinaryHolder(false); + auto ErrOrObjs = InputBinaryHolder.GetObjectFiles(DM.getBinaryPath()); + if (auto Error = ErrOrObjs.getError()) + return error(Twine("opening ") + DM.getBinaryPath() + ": " + + Error.message(), + "output file streaming"); + + auto ErrOrInputBinary = + InputBinaryHolder.GetAs(DM.getTriple()); + if (auto Error = ErrOrInputBinary.getError()) + return error(Twine("opening ") + DM.getBinaryPath() + ": " + + Error.message(), + "output file streaming"); + auto &InputBinary = *ErrOrInputBinary; + + bool Is64Bit = Writer.is64Bit(); + MachO::symtab_command SymtabCmd = InputBinary.getSymtabLoadCommand(); + + // Get UUID. + MachO::uuid_command UUIDCmd; + memset(&UUIDCmd, 0, sizeof(UUIDCmd)); + UUIDCmd.cmd = MachO::LC_UUID; + UUIDCmd.cmdsize = sizeof(MachO::uuid_command); + for (auto &LCI : InputBinary.load_commands()) { + if (LCI.C.cmd == MachO::LC_UUID) { + UUIDCmd = InputBinary.getUuidCommand(LCI); + break; + } + } + + // Compute the number of load commands we will need. + unsigned LoadCommandSize = 0; + unsigned NumLoadCommands = 0; + // We will copy the UUID if there is one. + if (UUIDCmd.cmd != 0) { + ++NumLoadCommands; + LoadCommandSize += sizeof(MachO::uuid_command); + } + + // If we have a valid symtab to copy, do it. + bool ShouldEmitSymtab = + isExecutable(InputBinary) && hasLinkEditSegment(InputBinary); + if (ShouldEmitSymtab) { + LoadCommandSize += sizeof(MachO::symtab_command); + ++NumLoadCommands; + } + + unsigned HeaderSize = + Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header); + // We will copy every segment that isn't __DWARF. + iterateOnSegments(InputBinary, [&](const MachO::segment_command_64 &Segment) { + if (StringRef("__DWARF") == Segment.segname) + return; + + ++NumLoadCommands; + LoadCommandSize += segmentLoadCommandSize(Is64Bit, Segment.nsects); + }); + + // We will add our own brand new __DWARF segment if we have debug + // info. + unsigned NumDwarfSections = 0; + uint64_t DwarfSegmentSize = 0; + + for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { + MCSection *Sec = Layout.getSectionOrder()[i]; + if (Sec->begin() == Sec->end()) + continue; + + if (uint64_t Size = Layout.getSectionFileSize(Sec)) { + DwarfSegmentSize = + RoundUpToAlignment(DwarfSegmentSize, Sec->getAlignment()); + DwarfSegmentSize += Size; + ++NumDwarfSections; + } + } + + if (NumDwarfSections) { + ++NumLoadCommands; + LoadCommandSize += segmentLoadCommandSize(Is64Bit, NumDwarfSections); + } + + SmallString<0> NewSymtab; + NonRelocatableStringpool NewStrings; + unsigned NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + unsigned NumSyms = 0; + uint64_t NewStringsSize = 0; + if (ShouldEmitSymtab) { + NewSymtab.reserve(SymtabCmd.nsyms * NListSize / 2); + NumSyms = transferSymbols(InputBinary, NewSymtab, NewStrings); + NewStringsSize = NewStrings.getSize() + 1; + } + + uint64_t SymtabStart = LoadCommandSize; + SymtabStart += HeaderSize; + SymtabStart = RoundUpToAlignment(SymtabStart, 0x1000); + + // We gathered all the information we need, start emitting the output file. + Writer.writeHeader(MachO::MH_DSYM, NumLoadCommands, LoadCommandSize, false); + + // Write the load commands. + assert(OutFile.tell() == HeaderSize); + if (UUIDCmd.cmd != 0) { + Writer.write32(UUIDCmd.cmd); + Writer.write32(UUIDCmd.cmdsize); + Writer.writeBytes( + StringRef(reinterpret_cast(UUIDCmd.uuid), 16)); + assert(OutFile.tell() == HeaderSize + sizeof(UUIDCmd)); + } + + assert(SymtabCmd.cmd && "No symbol table."); + uint64_t StringStart = SymtabStart + NumSyms * NListSize; + if (ShouldEmitSymtab) + Writer.writeSymtabLoadCommand(SymtabStart, NumSyms, StringStart, + NewStringsSize); + + uint64_t DwarfSegmentStart = StringStart + NewStringsSize; + DwarfSegmentStart = RoundUpToAlignment(DwarfSegmentStart, 0x1000); + + // Write the load commands for the segments and sections we 'import' from + // the original binary. + uint64_t EndAddress = 0; + uint64_t GapForDwarf = UINT64_MAX; + for (auto &LCI : InputBinary.load_commands()) { + if (LCI.C.cmd == MachO::LC_SEGMENT) + transferSegmentAndSections(LCI, InputBinary.getSegmentLoadCommand(LCI), + InputBinary, Writer, SymtabStart, + StringStart + NewStringsSize - SymtabStart, + DwarfSegmentSize, GapForDwarf, EndAddress); + else if (LCI.C.cmd == MachO::LC_SEGMENT_64) + transferSegmentAndSections(LCI, InputBinary.getSegment64LoadCommand(LCI), + InputBinary, Writer, SymtabStart, + StringStart + NewStringsSize - SymtabStart, + DwarfSegmentSize, GapForDwarf, EndAddress); + } + + uint64_t DwarfVMAddr = RoundUpToAlignment(EndAddress, 0x1000); + uint64_t DwarfVMMax = Is64Bit ? UINT64_MAX : UINT32_MAX; + if (DwarfVMAddr + DwarfSegmentSize > DwarfVMMax || + DwarfVMAddr + DwarfSegmentSize < DwarfVMAddr /* Overflow */) { + // There is no room for the __DWARF segment at the end of the + // address space. Look trhough segments to find a gap. + DwarfVMAddr = GapForDwarf; + if (DwarfVMAddr == UINT64_MAX) + warn("not enough VM space for the __DWARF segment.", + "output file streaming"); + } + + // Write the load command for the __DWARF segment. + createDwarfSegment(DwarfVMAddr, DwarfSegmentStart, DwarfSegmentSize, + NumDwarfSections, Layout, Writer); + + assert(OutFile.tell() == LoadCommandSize + HeaderSize); + Writer.WriteZeros(SymtabStart - (LoadCommandSize + HeaderSize)); + assert(OutFile.tell() == SymtabStart); + + // Transfer symbols. + if (ShouldEmitSymtab) { + Writer.writeBytes(NewSymtab.str()); + assert(OutFile.tell() == StringStart); + + // Transfer string table. + // FIXME: The NonRelocatableStringpool starts with an empty string, but + // dsymutil-classic starts the reconstructed string table with 2 of these. + // Reproduce that behavior for now (there is corresponding code in + // transferSymbol). + Writer.WriteZeros(1); + typedef NonRelocatableStringpool::MapTy MapTy; + for (auto *Entry = NewStrings.getFirstEntry(); Entry; + Entry = static_cast(Entry->getValue().second)) + Writer.writeBytes( + StringRef(Entry->getKey().data(), Entry->getKey().size() + 1)); + } + + assert(OutFile.tell() == StringStart + NewStringsSize); + + // Pad till the Dwarf segment start. + Writer.WriteZeros(DwarfSegmentStart - (StringStart + NewStringsSize)); + assert(OutFile.tell() == DwarfSegmentStart); + + // Emit the Dwarf sections contents. + for (const MCSection &Sec : MCAsm) { + if (Sec.begin() == Sec.end()) + continue; + + uint64_t Pos = OutFile.tell(); + Writer.WriteZeros(RoundUpToAlignment(Pos, Sec.getAlignment()) - Pos); + MCAsm.writeSectionData(&Sec, Layout); + } + + return true; +} } } } diff --git a/tools/dsymutil/MachOUtils.h b/tools/dsymutil/MachOUtils.h index d6b6f3d61c3..9a63645ab4a 100644 --- a/tools/dsymutil/MachOUtils.h +++ b/tools/dsymutil/MachOUtils.h @@ -13,7 +13,10 @@ #include "llvm/ADT/StringRef.h" namespace llvm { +class MCStreamer; +class raw_fd_ostream; namespace dsymutil { +class DebugMap; struct LinkOptions; namespace MachOUtils { @@ -25,6 +28,9 @@ struct ArchAndFilename { bool generateUniversalBinary(SmallVectorImpl &ArchFiles, StringRef OutputFileName, const LinkOptions &); +bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS, + raw_fd_ostream &OutFile); + std::string getArchName(StringRef Arch); } } -- 2.34.1