X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=tools%2Fllvm-objdump%2FMachODump.cpp;h=04c72f4856c896e76420ca3d2ec30f93736986f4;hp=16ee58d82877032cf309a5b5a1c3a69ddd9bec84;hb=54a2768153deeac09a643854d230e7d4c60af12a;hpb=4141e003f948b2825cee60935509642546990e35 diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 16ee58d8287..04c72f4856c 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -18,26 +18,28 @@ #include "llvm/ADT/Triple.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" @@ -62,9 +64,81 @@ static cl::opt DSYMFile("dsym", static cl::opt FullLeadingAddr("full-leading-addr", cl::desc("Print full leading address")); -static cl::opt - PrintImmHex("print-imm-hex", - cl::desc("Use hex format for immediate values")); +static cl::opt NoLeadingAddr("no-leading-addr", + cl::desc("Print no leading address")); + +cl::opt llvm::UniversalHeaders("universal-headers", + cl::desc("Print Mach-O universal headers " + "(requires -macho)")); + +cl::opt + llvm::ArchiveHeaders("archive-headers", + cl::desc("Print archive headers for Mach-O archives " + "(requires -macho)")); + +cl::opt + ArchiveMemberOffsets("archive-member-offsets", + cl::desc("Print the offset to each archive member for " + "Mach-O archives (requires -macho and " + "-archive-headers)")); + +cl::opt + llvm::IndirectSymbols("indirect-symbols", + cl::desc("Print indirect symbol table for Mach-O " + "objects (requires -macho)")); + +cl::opt + llvm::DataInCode("data-in-code", + cl::desc("Print the data in code table for Mach-O objects " + "(requires -macho)")); + +cl::opt + llvm::LinkOptHints("link-opt-hints", + cl::desc("Print the linker optimization hints for " + "Mach-O objects (requires -macho)")); + +cl::list + llvm::DumpSections("section", + cl::desc("Prints the specified segment,section for " + "Mach-O objects (requires -macho)")); + +cl::opt + llvm::InfoPlist("info-plist", + cl::desc("Print the info plist section as strings for " + "Mach-O objects (requires -macho)")); + +cl::opt + llvm::DylibsUsed("dylibs-used", + cl::desc("Print the shared libraries used for linked " + "Mach-O files (requires -macho)")); + +cl::opt + llvm::DylibId("dylib-id", + cl::desc("Print the shared library's id for the dylib Mach-O " + "file (requires -macho)")); + +cl::opt + llvm::NonVerbose("non-verbose", + cl::desc("Print the info for Mach-O objects in " + "non-verbose or numeric form (requires -macho)")); + +cl::opt + llvm::ObjcMetaData("objc-meta-data", + cl::desc("Print the Objective-C runtime meta data for " + "Mach-O files (requires -macho)")); + +cl::opt llvm::DisSymName( + "dis-symname", + cl::desc("disassemble just this symbol's instructions (requires -macho")); + +static cl::opt NoSymbolicOperands( + "no-symbolic-operands", + cl::desc("do not symbolic operands when disassembling (requires -macho)")); + +static cl::list + ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"), + cl::ZeroOrMore); +bool ArchAll = false; static std::string ThumbTripleName; @@ -101,19 +175,8 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj, struct SymbolSorter { bool operator()(const SymbolRef &A, const SymbolRef &B) { - SymbolRef::Type AType, BType; - A.getType(AType); - B.getType(BType); - - uint64_t AAddr, BAddr; - if (AType != SymbolRef::ST_Function) - AAddr = 0; - else - A.getAddress(AAddr); - if (BType != SymbolRef::ST_Function) - BAddr = 0; - else - B.getAddress(BAddr); + uint64_t AAddr = (A.getType() != SymbolRef::ST_Function) ? 0 : A.getValue(); + uint64_t BAddr = (B.getType() != SymbolRef::ST_Function) ? 0 : B.getValue(); return AAddr < BAddr; } }; @@ -138,7 +201,7 @@ static bool compareDiceTableEntries(const DiceTableEntry &i, return j.first >= i.first && j.first < i.first + Length; } -static uint64_t DumpDataInCode(const char *bytes, uint64_t Length, +static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length, unsigned short Kind) { uint32_t Value, Size = 1; @@ -147,19 +210,19 @@ static uint64_t DumpDataInCode(const char *bytes, uint64_t Length, case MachO::DICE_KIND_DATA: if (Length >= 4) { if (!NoShowRawInsn) - DumpBytes(StringRef(bytes, 4)); + dumpBytes(ArrayRef(bytes, 4), outs()); Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0]; outs() << "\t.long " << Value; Size = 4; } else if (Length >= 2) { if (!NoShowRawInsn) - DumpBytes(StringRef(bytes, 2)); + dumpBytes(ArrayRef(bytes, 2), outs()); Value = bytes[1] << 8 | bytes[0]; outs() << "\t.short " << Value; Size = 2; } else { if (!NoShowRawInsn) - DumpBytes(StringRef(bytes, 2)); + dumpBytes(ArrayRef(bytes, 2), outs()); Value = bytes[0]; outs() << "\t.byte " << Value; Size = 1; @@ -171,14 +234,14 @@ static uint64_t DumpDataInCode(const char *bytes, uint64_t Length, break; case MachO::DICE_KIND_JUMP_TABLE8: if (!NoShowRawInsn) - DumpBytes(StringRef(bytes, 1)); + dumpBytes(ArrayRef(bytes, 1), outs()); Value = bytes[0]; outs() << "\t.byte " << format("%3u", Value) << "\t@ KIND_JUMP_TABLE8\n"; Size = 1; break; case MachO::DICE_KIND_JUMP_TABLE16: if (!NoShowRawInsn) - DumpBytes(StringRef(bytes, 2)); + dumpBytes(ArrayRef(bytes, 2), outs()); Value = bytes[1] << 8 | bytes[0]; outs() << "\t.short " << format("%5u", Value & 0xffff) << "\t@ KIND_JUMP_TABLE16\n"; @@ -187,7 +250,7 @@ static uint64_t DumpDataInCode(const char *bytes, uint64_t Length, case MachO::DICE_KIND_JUMP_TABLE32: case MachO::DICE_KIND_ABS_JUMP_TABLE32: if (!NoShowRawInsn) - DumpBytes(StringRef(bytes, 4)); + dumpBytes(ArrayRef(bytes, 4), outs()); Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0]; outs() << "\t.long " << Value; if (Kind == MachO::DICE_KIND_JUMP_TABLE32) @@ -200,14 +263,18 @@ static uint64_t DumpDataInCode(const char *bytes, uint64_t Length, return Size; } -static void getSectionsAndSymbols(const MachO::mach_header Header, - MachOObjectFile *MachOObj, +static void getSectionsAndSymbols(MachOObjectFile *MachOObj, std::vector &Sections, std::vector &Symbols, SmallVectorImpl &FoundFns, uint64_t &BaseSegmentAddress) { - for (const SymbolRef &Symbol : MachOObj->symbols()) - Symbols.push_back(Symbol); + for (const SymbolRef &Symbol : MachOObj->symbols()) { + ErrorOr SymName = Symbol.getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + if (!SymName->startswith("ltmp")) + Symbols.push_back(Symbol); + } for (const SectionRef &Section : MachOObj->sections()) { StringRef SectName; @@ -215,10 +282,8 @@ static void getSectionsAndSymbols(const MachO::mach_header Header, Sections.push_back(Section); } - MachOObjectFile::LoadCommandInfo Command = - MachOObj->getFirstLoadCommandInfo(); bool BaseSegmentAddressSet = false; - for (unsigned i = 0;; ++i) { + for (const auto &Command : MachOObj->load_commands()) { if (Command.C.cmd == MachO::LC_FUNCTION_STARTS) { // We found a function starts segment, parse the addresses for later // consumption. @@ -234,936 +299,5270 @@ static void getSectionsAndSymbols(const MachO::mach_header Header, BaseSegmentAddress = SLC.vmaddr; } } + } +} - if (i == Header.ncmds - 1) - break; +static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose, + uint32_t n, uint32_t count, + uint32_t stride, uint64_t addr) { + MachO::dysymtab_command Dysymtab = O->getDysymtabLoadCommand(); + uint32_t nindirectsyms = Dysymtab.nindirectsyms; + if (n > nindirectsyms) + outs() << " (entries start past the end of the indirect symbol " + "table) (reserved1 field greater than the table size)"; + else if (n + count > nindirectsyms) + outs() << " (entries extends past the end of the indirect symbol " + "table)"; + outs() << "\n"; + uint32_t cputype = O->getHeader().cputype; + if (cputype & MachO::CPU_ARCH_ABI64) + outs() << "address index"; + else + outs() << "address index"; + if (verbose) + outs() << " name\n"; + else + outs() << "\n"; + for (uint32_t j = 0; j < count && n + j < nindirectsyms; j++) { + if (cputype & MachO::CPU_ARCH_ABI64) + outs() << format("0x%016" PRIx64, addr + j * stride) << " "; else - Command = MachOObj->getNextLoadCommandInfo(Command); + outs() << format("0x%08" PRIx32, addr + j * stride) << " "; + MachO::dysymtab_command Dysymtab = O->getDysymtabLoadCommand(); + uint32_t indirect_symbol = O->getIndirectSymbolTableEntry(Dysymtab, n + j); + if (indirect_symbol == MachO::INDIRECT_SYMBOL_LOCAL) { + outs() << "LOCAL\n"; + continue; + } + if (indirect_symbol == + (MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS)) { + outs() << "LOCAL ABSOLUTE\n"; + continue; + } + if (indirect_symbol == MachO::INDIRECT_SYMBOL_ABS) { + outs() << "ABSOLUTE\n"; + continue; + } + outs() << format("%5u ", indirect_symbol); + if (verbose) { + MachO::symtab_command Symtab = O->getSymtabLoadCommand(); + if (indirect_symbol < Symtab.nsyms) { + symbol_iterator Sym = O->getSymbolByIndex(indirect_symbol); + SymbolRef Symbol = *Sym; + ErrorOr SymName = Symbol.getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + outs() << *SymName; + } else { + outs() << "?"; + } + } + outs() << "\n"; } } -static void DisassembleInputMachO2(StringRef Filename, - MachOObjectFile *MachOOF); +static void PrintIndirectSymbols(MachOObjectFile *O, bool verbose) { + for (const auto &Load : O->load_commands()) { + if (Load.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 Seg = O->getSegment64LoadCommand(Load); + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section_64 Sec = O->getSection64(Load, J); + uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; + if (section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || + section_type == MachO::S_LAZY_SYMBOL_POINTERS || + section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || + section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS || + section_type == MachO::S_SYMBOL_STUBS) { + uint32_t stride; + if (section_type == MachO::S_SYMBOL_STUBS) + stride = Sec.reserved2; + else + stride = 8; + if (stride == 0) { + outs() << "Can't print indirect symbols for (" << Sec.segname << "," + << Sec.sectname << ") " + << "(size of stubs in reserved2 field is zero)\n"; + continue; + } + uint32_t count = Sec.size / stride; + outs() << "Indirect symbols for (" << Sec.segname << "," + << Sec.sectname << ") " << count << " entries"; + uint32_t n = Sec.reserved1; + PrintIndirectSymbolTable(O, verbose, n, count, stride, Sec.addr); + } + } + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + MachO::segment_command Seg = O->getSegmentLoadCommand(Load); + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section Sec = O->getSection(Load, J); + uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; + if (section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || + section_type == MachO::S_LAZY_SYMBOL_POINTERS || + section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || + section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS || + section_type == MachO::S_SYMBOL_STUBS) { + uint32_t stride; + if (section_type == MachO::S_SYMBOL_STUBS) + stride = Sec.reserved2; + else + stride = 4; + if (stride == 0) { + outs() << "Can't print indirect symbols for (" << Sec.segname << "," + << Sec.sectname << ") " + << "(size of stubs in reserved2 field is zero)\n"; + continue; + } + uint32_t count = Sec.size / stride; + outs() << "Indirect symbols for (" << Sec.segname << "," + << Sec.sectname << ") " << count << " entries"; + uint32_t n = Sec.reserved1; + PrintIndirectSymbolTable(O, verbose, n, count, stride, Sec.addr); + } + } + } + } +} -void llvm::DisassembleInputMachO(StringRef Filename) { - ErrorOr> BuffOrErr = - MemoryBuffer::getFileOrSTDIN(Filename); - if (std::error_code EC = BuffOrErr.getError()) { - errs() << "llvm-objdump: " << Filename << ": " << EC.message() << "\n"; - return; +static void PrintDataInCodeTable(MachOObjectFile *O, bool verbose) { + MachO::linkedit_data_command DIC = O->getDataInCodeLoadCommand(); + uint32_t nentries = DIC.datasize / sizeof(struct MachO::data_in_code_entry); + outs() << "Data in code table (" << nentries << " entries)\n"; + outs() << "offset length kind\n"; + for (dice_iterator DI = O->begin_dices(), DE = O->end_dices(); DI != DE; + ++DI) { + uint32_t Offset; + DI->getOffset(Offset); + outs() << format("0x%08" PRIx32, Offset) << " "; + uint16_t Length; + DI->getLength(Length); + outs() << format("%6u", Length) << " "; + uint16_t Kind; + DI->getKind(Kind); + if (verbose) { + switch (Kind) { + case MachO::DICE_KIND_DATA: + outs() << "DATA"; + break; + case MachO::DICE_KIND_JUMP_TABLE8: + outs() << "JUMP_TABLE8"; + break; + case MachO::DICE_KIND_JUMP_TABLE16: + outs() << "JUMP_TABLE16"; + break; + case MachO::DICE_KIND_JUMP_TABLE32: + outs() << "JUMP_TABLE32"; + break; + case MachO::DICE_KIND_ABS_JUMP_TABLE32: + outs() << "ABS_JUMP_TABLE32"; + break; + default: + outs() << format("0x%04" PRIx32, Kind); + break; + } + } else + outs() << format("0x%04" PRIx32, Kind); + outs() << "\n"; } - std::unique_ptr Buff = std::move(BuffOrErr.get()); +} + +static void PrintLinkOptHints(MachOObjectFile *O) { + MachO::linkedit_data_command LohLC = O->getLinkOptHintsLoadCommand(); + const char *loh = O->getData().substr(LohLC.dataoff, 1).data(); + uint32_t nloh = LohLC.datasize; + outs() << "Linker optimiztion hints (" << nloh << " total bytes)\n"; + for (uint32_t i = 0; i < nloh;) { + unsigned n; + uint64_t identifier = decodeULEB128((const uint8_t *)(loh + i), &n); + i += n; + outs() << " identifier " << identifier << " "; + if (i >= nloh) + return; + switch (identifier) { + case 1: + outs() << "AdrpAdrp\n"; + break; + case 2: + outs() << "AdrpLdr\n"; + break; + case 3: + outs() << "AdrpAddLdr\n"; + break; + case 4: + outs() << "AdrpLdrGotLdr\n"; + break; + case 5: + outs() << "AdrpAddStr\n"; + break; + case 6: + outs() << "AdrpLdrGotStr\n"; + break; + case 7: + outs() << "AdrpAdd\n"; + break; + case 8: + outs() << "AdrpLdrGot\n"; + break; + default: + outs() << "Unknown identifier value\n"; + break; + } + uint64_t narguments = decodeULEB128((const uint8_t *)(loh + i), &n); + i += n; + outs() << " narguments " << narguments << "\n"; + if (i >= nloh) + return; - std::unique_ptr MachOOF = std::move( - ObjectFile::createMachOObjectFile(Buff.get()->getMemBufferRef()).get()); + for (uint32_t j = 0; j < narguments; j++) { + uint64_t value = decodeULEB128((const uint8_t *)(loh + i), &n); + i += n; + outs() << "\tvalue " << format("0x%" PRIx64, value) << "\n"; + if (i >= nloh) + return; + } + } +} - DisassembleInputMachO2(Filename, MachOOF.get()); +static void PrintDylibs(MachOObjectFile *O, bool JustId) { + unsigned Index = 0; + for (const auto &Load : O->load_commands()) { + if ((JustId && Load.C.cmd == MachO::LC_ID_DYLIB) || + (!JustId && (Load.C.cmd == MachO::LC_ID_DYLIB || + Load.C.cmd == MachO::LC_LOAD_DYLIB || + Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB || + Load.C.cmd == MachO::LC_REEXPORT_DYLIB || + Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB || + Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB))) { + MachO::dylib_command dl = O->getDylibIDLoadCommand(Load); + if (dl.dylib.name < dl.cmdsize) { + const char *p = (const char *)(Load.Ptr) + dl.dylib.name; + if (JustId) + outs() << p << "\n"; + else { + outs() << "\t" << p; + outs() << " (compatibility version " + << ((dl.dylib.compatibility_version >> 16) & 0xffff) << "." + << ((dl.dylib.compatibility_version >> 8) & 0xff) << "." + << (dl.dylib.compatibility_version & 0xff) << ","; + outs() << " current version " + << ((dl.dylib.current_version >> 16) & 0xffff) << "." + << ((dl.dylib.current_version >> 8) & 0xff) << "." + << (dl.dylib.current_version & 0xff) << ")\n"; + } + } else { + outs() << "\tBad offset (" << dl.dylib.name << ") for name of "; + if (Load.C.cmd == MachO::LC_ID_DYLIB) + outs() << "LC_ID_DYLIB "; + else if (Load.C.cmd == MachO::LC_LOAD_DYLIB) + outs() << "LC_LOAD_DYLIB "; + else if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB) + outs() << "LC_LOAD_WEAK_DYLIB "; + else if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB) + outs() << "LC_LAZY_LOAD_DYLIB "; + else if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB) + outs() << "LC_REEXPORT_DYLIB "; + else if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) + outs() << "LC_LOAD_UPWARD_DYLIB "; + else + outs() << "LC_??? "; + outs() << "command " << Index++ << "\n"; + } + } + } } typedef DenseMap SymbolAddressMap; -typedef std::pair BindInfoEntry; -typedef std::vector BindTable; -typedef BindTable::iterator bind_table_iterator; -// The block of info used by the Symbolizer call backs. -struct DisassembleInfo { - bool verbose; - MachOObjectFile *O; - SectionRef S; - SymbolAddressMap *AddrMap; - std::vector *Sections; - const char *class_name; - const char *selector_name; - char *method; - char *demangled_name; - BindTable *bindtable; -}; +static void CreateSymbolAddressMap(MachOObjectFile *O, + SymbolAddressMap *AddrMap) { + // Create a map of symbol addresses to symbol names. + for (const SymbolRef &Symbol : O->symbols()) { + SymbolRef::Type ST = Symbol.getType(); + if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data || + ST == SymbolRef::ST_Other) { + uint64_t Address = Symbol.getValue(); + ErrorOr SymNameOrErr = Symbol.getName(); + if (std::error_code EC = SymNameOrErr.getError()) + report_fatal_error(EC.message()); + StringRef SymName = *SymNameOrErr; + if (!SymName.startswith(".objc")) + (*AddrMap)[Address] = SymName; + } + } +} // GuessSymbolName is passed the address of what might be a symbol and a -// pointer to the DisassembleInfo struct. It returns the name of a symbol +// pointer to the SymbolAddressMap. It returns the name of a symbol // with that address or nullptr if no symbol is found with that address. -static const char *GuessSymbolName(uint64_t value, - struct DisassembleInfo *info) { +static const char *GuessSymbolName(uint64_t value, SymbolAddressMap *AddrMap) { const char *SymbolName = nullptr; // A DenseMap can't lookup up some values. if (value != 0xffffffffffffffffULL && value != 0xfffffffffffffffeULL) { - StringRef name = info->AddrMap->lookup(value); + StringRef name = AddrMap->lookup(value); if (!name.empty()) SymbolName = name.data(); } return SymbolName; } -// SymbolizerGetOpInfo() is the operand information call back function. -// This is called to get the symbolic information for operand(s) of an -// instruction when it is being done. This routine does this from -// the relocation information, symbol table, etc. That block of information -// is a pointer to the struct DisassembleInfo that was passed when the -// disassembler context was created and passed to back to here when -// called back by the disassembler for instruction operands that could have -// relocation information. The address of the instruction containing operand is -// at the Pc parameter. The immediate value the operand has is passed in -// op_info->Value and is at Offset past the start of the instruction and has a -// byte Size of 1, 2 or 4. The symbolc information is returned in TagBuf is the -// LLVMOpInfo1 struct defined in the header "llvm-c/Disassembler.h" as symbol -// names and addends of the symbolic expression to add for the operand. The -// value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic -// information is returned then this function returns 1 else it returns 0. -int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, - uint64_t Size, int TagType, void *TagBuf) { - struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo; - struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf; - unsigned int value = op_info->Value; +static void DumpCstringChar(const char c) { + char p[2]; + p[0] = c; + p[1] = '\0'; + outs().write_escaped(p); +} - // Make sure all fields returned are zero if we don't set them. - memset((void *)op_info, '\0', sizeof(struct LLVMOpInfo1)); - op_info->Value = value; +static void DumpCstringSection(MachOObjectFile *O, const char *sect, + uint32_t sect_size, uint64_t sect_addr, + bool print_addresses) { + for (uint32_t i = 0; i < sect_size; i++) { + if (print_addresses) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, sect_addr + i) << " "; + else + outs() << format("%08" PRIx64, sect_addr + i) << " "; + } + for (; i < sect_size && sect[i] != '\0'; i++) + DumpCstringChar(sect[i]); + if (i < sect_size && sect[i] == '\0') + outs() << "\n"; + } +} - // If the TagType is not the value 1 which it code knows about or if no - // verbose symbolic information is wanted then just return 0, indicating no - // information is being returned. - if (TagType != 1 || info->verbose == false) - return 0; +static void DumpLiteral4(uint32_t l, float f) { + outs() << format("0x%08" PRIx32, l); + if ((l & 0x7f800000) != 0x7f800000) + outs() << format(" (%.16e)\n", f); + else { + if (l == 0x7f800000) + outs() << " (+Infinity)\n"; + else if (l == 0xff800000) + outs() << " (-Infinity)\n"; + else if ((l & 0x00400000) == 0x00400000) + outs() << " (non-signaling Not-a-Number)\n"; + else + outs() << " (signaling Not-a-Number)\n"; + } +} - unsigned int Arch = info->O->getArch(); - if (Arch == Triple::x86) { - if (Size != 1 && Size != 2 && Size != 4 && Size != 0) - return 0; - // First search the section's relocation entries (if any) for an entry - // for this section offset. - uint32_t sect_addr = info->S.getAddress(); - uint32_t sect_offset = (Pc + Offset) - sect_addr; - bool reloc_found = false; - DataRefImpl Rel; - MachO::any_relocation_info RE; - bool isExtern = false; - SymbolRef Symbol; - bool r_scattered = false; - uint32_t r_value, pair_r_value, r_type; - for (const RelocationRef &Reloc : info->S.relocations()) { - uint64_t RelocOffset; - Reloc.getOffset(RelocOffset); - if (RelocOffset == sect_offset) { - Rel = Reloc.getRawDataRefImpl(); - RE = info->O->getRelocation(Rel); - r_type = info->O->getAnyRelocationType(RE); - r_scattered = info->O->isRelocationScattered(RE); - if (r_scattered) { - r_value = info->O->getScatteredRelocationValue(RE); - if (r_type == MachO::GENERIC_RELOC_SECTDIFF || - r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) { - DataRefImpl RelNext = Rel; - info->O->moveRelocationNext(RelNext); - MachO::any_relocation_info RENext; - RENext = info->O->getRelocation(RelNext); - if (info->O->isRelocationScattered(RENext)) - pair_r_value = info->O->getScatteredRelocationValue(RENext); - else - return 0; - } - } else { - isExtern = info->O->getPlainRelocationExternal(RE); - if (isExtern) { - symbol_iterator RelocSym = Reloc.getSymbol(); - Symbol = *RelocSym; - } - } - reloc_found = true; - break; - } - } - if (reloc_found && isExtern) { - StringRef SymName; - Symbol.getName(SymName); - const char *name = SymName.data(); - op_info->AddSymbol.Present = 1; - op_info->AddSymbol.Name = name; - // For i386 extern relocation entries the value in the instruction is - // the offset from the symbol, and value is already set in op_info->Value. - return 1; +static void DumpLiteral4Section(MachOObjectFile *O, const char *sect, + uint32_t sect_size, uint64_t sect_addr, + bool print_addresses) { + for (uint32_t i = 0; i < sect_size; i += sizeof(float)) { + if (print_addresses) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, sect_addr + i) << " "; + else + outs() << format("%08" PRIx64, sect_addr + i) << " "; } - if (reloc_found && (r_type == MachO::GENERIC_RELOC_SECTDIFF || - r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)) { - const char *add = GuessSymbolName(r_value, info); - const char *sub = GuessSymbolName(pair_r_value, info); - uint32_t offset = value - (r_value - pair_r_value); - op_info->AddSymbol.Present = 1; - if (add != nullptr) - op_info->AddSymbol.Name = add; + float f; + memcpy(&f, sect + i, sizeof(float)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(f); + uint32_t l; + memcpy(&l, sect + i, sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(l); + DumpLiteral4(l, f); + } +} + +static void DumpLiteral8(MachOObjectFile *O, uint32_t l0, uint32_t l1, + double d) { + outs() << format("0x%08" PRIx32, l0) << " " << format("0x%08" PRIx32, l1); + uint32_t Hi, Lo; + if (O->isLittleEndian()) { + Hi = l1; + Lo = l0; + } else { + Hi = l0; + Lo = l1; + } + // Hi is the high word, so this is equivalent to if(isfinite(d)) + if ((Hi & 0x7ff00000) != 0x7ff00000) + outs() << format(" (%.16e)\n", d); + else { + if (Hi == 0x7ff00000 && Lo == 0) + outs() << " (+Infinity)\n"; + else if (Hi == 0xfff00000 && Lo == 0) + outs() << " (-Infinity)\n"; + else if ((Hi & 0x00080000) == 0x00080000) + outs() << " (non-signaling Not-a-Number)\n"; + else + outs() << " (signaling Not-a-Number)\n"; + } +} + +static void DumpLiteral8Section(MachOObjectFile *O, const char *sect, + uint32_t sect_size, uint64_t sect_addr, + bool print_addresses) { + for (uint32_t i = 0; i < sect_size; i += sizeof(double)) { + if (print_addresses) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, sect_addr + i) << " "; else - op_info->AddSymbol.Value = r_value; - op_info->SubtractSymbol.Present = 1; - if (sub != nullptr) - op_info->SubtractSymbol.Name = sub; + outs() << format("%08" PRIx64, sect_addr + i) << " "; + } + double d; + memcpy(&d, sect + i, sizeof(double)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(d); + uint32_t l0, l1; + memcpy(&l0, sect + i, sizeof(uint32_t)); + memcpy(&l1, sect + i + sizeof(uint32_t), sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(l0); + sys::swapByteOrder(l1); + } + DumpLiteral8(O, l0, l1, d); + } +} + +static void DumpLiteral16(uint32_t l0, uint32_t l1, uint32_t l2, uint32_t l3) { + outs() << format("0x%08" PRIx32, l0) << " "; + outs() << format("0x%08" PRIx32, l1) << " "; + outs() << format("0x%08" PRIx32, l2) << " "; + outs() << format("0x%08" PRIx32, l3) << "\n"; +} + +static void DumpLiteral16Section(MachOObjectFile *O, const char *sect, + uint32_t sect_size, uint64_t sect_addr, + bool print_addresses) { + for (uint32_t i = 0; i < sect_size; i += 16) { + if (print_addresses) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, sect_addr + i) << " "; else - op_info->SubtractSymbol.Value = pair_r_value; - op_info->Value = offset; - return 1; + outs() << format("%08" PRIx64, sect_addr + i) << " "; } - // TODO: - // Second search the external relocation entries of a fully linked image - // (if any) for an entry that matches this segment offset. - // uint32_t seg_offset = (Pc + Offset); - return 0; - } else if (Arch == Triple::x86_64) { - if (Size != 1 && Size != 2 && Size != 4 && Size != 0) - return 0; - // First search the section's relocation entries (if any) for an entry - // for this section offset. - uint64_t sect_addr = info->S.getAddress(); - uint64_t sect_offset = (Pc + Offset) - sect_addr; - bool reloc_found = false; + uint32_t l0, l1, l2, l3; + memcpy(&l0, sect + i, sizeof(uint32_t)); + memcpy(&l1, sect + i + sizeof(uint32_t), sizeof(uint32_t)); + memcpy(&l2, sect + i + 2 * sizeof(uint32_t), sizeof(uint32_t)); + memcpy(&l3, sect + i + 3 * sizeof(uint32_t), sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(l0); + sys::swapByteOrder(l1); + sys::swapByteOrder(l2); + sys::swapByteOrder(l3); + } + DumpLiteral16(l0, l1, l2, l3); + } +} + +static void DumpLiteralPointerSection(MachOObjectFile *O, + const SectionRef &Section, + const char *sect, uint32_t sect_size, + uint64_t sect_addr, + bool print_addresses) { + // Collect the literal sections in this Mach-O file. + std::vector LiteralSections; + for (const SectionRef &Section : O->sections()) { + DataRefImpl Ref = Section.getRawDataRefImpl(); + uint32_t section_type; + if (O->is64Bit()) { + const MachO::section_64 Sec = O->getSection64(Ref); + section_type = Sec.flags & MachO::SECTION_TYPE; + } else { + const MachO::section Sec = O->getSection(Ref); + section_type = Sec.flags & MachO::SECTION_TYPE; + } + if (section_type == MachO::S_CSTRING_LITERALS || + section_type == MachO::S_4BYTE_LITERALS || + section_type == MachO::S_8BYTE_LITERALS || + section_type == MachO::S_16BYTE_LITERALS) + LiteralSections.push_back(Section); + } + + // Set the size of the literal pointer. + uint32_t lp_size = O->is64Bit() ? 8 : 4; + + // Collect the external relocation symbols for the literal pointers. + std::vector> Relocs; + for (const RelocationRef &Reloc : Section.relocations()) { DataRefImpl Rel; MachO::any_relocation_info RE; bool isExtern = false; - SymbolRef Symbol; - for (const RelocationRef &Reloc : info->S.relocations()) { - uint64_t RelocOffset; - Reloc.getOffset(RelocOffset); - if (RelocOffset == sect_offset) { - Rel = Reloc.getRawDataRefImpl(); - RE = info->O->getRelocation(Rel); - // NOTE: Scattered relocations don't exist on x86_64. - isExtern = info->O->getPlainRelocationExternal(RE); - if (isExtern) { - symbol_iterator RelocSym = Reloc.getSymbol(); - Symbol = *RelocSym; - } - reloc_found = true; - break; - } - } - if (reloc_found && isExtern) { - // The Value passed in will be adjusted by the Pc if the instruction - // adds the Pc. But for x86_64 external relocation entries the Value - // is the offset from the external symbol. - if (info->O->getAnyRelocationPCRel(RE)) - op_info->Value -= Pc + Offset + Size; - StringRef SymName; - Symbol.getName(SymName); - const char *name = SymName.data(); - unsigned Type = info->O->getAnyRelocationType(RE); - if (Type == MachO::X86_64_RELOC_SUBTRACTOR) { - DataRefImpl RelNext = Rel; - info->O->moveRelocationNext(RelNext); - MachO::any_relocation_info RENext = info->O->getRelocation(RelNext); - unsigned TypeNext = info->O->getAnyRelocationType(RENext); - bool isExternNext = info->O->getPlainRelocationExternal(RENext); - unsigned SymbolNum = info->O->getPlainRelocationSymbolNum(RENext); - if (TypeNext == MachO::X86_64_RELOC_UNSIGNED && isExternNext) { - op_info->SubtractSymbol.Present = 1; - op_info->SubtractSymbol.Name = name; - symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum); - Symbol = *RelocSymNext; - StringRef SymNameNext; - Symbol.getName(SymNameNext); - name = SymNameNext.data(); - } - } - // TODO: add the VariantKinds to op_info->VariantKind for relocation types - // like: X86_64_RELOC_TLV, X86_64_RELOC_GOT_LOAD and X86_64_RELOC_GOT. - op_info->AddSymbol.Present = 1; - op_info->AddSymbol.Name = name; - return 1; + Rel = Reloc.getRawDataRefImpl(); + RE = O->getRelocation(Rel); + isExtern = O->getPlainRelocationExternal(RE); + if (isExtern) { + uint64_t RelocOffset = Reloc.getOffset(); + symbol_iterator RelocSym = Reloc.getSymbol(); + Relocs.push_back(std::make_pair(RelocOffset, *RelocSym)); } - // TODO: - // Second search the external relocation entries of a fully linked image - // (if any) for an entry that matches this segment offset. - // uint64_t seg_offset = (Pc + Offset); - return 0; - } else if (Arch == Triple::arm) { - if (Offset != 0 || (Size != 4 && Size != 2)) - return 0; - // First search the section's relocation entries (if any) for an entry - // for this section offset. - uint32_t sect_addr = info->S.getAddress(); - uint32_t sect_offset = (Pc + Offset) - sect_addr; - bool reloc_found = false; - DataRefImpl Rel; - MachO::any_relocation_info RE; - bool isExtern = false; - SymbolRef Symbol; - bool r_scattered = false; - uint32_t r_value, pair_r_value, r_type, r_length, other_half; - for (const RelocationRef &Reloc : info->S.relocations()) { - uint64_t RelocOffset; - Reloc.getOffset(RelocOffset); - if (RelocOffset == sect_offset) { - Rel = Reloc.getRawDataRefImpl(); - RE = info->O->getRelocation(Rel); - r_length = info->O->getAnyRelocationLength(RE); - r_scattered = info->O->isRelocationScattered(RE); - if (r_scattered) { - r_value = info->O->getScatteredRelocationValue(RE); - r_type = info->O->getScatteredRelocationType(RE); - } else { - r_type = info->O->getAnyRelocationType(RE); - isExtern = info->O->getPlainRelocationExternal(RE); - if (isExtern) { - symbol_iterator RelocSym = Reloc.getSymbol(); - Symbol = *RelocSym; - } - } - if (r_type == MachO::ARM_RELOC_HALF || - r_type == MachO::ARM_RELOC_SECTDIFF || - r_type == MachO::ARM_RELOC_LOCAL_SECTDIFF || - r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { - DataRefImpl RelNext = Rel; - info->O->moveRelocationNext(RelNext); - MachO::any_relocation_info RENext; - RENext = info->O->getRelocation(RelNext); - other_half = info->O->getAnyRelocationAddress(RENext) & 0xffff; - if (info->O->isRelocationScattered(RENext)) - pair_r_value = info->O->getScatteredRelocationValue(RENext); - } - reloc_found = true; - break; - } + } + array_pod_sort(Relocs.begin(), Relocs.end()); + + // Dump each literal pointer. + for (uint32_t i = 0; i < sect_size; i += lp_size) { + if (print_addresses) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, sect_addr + i) << " "; + else + outs() << format("%08" PRIx64, sect_addr + i) << " "; } - if (reloc_found && isExtern) { - StringRef SymName; - Symbol.getName(SymName); - const char *name = SymName.data(); - op_info->AddSymbol.Present = 1; - op_info->AddSymbol.Name = name; - if (value != 0) { - switch (r_type) { - case MachO::ARM_RELOC_HALF: - if ((r_length & 0x1) == 1) { - op_info->Value = value << 16 | other_half; - op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; - } else { - op_info->Value = other_half << 16 | value; - op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; - } - break; - default: - break; - } - } else { - switch (r_type) { - case MachO::ARM_RELOC_HALF: - if ((r_length & 0x1) == 1) { - op_info->Value = value << 16 | other_half; - op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; - } else { - op_info->Value = other_half << 16 | value; - op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; - } - break; - default: - break; - } - } - return 1; + uint64_t lp; + if (O->is64Bit()) { + memcpy(&lp, sect + i, sizeof(uint64_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(lp); + } else { + uint32_t li; + memcpy(&li, sect + i, sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(li); + lp = li; } - // If we have a branch that is not an external relocation entry then - // return 0 so the code in tryAddingSymbolicOperand() can use the - // SymbolLookUp call back with the branch target address to look up the - // symbol and possiblity add an annotation for a symbol stub. - if (reloc_found && isExtern == 0 && (r_type == MachO::ARM_RELOC_BR24 || - r_type == MachO::ARM_THUMB_RELOC_BR22)) - return 0; - uint32_t offset = 0; - if (reloc_found) { - if (r_type == MachO::ARM_RELOC_HALF || - r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { - if ((r_length & 0x1) == 1) - value = value << 16 | other_half; - else - value = other_half << 16 | value; - } - if (r_scattered && (r_type != MachO::ARM_RELOC_HALF && - r_type != MachO::ARM_RELOC_HALF_SECTDIFF)) { - offset = value - r_value; - value = r_value; - } + // First look for an external relocation entry for this literal pointer. + auto Reloc = std::find_if( + Relocs.begin(), Relocs.end(), + [&](const std::pair &P) { return P.first == i; }); + if (Reloc != Relocs.end()) { + symbol_iterator RelocSym = Reloc->second; + ErrorOr SymName = RelocSym->getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + outs() << "external relocation entry for symbol:" << *SymName << "\n"; + continue; } - if (reloc_found && r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { - if ((r_length & 0x1) == 1) - op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; - else - op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; - const char *add = GuessSymbolName(r_value, info); - const char *sub = GuessSymbolName(pair_r_value, info); - int32_t offset = value - (r_value - pair_r_value); - op_info->AddSymbol.Present = 1; - if (add != nullptr) - op_info->AddSymbol.Name = add; - else - op_info->AddSymbol.Value = r_value; - op_info->SubtractSymbol.Present = 1; - if (sub != nullptr) - op_info->SubtractSymbol.Name = sub; - else - op_info->SubtractSymbol.Value = pair_r_value; - op_info->Value = offset; - return 1; + // For local references see what the section the literal pointer points to. + auto Sect = std::find_if(LiteralSections.begin(), LiteralSections.end(), + [&](const SectionRef &R) { + return lp >= R.getAddress() && + lp < R.getAddress() + R.getSize(); + }); + if (Sect == LiteralSections.end()) { + outs() << format("0x%" PRIx64, lp) << " (not in a literal section)\n"; + continue; } - if (reloc_found == false) - return 0; + uint64_t SectAddress = Sect->getAddress(); + uint64_t SectSize = Sect->getSize(); - op_info->AddSymbol.Present = 1; - op_info->Value = offset; - if (reloc_found) { - if (r_type == MachO::ARM_RELOC_HALF) { - if ((r_length & 0x1) == 1) - op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; - else - op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; + StringRef SectName; + Sect->getName(SectName); + DataRefImpl Ref = Sect->getRawDataRefImpl(); + StringRef SegmentName = O->getSectionFinalSegmentName(Ref); + outs() << SegmentName << ":" << SectName << ":"; + + uint32_t section_type; + if (O->is64Bit()) { + const MachO::section_64 Sec = O->getSection64(Ref); + section_type = Sec.flags & MachO::SECTION_TYPE; + } else { + const MachO::section Sec = O->getSection(Ref); + section_type = Sec.flags & MachO::SECTION_TYPE; + } + + StringRef BytesStr; + Sect->getContents(BytesStr); + const char *Contents = reinterpret_cast(BytesStr.data()); + + switch (section_type) { + case MachO::S_CSTRING_LITERALS: + for (uint64_t i = lp - SectAddress; i < SectSize && Contents[i] != '\0'; + i++) { + DumpCstringChar(Contents[i]); + } + outs() << "\n"; + break; + case MachO::S_4BYTE_LITERALS: + float f; + memcpy(&f, Contents + (lp - SectAddress), sizeof(float)); + uint32_t l; + memcpy(&l, Contents + (lp - SectAddress), sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(f); + sys::swapByteOrder(l); } + DumpLiteral4(l, f); + break; + case MachO::S_8BYTE_LITERALS: { + double d; + memcpy(&d, Contents + (lp - SectAddress), sizeof(double)); + uint32_t l0, l1; + memcpy(&l0, Contents + (lp - SectAddress), sizeof(uint32_t)); + memcpy(&l1, Contents + (lp - SectAddress) + sizeof(uint32_t), + sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(f); + sys::swapByteOrder(l0); + sys::swapByteOrder(l1); + } + DumpLiteral8(O, l0, l1, d); + break; + } + case MachO::S_16BYTE_LITERALS: { + uint32_t l0, l1, l2, l3; + memcpy(&l0, Contents + (lp - SectAddress), sizeof(uint32_t)); + memcpy(&l1, Contents + (lp - SectAddress) + sizeof(uint32_t), + sizeof(uint32_t)); + memcpy(&l2, Contents + (lp - SectAddress) + 2 * sizeof(uint32_t), + sizeof(uint32_t)); + memcpy(&l3, Contents + (lp - SectAddress) + 3 * sizeof(uint32_t), + sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) { + sys::swapByteOrder(l0); + sys::swapByteOrder(l1); + sys::swapByteOrder(l2); + sys::swapByteOrder(l3); + } + DumpLiteral16(l0, l1, l2, l3); + break; } - const char *add = GuessSymbolName(value, info); - if (add != nullptr) { - op_info->AddSymbol.Name = add; - return 1; } - op_info->AddSymbol.Value = value; - return 1; - } else if (Arch == Triple::aarch64) { - return 0; - } else { - return 0; } } -// GuessCstringPointer is passed the address of what might be a pointer to a -// literal string in a cstring section. If that address is in a cstring section -// it returns a pointer to that string. Else it returns nullptr. -const char *GuessCstringPointer(uint64_t ReferenceValue, - struct DisassembleInfo *info) { - uint32_t LoadCommandCount = info->O->getHeader().ncmds; - MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo(); - for (unsigned I = 0;; ++I) { - if (Load.C.cmd == MachO::LC_SEGMENT_64) { - MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load); - for (unsigned J = 0; J < Seg.nsects; ++J) { - MachO::section_64 Sec = info->O->getSection64(Load, J); - uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; - if (section_type == MachO::S_CSTRING_LITERALS && - ReferenceValue >= Sec.addr && - ReferenceValue < Sec.addr + Sec.size) { - uint64_t sect_offset = ReferenceValue - Sec.addr; - uint64_t object_offset = Sec.offset + sect_offset; - StringRef MachOContents = info->O->getData(); - uint64_t object_size = MachOContents.size(); - const char *object_addr = (const char *)MachOContents.data(); - if (object_offset < object_size) { - const char *name = object_addr + object_offset; - return name; - } else { - return nullptr; - } - } +static void DumpInitTermPointerSection(MachOObjectFile *O, const char *sect, + uint32_t sect_size, uint64_t sect_addr, + SymbolAddressMap *AddrMap, + bool verbose) { + uint32_t stride; + if (O->is64Bit()) + stride = sizeof(uint64_t); + else + stride = sizeof(uint32_t); + for (uint32_t i = 0; i < sect_size; i += stride) { + const char *SymbolName = nullptr; + if (O->is64Bit()) { + outs() << format("0x%016" PRIx64, sect_addr + i * stride) << " "; + uint64_t pointer_value; + memcpy(&pointer_value, sect + i, stride); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(pointer_value); + outs() << format("0x%016" PRIx64, pointer_value); + if (verbose) + SymbolName = GuessSymbolName(pointer_value, AddrMap); + } else { + outs() << format("0x%08" PRIx64, sect_addr + i * stride) << " "; + uint32_t pointer_value; + memcpy(&pointer_value, sect + i, stride); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(pointer_value); + outs() << format("0x%08" PRIx32, pointer_value); + if (verbose) + SymbolName = GuessSymbolName(pointer_value, AddrMap); + } + if (SymbolName) + outs() << " " << SymbolName; + outs() << "\n"; + } +} + +static void DumpRawSectionContents(MachOObjectFile *O, const char *sect, + uint32_t size, uint64_t addr) { + uint32_t cputype = O->getHeader().cputype; + if (cputype == MachO::CPU_TYPE_I386 || cputype == MachO::CPU_TYPE_X86_64) { + uint32_t j; + for (uint32_t i = 0; i < size; i += j, addr += j) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, addr) << "\t"; + else + outs() << format("%08" PRIx64, addr) << "\t"; + for (j = 0; j < 16 && i + j < size; j++) { + uint8_t byte_word = *(sect + i + j); + outs() << format("%02" PRIx32, (uint32_t)byte_word) << " "; } - } else if (Load.C.cmd == MachO::LC_SEGMENT) { - MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load); - for (unsigned J = 0; J < Seg.nsects; ++J) { - MachO::section Sec = info->O->getSection(Load, J); - uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; - if (section_type == MachO::S_CSTRING_LITERALS && - ReferenceValue >= Sec.addr && - ReferenceValue < Sec.addr + Sec.size) { - uint64_t sect_offset = ReferenceValue - Sec.addr; - uint64_t object_offset = Sec.offset + sect_offset; - StringRef MachOContents = info->O->getData(); - uint64_t object_size = MachOContents.size(); - const char *object_addr = (const char *)MachOContents.data(); - if (object_offset < object_size) { - const char *name = object_addr + object_offset; - return name; - } else { - return nullptr; + outs() << "\n"; + } + } else { + uint32_t j; + for (uint32_t i = 0; i < size; i += j, addr += j) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, addr) << "\t"; + else + outs() << format("%08" PRIx64, sect) << "\t"; + for (j = 0; j < 4 * sizeof(int32_t) && i + j < size; + j += sizeof(int32_t)) { + if (i + j + sizeof(int32_t) < size) { + uint32_t long_word; + memcpy(&long_word, sect + i + j, sizeof(int32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(long_word); + outs() << format("%08" PRIx32, long_word) << " "; + } else { + for (uint32_t k = 0; i + j + k < size; k++) { + uint8_t byte_word = *(sect + i + j); + outs() << format("%02" PRIx32, (uint32_t)byte_word) << " "; } } } + outs() << "\n"; } - if (I == LoadCommandCount - 1) - break; - else - Load = info->O->getNextLoadCommandInfo(Load); } - return nullptr; } -// GuessIndirectSymbol returns the name of the indirect symbol for the -// ReferenceValue passed in or nullptr. This is used when ReferenceValue maybe -// an address of a symbol stub or a lazy or non-lazy pointer to associate the -// symbol name being referenced by the stub or pointer. -static const char *GuessIndirectSymbol(uint64_t ReferenceValue, - struct DisassembleInfo *info) { - uint32_t LoadCommandCount = info->O->getHeader().ncmds; - MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo(); - MachO::dysymtab_command Dysymtab = info->O->getDysymtabLoadCommand(); - MachO::symtab_command Symtab = info->O->getSymtabLoadCommand(); - for (unsigned I = 0;; ++I) { - if (Load.C.cmd == MachO::LC_SEGMENT_64) { - MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load); - for (unsigned J = 0; J < Seg.nsects; ++J) { - MachO::section_64 Sec = info->O->getSection64(Load, J); - uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; - if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || - section_type == MachO::S_LAZY_SYMBOL_POINTERS || - section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || - section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS || - section_type == MachO::S_SYMBOL_STUBS) && - ReferenceValue >= Sec.addr && - ReferenceValue < Sec.addr + Sec.size) { - uint32_t stride; - if (section_type == MachO::S_SYMBOL_STUBS) - stride = Sec.reserved2; - else - stride = 8; - if (stride == 0) - return nullptr; - uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride; - if (index < Dysymtab.nindirectsyms) { - uint32_t indirect_symbol = - info->O->getIndirectSymbolTableEntry(Dysymtab, index); - if (indirect_symbol < Symtab.nsyms) { - symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol); - SymbolRef Symbol = *Sym; - StringRef SymName; - Symbol.getName(SymName); - const char *name = SymName.data(); - return name; - } - } - } - } - } else if (Load.C.cmd == MachO::LC_SEGMENT) { - MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load); - for (unsigned J = 0; J < Seg.nsects; ++J) { - MachO::section Sec = info->O->getSection(Load, J); - uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; - if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || - section_type == MachO::S_LAZY_SYMBOL_POINTERS || - section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || - section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS || - section_type == MachO::S_SYMBOL_STUBS) && - ReferenceValue >= Sec.addr && - ReferenceValue < Sec.addr + Sec.size) { - uint32_t stride; - if (section_type == MachO::S_SYMBOL_STUBS) - stride = Sec.reserved2; - else - stride = 4; - if (stride == 0) - return nullptr; - uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride; - if (index < Dysymtab.nindirectsyms) { - uint32_t indirect_symbol = - info->O->getIndirectSymbolTableEntry(Dysymtab, index); - if (indirect_symbol < Symtab.nsyms) { - symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol); - SymbolRef Symbol = *Sym; - StringRef SymName; - Symbol.getName(SymName); - const char *name = SymName.data(); - return name; - } +static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, + StringRef DisSegName, StringRef DisSectName); +static void DumpProtocolSection(MachOObjectFile *O, const char *sect, + uint32_t size, uint32_t addr); + +static void DumpSectionContents(StringRef Filename, MachOObjectFile *O, + bool verbose) { + SymbolAddressMap AddrMap; + if (verbose) + CreateSymbolAddressMap(O, &AddrMap); + + for (unsigned i = 0; i < DumpSections.size(); ++i) { + StringRef DumpSection = DumpSections[i]; + std::pair DumpSegSectName; + DumpSegSectName = DumpSection.split(','); + StringRef DumpSegName, DumpSectName; + if (DumpSegSectName.second.size()) { + DumpSegName = DumpSegSectName.first; + DumpSectName = DumpSegSectName.second; + } else { + DumpSegName = ""; + DumpSectName = DumpSegSectName.first; + } + for (const SectionRef &Section : O->sections()) { + StringRef SectName; + Section.getName(SectName); + DataRefImpl Ref = Section.getRawDataRefImpl(); + StringRef SegName = O->getSectionFinalSegmentName(Ref); + if ((DumpSegName.empty() || SegName == DumpSegName) && + (SectName == DumpSectName)) { + + uint32_t section_flags; + if (O->is64Bit()) { + const MachO::section_64 Sec = O->getSection64(Ref); + section_flags = Sec.flags; + + } else { + const MachO::section Sec = O->getSection(Ref); + section_flags = Sec.flags; + } + uint32_t section_type = section_flags & MachO::SECTION_TYPE; + + StringRef BytesStr; + Section.getContents(BytesStr); + const char *sect = reinterpret_cast(BytesStr.data()); + uint32_t sect_size = BytesStr.size(); + uint64_t sect_addr = Section.getAddress(); + + outs() << "Contents of (" << SegName << "," << SectName + << ") section\n"; + + if (verbose) { + if ((section_flags & MachO::S_ATTR_PURE_INSTRUCTIONS) || + (section_flags & MachO::S_ATTR_SOME_INSTRUCTIONS)) { + DisassembleMachO(Filename, O, SegName, SectName); + continue; } + if (SegName == "__TEXT" && SectName == "__info_plist") { + outs() << sect; + continue; + } + if (SegName == "__OBJC" && SectName == "__protocol") { + DumpProtocolSection(O, sect, sect_size, sect_addr); + continue; + } + switch (section_type) { + case MachO::S_REGULAR: + DumpRawSectionContents(O, sect, sect_size, sect_addr); + break; + case MachO::S_ZEROFILL: + outs() << "zerofill section and has no contents in the file\n"; + break; + case MachO::S_CSTRING_LITERALS: + DumpCstringSection(O, sect, sect_size, sect_addr, !NoLeadingAddr); + break; + case MachO::S_4BYTE_LITERALS: + DumpLiteral4Section(O, sect, sect_size, sect_addr, !NoLeadingAddr); + break; + case MachO::S_8BYTE_LITERALS: + DumpLiteral8Section(O, sect, sect_size, sect_addr, !NoLeadingAddr); + break; + case MachO::S_16BYTE_LITERALS: + DumpLiteral16Section(O, sect, sect_size, sect_addr, !NoLeadingAddr); + break; + case MachO::S_LITERAL_POINTERS: + DumpLiteralPointerSection(O, Section, sect, sect_size, sect_addr, + !NoLeadingAddr); + break; + case MachO::S_MOD_INIT_FUNC_POINTERS: + case MachO::S_MOD_TERM_FUNC_POINTERS: + DumpInitTermPointerSection(O, sect, sect_size, sect_addr, &AddrMap, + verbose); + break; + default: + outs() << "Unknown section type (" + << format("0x%08" PRIx32, section_type) << ")\n"; + DumpRawSectionContents(O, sect, sect_size, sect_addr); + break; + } + } else { + if (section_type == MachO::S_ZEROFILL) + outs() << "zerofill section and has no contents in the file\n"; + else + DumpRawSectionContents(O, sect, sect_size, sect_addr); } } } - if (I == LoadCommandCount - 1) - break; - else - Load = info->O->getNextLoadCommandInfo(Load); } - return nullptr; } -// method_reference() is called passing it the ReferenceName that might be -// a reference it to an Objective-C method call. If so then it allocates and -// assembles a method call string with the values last seen and saved in -// the DisassembleInfo's class_name and selector_name fields. This is saved -// into the method field of the info and any previous string is free'ed. -// Then the class_name field in the info is set to nullptr. The method call -// string is set into ReferenceName and ReferenceType is set to -// LLVMDisassembler_ReferenceType_Out_Objc_Message. If this not a method call -// then both ReferenceType and ReferenceName are left unchanged. -static void method_reference(struct DisassembleInfo *info, - uint64_t *ReferenceType, - const char **ReferenceName) { - if (*ReferenceName != nullptr) { - if (strcmp(*ReferenceName, "_objc_msgSend") == 0) { - if (info->selector_name != NULL) { - if (info->method != nullptr) - free(info->method); - if (info->class_name != nullptr) { - info->method = (char *)malloc(5 + strlen(info->class_name) + - strlen(info->selector_name)); - if (info->method != nullptr) { - strcpy(info->method, "+["); - strcat(info->method, info->class_name); - strcat(info->method, " "); - strcat(info->method, info->selector_name); - strcat(info->method, "]"); - *ReferenceName = info->method; - *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message; - } - } else { - info->method = (char *)malloc(9 + strlen(info->selector_name)); - if (info->method != nullptr) { - strcpy(info->method, "-[%rdi "); - strcat(info->method, info->selector_name); - strcat(info->method, "]"); - *ReferenceName = info->method; - *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message; - } - } - info->class_name = nullptr; - } - } else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) { - if (info->selector_name != NULL) { - if (info->method != nullptr) - free(info->method); - info->method = (char *)malloc(17 + strlen(info->selector_name)); - if (info->method != nullptr) { - strcpy(info->method, "-[[%rdi super] "); - strcat(info->method, info->selector_name); - strcat(info->method, "]"); - *ReferenceName = info->method; - *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message; - } - info->class_name = nullptr; - } +static void DumpInfoPlistSectionContents(StringRef Filename, + MachOObjectFile *O) { + for (const SectionRef &Section : O->sections()) { + StringRef SectName; + Section.getName(SectName); + DataRefImpl Ref = Section.getRawDataRefImpl(); + StringRef SegName = O->getSectionFinalSegmentName(Ref); + if (SegName == "__TEXT" && SectName == "__info_plist") { + outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; + StringRef BytesStr; + Section.getContents(BytesStr); + const char *sect = reinterpret_cast(BytesStr.data()); + outs() << sect; + return; } } } -// GuessPointerPointer() is passed the address of what might be a pointer to -// a reference to an Objective-C class, selector, message ref or cfstring. -// If so the value of the pointer is returned and one of the booleans are set -// to true. If not zero is returned and all the booleans are set to false. -static uint64_t GuessPointerPointer(uint64_t ReferenceValue, - struct DisassembleInfo *info, - bool &classref, bool &selref, bool &msgref, - bool &cfstring) { - classref = false; - selref = false; - msgref = false; - cfstring = false; - uint32_t LoadCommandCount = info->O->getHeader().ncmds; - MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo(); - for (unsigned I = 0;; ++I) { - if (Load.C.cmd == MachO::LC_SEGMENT_64) { - MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load); - for (unsigned J = 0; J < Seg.nsects; ++J) { - MachO::section_64 Sec = info->O->getSection64(Load, J); - if ((strncmp(Sec.sectname, "__objc_selrefs", 16) == 0 || - strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 || - strncmp(Sec.sectname, "__objc_superrefs", 16) == 0 || - strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 || - strncmp(Sec.sectname, "__cfstring", 16) == 0) && - ReferenceValue >= Sec.addr && - ReferenceValue < Sec.addr + Sec.size) { - uint64_t sect_offset = ReferenceValue - Sec.addr; - uint64_t object_offset = Sec.offset + sect_offset; - StringRef MachOContents = info->O->getData(); - uint64_t object_size = MachOContents.size(); - const char *object_addr = (const char *)MachOContents.data(); - if (object_offset < object_size) { - uint64_t pointer_value; - memcpy(&pointer_value, object_addr + object_offset, - sizeof(uint64_t)); - if (info->O->isLittleEndian() != sys::IsLittleEndianHost) - sys::swapByteOrder(pointer_value); - if (strncmp(Sec.sectname, "__objc_selrefs", 16) == 0) - selref = true; - else if (strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 || - strncmp(Sec.sectname, "__objc_superrefs", 16) == 0) - classref = true; - else if (strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 && - ReferenceValue + 8 < Sec.addr + Sec.size) { - msgref = true; - memcpy(&pointer_value, object_addr + object_offset + 8, - sizeof(uint64_t)); - if (info->O->isLittleEndian() != sys::IsLittleEndianHost) - sys::swapByteOrder(pointer_value); - } else if (strncmp(Sec.sectname, "__cfstring", 16) == 0) - cfstring = true; - return pointer_value; - } else { - return 0; - } - } - } +// checkMachOAndArchFlags() checks to see if the ObjectFile is a Mach-O file +// and if it is and there is a list of architecture flags is specified then +// check to make sure this Mach-O file is one of those architectures or all +// architectures were specified. If not then an error is generated and this +// routine returns false. Else it returns true. +static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) { + if (isa(O) && !ArchAll && ArchFlags.size() != 0) { + MachOObjectFile *MachO = dyn_cast(O); + bool ArchFound = false; + MachO::mach_header H; + MachO::mach_header_64 H_64; + Triple T; + if (MachO->is64Bit()) { + H_64 = MachO->MachOObjectFile::getHeader64(); + T = MachOObjectFile::getArch(H_64.cputype, H_64.cpusubtype); + } else { + H = MachO->MachOObjectFile::getHeader(); + T = MachOObjectFile::getArch(H.cputype, H.cpusubtype); } - // TODO: Look for LC_SEGMENT for 32-bit Mach-O files. - if (I == LoadCommandCount - 1) + unsigned i; + for (i = 0; i < ArchFlags.size(); ++i) { + if (ArchFlags[i] == T.getArchName()) + ArchFound = true; break; - else - Load = info->O->getNextLoadCommandInfo(Load); + } + if (!ArchFound) { + errs() << "llvm-objdump: file: " + Filename + " does not contain " + << "architecture: " + ArchFlags[i] + "\n"; + return false; + } } - return 0; + return true; } -// get_pointer_64 returns a pointer to the bytes in the object file at the -// Address from a section in the Mach-O file. And indirectly returns the -// offset into the section, number of bytes left in the section past the offset -// and which section is was being referenced. If the Address is not in a -// section nullptr is returned. -const char *get_pointer_64(uint64_t Address, uint32_t &offset, uint32_t &left, - SectionRef &S, DisassembleInfo *info) { - offset = 0; - left = 0; - S = SectionRef(); - for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) { - uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress(); - uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize(); - if (Address >= SectAddress && Address < SectAddress + SectSize) { - S = (*(info->Sections))[SectIdx]; - offset = Address - SectAddress; - left = SectSize - offset; - StringRef SectContents; - ((*(info->Sections))[SectIdx]).getContents(SectContents); - return SectContents.data() + offset; - } +static void printObjcMetaData(MachOObjectFile *O, bool verbose); + +// ProcessMachO() is passed a single opened Mach-O file, which may be an +// archive member and or in a slice of a universal file. It prints the +// the file name and header info and then processes it according to the +// command line options. +static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF, + StringRef ArchiveMemberName = StringRef(), + StringRef ArchitectureName = StringRef()) { + // If we are doing some processing here on the Mach-O file print the header + // info. And don't print it otherwise like in the case of printing the + // UniversalHeaders or ArchiveHeaders. + if (Disassemble || PrivateHeaders || ExportsTrie || Rebase || Bind || + LazyBind || WeakBind || IndirectSymbols || DataInCode || LinkOptHints || + DylibsUsed || DylibId || ObjcMetaData || (DumpSections.size() != 0)) { + outs() << Filename; + if (!ArchiveMemberName.empty()) + outs() << '(' << ArchiveMemberName << ')'; + if (!ArchitectureName.empty()) + outs() << " (architecture " << ArchitectureName << ")"; + outs() << ":\n"; } - return nullptr; + + if (Disassemble) + DisassembleMachO(Filename, MachOOF, "__TEXT", "__text"); + if (IndirectSymbols) + PrintIndirectSymbols(MachOOF, !NonVerbose); + if (DataInCode) + PrintDataInCodeTable(MachOOF, !NonVerbose); + if (LinkOptHints) + PrintLinkOptHints(MachOOF); + if (Relocations) + PrintRelocations(MachOOF); + if (SectionHeaders) + PrintSectionHeaders(MachOOF); + if (SectionContents) + PrintSectionContents(MachOOF); + if (DumpSections.size() != 0) + DumpSectionContents(Filename, MachOOF, !NonVerbose); + if (InfoPlist) + DumpInfoPlistSectionContents(Filename, MachOOF); + if (DylibsUsed) + PrintDylibs(MachOOF, false); + if (DylibId) + PrintDylibs(MachOOF, true); + if (SymbolTable) + PrintSymbolTable(MachOOF); + if (UnwindInfo) + printMachOUnwindInfo(MachOOF); + if (PrivateHeaders) + printMachOFileHeader(MachOOF); + if (ObjcMetaData) + printObjcMetaData(MachOOF, !NonVerbose); + if (ExportsTrie) + printExportsTrie(MachOOF); + if (Rebase) + printRebaseTable(MachOOF); + if (Bind) + printBindTable(MachOOF); + if (LazyBind) + printLazyBindTable(MachOOF); + if (WeakBind) + printWeakBindTable(MachOOF); } -// get_symbol_64() returns the name of a symbol (or nullptr) and the address of -// the symbol indirectly through n_value. Based on the relocation information -// for the specified section offset in the specified section reference. -const char *get_symbol_64(uint32_t sect_offset, SectionRef S, - DisassembleInfo *info, uint64_t &n_value) { - n_value = 0; - if (info->verbose == false) - return nullptr; +// printUnknownCPUType() helps print_fat_headers for unknown CPU's. +static void printUnknownCPUType(uint32_t cputype, uint32_t cpusubtype) { + outs() << " cputype (" << cputype << ")\n"; + outs() << " cpusubtype (" << cpusubtype << ")\n"; +} - // See if there is an external relocation entry at the sect_offset. - bool reloc_found = false; - DataRefImpl Rel; - MachO::any_relocation_info RE; - bool isExtern = false; - SymbolRef Symbol; - for (const RelocationRef &Reloc : S.relocations()) { - uint64_t RelocOffset; - Reloc.getOffset(RelocOffset); - if (RelocOffset == sect_offset) { - Rel = Reloc.getRawDataRefImpl(); - RE = info->O->getRelocation(Rel); - if (info->O->isRelocationScattered(RE)) - continue; - isExtern = info->O->getPlainRelocationExternal(RE); - if (isExtern) { - symbol_iterator RelocSym = Reloc.getSymbol(); - Symbol = *RelocSym; - } - reloc_found = true; +// printCPUType() helps print_fat_headers by printing the cputype and +// pusubtype (symbolically for the one's it knows about). +static void printCPUType(uint32_t cputype, uint32_t cpusubtype) { + switch (cputype) { + case MachO::CPU_TYPE_I386: + switch (cpusubtype) { + case MachO::CPU_SUBTYPE_I386_ALL: + outs() << " cputype CPU_TYPE_I386\n"; + outs() << " cpusubtype CPU_SUBTYPE_I386_ALL\n"; + break; + default: + printUnknownCPUType(cputype, cpusubtype); break; } - } - // If there is an external relocation entry for a symbol in this section - // at this section_offset then use that symbol's value for the n_value - // and return its name. - const char *SymbolName = nullptr; - if (reloc_found && isExtern) { - Symbol.getAddress(n_value); - StringRef name; - Symbol.getName(name); - if (!name.empty()) { - SymbolName = name.data(); - return SymbolName; + break; + case MachO::CPU_TYPE_X86_64: + switch (cpusubtype) { + case MachO::CPU_SUBTYPE_X86_64_ALL: + outs() << " cputype CPU_TYPE_X86_64\n"; + outs() << " cpusubtype CPU_SUBTYPE_X86_64_ALL\n"; + break; + case MachO::CPU_SUBTYPE_X86_64_H: + outs() << " cputype CPU_TYPE_X86_64\n"; + outs() << " cpusubtype CPU_SUBTYPE_X86_64_H\n"; + break; + default: + printUnknownCPUType(cputype, cpusubtype); + break; + } + break; + case MachO::CPU_TYPE_ARM: + switch (cpusubtype) { + case MachO::CPU_SUBTYPE_ARM_ALL: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_ALL\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V4T: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V4T\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V5TEJ: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V5TEJ\n"; + break; + case MachO::CPU_SUBTYPE_ARM_XSCALE: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_XSCALE\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V6: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V6\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V6M: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V6M\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V7: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V7\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V7EM: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V7EM\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V7K: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V7K\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V7M: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V7M\n"; + break; + case MachO::CPU_SUBTYPE_ARM_V7S: + outs() << " cputype CPU_TYPE_ARM\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM_V7S\n"; + break; + default: + printUnknownCPUType(cputype, cpusubtype); + break; + } + break; + case MachO::CPU_TYPE_ARM64: + switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM64_ALL: + outs() << " cputype CPU_TYPE_ARM64\n"; + outs() << " cpusubtype CPU_SUBTYPE_ARM64_ALL\n"; + break; + default: + printUnknownCPUType(cputype, cpusubtype); + break; } + break; + default: + printUnknownCPUType(cputype, cpusubtype); + break; } +} - // TODO: For fully linked images, look through the external relocation - // entries off the dynamic symtab command. For these the r_offset is from the - // start of the first writeable segment in the Mach-O file. So the offset - // to this section from that segment is passed to this routine by the caller, - // as the database_offset. Which is the difference of the section's starting - // address and the first writable segment. - // - // NOTE: need add passing the database_offset to this routine. - - // TODO: We did not find an external relocation entry so look up the - // ReferenceValue as an address of a symbol and if found return that symbol's - // name. - // - // NOTE: need add passing the ReferenceValue to this routine. Then that code - // would simply be this: - // SymbolName = GuessSymbolName(ReferenceValue, info); +static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB, + bool verbose) { + outs() << "Fat headers\n"; + if (verbose) + outs() << "fat_magic FAT_MAGIC\n"; + else + outs() << "fat_magic " << format("0x%" PRIx32, MachO::FAT_MAGIC) << "\n"; + + uint32_t nfat_arch = UB->getNumberOfObjects(); + StringRef Buf = UB->getData(); + uint64_t size = Buf.size(); + uint64_t big_size = sizeof(struct MachO::fat_header) + + nfat_arch * sizeof(struct MachO::fat_arch); + outs() << "nfat_arch " << UB->getNumberOfObjects(); + if (nfat_arch == 0) + outs() << " (malformed, contains zero architecture types)\n"; + else if (big_size > size) + outs() << " (malformed, architectures past end of file)\n"; + else + outs() << "\n"; - return SymbolName; + for (uint32_t i = 0; i < nfat_arch; ++i) { + MachOUniversalBinary::ObjectForArch OFA(UB, i); + uint32_t cputype = OFA.getCPUType(); + uint32_t cpusubtype = OFA.getCPUSubType(); + outs() << "architecture "; + for (uint32_t j = 0; i != 0 && j <= i - 1; j++) { + MachOUniversalBinary::ObjectForArch other_OFA(UB, j); + uint32_t other_cputype = other_OFA.getCPUType(); + uint32_t other_cpusubtype = other_OFA.getCPUSubType(); + if (cputype != 0 && cpusubtype != 0 && cputype == other_cputype && + (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) == + (other_cpusubtype & ~MachO::CPU_SUBTYPE_MASK)) { + outs() << "(illegal duplicate architecture) "; + break; + } + } + if (verbose) { + outs() << OFA.getArchTypeName() << "\n"; + printCPUType(cputype, cpusubtype & ~MachO::CPU_SUBTYPE_MASK); + } else { + outs() << i << "\n"; + outs() << " cputype " << cputype << "\n"; + outs() << " cpusubtype " << (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) + << "\n"; + } + if (verbose && + (cpusubtype & MachO::CPU_SUBTYPE_MASK) == MachO::CPU_SUBTYPE_LIB64) + outs() << " capabilities CPU_SUBTYPE_LIB64\n"; + else + outs() << " capabilities " + << format("0x%" PRIx32, + (cpusubtype & MachO::CPU_SUBTYPE_MASK) >> 24) << "\n"; + outs() << " offset " << OFA.getOffset(); + if (OFA.getOffset() > size) + outs() << " (past end of file)"; + if (OFA.getOffset() % (1 << OFA.getAlign()) != 0) + outs() << " (not aligned on it's alignment (2^" << OFA.getAlign() << ")"; + outs() << "\n"; + outs() << " size " << OFA.getSize(); + big_size = OFA.getOffset() + OFA.getSize(); + if (big_size > size) + outs() << " (past end of file)"; + outs() << "\n"; + outs() << " align 2^" << OFA.getAlign() << " (" << (1 << OFA.getAlign()) + << ")\n"; + } } -// These are structs in the Objective-C meta data and read to produce the -// comments for disassembly. While these are part of the ABI they are no -// public defintions. So the are here not in include/llvm/Support/MachO.h . - -// The cfstring object in a 64-bit Mach-O file. -struct cfstring64_t { - uint64_t isa; // class64_t * (64-bit pointer) - uint64_t flags; // flag bits - uint64_t characters; // char * (64-bit pointer) - uint64_t length; // number of non-NULL characters in above +static void printArchiveChild(Archive::Child &C, bool verbose, + bool print_offset) { + if (print_offset) + outs() << C.getChildOffset() << "\t"; + sys::fs::perms Mode = C.getAccessMode(); + if (verbose) { + // FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG. + // But there is nothing in sys::fs::perms for S_IFMT or S_IFREG. + outs() << "-"; + if (Mode & sys::fs::owner_read) + outs() << "r"; + else + outs() << "-"; + if (Mode & sys::fs::owner_write) + outs() << "w"; + else + outs() << "-"; + if (Mode & sys::fs::owner_exe) + outs() << "x"; + else + outs() << "-"; + if (Mode & sys::fs::group_read) + outs() << "r"; + else + outs() << "-"; + if (Mode & sys::fs::group_write) + outs() << "w"; + else + outs() << "-"; + if (Mode & sys::fs::group_exe) + outs() << "x"; + else + outs() << "-"; + if (Mode & sys::fs::others_read) + outs() << "r"; + else + outs() << "-"; + if (Mode & sys::fs::others_write) + outs() << "w"; + else + outs() << "-"; + if (Mode & sys::fs::others_exe) + outs() << "x"; + else + outs() << "-"; + } else { + outs() << format("0%o ", Mode); + } + + unsigned UID = C.getUID(); + outs() << format("%3d/", UID); + unsigned GID = C.getGID(); + outs() << format("%-3d ", GID); + uint64_t Size = C.getRawSize(); + outs() << format("%5" PRId64, Size) << " "; + + StringRef RawLastModified = C.getRawLastModified(); + if (verbose) { + unsigned Seconds; + if (RawLastModified.getAsInteger(10, Seconds)) + outs() << "(date: \"%s\" contains non-decimal chars) " << RawLastModified; + else { + // Since cime(3) returns a 26 character string of the form: + // "Sun Sep 16 01:03:52 1973\n\0" + // just print 24 characters. + time_t t = Seconds; + outs() << format("%.24s ", ctime(&t)); + } + } else { + outs() << RawLastModified << " "; + } + + if (verbose) { + ErrorOr NameOrErr = C.getName(); + if (NameOrErr.getError()) { + StringRef RawName = C.getRawName(); + outs() << RawName << "\n"; + } else { + StringRef Name = NameOrErr.get(); + outs() << Name << "\n"; + } + } else { + StringRef RawName = C.getRawName(); + outs() << RawName << "\n"; + } +} + +static void printArchiveHeaders(Archive *A, bool verbose, bool print_offset) { + if (A->hasSymbolTable()) { + Archive::child_iterator S = A->getSymbolTableChild(); + Archive::Child C = *S; + printArchiveChild(C, verbose, print_offset); + } + for (Archive::child_iterator I = A->child_begin(), E = A->child_end(); I != E; + ++I) { + Archive::Child C = *I; + printArchiveChild(C, verbose, print_offset); + } +} + +// ParseInputMachO() parses the named Mach-O file in Filename and handles the +// -arch flags selecting just those slices as specified by them and also parses +// archive files. Then for each individual Mach-O file ProcessMachO() is +// called to process the file based on the command line options. +void llvm::ParseInputMachO(StringRef Filename) { + // Check for -arch all and verifiy the -arch flags are valid. + for (unsigned i = 0; i < ArchFlags.size(); ++i) { + if (ArchFlags[i] == "all") { + ArchAll = true; + } else { + if (!MachOObjectFile::isValidArch(ArchFlags[i])) { + errs() << "llvm-objdump: Unknown architecture named '" + ArchFlags[i] + + "'for the -arch option\n"; + return; + } + } + } + + // Attempt to open the binary. + ErrorOr> BinaryOrErr = createBinary(Filename); + if (std::error_code EC = BinaryOrErr.getError()) { + errs() << "llvm-objdump: '" << Filename << "': " << EC.message() << ".\n"; + return; + } + Binary &Bin = *BinaryOrErr.get().getBinary(); + + if (Archive *A = dyn_cast(&Bin)) { + outs() << "Archive : " << Filename << "\n"; + if (ArchiveHeaders) + printArchiveHeaders(A, !NonVerbose, ArchiveMemberOffsets); + for (Archive::child_iterator I = A->child_begin(), E = A->child_end(); + I != E; ++I) { + ErrorOr> ChildOrErr = I->getAsBinary(); + if (ChildOrErr.getError()) + continue; + if (MachOObjectFile *O = dyn_cast(&*ChildOrErr.get())) { + if (!checkMachOAndArchFlags(O, Filename)) + return; + ProcessMachO(Filename, O, O->getFileName()); + } + } + return; + } + if (UniversalHeaders) { + if (MachOUniversalBinary *UB = dyn_cast(&Bin)) + printMachOUniversalHeaders(UB, !NonVerbose); + } + if (MachOUniversalBinary *UB = dyn_cast(&Bin)) { + // If we have a list of architecture flags specified dump only those. + if (!ArchAll && ArchFlags.size() != 0) { + // Look for a slice in the universal binary that matches each ArchFlag. + bool ArchFound; + for (unsigned i = 0; i < ArchFlags.size(); ++i) { + ArchFound = false; + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + if (ArchFlags[i] == I->getArchTypeName()) { + ArchFound = true; + ErrorOr> ObjOrErr = + I->getAsObjectFile(); + std::string ArchitectureName = ""; + if (ArchFlags.size() > 1) + ArchitectureName = I->getArchTypeName(); + if (ObjOrErr) { + ObjectFile &O = *ObjOrErr.get(); + if (MachOObjectFile *MachOOF = dyn_cast(&O)) + ProcessMachO(Filename, MachOOF, "", ArchitectureName); + } else if (ErrorOr> AOrErr = + I->getAsArchive()) { + std::unique_ptr &A = *AOrErr; + outs() << "Archive : " << Filename; + if (!ArchitectureName.empty()) + outs() << " (architecture " << ArchitectureName << ")"; + outs() << "\n"; + if (ArchiveHeaders) + printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets); + for (Archive::child_iterator AI = A->child_begin(), + AE = A->child_end(); + AI != AE; ++AI) { + ErrorOr> ChildOrErr = AI->getAsBinary(); + if (ChildOrErr.getError()) + continue; + if (MachOObjectFile *O = + dyn_cast(&*ChildOrErr.get())) + ProcessMachO(Filename, O, O->getFileName(), ArchitectureName); + } + } + } + } + if (!ArchFound) { + errs() << "llvm-objdump: file: " + Filename + " does not contain " + << "architecture: " + ArchFlags[i] + "\n"; + return; + } + } + return; + } + // No architecture flags were specified so if this contains a slice that + // matches the host architecture dump only that. + if (!ArchAll) { + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + if (MachOObjectFile::getHostArch().getArchName() == + I->getArchTypeName()) { + ErrorOr> ObjOrErr = I->getAsObjectFile(); + std::string ArchiveName; + ArchiveName.clear(); + if (ObjOrErr) { + ObjectFile &O = *ObjOrErr.get(); + if (MachOObjectFile *MachOOF = dyn_cast(&O)) + ProcessMachO(Filename, MachOOF); + } else if (ErrorOr> AOrErr = + I->getAsArchive()) { + std::unique_ptr &A = *AOrErr; + outs() << "Archive : " << Filename << "\n"; + if (ArchiveHeaders) + printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets); + for (Archive::child_iterator AI = A->child_begin(), + AE = A->child_end(); + AI != AE; ++AI) { + ErrorOr> ChildOrErr = AI->getAsBinary(); + if (ChildOrErr.getError()) + continue; + if (MachOObjectFile *O = + dyn_cast(&*ChildOrErr.get())) + ProcessMachO(Filename, O, O->getFileName()); + } + } + return; + } + } + } + // Either all architectures have been specified or none have been specified + // and this does not contain the host architecture so dump all the slices. + bool moreThanOneArch = UB->getNumberOfObjects() > 1; + for (MachOUniversalBinary::object_iterator I = UB->begin_objects(), + E = UB->end_objects(); + I != E; ++I) { + ErrorOr> ObjOrErr = I->getAsObjectFile(); + std::string ArchitectureName = ""; + if (moreThanOneArch) + ArchitectureName = I->getArchTypeName(); + if (ObjOrErr) { + ObjectFile &Obj = *ObjOrErr.get(); + if (MachOObjectFile *MachOOF = dyn_cast(&Obj)) + ProcessMachO(Filename, MachOOF, "", ArchitectureName); + } else if (ErrorOr> AOrErr = I->getAsArchive()) { + std::unique_ptr &A = *AOrErr; + outs() << "Archive : " << Filename; + if (!ArchitectureName.empty()) + outs() << " (architecture " << ArchitectureName << ")"; + outs() << "\n"; + if (ArchiveHeaders) + printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets); + for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end(); + AI != AE; ++AI) { + ErrorOr> ChildOrErr = AI->getAsBinary(); + if (ChildOrErr.getError()) + continue; + if (MachOObjectFile *O = + dyn_cast(&*ChildOrErr.get())) { + if (MachOObjectFile *MachOOF = dyn_cast(O)) + ProcessMachO(Filename, MachOOF, MachOOF->getFileName(), + ArchitectureName); + } + } + } + } + return; + } + if (ObjectFile *O = dyn_cast(&Bin)) { + if (!checkMachOAndArchFlags(O, Filename)) + return; + if (MachOObjectFile *MachOOF = dyn_cast(&*O)) { + ProcessMachO(Filename, MachOOF); + } else + errs() << "llvm-objdump: '" << Filename << "': " + << "Object is not a Mach-O file type.\n"; + } else + errs() << "llvm-objdump: '" << Filename << "': " + << "Unrecognized file type.\n"; +} + +typedef std::pair BindInfoEntry; +typedef std::vector BindTable; +typedef BindTable::iterator bind_table_iterator; + +// The block of info used by the Symbolizer call backs. +struct DisassembleInfo { + bool verbose; + MachOObjectFile *O; + SectionRef S; + SymbolAddressMap *AddrMap; + std::vector *Sections; + const char *class_name; + const char *selector_name; + char *method; + char *demangled_name; + uint64_t adrp_addr; + uint32_t adrp_inst; + BindTable *bindtable; }; -// The class object in a 64-bit Mach-O file. -struct class64_t { - uint64_t isa; // class64_t * (64-bit pointer) - uint64_t superclass; // class64_t * (64-bit pointer) - uint64_t cache; // Cache (64-bit pointer) - uint64_t vtable; // IMP * (64-bit pointer) - uint64_t data; // class_ro64_t * (64-bit pointer) -}; +// SymbolizerGetOpInfo() is the operand information call back function. +// This is called to get the symbolic information for operand(s) of an +// instruction when it is being done. This routine does this from +// the relocation information, symbol table, etc. That block of information +// is a pointer to the struct DisassembleInfo that was passed when the +// disassembler context was created and passed to back to here when +// called back by the disassembler for instruction operands that could have +// relocation information. The address of the instruction containing operand is +// at the Pc parameter. The immediate value the operand has is passed in +// op_info->Value and is at Offset past the start of the instruction and has a +// byte Size of 1, 2 or 4. The symbolc information is returned in TagBuf is the +// LLVMOpInfo1 struct defined in the header "llvm-c/Disassembler.h" as symbol +// names and addends of the symbolic expression to add for the operand. The +// value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic +// information is returned then this function returns 1 else it returns 0. +static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, + uint64_t Size, int TagType, void *TagBuf) { + struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo; + struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf; + uint64_t value = op_info->Value; + + // Make sure all fields returned are zero if we don't set them. + memset((void *)op_info, '\0', sizeof(struct LLVMOpInfo1)); + op_info->Value = value; + + // If the TagType is not the value 1 which it code knows about or if no + // verbose symbolic information is wanted then just return 0, indicating no + // information is being returned. + if (TagType != 1 || !info->verbose) + return 0; + + unsigned int Arch = info->O->getArch(); + if (Arch == Triple::x86) { + if (Size != 1 && Size != 2 && Size != 4 && Size != 0) + return 0; + // First search the section's relocation entries (if any) for an entry + // for this section offset. + uint32_t sect_addr = info->S.getAddress(); + uint32_t sect_offset = (Pc + Offset) - sect_addr; + bool reloc_found = false; + DataRefImpl Rel; + MachO::any_relocation_info RE; + bool isExtern = false; + SymbolRef Symbol; + bool r_scattered = false; + uint32_t r_value, pair_r_value, r_type; + for (const RelocationRef &Reloc : info->S.relocations()) { + uint64_t RelocOffset = Reloc.getOffset(); + if (RelocOffset == sect_offset) { + Rel = Reloc.getRawDataRefImpl(); + RE = info->O->getRelocation(Rel); + r_type = info->O->getAnyRelocationType(RE); + r_scattered = info->O->isRelocationScattered(RE); + if (r_scattered) { + r_value = info->O->getScatteredRelocationValue(RE); + if (r_type == MachO::GENERIC_RELOC_SECTDIFF || + r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) { + DataRefImpl RelNext = Rel; + info->O->moveRelocationNext(RelNext); + MachO::any_relocation_info RENext; + RENext = info->O->getRelocation(RelNext); + if (info->O->isRelocationScattered(RENext)) + pair_r_value = info->O->getScatteredRelocationValue(RENext); + else + return 0; + } + } else { + isExtern = info->O->getPlainRelocationExternal(RE); + if (isExtern) { + symbol_iterator RelocSym = Reloc.getSymbol(); + Symbol = *RelocSym; + } + } + reloc_found = true; + break; + } + } + if (reloc_found && isExtern) { + ErrorOr SymName = Symbol.getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + const char *name = SymName->data(); + op_info->AddSymbol.Present = 1; + op_info->AddSymbol.Name = name; + // For i386 extern relocation entries the value in the instruction is + // the offset from the symbol, and value is already set in op_info->Value. + return 1; + } + if (reloc_found && (r_type == MachO::GENERIC_RELOC_SECTDIFF || + r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)) { + const char *add = GuessSymbolName(r_value, info->AddrMap); + const char *sub = GuessSymbolName(pair_r_value, info->AddrMap); + uint32_t offset = value - (r_value - pair_r_value); + op_info->AddSymbol.Present = 1; + if (add != nullptr) + op_info->AddSymbol.Name = add; + else + op_info->AddSymbol.Value = r_value; + op_info->SubtractSymbol.Present = 1; + if (sub != nullptr) + op_info->SubtractSymbol.Name = sub; + else + op_info->SubtractSymbol.Value = pair_r_value; + op_info->Value = offset; + return 1; + } + // TODO: + // Second search the external relocation entries of a fully linked image + // (if any) for an entry that matches this segment offset. + // uint32_t seg_offset = (Pc + Offset); + return 0; + } + if (Arch == Triple::x86_64) { + if (Size != 1 && Size != 2 && Size != 4 && Size != 0) + return 0; + // First search the section's relocation entries (if any) for an entry + // for this section offset. + uint64_t sect_addr = info->S.getAddress(); + uint64_t sect_offset = (Pc + Offset) - sect_addr; + bool reloc_found = false; + DataRefImpl Rel; + MachO::any_relocation_info RE; + bool isExtern = false; + SymbolRef Symbol; + for (const RelocationRef &Reloc : info->S.relocations()) { + uint64_t RelocOffset = Reloc.getOffset(); + if (RelocOffset == sect_offset) { + Rel = Reloc.getRawDataRefImpl(); + RE = info->O->getRelocation(Rel); + // NOTE: Scattered relocations don't exist on x86_64. + isExtern = info->O->getPlainRelocationExternal(RE); + if (isExtern) { + symbol_iterator RelocSym = Reloc.getSymbol(); + Symbol = *RelocSym; + } + reloc_found = true; + break; + } + } + if (reloc_found && isExtern) { + // The Value passed in will be adjusted by the Pc if the instruction + // adds the Pc. But for x86_64 external relocation entries the Value + // is the offset from the external symbol. + if (info->O->getAnyRelocationPCRel(RE)) + op_info->Value -= Pc + Offset + Size; + ErrorOr SymName = Symbol.getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + const char *name = SymName->data(); + unsigned Type = info->O->getAnyRelocationType(RE); + if (Type == MachO::X86_64_RELOC_SUBTRACTOR) { + DataRefImpl RelNext = Rel; + info->O->moveRelocationNext(RelNext); + MachO::any_relocation_info RENext = info->O->getRelocation(RelNext); + unsigned TypeNext = info->O->getAnyRelocationType(RENext); + bool isExternNext = info->O->getPlainRelocationExternal(RENext); + unsigned SymbolNum = info->O->getPlainRelocationSymbolNum(RENext); + if (TypeNext == MachO::X86_64_RELOC_UNSIGNED && isExternNext) { + op_info->SubtractSymbol.Present = 1; + op_info->SubtractSymbol.Name = name; + symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum); + Symbol = *RelocSymNext; + ErrorOr SymNameNext = Symbol.getName(); + if (std::error_code EC = SymNameNext.getError()) + report_fatal_error(EC.message()); + name = SymNameNext->data(); + } + } + // TODO: add the VariantKinds to op_info->VariantKind for relocation types + // like: X86_64_RELOC_TLV, X86_64_RELOC_GOT_LOAD and X86_64_RELOC_GOT. + op_info->AddSymbol.Present = 1; + op_info->AddSymbol.Name = name; + return 1; + } + // TODO: + // Second search the external relocation entries of a fully linked image + // (if any) for an entry that matches this segment offset. + // uint64_t seg_offset = (Pc + Offset); + return 0; + } + if (Arch == Triple::arm) { + if (Offset != 0 || (Size != 4 && Size != 2)) + return 0; + // First search the section's relocation entries (if any) for an entry + // for this section offset. + uint32_t sect_addr = info->S.getAddress(); + uint32_t sect_offset = (Pc + Offset) - sect_addr; + DataRefImpl Rel; + MachO::any_relocation_info RE; + bool isExtern = false; + SymbolRef Symbol; + bool r_scattered = false; + uint32_t r_value, pair_r_value, r_type, r_length, other_half; + auto Reloc = + std::find_if(info->S.relocations().begin(), info->S.relocations().end(), + [&](const RelocationRef &Reloc) { + uint64_t RelocOffset = Reloc.getOffset(); + return RelocOffset == sect_offset; + }); + + if (Reloc == info->S.relocations().end()) + return 0; + + Rel = Reloc->getRawDataRefImpl(); + RE = info->O->getRelocation(Rel); + r_length = info->O->getAnyRelocationLength(RE); + r_scattered = info->O->isRelocationScattered(RE); + if (r_scattered) { + r_value = info->O->getScatteredRelocationValue(RE); + r_type = info->O->getScatteredRelocationType(RE); + } else { + r_type = info->O->getAnyRelocationType(RE); + isExtern = info->O->getPlainRelocationExternal(RE); + if (isExtern) { + symbol_iterator RelocSym = Reloc->getSymbol(); + Symbol = *RelocSym; + } + } + if (r_type == MachO::ARM_RELOC_HALF || + r_type == MachO::ARM_RELOC_SECTDIFF || + r_type == MachO::ARM_RELOC_LOCAL_SECTDIFF || + r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { + DataRefImpl RelNext = Rel; + info->O->moveRelocationNext(RelNext); + MachO::any_relocation_info RENext; + RENext = info->O->getRelocation(RelNext); + other_half = info->O->getAnyRelocationAddress(RENext) & 0xffff; + if (info->O->isRelocationScattered(RENext)) + pair_r_value = info->O->getScatteredRelocationValue(RENext); + } + + if (isExtern) { + ErrorOr SymName = Symbol.getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + const char *name = SymName->data(); + op_info->AddSymbol.Present = 1; + op_info->AddSymbol.Name = name; + switch (r_type) { + case MachO::ARM_RELOC_HALF: + if ((r_length & 0x1) == 1) { + op_info->Value = value << 16 | other_half; + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; + } else { + op_info->Value = other_half << 16 | value; + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; + } + break; + default: + break; + } + return 1; + } + // If we have a branch that is not an external relocation entry then + // return 0 so the code in tryAddingSymbolicOperand() can use the + // SymbolLookUp call back with the branch target address to look up the + // symbol and possiblity add an annotation for a symbol stub. + if (isExtern == 0 && (r_type == MachO::ARM_RELOC_BR24 || + r_type == MachO::ARM_THUMB_RELOC_BR22)) + return 0; + + uint32_t offset = 0; + if (r_type == MachO::ARM_RELOC_HALF || + r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { + if ((r_length & 0x1) == 1) + value = value << 16 | other_half; + else + value = other_half << 16 | value; + } + if (r_scattered && (r_type != MachO::ARM_RELOC_HALF && + r_type != MachO::ARM_RELOC_HALF_SECTDIFF)) { + offset = value - r_value; + value = r_value; + } + + if (r_type == MachO::ARM_RELOC_HALF_SECTDIFF) { + if ((r_length & 0x1) == 1) + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; + else + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; + const char *add = GuessSymbolName(r_value, info->AddrMap); + const char *sub = GuessSymbolName(pair_r_value, info->AddrMap); + int32_t offset = value - (r_value - pair_r_value); + op_info->AddSymbol.Present = 1; + if (add != nullptr) + op_info->AddSymbol.Name = add; + else + op_info->AddSymbol.Value = r_value; + op_info->SubtractSymbol.Present = 1; + if (sub != nullptr) + op_info->SubtractSymbol.Name = sub; + else + op_info->SubtractSymbol.Value = pair_r_value; + op_info->Value = offset; + return 1; + } + + op_info->AddSymbol.Present = 1; + op_info->Value = offset; + if (r_type == MachO::ARM_RELOC_HALF) { + if ((r_length & 0x1) == 1) + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; + else + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; + } + const char *add = GuessSymbolName(value, info->AddrMap); + if (add != nullptr) { + op_info->AddSymbol.Name = add; + return 1; + } + op_info->AddSymbol.Value = value; + return 1; + } + if (Arch == Triple::aarch64) { + if (Offset != 0 || Size != 4) + return 0; + // First search the section's relocation entries (if any) for an entry + // for this section offset. + uint64_t sect_addr = info->S.getAddress(); + uint64_t sect_offset = (Pc + Offset) - sect_addr; + auto Reloc = + std::find_if(info->S.relocations().begin(), info->S.relocations().end(), + [&](const RelocationRef &Reloc) { + uint64_t RelocOffset = Reloc.getOffset(); + return RelocOffset == sect_offset; + }); + + if (Reloc == info->S.relocations().end()) + return 0; + + DataRefImpl Rel = Reloc->getRawDataRefImpl(); + MachO::any_relocation_info RE = info->O->getRelocation(Rel); + uint32_t r_type = info->O->getAnyRelocationType(RE); + if (r_type == MachO::ARM64_RELOC_ADDEND) { + DataRefImpl RelNext = Rel; + info->O->moveRelocationNext(RelNext); + MachO::any_relocation_info RENext = info->O->getRelocation(RelNext); + if (value == 0) { + value = info->O->getPlainRelocationSymbolNum(RENext); + op_info->Value = value; + } + } + // NOTE: Scattered relocations don't exist on arm64. + if (!info->O->getPlainRelocationExternal(RE)) + return 0; + ErrorOr SymName = Reloc->getSymbol()->getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + const char *name = SymName->data(); + op_info->AddSymbol.Present = 1; + op_info->AddSymbol.Name = name; + + switch (r_type) { + case MachO::ARM64_RELOC_PAGE21: + /* @page */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_PAGE; + break; + case MachO::ARM64_RELOC_PAGEOFF12: + /* @pageoff */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_PAGEOFF; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGE21: + /* @gotpage */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_GOTPAGE; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12: + /* @gotpageoff */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF; + break; + case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21: + /* @tvlppage is not implemented in llvm-mc */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_TLVP; + break; + case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12: + /* @tvlppageoff is not implemented in llvm-mc */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_TLVOFF; + break; + default: + case MachO::ARM64_RELOC_BRANCH26: + op_info->VariantKind = LLVMDisassembler_VariantKind_None; + break; + } + return 1; + } + return 0; +} + +// GuessCstringPointer is passed the address of what might be a pointer to a +// literal string in a cstring section. If that address is in a cstring section +// it returns a pointer to that string. Else it returns nullptr. +static const char *GuessCstringPointer(uint64_t ReferenceValue, + struct DisassembleInfo *info) { + for (const auto &Load : info->O->load_commands()) { + if (Load.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load); + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section_64 Sec = info->O->getSection64(Load, J); + uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; + if (section_type == MachO::S_CSTRING_LITERALS && + ReferenceValue >= Sec.addr && + ReferenceValue < Sec.addr + Sec.size) { + uint64_t sect_offset = ReferenceValue - Sec.addr; + uint64_t object_offset = Sec.offset + sect_offset; + StringRef MachOContents = info->O->getData(); + uint64_t object_size = MachOContents.size(); + const char *object_addr = (const char *)MachOContents.data(); + if (object_offset < object_size) { + const char *name = object_addr + object_offset; + return name; + } else { + return nullptr; + } + } + } + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load); + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section Sec = info->O->getSection(Load, J); + uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; + if (section_type == MachO::S_CSTRING_LITERALS && + ReferenceValue >= Sec.addr && + ReferenceValue < Sec.addr + Sec.size) { + uint64_t sect_offset = ReferenceValue - Sec.addr; + uint64_t object_offset = Sec.offset + sect_offset; + StringRef MachOContents = info->O->getData(); + uint64_t object_size = MachOContents.size(); + const char *object_addr = (const char *)MachOContents.data(); + if (object_offset < object_size) { + const char *name = object_addr + object_offset; + return name; + } else { + return nullptr; + } + } + } + } + } + return nullptr; +} + +// GuessIndirectSymbol returns the name of the indirect symbol for the +// ReferenceValue passed in or nullptr. This is used when ReferenceValue maybe +// an address of a symbol stub or a lazy or non-lazy pointer to associate the +// symbol name being referenced by the stub or pointer. +static const char *GuessIndirectSymbol(uint64_t ReferenceValue, + struct DisassembleInfo *info) { + MachO::dysymtab_command Dysymtab = info->O->getDysymtabLoadCommand(); + MachO::symtab_command Symtab = info->O->getSymtabLoadCommand(); + for (const auto &Load : info->O->load_commands()) { + if (Load.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load); + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section_64 Sec = info->O->getSection64(Load, J); + uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; + if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || + section_type == MachO::S_LAZY_SYMBOL_POINTERS || + section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || + section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS || + section_type == MachO::S_SYMBOL_STUBS) && + ReferenceValue >= Sec.addr && + ReferenceValue < Sec.addr + Sec.size) { + uint32_t stride; + if (section_type == MachO::S_SYMBOL_STUBS) + stride = Sec.reserved2; + else + stride = 8; + if (stride == 0) + return nullptr; + uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride; + if (index < Dysymtab.nindirectsyms) { + uint32_t indirect_symbol = + info->O->getIndirectSymbolTableEntry(Dysymtab, index); + if (indirect_symbol < Symtab.nsyms) { + symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol); + SymbolRef Symbol = *Sym; + ErrorOr SymName = Symbol.getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + const char *name = SymName->data(); + return name; + } + } + } + } + } else if (Load.C.cmd == MachO::LC_SEGMENT) { + MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load); + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section Sec = info->O->getSection(Load, J); + uint32_t section_type = Sec.flags & MachO::SECTION_TYPE; + if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS || + section_type == MachO::S_LAZY_SYMBOL_POINTERS || + section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS || + section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS || + section_type == MachO::S_SYMBOL_STUBS) && + ReferenceValue >= Sec.addr && + ReferenceValue < Sec.addr + Sec.size) { + uint32_t stride; + if (section_type == MachO::S_SYMBOL_STUBS) + stride = Sec.reserved2; + else + stride = 4; + if (stride == 0) + return nullptr; + uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride; + if (index < Dysymtab.nindirectsyms) { + uint32_t indirect_symbol = + info->O->getIndirectSymbolTableEntry(Dysymtab, index); + if (indirect_symbol < Symtab.nsyms) { + symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol); + SymbolRef Symbol = *Sym; + ErrorOr SymName = Symbol.getName(); + if (std::error_code EC = SymName.getError()) + report_fatal_error(EC.message()); + const char *name = SymName->data(); + return name; + } + } + } + } + } + } + return nullptr; +} + +// method_reference() is called passing it the ReferenceName that might be +// a reference it to an Objective-C method call. If so then it allocates and +// assembles a method call string with the values last seen and saved in +// the DisassembleInfo's class_name and selector_name fields. This is saved +// into the method field of the info and any previous string is free'ed. +// Then the class_name field in the info is set to nullptr. The method call +// string is set into ReferenceName and ReferenceType is set to +// LLVMDisassembler_ReferenceType_Out_Objc_Message. If this not a method call +// then both ReferenceType and ReferenceName are left unchanged. +static void method_reference(struct DisassembleInfo *info, + uint64_t *ReferenceType, + const char **ReferenceName) { + unsigned int Arch = info->O->getArch(); + if (*ReferenceName != nullptr) { + if (strcmp(*ReferenceName, "_objc_msgSend") == 0) { + if (info->selector_name != nullptr) { + if (info->method != nullptr) + free(info->method); + if (info->class_name != nullptr) { + info->method = (char *)malloc(5 + strlen(info->class_name) + + strlen(info->selector_name)); + if (info->method != nullptr) { + strcpy(info->method, "+["); + strcat(info->method, info->class_name); + strcat(info->method, " "); + strcat(info->method, info->selector_name); + strcat(info->method, "]"); + *ReferenceName = info->method; + *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message; + } + } else { + info->method = (char *)malloc(9 + strlen(info->selector_name)); + if (info->method != nullptr) { + if (Arch == Triple::x86_64) + strcpy(info->method, "-[%rdi "); + else if (Arch == Triple::aarch64) + strcpy(info->method, "-[x0 "); + else + strcpy(info->method, "-[r? "); + strcat(info->method, info->selector_name); + strcat(info->method, "]"); + *ReferenceName = info->method; + *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message; + } + } + info->class_name = nullptr; + } + } else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) { + if (info->selector_name != nullptr) { + if (info->method != nullptr) + free(info->method); + info->method = (char *)malloc(17 + strlen(info->selector_name)); + if (info->method != nullptr) { + if (Arch == Triple::x86_64) + strcpy(info->method, "-[[%rdi super] "); + else if (Arch == Triple::aarch64) + strcpy(info->method, "-[[x0 super] "); + else + strcpy(info->method, "-[[r? super] "); + strcat(info->method, info->selector_name); + strcat(info->method, "]"); + *ReferenceName = info->method; + *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message; + } + info->class_name = nullptr; + } + } + } +} + +// GuessPointerPointer() is passed the address of what might be a pointer to +// a reference to an Objective-C class, selector, message ref or cfstring. +// If so the value of the pointer is returned and one of the booleans are set +// to true. If not zero is returned and all the booleans are set to false. +static uint64_t GuessPointerPointer(uint64_t ReferenceValue, + struct DisassembleInfo *info, + bool &classref, bool &selref, bool &msgref, + bool &cfstring) { + classref = false; + selref = false; + msgref = false; + cfstring = false; + for (const auto &Load : info->O->load_commands()) { + if (Load.C.cmd == MachO::LC_SEGMENT_64) { + MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load); + for (unsigned J = 0; J < Seg.nsects; ++J) { + MachO::section_64 Sec = info->O->getSection64(Load, J); + if ((strncmp(Sec.sectname, "__objc_selrefs", 16) == 0 || + strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 || + strncmp(Sec.sectname, "__objc_superrefs", 16) == 0 || + strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 || + strncmp(Sec.sectname, "__cfstring", 16) == 0) && + ReferenceValue >= Sec.addr && + ReferenceValue < Sec.addr + Sec.size) { + uint64_t sect_offset = ReferenceValue - Sec.addr; + uint64_t object_offset = Sec.offset + sect_offset; + StringRef MachOContents = info->O->getData(); + uint64_t object_size = MachOContents.size(); + const char *object_addr = (const char *)MachOContents.data(); + if (object_offset < object_size) { + uint64_t pointer_value; + memcpy(&pointer_value, object_addr + object_offset, + sizeof(uint64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(pointer_value); + if (strncmp(Sec.sectname, "__objc_selrefs", 16) == 0) + selref = true; + else if (strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 || + strncmp(Sec.sectname, "__objc_superrefs", 16) == 0) + classref = true; + else if (strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 && + ReferenceValue + 8 < Sec.addr + Sec.size) { + msgref = true; + memcpy(&pointer_value, object_addr + object_offset + 8, + sizeof(uint64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(pointer_value); + } else if (strncmp(Sec.sectname, "__cfstring", 16) == 0) + cfstring = true; + return pointer_value; + } else { + return 0; + } + } + } + } + // TODO: Look for LC_SEGMENT for 32-bit Mach-O files. + } + return 0; +} + +// get_pointer_64 returns a pointer to the bytes in the object file at the +// Address from a section in the Mach-O file. And indirectly returns the +// offset into the section, number of bytes left in the section past the offset +// and which section is was being referenced. If the Address is not in a +// section nullptr is returned. +static const char *get_pointer_64(uint64_t Address, uint32_t &offset, + uint32_t &left, SectionRef &S, + DisassembleInfo *info, + bool objc_only = false) { + offset = 0; + left = 0; + S = SectionRef(); + for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) { + uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress(); + uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize(); + if (objc_only) { + StringRef SectName; + ((*(info->Sections))[SectIdx]).getName(SectName); + DataRefImpl Ref = ((*(info->Sections))[SectIdx]).getRawDataRefImpl(); + StringRef SegName = info->O->getSectionFinalSegmentName(Ref); + if (SegName != "__OBJC" && SectName != "__cstring") + continue; + } + if (Address >= SectAddress && Address < SectAddress + SectSize) { + S = (*(info->Sections))[SectIdx]; + offset = Address - SectAddress; + left = SectSize - offset; + StringRef SectContents; + ((*(info->Sections))[SectIdx]).getContents(SectContents); + return SectContents.data() + offset; + } + } + return nullptr; +} + +static const char *get_pointer_32(uint32_t Address, uint32_t &offset, + uint32_t &left, SectionRef &S, + DisassembleInfo *info, + bool objc_only = false) { + return get_pointer_64(Address, offset, left, S, info, objc_only); +} + +// get_symbol_64() returns the name of a symbol (or nullptr) and the address of +// the symbol indirectly through n_value. Based on the relocation information +// for the specified section offset in the specified section reference. +// If no relocation information is found and a non-zero ReferenceValue for the +// symbol is passed, look up that address in the info's AddrMap. +static const char *get_symbol_64(uint32_t sect_offset, SectionRef S, + DisassembleInfo *info, uint64_t &n_value, + uint64_t ReferenceValue = 0) { + n_value = 0; + if (!info->verbose) + return nullptr; + + // See if there is an external relocation entry at the sect_offset. + bool reloc_found = false; + DataRefImpl Rel; + MachO::any_relocation_info RE; + bool isExtern = false; + SymbolRef Symbol; + for (const RelocationRef &Reloc : S.relocations()) { + uint64_t RelocOffset = Reloc.getOffset(); + if (RelocOffset == sect_offset) { + Rel = Reloc.getRawDataRefImpl(); + RE = info->O->getRelocation(Rel); + if (info->O->isRelocationScattered(RE)) + continue; + isExtern = info->O->getPlainRelocationExternal(RE); + if (isExtern) { + symbol_iterator RelocSym = Reloc.getSymbol(); + Symbol = *RelocSym; + } + reloc_found = true; + break; + } + } + // If there is an external relocation entry for a symbol in this section + // at this section_offset then use that symbol's value for the n_value + // and return its name. + const char *SymbolName = nullptr; + if (reloc_found && isExtern) { + n_value = Symbol.getValue(); + ErrorOr NameOrError = Symbol.getName(); + if (std::error_code EC = NameOrError.getError()) + report_fatal_error(EC.message()); + StringRef Name = *NameOrError; + if (!Name.empty()) { + SymbolName = Name.data(); + return SymbolName; + } + } + + // TODO: For fully linked images, look through the external relocation + // entries off the dynamic symtab command. For these the r_offset is from the + // start of the first writeable segment in the Mach-O file. So the offset + // to this section from that segment is passed to this routine by the caller, + // as the database_offset. Which is the difference of the section's starting + // address and the first writable segment. + // + // NOTE: need add passing the database_offset to this routine. + + // We did not find an external relocation entry so look up the ReferenceValue + // as an address of a symbol and if found return that symbol's name. + SymbolName = GuessSymbolName(ReferenceValue, info->AddrMap); + + return SymbolName; +} + +static const char *get_symbol_32(uint32_t sect_offset, SectionRef S, + DisassembleInfo *info, + uint32_t ReferenceValue) { + uint64_t n_value64; + return get_symbol_64(sect_offset, S, info, n_value64, ReferenceValue); +} + +// These are structs in the Objective-C meta data and read to produce the +// comments for disassembly. While these are part of the ABI they are no +// public defintions. So the are here not in include/llvm/Support/MachO.h . + +// The cfstring object in a 64-bit Mach-O file. +struct cfstring64_t { + uint64_t isa; // class64_t * (64-bit pointer) + uint64_t flags; // flag bits + uint64_t characters; // char * (64-bit pointer) + uint64_t length; // number of non-NULL characters in above +}; + +// The class object in a 64-bit Mach-O file. +struct class64_t { + uint64_t isa; // class64_t * (64-bit pointer) + uint64_t superclass; // class64_t * (64-bit pointer) + uint64_t cache; // Cache (64-bit pointer) + uint64_t vtable; // IMP * (64-bit pointer) + uint64_t data; // class_ro64_t * (64-bit pointer) +}; + +struct class32_t { + uint32_t isa; /* class32_t * (32-bit pointer) */ + uint32_t superclass; /* class32_t * (32-bit pointer) */ + uint32_t cache; /* Cache (32-bit pointer) */ + uint32_t vtable; /* IMP * (32-bit pointer) */ + uint32_t data; /* class_ro32_t * (32-bit pointer) */ +}; + +struct class_ro64_t { + uint32_t flags; + uint32_t instanceStart; + uint32_t instanceSize; + uint32_t reserved; + uint64_t ivarLayout; // const uint8_t * (64-bit pointer) + uint64_t name; // const char * (64-bit pointer) + uint64_t baseMethods; // const method_list_t * (64-bit pointer) + uint64_t baseProtocols; // const protocol_list_t * (64-bit pointer) + uint64_t ivars; // const ivar_list_t * (64-bit pointer) + uint64_t weakIvarLayout; // const uint8_t * (64-bit pointer) + uint64_t baseProperties; // const struct objc_property_list (64-bit pointer) +}; + +struct class_ro32_t { + uint32_t flags; + uint32_t instanceStart; + uint32_t instanceSize; + uint32_t ivarLayout; /* const uint8_t * (32-bit pointer) */ + uint32_t name; /* const char * (32-bit pointer) */ + uint32_t baseMethods; /* const method_list_t * (32-bit pointer) */ + uint32_t baseProtocols; /* const protocol_list_t * (32-bit pointer) */ + uint32_t ivars; /* const ivar_list_t * (32-bit pointer) */ + uint32_t weakIvarLayout; /* const uint8_t * (32-bit pointer) */ + uint32_t baseProperties; /* const struct objc_property_list * + (32-bit pointer) */ +}; + +/* Values for class_ro{64,32}_t->flags */ +#define RO_META (1 << 0) +#define RO_ROOT (1 << 1) +#define RO_HAS_CXX_STRUCTORS (1 << 2) + +struct method_list64_t { + uint32_t entsize; + uint32_t count; + /* struct method64_t first; These structures follow inline */ +}; + +struct method_list32_t { + uint32_t entsize; + uint32_t count; + /* struct method32_t first; These structures follow inline */ +}; + +struct method64_t { + uint64_t name; /* SEL (64-bit pointer) */ + uint64_t types; /* const char * (64-bit pointer) */ + uint64_t imp; /* IMP (64-bit pointer) */ +}; + +struct method32_t { + uint32_t name; /* SEL (32-bit pointer) */ + uint32_t types; /* const char * (32-bit pointer) */ + uint32_t imp; /* IMP (32-bit pointer) */ +}; + +struct protocol_list64_t { + uint64_t count; /* uintptr_t (a 64-bit value) */ + /* struct protocol64_t * list[0]; These pointers follow inline */ +}; + +struct protocol_list32_t { + uint32_t count; /* uintptr_t (a 32-bit value) */ + /* struct protocol32_t * list[0]; These pointers follow inline */ +}; + +struct protocol64_t { + uint64_t isa; /* id * (64-bit pointer) */ + uint64_t name; /* const char * (64-bit pointer) */ + uint64_t protocols; /* struct protocol_list64_t * + (64-bit pointer) */ + uint64_t instanceMethods; /* method_list_t * (64-bit pointer) */ + uint64_t classMethods; /* method_list_t * (64-bit pointer) */ + uint64_t optionalInstanceMethods; /* method_list_t * (64-bit pointer) */ + uint64_t optionalClassMethods; /* method_list_t * (64-bit pointer) */ + uint64_t instanceProperties; /* struct objc_property_list * + (64-bit pointer) */ +}; + +struct protocol32_t { + uint32_t isa; /* id * (32-bit pointer) */ + uint32_t name; /* const char * (32-bit pointer) */ + uint32_t protocols; /* struct protocol_list_t * + (32-bit pointer) */ + uint32_t instanceMethods; /* method_list_t * (32-bit pointer) */ + uint32_t classMethods; /* method_list_t * (32-bit pointer) */ + uint32_t optionalInstanceMethods; /* method_list_t * (32-bit pointer) */ + uint32_t optionalClassMethods; /* method_list_t * (32-bit pointer) */ + uint32_t instanceProperties; /* struct objc_property_list * + (32-bit pointer) */ +}; + +struct ivar_list64_t { + uint32_t entsize; + uint32_t count; + /* struct ivar64_t first; These structures follow inline */ +}; + +struct ivar_list32_t { + uint32_t entsize; + uint32_t count; + /* struct ivar32_t first; These structures follow inline */ +}; + +struct ivar64_t { + uint64_t offset; /* uintptr_t * (64-bit pointer) */ + uint64_t name; /* const char * (64-bit pointer) */ + uint64_t type; /* const char * (64-bit pointer) */ + uint32_t alignment; + uint32_t size; +}; + +struct ivar32_t { + uint32_t offset; /* uintptr_t * (32-bit pointer) */ + uint32_t name; /* const char * (32-bit pointer) */ + uint32_t type; /* const char * (32-bit pointer) */ + uint32_t alignment; + uint32_t size; +}; + +struct objc_property_list64 { + uint32_t entsize; + uint32_t count; + /* struct objc_property64 first; These structures follow inline */ +}; + +struct objc_property_list32 { + uint32_t entsize; + uint32_t count; + /* struct objc_property32 first; These structures follow inline */ +}; + +struct objc_property64 { + uint64_t name; /* const char * (64-bit pointer) */ + uint64_t attributes; /* const char * (64-bit pointer) */ +}; + +struct objc_property32 { + uint32_t name; /* const char * (32-bit pointer) */ + uint32_t attributes; /* const char * (32-bit pointer) */ +}; + +struct category64_t { + uint64_t name; /* const char * (64-bit pointer) */ + uint64_t cls; /* struct class_t * (64-bit pointer) */ + uint64_t instanceMethods; /* struct method_list_t * (64-bit pointer) */ + uint64_t classMethods; /* struct method_list_t * (64-bit pointer) */ + uint64_t protocols; /* struct protocol_list_t * (64-bit pointer) */ + uint64_t instanceProperties; /* struct objc_property_list * + (64-bit pointer) */ +}; + +struct category32_t { + uint32_t name; /* const char * (32-bit pointer) */ + uint32_t cls; /* struct class_t * (32-bit pointer) */ + uint32_t instanceMethods; /* struct method_list_t * (32-bit pointer) */ + uint32_t classMethods; /* struct method_list_t * (32-bit pointer) */ + uint32_t protocols; /* struct protocol_list_t * (32-bit pointer) */ + uint32_t instanceProperties; /* struct objc_property_list * + (32-bit pointer) */ +}; + +struct objc_image_info64 { + uint32_t version; + uint32_t flags; +}; +struct objc_image_info32 { + uint32_t version; + uint32_t flags; +}; +struct imageInfo_t { + uint32_t version; + uint32_t flags; +}; +/* masks for objc_image_info.flags */ +#define OBJC_IMAGE_IS_REPLACEMENT (1 << 0) +#define OBJC_IMAGE_SUPPORTS_GC (1 << 1) + +struct message_ref64 { + uint64_t imp; /* IMP (64-bit pointer) */ + uint64_t sel; /* SEL (64-bit pointer) */ +}; + +struct message_ref32 { + uint32_t imp; /* IMP (32-bit pointer) */ + uint32_t sel; /* SEL (32-bit pointer) */ +}; + +// Objective-C 1 (32-bit only) meta data structs. + +struct objc_module_t { + uint32_t version; + uint32_t size; + uint32_t name; /* char * (32-bit pointer) */ + uint32_t symtab; /* struct objc_symtab * (32-bit pointer) */ +}; + +struct objc_symtab_t { + uint32_t sel_ref_cnt; + uint32_t refs; /* SEL * (32-bit pointer) */ + uint16_t cls_def_cnt; + uint16_t cat_def_cnt; + // uint32_t defs[1]; /* void * (32-bit pointer) variable size */ +}; + +struct objc_class_t { + uint32_t isa; /* struct objc_class * (32-bit pointer) */ + uint32_t super_class; /* struct objc_class * (32-bit pointer) */ + uint32_t name; /* const char * (32-bit pointer) */ + int32_t version; + int32_t info; + int32_t instance_size; + uint32_t ivars; /* struct objc_ivar_list * (32-bit pointer) */ + uint32_t methodLists; /* struct objc_method_list ** (32-bit pointer) */ + uint32_t cache; /* struct objc_cache * (32-bit pointer) */ + uint32_t protocols; /* struct objc_protocol_list * (32-bit pointer) */ +}; + +#define CLS_GETINFO(cls, infomask) ((cls)->info & (infomask)) +// class is not a metaclass +#define CLS_CLASS 0x1 +// class is a metaclass +#define CLS_META 0x2 + +struct objc_category_t { + uint32_t category_name; /* char * (32-bit pointer) */ + uint32_t class_name; /* char * (32-bit pointer) */ + uint32_t instance_methods; /* struct objc_method_list * (32-bit pointer) */ + uint32_t class_methods; /* struct objc_method_list * (32-bit pointer) */ + uint32_t protocols; /* struct objc_protocol_list * (32-bit ptr) */ +}; + +struct objc_ivar_t { + uint32_t ivar_name; /* char * (32-bit pointer) */ + uint32_t ivar_type; /* char * (32-bit pointer) */ + int32_t ivar_offset; +}; + +struct objc_ivar_list_t { + int32_t ivar_count; + // struct objc_ivar_t ivar_list[1]; /* variable length structure */ +}; + +struct objc_method_list_t { + uint32_t obsolete; /* struct objc_method_list * (32-bit pointer) */ + int32_t method_count; + // struct objc_method_t method_list[1]; /* variable length structure */ +}; + +struct objc_method_t { + uint32_t method_name; /* SEL, aka struct objc_selector * (32-bit pointer) */ + uint32_t method_types; /* char * (32-bit pointer) */ + uint32_t method_imp; /* IMP, aka function pointer, (*IMP)(id, SEL, ...) + (32-bit pointer) */ +}; + +struct objc_protocol_list_t { + uint32_t next; /* struct objc_protocol_list * (32-bit pointer) */ + int32_t count; + // uint32_t list[1]; /* Protocol *, aka struct objc_protocol_t * + // (32-bit pointer) */ +}; + +struct objc_protocol_t { + uint32_t isa; /* struct objc_class * (32-bit pointer) */ + uint32_t protocol_name; /* char * (32-bit pointer) */ + uint32_t protocol_list; /* struct objc_protocol_list * (32-bit pointer) */ + uint32_t instance_methods; /* struct objc_method_description_list * + (32-bit pointer) */ + uint32_t class_methods; /* struct objc_method_description_list * + (32-bit pointer) */ +}; + +struct objc_method_description_list_t { + int32_t count; + // struct objc_method_description_t list[1]; +}; + +struct objc_method_description_t { + uint32_t name; /* SEL, aka struct objc_selector * (32-bit pointer) */ + uint32_t types; /* char * (32-bit pointer) */ +}; + +inline void swapStruct(struct cfstring64_t &cfs) { + sys::swapByteOrder(cfs.isa); + sys::swapByteOrder(cfs.flags); + sys::swapByteOrder(cfs.characters); + sys::swapByteOrder(cfs.length); +} + +inline void swapStruct(struct class64_t &c) { + sys::swapByteOrder(c.isa); + sys::swapByteOrder(c.superclass); + sys::swapByteOrder(c.cache); + sys::swapByteOrder(c.vtable); + sys::swapByteOrder(c.data); +} + +inline void swapStruct(struct class32_t &c) { + sys::swapByteOrder(c.isa); + sys::swapByteOrder(c.superclass); + sys::swapByteOrder(c.cache); + sys::swapByteOrder(c.vtable); + sys::swapByteOrder(c.data); +} + +inline void swapStruct(struct class_ro64_t &cro) { + sys::swapByteOrder(cro.flags); + sys::swapByteOrder(cro.instanceStart); + sys::swapByteOrder(cro.instanceSize); + sys::swapByteOrder(cro.reserved); + sys::swapByteOrder(cro.ivarLayout); + sys::swapByteOrder(cro.name); + sys::swapByteOrder(cro.baseMethods); + sys::swapByteOrder(cro.baseProtocols); + sys::swapByteOrder(cro.ivars); + sys::swapByteOrder(cro.weakIvarLayout); + sys::swapByteOrder(cro.baseProperties); +} + +inline void swapStruct(struct class_ro32_t &cro) { + sys::swapByteOrder(cro.flags); + sys::swapByteOrder(cro.instanceStart); + sys::swapByteOrder(cro.instanceSize); + sys::swapByteOrder(cro.ivarLayout); + sys::swapByteOrder(cro.name); + sys::swapByteOrder(cro.baseMethods); + sys::swapByteOrder(cro.baseProtocols); + sys::swapByteOrder(cro.ivars); + sys::swapByteOrder(cro.weakIvarLayout); + sys::swapByteOrder(cro.baseProperties); +} + +inline void swapStruct(struct method_list64_t &ml) { + sys::swapByteOrder(ml.entsize); + sys::swapByteOrder(ml.count); +} + +inline void swapStruct(struct method_list32_t &ml) { + sys::swapByteOrder(ml.entsize); + sys::swapByteOrder(ml.count); +} + +inline void swapStruct(struct method64_t &m) { + sys::swapByteOrder(m.name); + sys::swapByteOrder(m.types); + sys::swapByteOrder(m.imp); +} + +inline void swapStruct(struct method32_t &m) { + sys::swapByteOrder(m.name); + sys::swapByteOrder(m.types); + sys::swapByteOrder(m.imp); +} + +inline void swapStruct(struct protocol_list64_t &pl) { + sys::swapByteOrder(pl.count); +} + +inline void swapStruct(struct protocol_list32_t &pl) { + sys::swapByteOrder(pl.count); +} + +inline void swapStruct(struct protocol64_t &p) { + sys::swapByteOrder(p.isa); + sys::swapByteOrder(p.name); + sys::swapByteOrder(p.protocols); + sys::swapByteOrder(p.instanceMethods); + sys::swapByteOrder(p.classMethods); + sys::swapByteOrder(p.optionalInstanceMethods); + sys::swapByteOrder(p.optionalClassMethods); + sys::swapByteOrder(p.instanceProperties); +} + +inline void swapStruct(struct protocol32_t &p) { + sys::swapByteOrder(p.isa); + sys::swapByteOrder(p.name); + sys::swapByteOrder(p.protocols); + sys::swapByteOrder(p.instanceMethods); + sys::swapByteOrder(p.classMethods); + sys::swapByteOrder(p.optionalInstanceMethods); + sys::swapByteOrder(p.optionalClassMethods); + sys::swapByteOrder(p.instanceProperties); +} + +inline void swapStruct(struct ivar_list64_t &il) { + sys::swapByteOrder(il.entsize); + sys::swapByteOrder(il.count); +} + +inline void swapStruct(struct ivar_list32_t &il) { + sys::swapByteOrder(il.entsize); + sys::swapByteOrder(il.count); +} + +inline void swapStruct(struct ivar64_t &i) { + sys::swapByteOrder(i.offset); + sys::swapByteOrder(i.name); + sys::swapByteOrder(i.type); + sys::swapByteOrder(i.alignment); + sys::swapByteOrder(i.size); +} + +inline void swapStruct(struct ivar32_t &i) { + sys::swapByteOrder(i.offset); + sys::swapByteOrder(i.name); + sys::swapByteOrder(i.type); + sys::swapByteOrder(i.alignment); + sys::swapByteOrder(i.size); +} + +inline void swapStruct(struct objc_property_list64 &pl) { + sys::swapByteOrder(pl.entsize); + sys::swapByteOrder(pl.count); +} + +inline void swapStruct(struct objc_property_list32 &pl) { + sys::swapByteOrder(pl.entsize); + sys::swapByteOrder(pl.count); +} + +inline void swapStruct(struct objc_property64 &op) { + sys::swapByteOrder(op.name); + sys::swapByteOrder(op.attributes); +} + +inline void swapStruct(struct objc_property32 &op) { + sys::swapByteOrder(op.name); + sys::swapByteOrder(op.attributes); +} + +inline void swapStruct(struct category64_t &c) { + sys::swapByteOrder(c.name); + sys::swapByteOrder(c.cls); + sys::swapByteOrder(c.instanceMethods); + sys::swapByteOrder(c.classMethods); + sys::swapByteOrder(c.protocols); + sys::swapByteOrder(c.instanceProperties); +} + +inline void swapStruct(struct category32_t &c) { + sys::swapByteOrder(c.name); + sys::swapByteOrder(c.cls); + sys::swapByteOrder(c.instanceMethods); + sys::swapByteOrder(c.classMethods); + sys::swapByteOrder(c.protocols); + sys::swapByteOrder(c.instanceProperties); +} + +inline void swapStruct(struct objc_image_info64 &o) { + sys::swapByteOrder(o.version); + sys::swapByteOrder(o.flags); +} + +inline void swapStruct(struct objc_image_info32 &o) { + sys::swapByteOrder(o.version); + sys::swapByteOrder(o.flags); +} + +inline void swapStruct(struct imageInfo_t &o) { + sys::swapByteOrder(o.version); + sys::swapByteOrder(o.flags); +} + +inline void swapStruct(struct message_ref64 &mr) { + sys::swapByteOrder(mr.imp); + sys::swapByteOrder(mr.sel); +} + +inline void swapStruct(struct message_ref32 &mr) { + sys::swapByteOrder(mr.imp); + sys::swapByteOrder(mr.sel); +} + +inline void swapStruct(struct objc_module_t &module) { + sys::swapByteOrder(module.version); + sys::swapByteOrder(module.size); + sys::swapByteOrder(module.name); + sys::swapByteOrder(module.symtab); +} + +inline void swapStruct(struct objc_symtab_t &symtab) { + sys::swapByteOrder(symtab.sel_ref_cnt); + sys::swapByteOrder(symtab.refs); + sys::swapByteOrder(symtab.cls_def_cnt); + sys::swapByteOrder(symtab.cat_def_cnt); +} + +inline void swapStruct(struct objc_class_t &objc_class) { + sys::swapByteOrder(objc_class.isa); + sys::swapByteOrder(objc_class.super_class); + sys::swapByteOrder(objc_class.name); + sys::swapByteOrder(objc_class.version); + sys::swapByteOrder(objc_class.info); + sys::swapByteOrder(objc_class.instance_size); + sys::swapByteOrder(objc_class.ivars); + sys::swapByteOrder(objc_class.methodLists); + sys::swapByteOrder(objc_class.cache); + sys::swapByteOrder(objc_class.protocols); +} + +inline void swapStruct(struct objc_category_t &objc_category) { + sys::swapByteOrder(objc_category.category_name); + sys::swapByteOrder(objc_category.class_name); + sys::swapByteOrder(objc_category.instance_methods); + sys::swapByteOrder(objc_category.class_methods); + sys::swapByteOrder(objc_category.protocols); +} + +inline void swapStruct(struct objc_ivar_list_t &objc_ivar_list) { + sys::swapByteOrder(objc_ivar_list.ivar_count); +} + +inline void swapStruct(struct objc_ivar_t &objc_ivar) { + sys::swapByteOrder(objc_ivar.ivar_name); + sys::swapByteOrder(objc_ivar.ivar_type); + sys::swapByteOrder(objc_ivar.ivar_offset); +} + +inline void swapStruct(struct objc_method_list_t &method_list) { + sys::swapByteOrder(method_list.obsolete); + sys::swapByteOrder(method_list.method_count); +} + +inline void swapStruct(struct objc_method_t &method) { + sys::swapByteOrder(method.method_name); + sys::swapByteOrder(method.method_types); + sys::swapByteOrder(method.method_imp); +} + +inline void swapStruct(struct objc_protocol_list_t &protocol_list) { + sys::swapByteOrder(protocol_list.next); + sys::swapByteOrder(protocol_list.count); +} + +inline void swapStruct(struct objc_protocol_t &protocol) { + sys::swapByteOrder(protocol.isa); + sys::swapByteOrder(protocol.protocol_name); + sys::swapByteOrder(protocol.protocol_list); + sys::swapByteOrder(protocol.instance_methods); + sys::swapByteOrder(protocol.class_methods); +} + +inline void swapStruct(struct objc_method_description_list_t &mdl) { + sys::swapByteOrder(mdl.count); +} + +inline void swapStruct(struct objc_method_description_t &md) { + sys::swapByteOrder(md.name); + sys::swapByteOrder(md.types); +} + +static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue, + struct DisassembleInfo *info); + +// get_objc2_64bit_class_name() is used for disassembly and is passed a pointer +// to an Objective-C class and returns the class name. It is also passed the +// address of the pointer, so when the pointer is zero as it can be in an .o +// file, that is used to look for an external relocation entry with a symbol +// name. +static const char *get_objc2_64bit_class_name(uint64_t pointer_value, + uint64_t ReferenceValue, + struct DisassembleInfo *info) { + const char *r; + uint32_t offset, left; + SectionRef S; + + // The pointer_value can be 0 in an object file and have a relocation + // entry for the class symbol at the ReferenceValue (the address of the + // pointer). + if (pointer_value == 0) { + r = get_pointer_64(ReferenceValue, offset, left, S, info); + if (r == nullptr || left < sizeof(uint64_t)) + return nullptr; + uint64_t n_value; + const char *symbol_name = get_symbol_64(offset, S, info, n_value); + if (symbol_name == nullptr) + return nullptr; + const char *class_name = strrchr(symbol_name, '$'); + if (class_name != nullptr && class_name[1] == '_' && class_name[2] != '\0') + return class_name + 2; + else + return nullptr; + } + + // The case were the pointer_value is non-zero and points to a class defined + // in this Mach-O file. + r = get_pointer_64(pointer_value, offset, left, S, info); + if (r == nullptr || left < sizeof(struct class64_t)) + return nullptr; + struct class64_t c; + memcpy(&c, r, sizeof(struct class64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(c); + if (c.data == 0) + return nullptr; + r = get_pointer_64(c.data, offset, left, S, info); + if (r == nullptr || left < sizeof(struct class_ro64_t)) + return nullptr; + struct class_ro64_t cro; + memcpy(&cro, r, sizeof(struct class_ro64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(cro); + if (cro.name == 0) + return nullptr; + const char *name = get_pointer_64(cro.name, offset, left, S, info); + return name; +} + +// get_objc2_64bit_cfstring_name is used for disassembly and is passed a +// pointer to a cfstring and returns its name or nullptr. +static const char *get_objc2_64bit_cfstring_name(uint64_t ReferenceValue, + struct DisassembleInfo *info) { + const char *r, *name; + uint32_t offset, left; + SectionRef S; + struct cfstring64_t cfs; + uint64_t cfs_characters; + + r = get_pointer_64(ReferenceValue, offset, left, S, info); + if (r == nullptr || left < sizeof(struct cfstring64_t)) + return nullptr; + memcpy(&cfs, r, sizeof(struct cfstring64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(cfs); + if (cfs.characters == 0) { + uint64_t n_value; + const char *symbol_name = get_symbol_64( + offset + offsetof(struct cfstring64_t, characters), S, info, n_value); + if (symbol_name == nullptr) + return nullptr; + cfs_characters = n_value; + } else + cfs_characters = cfs.characters; + name = get_pointer_64(cfs_characters, offset, left, S, info); + + return name; +} + +// get_objc2_64bit_selref() is used for disassembly and is passed a the address +// of a pointer to an Objective-C selector reference when the pointer value is +// zero as in a .o file and is likely to have a external relocation entry with +// who's symbol's n_value is the real pointer to the selector name. If that is +// the case the real pointer to the selector name is returned else 0 is +// returned +static uint64_t get_objc2_64bit_selref(uint64_t ReferenceValue, + struct DisassembleInfo *info) { + uint32_t offset, left; + SectionRef S; + + const char *r = get_pointer_64(ReferenceValue, offset, left, S, info); + if (r == nullptr || left < sizeof(uint64_t)) + return 0; + uint64_t n_value; + const char *symbol_name = get_symbol_64(offset, S, info, n_value); + if (symbol_name == nullptr) + return 0; + return n_value; +} + +static const SectionRef get_section(MachOObjectFile *O, const char *segname, + const char *sectname) { + for (const SectionRef &Section : O->sections()) { + StringRef SectName; + Section.getName(SectName); + DataRefImpl Ref = Section.getRawDataRefImpl(); + StringRef SegName = O->getSectionFinalSegmentName(Ref); + if (SegName == segname && SectName == sectname) + return Section; + } + return SectionRef(); +} + +static void +walk_pointer_list_64(const char *listname, const SectionRef S, + MachOObjectFile *O, struct DisassembleInfo *info, + void (*func)(uint64_t, struct DisassembleInfo *info)) { + if (S == SectionRef()) + return; + + StringRef SectName; + S.getName(SectName); + DataRefImpl Ref = S.getRawDataRefImpl(); + StringRef SegName = O->getSectionFinalSegmentName(Ref); + outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; + + StringRef BytesStr; + S.getContents(BytesStr); + const char *Contents = reinterpret_cast(BytesStr.data()); + + for (uint32_t i = 0; i < S.getSize(); i += sizeof(uint64_t)) { + uint32_t left = S.getSize() - i; + uint32_t size = left < sizeof(uint64_t) ? left : sizeof(uint64_t); + uint64_t p = 0; + memcpy(&p, Contents + i, size); + if (i + sizeof(uint64_t) > S.getSize()) + outs() << listname << " list pointer extends past end of (" << SegName + << "," << SectName << ") section\n"; + outs() << format("%016" PRIx64, S.getAddress() + i) << " "; + + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(p); + + uint64_t n_value = 0; + const char *name = get_symbol_64(i, S, info, n_value, p); + if (name == nullptr) + name = get_dyld_bind_info_symbolname(S.getAddress() + i, info); + + if (n_value != 0) { + outs() << format("0x%" PRIx64, n_value); + if (p != 0) + outs() << " + " << format("0x%" PRIx64, p); + } else + outs() << format("0x%" PRIx64, p); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + p += n_value; + if (func) + func(p, info); + } +} + +static void +walk_pointer_list_32(const char *listname, const SectionRef S, + MachOObjectFile *O, struct DisassembleInfo *info, + void (*func)(uint32_t, struct DisassembleInfo *info)) { + if (S == SectionRef()) + return; + + StringRef SectName; + S.getName(SectName); + DataRefImpl Ref = S.getRawDataRefImpl(); + StringRef SegName = O->getSectionFinalSegmentName(Ref); + outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; + + StringRef BytesStr; + S.getContents(BytesStr); + const char *Contents = reinterpret_cast(BytesStr.data()); + + for (uint32_t i = 0; i < S.getSize(); i += sizeof(uint32_t)) { + uint32_t left = S.getSize() - i; + uint32_t size = left < sizeof(uint32_t) ? left : sizeof(uint32_t); + uint32_t p = 0; + memcpy(&p, Contents + i, size); + if (i + sizeof(uint32_t) > S.getSize()) + outs() << listname << " list pointer extends past end of (" << SegName + << "," << SectName << ") section\n"; + uint32_t Address = S.getAddress() + i; + outs() << format("%08" PRIx32, Address) << " "; + + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(p); + outs() << format("0x%" PRIx32, p); + + const char *name = get_symbol_32(i, S, info, p); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + if (func) + func(p, info); + } +} + +static void print_layout_map(const char *layout_map, uint32_t left) { + outs() << " layout map: "; + do { + outs() << format("0x%02" PRIx32, (*layout_map) & 0xff) << " "; + left--; + layout_map++; + } while (*layout_map != '\0' && left != 0); + outs() << "\n"; +} + +static void print_layout_map64(uint64_t p, struct DisassembleInfo *info) { + uint32_t offset, left; + SectionRef S; + const char *layout_map; + + if (p == 0) + return; + layout_map = get_pointer_64(p, offset, left, S, info); + print_layout_map(layout_map, left); +} + +static void print_layout_map32(uint32_t p, struct DisassembleInfo *info) { + uint32_t offset, left; + SectionRef S; + const char *layout_map; + + if (p == 0) + return; + layout_map = get_pointer_32(p, offset, left, S, info); + print_layout_map(layout_map, left); +} + +static void print_method_list64_t(uint64_t p, struct DisassembleInfo *info, + const char *indent) { + struct method_list64_t ml; + struct method64_t m; + const char *r; + uint32_t offset, xoffset, left, i; + SectionRef S, xS; + const char *name, *sym_name; + uint64_t n_value; + + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&ml, '\0', sizeof(struct method_list64_t)); + if (left < sizeof(struct method_list64_t)) { + memcpy(&ml, r, left); + outs() << " (method_list_t entends past the end of the section)\n"; + } else + memcpy(&ml, r, sizeof(struct method_list64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(ml); + outs() << indent << "\t\t entsize " << ml.entsize << "\n"; + outs() << indent << "\t\t count " << ml.count << "\n"; + + p += sizeof(struct method_list64_t); + offset += sizeof(struct method_list64_t); + for (i = 0; i < ml.count; i++) { + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&m, '\0', sizeof(struct method64_t)); + if (left < sizeof(struct method64_t)) { + memcpy(&ml, r, left); + outs() << indent << " (method_t entends past the end of the section)\n"; + } else + memcpy(&m, r, sizeof(struct method64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(m); + + outs() << indent << "\t\t name "; + sym_name = get_symbol_64(offset + offsetof(struct method64_t, name), S, + info, n_value, m.name); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (m.name != 0) + outs() << " + " << format("0x%" PRIx64, m.name); + } else + outs() << format("0x%" PRIx64, m.name); + name = get_pointer_64(m.name + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << indent << "\t\t types "; + sym_name = get_symbol_64(offset + offsetof(struct method64_t, types), S, + info, n_value, m.types); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (m.types != 0) + outs() << " + " << format("0x%" PRIx64, m.types); + } else + outs() << format("0x%" PRIx64, m.types); + name = get_pointer_64(m.types + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << indent << "\t\t imp "; + name = get_symbol_64(offset + offsetof(struct method64_t, imp), S, info, + n_value, m.imp); + if (info->verbose && name == nullptr) { + if (n_value != 0) { + outs() << format("0x%" PRIx64, n_value) << " "; + if (m.imp != 0) + outs() << "+ " << format("0x%" PRIx64, m.imp) << " "; + } else + outs() << format("0x%" PRIx64, m.imp) << " "; + } + if (name != nullptr) + outs() << name; + outs() << "\n"; + + p += sizeof(struct method64_t); + offset += sizeof(struct method64_t); + } +} + +static void print_method_list32_t(uint64_t p, struct DisassembleInfo *info, + const char *indent) { + struct method_list32_t ml; + struct method32_t m; + const char *r, *name; + uint32_t offset, xoffset, left, i; + SectionRef S, xS; + + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&ml, '\0', sizeof(struct method_list32_t)); + if (left < sizeof(struct method_list32_t)) { + memcpy(&ml, r, left); + outs() << " (method_list_t entends past the end of the section)\n"; + } else + memcpy(&ml, r, sizeof(struct method_list32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(ml); + outs() << indent << "\t\t entsize " << ml.entsize << "\n"; + outs() << indent << "\t\t count " << ml.count << "\n"; + + p += sizeof(struct method_list32_t); + offset += sizeof(struct method_list32_t); + for (i = 0; i < ml.count; i++) { + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&m, '\0', sizeof(struct method32_t)); + if (left < sizeof(struct method32_t)) { + memcpy(&ml, r, left); + outs() << indent << " (method_t entends past the end of the section)\n"; + } else + memcpy(&m, r, sizeof(struct method32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(m); + + outs() << indent << "\t\t name " << format("0x%" PRIx32, m.name); + name = get_pointer_32(m.name, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << indent << "\t\t types " << format("0x%" PRIx32, m.types); + name = get_pointer_32(m.types, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << indent << "\t\t imp " << format("0x%" PRIx32, m.imp); + name = get_symbol_32(offset + offsetof(struct method32_t, imp), S, info, + m.imp); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + p += sizeof(struct method32_t); + offset += sizeof(struct method32_t); + } +} + +static bool print_method_list(uint32_t p, struct DisassembleInfo *info) { + uint32_t offset, left, xleft; + SectionRef S; + struct objc_method_list_t method_list; + struct objc_method_t method; + const char *r, *methods, *name, *SymbolName; + int32_t i; + + r = get_pointer_32(p, offset, left, S, info, true); + if (r == nullptr) + return true; + + outs() << "\n"; + if (left > sizeof(struct objc_method_list_t)) { + memcpy(&method_list, r, sizeof(struct objc_method_list_t)); + } else { + outs() << "\t\t objc_method_list extends past end of the section\n"; + memset(&method_list, '\0', sizeof(struct objc_method_list_t)); + memcpy(&method_list, r, left); + } + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(method_list); + + outs() << "\t\t obsolete " + << format("0x%08" PRIx32, method_list.obsolete) << "\n"; + outs() << "\t\t method_count " << method_list.method_count << "\n"; + + methods = r + sizeof(struct objc_method_list_t); + for (i = 0; i < method_list.method_count; i++) { + if ((i + 1) * sizeof(struct objc_method_t) > left) { + outs() << "\t\t remaining method's extend past the of the section\n"; + break; + } + memcpy(&method, methods + i * sizeof(struct objc_method_t), + sizeof(struct objc_method_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(method); + + outs() << "\t\t method_name " + << format("0x%08" PRIx32, method.method_name); + if (info->verbose) { + name = get_pointer_32(method.method_name, offset, xleft, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", xleft, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t\t method_types " + << format("0x%08" PRIx32, method.method_types); + if (info->verbose) { + name = get_pointer_32(method.method_types, offset, xleft, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", xleft, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t\t method_imp " + << format("0x%08" PRIx32, method.method_imp) << " "; + if (info->verbose) { + SymbolName = GuessSymbolName(method.method_imp, info->AddrMap); + if (SymbolName != nullptr) + outs() << SymbolName; + } + outs() << "\n"; + } + return false; +} + +static void print_protocol_list64_t(uint64_t p, struct DisassembleInfo *info) { + struct protocol_list64_t pl; + uint64_t q, n_value; + struct protocol64_t pc; + const char *r; + uint32_t offset, xoffset, left, i; + SectionRef S, xS; + const char *name, *sym_name; + + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&pl, '\0', sizeof(struct protocol_list64_t)); + if (left < sizeof(struct protocol_list64_t)) { + memcpy(&pl, r, left); + outs() << " (protocol_list_t entends past the end of the section)\n"; + } else + memcpy(&pl, r, sizeof(struct protocol_list64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(pl); + outs() << " count " << pl.count << "\n"; + + p += sizeof(struct protocol_list64_t); + offset += sizeof(struct protocol_list64_t); + for (i = 0; i < pl.count; i++) { + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + q = 0; + if (left < sizeof(uint64_t)) { + memcpy(&q, r, left); + outs() << " (protocol_t * entends past the end of the section)\n"; + } else + memcpy(&q, r, sizeof(uint64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(q); + + outs() << "\t\t list[" << i << "] "; + sym_name = get_symbol_64(offset, S, info, n_value, q); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (q != 0) + outs() << " + " << format("0x%" PRIx64, q); + } else + outs() << format("0x%" PRIx64, q); + outs() << " (struct protocol_t *)\n"; + + r = get_pointer_64(q + n_value, offset, left, S, info); + if (r == nullptr) + return; + memset(&pc, '\0', sizeof(struct protocol64_t)); + if (left < sizeof(struct protocol64_t)) { + memcpy(&pc, r, left); + outs() << " (protocol_t entends past the end of the section)\n"; + } else + memcpy(&pc, r, sizeof(struct protocol64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(pc); + + outs() << "\t\t\t isa " << format("0x%" PRIx64, pc.isa) << "\n"; + + outs() << "\t\t\t name "; + sym_name = get_symbol_64(offset + offsetof(struct protocol64_t, name), S, + info, n_value, pc.name); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (pc.name != 0) + outs() << " + " << format("0x%" PRIx64, pc.name); + } else + outs() << format("0x%" PRIx64, pc.name); + name = get_pointer_64(pc.name + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << "\t\t\tprotocols " << format("0x%" PRIx64, pc.protocols) << "\n"; + + outs() << "\t\t instanceMethods "; + sym_name = + get_symbol_64(offset + offsetof(struct protocol64_t, instanceMethods), + S, info, n_value, pc.instanceMethods); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (pc.instanceMethods != 0) + outs() << " + " << format("0x%" PRIx64, pc.instanceMethods); + } else + outs() << format("0x%" PRIx64, pc.instanceMethods); + outs() << " (struct method_list_t *)\n"; + if (pc.instanceMethods + n_value != 0) + print_method_list64_t(pc.instanceMethods + n_value, info, "\t"); + + outs() << "\t\t classMethods "; + sym_name = + get_symbol_64(offset + offsetof(struct protocol64_t, classMethods), S, + info, n_value, pc.classMethods); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (pc.classMethods != 0) + outs() << " + " << format("0x%" PRIx64, pc.classMethods); + } else + outs() << format("0x%" PRIx64, pc.classMethods); + outs() << " (struct method_list_t *)\n"; + if (pc.classMethods + n_value != 0) + print_method_list64_t(pc.classMethods + n_value, info, "\t"); + + outs() << "\t optionalInstanceMethods " + << format("0x%" PRIx64, pc.optionalInstanceMethods) << "\n"; + outs() << "\t optionalClassMethods " + << format("0x%" PRIx64, pc.optionalClassMethods) << "\n"; + outs() << "\t instanceProperties " + << format("0x%" PRIx64, pc.instanceProperties) << "\n"; + + p += sizeof(uint64_t); + offset += sizeof(uint64_t); + } +} + +static void print_protocol_list32_t(uint32_t p, struct DisassembleInfo *info) { + struct protocol_list32_t pl; + uint32_t q; + struct protocol32_t pc; + const char *r; + uint32_t offset, xoffset, left, i; + SectionRef S, xS; + const char *name; + + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&pl, '\0', sizeof(struct protocol_list32_t)); + if (left < sizeof(struct protocol_list32_t)) { + memcpy(&pl, r, left); + outs() << " (protocol_list_t entends past the end of the section)\n"; + } else + memcpy(&pl, r, sizeof(struct protocol_list32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(pl); + outs() << " count " << pl.count << "\n"; + + p += sizeof(struct protocol_list32_t); + offset += sizeof(struct protocol_list32_t); + for (i = 0; i < pl.count; i++) { + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + q = 0; + if (left < sizeof(uint32_t)) { + memcpy(&q, r, left); + outs() << " (protocol_t * entends past the end of the section)\n"; + } else + memcpy(&q, r, sizeof(uint32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(q); + outs() << "\t\t list[" << i << "] " << format("0x%" PRIx32, q) + << " (struct protocol_t *)\n"; + r = get_pointer_32(q, offset, left, S, info); + if (r == nullptr) + return; + memset(&pc, '\0', sizeof(struct protocol32_t)); + if (left < sizeof(struct protocol32_t)) { + memcpy(&pc, r, left); + outs() << " (protocol_t entends past the end of the section)\n"; + } else + memcpy(&pc, r, sizeof(struct protocol32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(pc); + outs() << "\t\t\t isa " << format("0x%" PRIx32, pc.isa) << "\n"; + outs() << "\t\t\t name " << format("0x%" PRIx32, pc.name); + name = get_pointer_32(pc.name, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + outs() << "\t\t\tprotocols " << format("0x%" PRIx32, pc.protocols) << "\n"; + outs() << "\t\t instanceMethods " + << format("0x%" PRIx32, pc.instanceMethods) + << " (struct method_list_t *)\n"; + if (pc.instanceMethods != 0) + print_method_list32_t(pc.instanceMethods, info, "\t"); + outs() << "\t\t classMethods " << format("0x%" PRIx32, pc.classMethods) + << " (struct method_list_t *)\n"; + if (pc.classMethods != 0) + print_method_list32_t(pc.classMethods, info, "\t"); + outs() << "\t optionalInstanceMethods " + << format("0x%" PRIx32, pc.optionalInstanceMethods) << "\n"; + outs() << "\t optionalClassMethods " + << format("0x%" PRIx32, pc.optionalClassMethods) << "\n"; + outs() << "\t instanceProperties " + << format("0x%" PRIx32, pc.instanceProperties) << "\n"; + p += sizeof(uint32_t); + offset += sizeof(uint32_t); + } +} + +static void print_indent(uint32_t indent) { + for (uint32_t i = 0; i < indent;) { + if (indent - i >= 8) { + outs() << "\t"; + i += 8; + } else { + for (uint32_t j = i; j < indent; j++) + outs() << " "; + return; + } + } +} + +static bool print_method_description_list(uint32_t p, uint32_t indent, + struct DisassembleInfo *info) { + uint32_t offset, left, xleft; + SectionRef S; + struct objc_method_description_list_t mdl; + struct objc_method_description_t md; + const char *r, *list, *name; + int32_t i; + + r = get_pointer_32(p, offset, left, S, info, true); + if (r == nullptr) + return true; + + outs() << "\n"; + if (left > sizeof(struct objc_method_description_list_t)) { + memcpy(&mdl, r, sizeof(struct objc_method_description_list_t)); + } else { + print_indent(indent); + outs() << " objc_method_description_list extends past end of the section\n"; + memset(&mdl, '\0', sizeof(struct objc_method_description_list_t)); + memcpy(&mdl, r, left); + } + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(mdl); + + print_indent(indent); + outs() << " count " << mdl.count << "\n"; + + list = r + sizeof(struct objc_method_description_list_t); + for (i = 0; i < mdl.count; i++) { + if ((i + 1) * sizeof(struct objc_method_description_t) > left) { + print_indent(indent); + outs() << " remaining list entries extend past the of the section\n"; + break; + } + print_indent(indent); + outs() << " list[" << i << "]\n"; + memcpy(&md, list + i * sizeof(struct objc_method_description_t), + sizeof(struct objc_method_description_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(md); + + print_indent(indent); + outs() << " name " << format("0x%08" PRIx32, md.name); + if (info->verbose) { + name = get_pointer_32(md.name, offset, xleft, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", xleft, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + print_indent(indent); + outs() << " types " << format("0x%08" PRIx32, md.types); + if (info->verbose) { + name = get_pointer_32(md.types, offset, xleft, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", xleft, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + } + return false; +} + +static bool print_protocol_list(uint32_t p, uint32_t indent, + struct DisassembleInfo *info); + +static bool print_protocol(uint32_t p, uint32_t indent, + struct DisassembleInfo *info) { + uint32_t offset, left; + SectionRef S; + struct objc_protocol_t protocol; + const char *r, *name; + + r = get_pointer_32(p, offset, left, S, info, true); + if (r == nullptr) + return true; + + outs() << "\n"; + if (left >= sizeof(struct objc_protocol_t)) { + memcpy(&protocol, r, sizeof(struct objc_protocol_t)); + } else { + print_indent(indent); + outs() << " Protocol extends past end of the section\n"; + memset(&protocol, '\0', sizeof(struct objc_protocol_t)); + memcpy(&protocol, r, left); + } + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(protocol); + + print_indent(indent); + outs() << " isa " << format("0x%08" PRIx32, protocol.isa) + << "\n"; + + print_indent(indent); + outs() << " protocol_name " + << format("0x%08" PRIx32, protocol.protocol_name); + if (info->verbose) { + name = get_pointer_32(protocol.protocol_name, offset, left, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", left, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + print_indent(indent); + outs() << " protocol_list " + << format("0x%08" PRIx32, protocol.protocol_list); + if (print_protocol_list(protocol.protocol_list, indent + 4, info)) + outs() << " (not in an __OBJC section)\n"; + + print_indent(indent); + outs() << " instance_methods " + << format("0x%08" PRIx32, protocol.instance_methods); + if (print_method_description_list(protocol.instance_methods, indent, info)) + outs() << " (not in an __OBJC section)\n"; + + print_indent(indent); + outs() << " class_methods " + << format("0x%08" PRIx32, protocol.class_methods); + if (print_method_description_list(protocol.class_methods, indent, info)) + outs() << " (not in an __OBJC section)\n"; + + return false; +} + +static bool print_protocol_list(uint32_t p, uint32_t indent, + struct DisassembleInfo *info) { + uint32_t offset, left, l; + SectionRef S; + struct objc_protocol_list_t protocol_list; + const char *r, *list; + int32_t i; + + r = get_pointer_32(p, offset, left, S, info, true); + if (r == nullptr) + return true; + + outs() << "\n"; + if (left > sizeof(struct objc_protocol_list_t)) { + memcpy(&protocol_list, r, sizeof(struct objc_protocol_list_t)); + } else { + outs() << "\t\t objc_protocol_list_t extends past end of the section\n"; + memset(&protocol_list, '\0', sizeof(struct objc_protocol_list_t)); + memcpy(&protocol_list, r, left); + } + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(protocol_list); + + print_indent(indent); + outs() << " next " << format("0x%08" PRIx32, protocol_list.next) + << "\n"; + print_indent(indent); + outs() << " count " << protocol_list.count << "\n"; + + list = r + sizeof(struct objc_protocol_list_t); + for (i = 0; i < protocol_list.count; i++) { + if ((i + 1) * sizeof(uint32_t) > left) { + outs() << "\t\t remaining list entries extend past the of the section\n"; + break; + } + memcpy(&l, list + i * sizeof(uint32_t), sizeof(uint32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(l); + + print_indent(indent); + outs() << " list[" << i << "] " << format("0x%08" PRIx32, l); + if (print_protocol(l, indent, info)) + outs() << "(not in an __OBJC section)\n"; + } + return false; +} + +static void print_ivar_list64_t(uint64_t p, struct DisassembleInfo *info) { + struct ivar_list64_t il; + struct ivar64_t i; + const char *r; + uint32_t offset, xoffset, left, j; + SectionRef S, xS; + const char *name, *sym_name, *ivar_offset_p; + uint64_t ivar_offset, n_value; + + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&il, '\0', sizeof(struct ivar_list64_t)); + if (left < sizeof(struct ivar_list64_t)) { + memcpy(&il, r, left); + outs() << " (ivar_list_t entends past the end of the section)\n"; + } else + memcpy(&il, r, sizeof(struct ivar_list64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(il); + outs() << " entsize " << il.entsize << "\n"; + outs() << " count " << il.count << "\n"; + + p += sizeof(struct ivar_list64_t); + offset += sizeof(struct ivar_list64_t); + for (j = 0; j < il.count; j++) { + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&i, '\0', sizeof(struct ivar64_t)); + if (left < sizeof(struct ivar64_t)) { + memcpy(&i, r, left); + outs() << " (ivar_t entends past the end of the section)\n"; + } else + memcpy(&i, r, sizeof(struct ivar64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(i); + + outs() << "\t\t\t offset "; + sym_name = get_symbol_64(offset + offsetof(struct ivar64_t, offset), S, + info, n_value, i.offset); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (i.offset != 0) + outs() << " + " << format("0x%" PRIx64, i.offset); + } else + outs() << format("0x%" PRIx64, i.offset); + ivar_offset_p = get_pointer_64(i.offset + n_value, xoffset, left, xS, info); + if (ivar_offset_p != nullptr && left >= sizeof(*ivar_offset_p)) { + memcpy(&ivar_offset, ivar_offset_p, sizeof(ivar_offset)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(ivar_offset); + outs() << " " << ivar_offset << "\n"; + } else + outs() << "\n"; + + outs() << "\t\t\t name "; + sym_name = get_symbol_64(offset + offsetof(struct ivar64_t, name), S, info, + n_value, i.name); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (i.name != 0) + outs() << " + " << format("0x%" PRIx64, i.name); + } else + outs() << format("0x%" PRIx64, i.name); + name = get_pointer_64(i.name + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << "\t\t\t type "; + sym_name = get_symbol_64(offset + offsetof(struct ivar64_t, type), S, info, + n_value, i.name); + name = get_pointer_64(i.type + n_value, xoffset, left, xS, info); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (i.type != 0) + outs() << " + " << format("0x%" PRIx64, i.type); + } else + outs() << format("0x%" PRIx64, i.type); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << "\t\t\talignment " << i.alignment << "\n"; + outs() << "\t\t\t size " << i.size << "\n"; + + p += sizeof(struct ivar64_t); + offset += sizeof(struct ivar64_t); + } +} + +static void print_ivar_list32_t(uint32_t p, struct DisassembleInfo *info) { + struct ivar_list32_t il; + struct ivar32_t i; + const char *r; + uint32_t offset, xoffset, left, j; + SectionRef S, xS; + const char *name, *ivar_offset_p; + uint32_t ivar_offset; + + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&il, '\0', sizeof(struct ivar_list32_t)); + if (left < sizeof(struct ivar_list32_t)) { + memcpy(&il, r, left); + outs() << " (ivar_list_t entends past the end of the section)\n"; + } else + memcpy(&il, r, sizeof(struct ivar_list32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(il); + outs() << " entsize " << il.entsize << "\n"; + outs() << " count " << il.count << "\n"; + + p += sizeof(struct ivar_list32_t); + offset += sizeof(struct ivar_list32_t); + for (j = 0; j < il.count; j++) { + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&i, '\0', sizeof(struct ivar32_t)); + if (left < sizeof(struct ivar32_t)) { + memcpy(&i, r, left); + outs() << " (ivar_t entends past the end of the section)\n"; + } else + memcpy(&i, r, sizeof(struct ivar32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(i); + + outs() << "\t\t\t offset " << format("0x%" PRIx32, i.offset); + ivar_offset_p = get_pointer_32(i.offset, xoffset, left, xS, info); + if (ivar_offset_p != nullptr && left >= sizeof(*ivar_offset_p)) { + memcpy(&ivar_offset, ivar_offset_p, sizeof(ivar_offset)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(ivar_offset); + outs() << " " << ivar_offset << "\n"; + } else + outs() << "\n"; + + outs() << "\t\t\t name " << format("0x%" PRIx32, i.name); + name = get_pointer_32(i.name, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << "\t\t\t type " << format("0x%" PRIx32, i.type); + name = get_pointer_32(i.type, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << "\t\t\talignment " << i.alignment << "\n"; + outs() << "\t\t\t size " << i.size << "\n"; + + p += sizeof(struct ivar32_t); + offset += sizeof(struct ivar32_t); + } +} + +static void print_objc_property_list64(uint64_t p, + struct DisassembleInfo *info) { + struct objc_property_list64 opl; + struct objc_property64 op; + const char *r; + uint32_t offset, xoffset, left, j; + SectionRef S, xS; + const char *name, *sym_name; + uint64_t n_value; + + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&opl, '\0', sizeof(struct objc_property_list64)); + if (left < sizeof(struct objc_property_list64)) { + memcpy(&opl, r, left); + outs() << " (objc_property_list entends past the end of the section)\n"; + } else + memcpy(&opl, r, sizeof(struct objc_property_list64)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(opl); + outs() << " entsize " << opl.entsize << "\n"; + outs() << " count " << opl.count << "\n"; + + p += sizeof(struct objc_property_list64); + offset += sizeof(struct objc_property_list64); + for (j = 0; j < opl.count; j++) { + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&op, '\0', sizeof(struct objc_property64)); + if (left < sizeof(struct objc_property64)) { + memcpy(&op, r, left); + outs() << " (objc_property entends past the end of the section)\n"; + } else + memcpy(&op, r, sizeof(struct objc_property64)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(op); + + outs() << "\t\t\t name "; + sym_name = get_symbol_64(offset + offsetof(struct objc_property64, name), S, + info, n_value, op.name); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (op.name != 0) + outs() << " + " << format("0x%" PRIx64, op.name); + } else + outs() << format("0x%" PRIx64, op.name); + name = get_pointer_64(op.name + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << "\t\t\tattributes "; + sym_name = + get_symbol_64(offset + offsetof(struct objc_property64, attributes), S, + info, n_value, op.attributes); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (op.attributes != 0) + outs() << " + " << format("0x%" PRIx64, op.attributes); + } else + outs() << format("0x%" PRIx64, op.attributes); + name = get_pointer_64(op.attributes + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + p += sizeof(struct objc_property64); + offset += sizeof(struct objc_property64); + } +} + +static void print_objc_property_list32(uint32_t p, + struct DisassembleInfo *info) { + struct objc_property_list32 opl; + struct objc_property32 op; + const char *r; + uint32_t offset, xoffset, left, j; + SectionRef S, xS; + const char *name; + + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&opl, '\0', sizeof(struct objc_property_list32)); + if (left < sizeof(struct objc_property_list32)) { + memcpy(&opl, r, left); + outs() << " (objc_property_list entends past the end of the section)\n"; + } else + memcpy(&opl, r, sizeof(struct objc_property_list32)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(opl); + outs() << " entsize " << opl.entsize << "\n"; + outs() << " count " << opl.count << "\n"; + + p += sizeof(struct objc_property_list32); + offset += sizeof(struct objc_property_list32); + for (j = 0; j < opl.count; j++) { + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&op, '\0', sizeof(struct objc_property32)); + if (left < sizeof(struct objc_property32)) { + memcpy(&op, r, left); + outs() << " (objc_property entends past the end of the section)\n"; + } else + memcpy(&op, r, sizeof(struct objc_property32)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(op); + + outs() << "\t\t\t name " << format("0x%" PRIx32, op.name); + name = get_pointer_32(op.name, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << "\t\t\tattributes " << format("0x%" PRIx32, op.attributes); + name = get_pointer_32(op.attributes, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + p += sizeof(struct objc_property32); + offset += sizeof(struct objc_property32); + } +} + +static void print_class_ro64_t(uint64_t p, struct DisassembleInfo *info, + bool &is_meta_class) { + struct class_ro64_t cro; + const char *r; + uint32_t offset, xoffset, left; + SectionRef S, xS; + const char *name, *sym_name; + uint64_t n_value; + + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr || left < sizeof(struct class_ro64_t)) + return; + memset(&cro, '\0', sizeof(struct class_ro64_t)); + if (left < sizeof(struct class_ro64_t)) { + memcpy(&cro, r, left); + outs() << " (class_ro_t entends past the end of the section)\n"; + } else + memcpy(&cro, r, sizeof(struct class_ro64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(cro); + outs() << " flags " << format("0x%" PRIx32, cro.flags); + if (cro.flags & RO_META) + outs() << " RO_META"; + if (cro.flags & RO_ROOT) + outs() << " RO_ROOT"; + if (cro.flags & RO_HAS_CXX_STRUCTORS) + outs() << " RO_HAS_CXX_STRUCTORS"; + outs() << "\n"; + outs() << " instanceStart " << cro.instanceStart << "\n"; + outs() << " instanceSize " << cro.instanceSize << "\n"; + outs() << " reserved " << format("0x%" PRIx32, cro.reserved) + << "\n"; + outs() << " ivarLayout " << format("0x%" PRIx64, cro.ivarLayout) + << "\n"; + print_layout_map64(cro.ivarLayout, info); + + outs() << " name "; + sym_name = get_symbol_64(offset + offsetof(struct class_ro64_t, name), S, + info, n_value, cro.name); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (cro.name != 0) + outs() << " + " << format("0x%" PRIx64, cro.name); + } else + outs() << format("0x%" PRIx64, cro.name); + name = get_pointer_64(cro.name + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << " baseMethods "; + sym_name = get_symbol_64(offset + offsetof(struct class_ro64_t, baseMethods), + S, info, n_value, cro.baseMethods); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (cro.baseMethods != 0) + outs() << " + " << format("0x%" PRIx64, cro.baseMethods); + } else + outs() << format("0x%" PRIx64, cro.baseMethods); + outs() << " (struct method_list_t *)\n"; + if (cro.baseMethods + n_value != 0) + print_method_list64_t(cro.baseMethods + n_value, info, ""); + + outs() << " baseProtocols "; + sym_name = + get_symbol_64(offset + offsetof(struct class_ro64_t, baseProtocols), S, + info, n_value, cro.baseProtocols); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (cro.baseProtocols != 0) + outs() << " + " << format("0x%" PRIx64, cro.baseProtocols); + } else + outs() << format("0x%" PRIx64, cro.baseProtocols); + outs() << "\n"; + if (cro.baseProtocols + n_value != 0) + print_protocol_list64_t(cro.baseProtocols + n_value, info); + + outs() << " ivars "; + sym_name = get_symbol_64(offset + offsetof(struct class_ro64_t, ivars), S, + info, n_value, cro.ivars); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (cro.ivars != 0) + outs() << " + " << format("0x%" PRIx64, cro.ivars); + } else + outs() << format("0x%" PRIx64, cro.ivars); + outs() << "\n"; + if (cro.ivars + n_value != 0) + print_ivar_list64_t(cro.ivars + n_value, info); + + outs() << " weakIvarLayout "; + sym_name = + get_symbol_64(offset + offsetof(struct class_ro64_t, weakIvarLayout), S, + info, n_value, cro.weakIvarLayout); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (cro.weakIvarLayout != 0) + outs() << " + " << format("0x%" PRIx64, cro.weakIvarLayout); + } else + outs() << format("0x%" PRIx64, cro.weakIvarLayout); + outs() << "\n"; + print_layout_map64(cro.weakIvarLayout + n_value, info); + + outs() << " baseProperties "; + sym_name = + get_symbol_64(offset + offsetof(struct class_ro64_t, baseProperties), S, + info, n_value, cro.baseProperties); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (cro.baseProperties != 0) + outs() << " + " << format("0x%" PRIx64, cro.baseProperties); + } else + outs() << format("0x%" PRIx64, cro.baseProperties); + outs() << "\n"; + if (cro.baseProperties + n_value != 0) + print_objc_property_list64(cro.baseProperties + n_value, info); + + is_meta_class = (cro.flags & RO_META) ? true : false; +} + +static void print_class_ro32_t(uint32_t p, struct DisassembleInfo *info, + bool &is_meta_class) { + struct class_ro32_t cro; + const char *r; + uint32_t offset, xoffset, left; + SectionRef S, xS; + const char *name; + + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&cro, '\0', sizeof(struct class_ro32_t)); + if (left < sizeof(struct class_ro32_t)) { + memcpy(&cro, r, left); + outs() << " (class_ro_t entends past the end of the section)\n"; + } else + memcpy(&cro, r, sizeof(struct class_ro32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(cro); + outs() << " flags " << format("0x%" PRIx32, cro.flags); + if (cro.flags & RO_META) + outs() << " RO_META"; + if (cro.flags & RO_ROOT) + outs() << " RO_ROOT"; + if (cro.flags & RO_HAS_CXX_STRUCTORS) + outs() << " RO_HAS_CXX_STRUCTORS"; + outs() << "\n"; + outs() << " instanceStart " << cro.instanceStart << "\n"; + outs() << " instanceSize " << cro.instanceSize << "\n"; + outs() << " ivarLayout " << format("0x%" PRIx32, cro.ivarLayout) + << "\n"; + print_layout_map32(cro.ivarLayout, info); + + outs() << " name " << format("0x%" PRIx32, cro.name); + name = get_pointer_32(cro.name, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << " baseMethods " + << format("0x%" PRIx32, cro.baseMethods) + << " (struct method_list_t *)\n"; + if (cro.baseMethods != 0) + print_method_list32_t(cro.baseMethods, info, ""); + + outs() << " baseProtocols " + << format("0x%" PRIx32, cro.baseProtocols) << "\n"; + if (cro.baseProtocols != 0) + print_protocol_list32_t(cro.baseProtocols, info); + outs() << " ivars " << format("0x%" PRIx32, cro.ivars) + << "\n"; + if (cro.ivars != 0) + print_ivar_list32_t(cro.ivars, info); + outs() << " weakIvarLayout " + << format("0x%" PRIx32, cro.weakIvarLayout) << "\n"; + print_layout_map32(cro.weakIvarLayout, info); + outs() << " baseProperties " + << format("0x%" PRIx32, cro.baseProperties) << "\n"; + if (cro.baseProperties != 0) + print_objc_property_list32(cro.baseProperties, info); + is_meta_class = (cro.flags & RO_META) ? true : false; +} + +static void print_class64_t(uint64_t p, struct DisassembleInfo *info) { + struct class64_t c; + const char *r; + uint32_t offset, left; + SectionRef S; + const char *name; + uint64_t isa_n_value, n_value; + + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr || left < sizeof(struct class64_t)) + return; + memset(&c, '\0', sizeof(struct class64_t)); + if (left < sizeof(struct class64_t)) { + memcpy(&c, r, left); + outs() << " (class_t entends past the end of the section)\n"; + } else + memcpy(&c, r, sizeof(struct class64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(c); + + outs() << " isa " << format("0x%" PRIx64, c.isa); + name = get_symbol_64(offset + offsetof(struct class64_t, isa), S, info, + isa_n_value, c.isa); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + outs() << " superclass " << format("0x%" PRIx64, c.superclass); + name = get_symbol_64(offset + offsetof(struct class64_t, superclass), S, info, + n_value, c.superclass); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + outs() << " cache " << format("0x%" PRIx64, c.cache); + name = get_symbol_64(offset + offsetof(struct class64_t, cache), S, info, + n_value, c.cache); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + outs() << " vtable " << format("0x%" PRIx64, c.vtable); + name = get_symbol_64(offset + offsetof(struct class64_t, vtable), S, info, + n_value, c.vtable); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + name = get_symbol_64(offset + offsetof(struct class64_t, data), S, info, + n_value, c.data); + outs() << " data "; + if (n_value != 0) { + if (info->verbose && name != nullptr) + outs() << name; + else + outs() << format("0x%" PRIx64, n_value); + if (c.data != 0) + outs() << " + " << format("0x%" PRIx64, c.data); + } else + outs() << format("0x%" PRIx64, c.data); + outs() << " (struct class_ro_t *)"; + + // This is a Swift class if some of the low bits of the pointer are set. + if ((c.data + n_value) & 0x7) + outs() << " Swift class"; + outs() << "\n"; + bool is_meta_class; + print_class_ro64_t((c.data + n_value) & ~0x7, info, is_meta_class); + + if (is_meta_class == false) { + outs() << "Meta Class\n"; + print_class64_t(c.isa + isa_n_value, info); + } +} + +static void print_class32_t(uint32_t p, struct DisassembleInfo *info) { + struct class32_t c; + const char *r; + uint32_t offset, left; + SectionRef S; + const char *name; + + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&c, '\0', sizeof(struct class32_t)); + if (left < sizeof(struct class32_t)) { + memcpy(&c, r, left); + outs() << " (class_t entends past the end of the section)\n"; + } else + memcpy(&c, r, sizeof(struct class32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(c); + + outs() << " isa " << format("0x%" PRIx32, c.isa); + name = + get_symbol_32(offset + offsetof(struct class32_t, isa), S, info, c.isa); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + outs() << " superclass " << format("0x%" PRIx32, c.superclass); + name = get_symbol_32(offset + offsetof(struct class32_t, superclass), S, info, + c.superclass); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + outs() << " cache " << format("0x%" PRIx32, c.cache); + name = get_symbol_32(offset + offsetof(struct class32_t, cache), S, info, + c.cache); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + outs() << " vtable " << format("0x%" PRIx32, c.vtable); + name = get_symbol_32(offset + offsetof(struct class32_t, vtable), S, info, + c.vtable); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + name = + get_symbol_32(offset + offsetof(struct class32_t, data), S, info, c.data); + outs() << " data " << format("0x%" PRIx32, c.data) + << " (struct class_ro_t *)"; + + // This is a Swift class if some of the low bits of the pointer are set. + if (c.data & 0x3) + outs() << " Swift class"; + outs() << "\n"; + bool is_meta_class; + print_class_ro32_t(c.data & ~0x3, info, is_meta_class); + + if (is_meta_class == false) { + outs() << "Meta Class\n"; + print_class32_t(c.isa, info); + } +} + +static void print_objc_class_t(struct objc_class_t *objc_class, + struct DisassembleInfo *info) { + uint32_t offset, left, xleft; + const char *name, *p, *ivar_list; + SectionRef S; + int32_t i; + struct objc_ivar_list_t objc_ivar_list; + struct objc_ivar_t ivar; + + outs() << "\t\t isa " << format("0x%08" PRIx32, objc_class->isa); + if (info->verbose && CLS_GETINFO(objc_class, CLS_META)) { + name = get_pointer_32(objc_class->isa, offset, left, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", left, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t super_class " + << format("0x%08" PRIx32, objc_class->super_class); + if (info->verbose) { + name = get_pointer_32(objc_class->super_class, offset, left, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", left, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t\t name " << format("0x%08" PRIx32, objc_class->name); + if (info->verbose) { + name = get_pointer_32(objc_class->name, offset, left, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", left, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t\t version " << format("0x%08" PRIx32, objc_class->version) + << "\n"; + + outs() << "\t\t info " << format("0x%08" PRIx32, objc_class->info); + if (info->verbose) { + if (CLS_GETINFO(objc_class, CLS_CLASS)) + outs() << " CLS_CLASS"; + else if (CLS_GETINFO(objc_class, CLS_META)) + outs() << " CLS_META"; + } + outs() << "\n"; + + outs() << "\t instance_size " + << format("0x%08" PRIx32, objc_class->instance_size) << "\n"; + + p = get_pointer_32(objc_class->ivars, offset, left, S, info, true); + outs() << "\t\t ivars " << format("0x%08" PRIx32, objc_class->ivars); + if (p != nullptr) { + if (left > sizeof(struct objc_ivar_list_t)) { + outs() << "\n"; + memcpy(&objc_ivar_list, p, sizeof(struct objc_ivar_list_t)); + } else { + outs() << " (entends past the end of the section)\n"; + memset(&objc_ivar_list, '\0', sizeof(struct objc_ivar_list_t)); + memcpy(&objc_ivar_list, p, left); + } + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(objc_ivar_list); + outs() << "\t\t ivar_count " << objc_ivar_list.ivar_count << "\n"; + ivar_list = p + sizeof(struct objc_ivar_list_t); + for (i = 0; i < objc_ivar_list.ivar_count; i++) { + if ((i + 1) * sizeof(struct objc_ivar_t) > left) { + outs() << "\t\t remaining ivar's extend past the of the section\n"; + break; + } + memcpy(&ivar, ivar_list + i * sizeof(struct objc_ivar_t), + sizeof(struct objc_ivar_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(ivar); + + outs() << "\t\t\tivar_name " << format("0x%08" PRIx32, ivar.ivar_name); + if (info->verbose) { + name = get_pointer_32(ivar.ivar_name, offset, xleft, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", xleft, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t\t\tivar_type " << format("0x%08" PRIx32, ivar.ivar_type); + if (info->verbose) { + name = get_pointer_32(ivar.ivar_type, offset, xleft, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", xleft, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t\t ivar_offset " + << format("0x%08" PRIx32, ivar.ivar_offset) << "\n"; + } + } else { + outs() << " (not in an __OBJC section)\n"; + } + + outs() << "\t\t methods " << format("0x%08" PRIx32, objc_class->methodLists); + if (print_method_list(objc_class->methodLists, info)) + outs() << " (not in an __OBJC section)\n"; + + outs() << "\t\t cache " << format("0x%08" PRIx32, objc_class->cache) + << "\n"; + + outs() << "\t\tprotocols " << format("0x%08" PRIx32, objc_class->protocols); + if (print_protocol_list(objc_class->protocols, 16, info)) + outs() << " (not in an __OBJC section)\n"; +} + +static void print_objc_objc_category_t(struct objc_category_t *objc_category, + struct DisassembleInfo *info) { + uint32_t offset, left; + const char *name; + SectionRef S; + + outs() << "\t category name " + << format("0x%08" PRIx32, objc_category->category_name); + if (info->verbose) { + name = get_pointer_32(objc_category->category_name, offset, left, S, info, + true); + if (name != nullptr) + outs() << format(" %.*s", left, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t\t class name " + << format("0x%08" PRIx32, objc_category->class_name); + if (info->verbose) { + name = + get_pointer_32(objc_category->class_name, offset, left, S, info, true); + if (name != nullptr) + outs() << format(" %.*s", left, name); + else + outs() << " (not in an __OBJC section)"; + } + outs() << "\n"; + + outs() << "\t instance methods " + << format("0x%08" PRIx32, objc_category->instance_methods); + if (print_method_list(objc_category->instance_methods, info)) + outs() << " (not in an __OBJC section)\n"; + + outs() << "\t class methods " + << format("0x%08" PRIx32, objc_category->class_methods); + if (print_method_list(objc_category->class_methods, info)) + outs() << " (not in an __OBJC section)\n"; +} + +static void print_category64_t(uint64_t p, struct DisassembleInfo *info) { + struct category64_t c; + const char *r; + uint32_t offset, xoffset, left; + SectionRef S, xS; + const char *name, *sym_name; + uint64_t n_value; + + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&c, '\0', sizeof(struct category64_t)); + if (left < sizeof(struct category64_t)) { + memcpy(&c, r, left); + outs() << " (category_t entends past the end of the section)\n"; + } else + memcpy(&c, r, sizeof(struct category64_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(c); + + outs() << " name "; + sym_name = get_symbol_64(offset + offsetof(struct category64_t, name), S, + info, n_value, c.name); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (c.name != 0) + outs() << " + " << format("0x%" PRIx64, c.name); + } else + outs() << format("0x%" PRIx64, c.name); + name = get_pointer_64(c.name + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + outs() << " cls "; + sym_name = get_symbol_64(offset + offsetof(struct category64_t, cls), S, info, + n_value, c.cls); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (c.cls != 0) + outs() << " + " << format("0x%" PRIx64, c.cls); + } else + outs() << format("0x%" PRIx64, c.cls); + outs() << "\n"; + if (c.cls + n_value != 0) + print_class64_t(c.cls + n_value, info); + + outs() << " instanceMethods "; + sym_name = + get_symbol_64(offset + offsetof(struct category64_t, instanceMethods), S, + info, n_value, c.instanceMethods); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (c.instanceMethods != 0) + outs() << " + " << format("0x%" PRIx64, c.instanceMethods); + } else + outs() << format("0x%" PRIx64, c.instanceMethods); + outs() << "\n"; + if (c.instanceMethods + n_value != 0) + print_method_list64_t(c.instanceMethods + n_value, info, ""); + + outs() << " classMethods "; + sym_name = get_symbol_64(offset + offsetof(struct category64_t, classMethods), + S, info, n_value, c.classMethods); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (c.classMethods != 0) + outs() << " + " << format("0x%" PRIx64, c.classMethods); + } else + outs() << format("0x%" PRIx64, c.classMethods); + outs() << "\n"; + if (c.classMethods + n_value != 0) + print_method_list64_t(c.classMethods + n_value, info, ""); + + outs() << " protocols "; + sym_name = get_symbol_64(offset + offsetof(struct category64_t, protocols), S, + info, n_value, c.protocols); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (c.protocols != 0) + outs() << " + " << format("0x%" PRIx64, c.protocols); + } else + outs() << format("0x%" PRIx64, c.protocols); + outs() << "\n"; + if (c.protocols + n_value != 0) + print_protocol_list64_t(c.protocols + n_value, info); + + outs() << "instanceProperties "; + sym_name = + get_symbol_64(offset + offsetof(struct category64_t, instanceProperties), + S, info, n_value, c.instanceProperties); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (c.instanceProperties != 0) + outs() << " + " << format("0x%" PRIx64, c.instanceProperties); + } else + outs() << format("0x%" PRIx64, c.instanceProperties); + outs() << "\n"; + if (c.instanceProperties + n_value != 0) + print_objc_property_list64(c.instanceProperties + n_value, info); +} + +static void print_category32_t(uint32_t p, struct DisassembleInfo *info) { + struct category32_t c; + const char *r; + uint32_t offset, left; + SectionRef S, xS; + const char *name; -struct class_ro64_t { - uint32_t flags; - uint32_t instanceStart; - uint32_t instanceSize; - uint32_t reserved; - uint64_t ivarLayout; // const uint8_t * (64-bit pointer) - uint64_t name; // const char * (64-bit pointer) - uint64_t baseMethods; // const method_list_t * (64-bit pointer) - uint64_t baseProtocols; // const protocol_list_t * (64-bit pointer) - uint64_t ivars; // const ivar_list_t * (64-bit pointer) - uint64_t weakIvarLayout; // const uint8_t * (64-bit pointer) - uint64_t baseProperties; // const struct objc_property_list (64-bit pointer) -}; + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&c, '\0', sizeof(struct category32_t)); + if (left < sizeof(struct category32_t)) { + memcpy(&c, r, left); + outs() << " (category_t entends past the end of the section)\n"; + } else + memcpy(&c, r, sizeof(struct category32_t)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(c); -inline void swapStruct(struct cfstring64_t &cfs) { - sys::swapByteOrder(cfs.isa); - sys::swapByteOrder(cfs.flags); - sys::swapByteOrder(cfs.characters); - sys::swapByteOrder(cfs.length); -} + outs() << " name " << format("0x%" PRIx32, c.name); + name = get_symbol_32(offset + offsetof(struct category32_t, name), S, info, + c.name); + if (name != NULL) + outs() << " " << name; + outs() << "\n"; -inline void swapStruct(struct class64_t &c) { - sys::swapByteOrder(c.isa); - sys::swapByteOrder(c.superclass); - sys::swapByteOrder(c.cache); - sys::swapByteOrder(c.vtable); - sys::swapByteOrder(c.data); + outs() << " cls " << format("0x%" PRIx32, c.cls) << "\n"; + if (c.cls != 0) + print_class32_t(c.cls, info); + outs() << " instanceMethods " << format("0x%" PRIx32, c.instanceMethods) + << "\n"; + if (c.instanceMethods != 0) + print_method_list32_t(c.instanceMethods, info, ""); + outs() << " classMethods " << format("0x%" PRIx32, c.classMethods) + << "\n"; + if (c.classMethods != 0) + print_method_list32_t(c.classMethods, info, ""); + outs() << " protocols " << format("0x%" PRIx32, c.protocols) << "\n"; + if (c.protocols != 0) + print_protocol_list32_t(c.protocols, info); + outs() << "instanceProperties " << format("0x%" PRIx32, c.instanceProperties) + << "\n"; + if (c.instanceProperties != 0) + print_objc_property_list32(c.instanceProperties, info); } -inline void swapStruct(struct class_ro64_t &cro) { - sys::swapByteOrder(cro.flags); - sys::swapByteOrder(cro.instanceStart); - sys::swapByteOrder(cro.instanceSize); - sys::swapByteOrder(cro.reserved); - sys::swapByteOrder(cro.ivarLayout); - sys::swapByteOrder(cro.name); - sys::swapByteOrder(cro.baseMethods); - sys::swapByteOrder(cro.baseProtocols); - sys::swapByteOrder(cro.ivars); - sys::swapByteOrder(cro.weakIvarLayout); - sys::swapByteOrder(cro.baseProperties); +static void print_message_refs64(SectionRef S, struct DisassembleInfo *info) { + uint32_t i, left, offset, xoffset; + uint64_t p, n_value; + struct message_ref64 mr; + const char *name, *sym_name; + const char *r; + SectionRef xS; + + if (S == SectionRef()) + return; + + StringRef SectName; + S.getName(SectName); + DataRefImpl Ref = S.getRawDataRefImpl(); + StringRef SegName = info->O->getSectionFinalSegmentName(Ref); + outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; + offset = 0; + for (i = 0; i < S.getSize(); i += sizeof(struct message_ref64)) { + p = S.getAddress() + i; + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&mr, '\0', sizeof(struct message_ref64)); + if (left < sizeof(struct message_ref64)) { + memcpy(&mr, r, left); + outs() << " (message_ref entends past the end of the section)\n"; + } else + memcpy(&mr, r, sizeof(struct message_ref64)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(mr); + + outs() << " imp "; + name = get_symbol_64(offset + offsetof(struct message_ref64, imp), S, info, + n_value, mr.imp); + if (n_value != 0) { + outs() << format("0x%" PRIx64, n_value) << " "; + if (mr.imp != 0) + outs() << "+ " << format("0x%" PRIx64, mr.imp) << " "; + } else + outs() << format("0x%" PRIx64, mr.imp) << " "; + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + outs() << " sel "; + sym_name = get_symbol_64(offset + offsetof(struct message_ref64, sel), S, + info, n_value, mr.sel); + if (n_value != 0) { + if (info->verbose && sym_name != nullptr) + outs() << sym_name; + else + outs() << format("0x%" PRIx64, n_value); + if (mr.sel != 0) + outs() << " + " << format("0x%" PRIx64, mr.sel); + } else + outs() << format("0x%" PRIx64, mr.sel); + name = get_pointer_64(mr.sel + n_value, xoffset, left, xS, info); + if (name != nullptr) + outs() << format(" %.*s", left, name); + outs() << "\n"; + + offset += sizeof(struct message_ref64); + } } -static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue, - struct DisassembleInfo *info); +static void print_message_refs32(SectionRef S, struct DisassembleInfo *info) { + uint32_t i, left, offset, xoffset, p; + struct message_ref32 mr; + const char *name, *r; + SectionRef xS; -// get_objc2_64bit_class_name() is used for disassembly and is passed a pointer -// to an Objective-C class and returns the class name. It is also passed the -// address of the pointer, so when the pointer is zero as it can be in an .o -// file, that is used to look for an external relocation entry with a symbol -// name. -const char *get_objc2_64bit_class_name(uint64_t pointer_value, - uint64_t ReferenceValue, - struct DisassembleInfo *info) { + if (S == SectionRef()) + return; + + StringRef SectName; + S.getName(SectName); + DataRefImpl Ref = S.getRawDataRefImpl(); + StringRef SegName = info->O->getSectionFinalSegmentName(Ref); + outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; + offset = 0; + for (i = 0; i < S.getSize(); i += sizeof(struct message_ref64)) { + p = S.getAddress() + i; + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&mr, '\0', sizeof(struct message_ref32)); + if (left < sizeof(struct message_ref32)) { + memcpy(&mr, r, left); + outs() << " (message_ref entends past the end of the section)\n"; + } else + memcpy(&mr, r, sizeof(struct message_ref32)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(mr); + + outs() << " imp " << format("0x%" PRIx32, mr.imp); + name = get_symbol_32(offset + offsetof(struct message_ref32, imp), S, info, + mr.imp); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + outs() << " sel " << format("0x%" PRIx32, mr.sel); + name = get_pointer_32(mr.sel, xoffset, left, xS, info); + if (name != nullptr) + outs() << " " << name; + outs() << "\n"; + + offset += sizeof(struct message_ref32); + } +} + +static void print_image_info64(SectionRef S, struct DisassembleInfo *info) { + uint32_t left, offset, swift_version; + uint64_t p; + struct objc_image_info64 o; const char *r; - uint32_t offset, left; - SectionRef S; - // The pointer_value can be 0 in an object file and have a relocation - // entry for the class symbol at the ReferenceValue (the address of the - // pointer). - if (pointer_value == 0) { - r = get_pointer_64(ReferenceValue, offset, left, S, info); - if (r == nullptr || left < sizeof(uint64_t)) - return nullptr; - uint64_t n_value; - const char *symbol_name = get_symbol_64(offset, S, info, n_value); - if (symbol_name == nullptr) - return nullptr; - const char *class_name = strrchr(symbol_name, '$'); - if (class_name != nullptr && class_name[1] == '_' && class_name[2] != '\0') - return class_name + 2; + StringRef SectName; + S.getName(SectName); + DataRefImpl Ref = S.getRawDataRefImpl(); + StringRef SegName = info->O->getSectionFinalSegmentName(Ref); + outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; + p = S.getAddress(); + r = get_pointer_64(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&o, '\0', sizeof(struct objc_image_info64)); + if (left < sizeof(struct objc_image_info64)) { + memcpy(&o, r, left); + outs() << " (objc_image_info entends past the end of the section)\n"; + } else + memcpy(&o, r, sizeof(struct objc_image_info64)); + if (info->O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(o); + outs() << " version " << o.version << "\n"; + outs() << " flags " << format("0x%" PRIx32, o.flags); + if (o.flags & OBJC_IMAGE_IS_REPLACEMENT) + outs() << " OBJC_IMAGE_IS_REPLACEMENT"; + if (o.flags & OBJC_IMAGE_SUPPORTS_GC) + outs() << " OBJC_IMAGE_SUPPORTS_GC"; + swift_version = (o.flags >> 8) & 0xff; + if (swift_version != 0) { + if (swift_version == 1) + outs() << " Swift 1.0"; + else if (swift_version == 2) + outs() << " Swift 1.1"; else - return nullptr; + outs() << " unknown future Swift version (" << swift_version << ")"; } + outs() << "\n"; +} - // The case were the pointer_value is non-zero and points to a class defined - // in this Mach-O file. - r = get_pointer_64(pointer_value, offset, left, S, info); - if (r == nullptr || left < sizeof(struct class64_t)) - return nullptr; - struct class64_t c; - memcpy(&c, r, sizeof(struct class64_t)); +static void print_image_info32(SectionRef S, struct DisassembleInfo *info) { + uint32_t left, offset, swift_version, p; + struct objc_image_info32 o; + const char *r; + + StringRef SectName; + S.getName(SectName); + DataRefImpl Ref = S.getRawDataRefImpl(); + StringRef SegName = info->O->getSectionFinalSegmentName(Ref); + outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; + p = S.getAddress(); + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&o, '\0', sizeof(struct objc_image_info32)); + if (left < sizeof(struct objc_image_info32)) { + memcpy(&o, r, left); + outs() << " (objc_image_info entends past the end of the section)\n"; + } else + memcpy(&o, r, sizeof(struct objc_image_info32)); if (info->O->isLittleEndian() != sys::IsLittleEndianHost) - swapStruct(c); - if (c.data == 0) - return nullptr; - r = get_pointer_64(c.data, offset, left, S, info); - if (r == nullptr || left < sizeof(struct class_ro64_t)) - return nullptr; - struct class_ro64_t cro; - memcpy(&cro, r, sizeof(struct class_ro64_t)); + swapStruct(o); + outs() << " version " << o.version << "\n"; + outs() << " flags " << format("0x%" PRIx32, o.flags); + if (o.flags & OBJC_IMAGE_IS_REPLACEMENT) + outs() << " OBJC_IMAGE_IS_REPLACEMENT"; + if (o.flags & OBJC_IMAGE_SUPPORTS_GC) + outs() << " OBJC_IMAGE_SUPPORTS_GC"; + swift_version = (o.flags >> 8) & 0xff; + if (swift_version != 0) { + if (swift_version == 1) + outs() << " Swift 1.0"; + else if (swift_version == 2) + outs() << " Swift 1.1"; + else + outs() << " unknown future Swift version (" << swift_version << ")"; + } + outs() << "\n"; +} + +static void print_image_info(SectionRef S, struct DisassembleInfo *info) { + uint32_t left, offset, p; + struct imageInfo_t o; + const char *r; + + StringRef SectName; + S.getName(SectName); + DataRefImpl Ref = S.getRawDataRefImpl(); + StringRef SegName = info->O->getSectionFinalSegmentName(Ref); + outs() << "Contents of (" << SegName << "," << SectName << ") section\n"; + p = S.getAddress(); + r = get_pointer_32(p, offset, left, S, info); + if (r == nullptr) + return; + memset(&o, '\0', sizeof(struct imageInfo_t)); + if (left < sizeof(struct imageInfo_t)) { + memcpy(&o, r, left); + outs() << " (imageInfo entends past the end of the section)\n"; + } else + memcpy(&o, r, sizeof(struct imageInfo_t)); if (info->O->isLittleEndian() != sys::IsLittleEndianHost) - swapStruct(cro); - if (cro.name == 0) - return nullptr; - const char *name = get_pointer_64(cro.name, offset, left, S, info); - return name; + swapStruct(o); + outs() << " version " << o.version << "\n"; + outs() << " flags " << format("0x%" PRIx32, o.flags); + if (o.flags & 0x1) + outs() << " F&C"; + if (o.flags & 0x2) + outs() << " GC"; + if (o.flags & 0x4) + outs() << " GC-only"; + else + outs() << " RR"; + outs() << "\n"; +} + +static void printObjc2_64bit_MetaData(MachOObjectFile *O, bool verbose) { + SymbolAddressMap AddrMap; + if (verbose) + CreateSymbolAddressMap(O, &AddrMap); + + std::vector Sections; + for (const SectionRef &Section : O->sections()) { + StringRef SectName; + Section.getName(SectName); + Sections.push_back(Section); + } + + struct DisassembleInfo info; + // Set up the block of info used by the Symbolizer call backs. + info.verbose = verbose; + info.O = O; + info.AddrMap = &AddrMap; + info.Sections = &Sections; + info.class_name = nullptr; + info.selector_name = nullptr; + info.method = nullptr; + info.demangled_name = nullptr; + info.bindtable = nullptr; + info.adrp_addr = 0; + info.adrp_inst = 0; + + const SectionRef CL = get_section(O, "__OBJC2", "__class_list"); + if (CL != SectionRef()) { + info.S = CL; + walk_pointer_list_64("class", CL, O, &info, print_class64_t); + } else { + const SectionRef CL = get_section(O, "__DATA", "__objc_classlist"); + info.S = CL; + walk_pointer_list_64("class", CL, O, &info, print_class64_t); + } + + const SectionRef CR = get_section(O, "__OBJC2", "__class_refs"); + if (CR != SectionRef()) { + info.S = CR; + walk_pointer_list_64("class refs", CR, O, &info, nullptr); + } else { + const SectionRef CR = get_section(O, "__DATA", "__objc_classrefs"); + info.S = CR; + walk_pointer_list_64("class refs", CR, O, &info, nullptr); + } + + const SectionRef SR = get_section(O, "__OBJC2", "__super_refs"); + if (SR != SectionRef()) { + info.S = SR; + walk_pointer_list_64("super refs", SR, O, &info, nullptr); + } else { + const SectionRef SR = get_section(O, "__DATA", "__objc_superrefs"); + info.S = SR; + walk_pointer_list_64("super refs", SR, O, &info, nullptr); + } + + const SectionRef CA = get_section(O, "__OBJC2", "__category_list"); + if (CA != SectionRef()) { + info.S = CA; + walk_pointer_list_64("category", CA, O, &info, print_category64_t); + } else { + const SectionRef CA = get_section(O, "__DATA", "__objc_catlist"); + info.S = CA; + walk_pointer_list_64("category", CA, O, &info, print_category64_t); + } + + const SectionRef PL = get_section(O, "__OBJC2", "__protocol_list"); + if (PL != SectionRef()) { + info.S = PL; + walk_pointer_list_64("protocol", PL, O, &info, nullptr); + } else { + const SectionRef PL = get_section(O, "__DATA", "__objc_protolist"); + info.S = PL; + walk_pointer_list_64("protocol", PL, O, &info, nullptr); + } + + const SectionRef MR = get_section(O, "__OBJC2", "__message_refs"); + if (MR != SectionRef()) { + info.S = MR; + print_message_refs64(MR, &info); + } else { + const SectionRef MR = get_section(O, "__DATA", "__objc_msgrefs"); + info.S = MR; + print_message_refs64(MR, &info); + } + + const SectionRef II = get_section(O, "__OBJC2", "__image_info"); + if (II != SectionRef()) { + info.S = II; + print_image_info64(II, &info); + } else { + const SectionRef II = get_section(O, "__DATA", "__objc_imageinfo"); + info.S = II; + print_image_info64(II, &info); + } + + if (info.bindtable != nullptr) + delete info.bindtable; +} + +static void printObjc2_32bit_MetaData(MachOObjectFile *O, bool verbose) { + SymbolAddressMap AddrMap; + if (verbose) + CreateSymbolAddressMap(O, &AddrMap); + + std::vector Sections; + for (const SectionRef &Section : O->sections()) { + StringRef SectName; + Section.getName(SectName); + Sections.push_back(Section); + } + + struct DisassembleInfo info; + // Set up the block of info used by the Symbolizer call backs. + info.verbose = verbose; + info.O = O; + info.AddrMap = &AddrMap; + info.Sections = &Sections; + info.class_name = nullptr; + info.selector_name = nullptr; + info.method = nullptr; + info.demangled_name = nullptr; + info.bindtable = nullptr; + info.adrp_addr = 0; + info.adrp_inst = 0; + + const SectionRef CL = get_section(O, "__OBJC2", "__class_list"); + if (CL != SectionRef()) { + info.S = CL; + walk_pointer_list_32("class", CL, O, &info, print_class32_t); + } else { + const SectionRef CL = get_section(O, "__DATA", "__objc_classlist"); + info.S = CL; + walk_pointer_list_32("class", CL, O, &info, print_class32_t); + } + + const SectionRef CR = get_section(O, "__OBJC2", "__class_refs"); + if (CR != SectionRef()) { + info.S = CR; + walk_pointer_list_32("class refs", CR, O, &info, nullptr); + } else { + const SectionRef CR = get_section(O, "__DATA", "__objc_classrefs"); + info.S = CR; + walk_pointer_list_32("class refs", CR, O, &info, nullptr); + } + + const SectionRef SR = get_section(O, "__OBJC2", "__super_refs"); + if (SR != SectionRef()) { + info.S = SR; + walk_pointer_list_32("super refs", SR, O, &info, nullptr); + } else { + const SectionRef SR = get_section(O, "__DATA", "__objc_superrefs"); + info.S = SR; + walk_pointer_list_32("super refs", SR, O, &info, nullptr); + } + + const SectionRef CA = get_section(O, "__OBJC2", "__category_list"); + if (CA != SectionRef()) { + info.S = CA; + walk_pointer_list_32("category", CA, O, &info, print_category32_t); + } else { + const SectionRef CA = get_section(O, "__DATA", "__objc_catlist"); + info.S = CA; + walk_pointer_list_32("category", CA, O, &info, print_category32_t); + } + + const SectionRef PL = get_section(O, "__OBJC2", "__protocol_list"); + if (PL != SectionRef()) { + info.S = PL; + walk_pointer_list_32("protocol", PL, O, &info, nullptr); + } else { + const SectionRef PL = get_section(O, "__DATA", "__objc_protolist"); + info.S = PL; + walk_pointer_list_32("protocol", PL, O, &info, nullptr); + } + + const SectionRef MR = get_section(O, "__OBJC2", "__message_refs"); + if (MR != SectionRef()) { + info.S = MR; + print_message_refs32(MR, &info); + } else { + const SectionRef MR = get_section(O, "__DATA", "__objc_msgrefs"); + info.S = MR; + print_message_refs32(MR, &info); + } + + const SectionRef II = get_section(O, "__OBJC2", "__image_info"); + if (II != SectionRef()) { + info.S = II; + print_image_info32(II, &info); + } else { + const SectionRef II = get_section(O, "__DATA", "__objc_imageinfo"); + info.S = II; + print_image_info32(II, &info); + } +} + +static bool printObjc1_32bit_MetaData(MachOObjectFile *O, bool verbose) { + uint32_t i, j, p, offset, xoffset, left, defs_left, def; + const char *r, *name, *defs; + struct objc_module_t module; + SectionRef S, xS; + struct objc_symtab_t symtab; + struct objc_class_t objc_class; + struct objc_category_t objc_category; + + outs() << "Objective-C segment\n"; + S = get_section(O, "__OBJC", "__module_info"); + if (S == SectionRef()) + return false; + + SymbolAddressMap AddrMap; + if (verbose) + CreateSymbolAddressMap(O, &AddrMap); + + std::vector Sections; + for (const SectionRef &Section : O->sections()) { + StringRef SectName; + Section.getName(SectName); + Sections.push_back(Section); + } + + struct DisassembleInfo info; + // Set up the block of info used by the Symbolizer call backs. + info.verbose = verbose; + info.O = O; + info.AddrMap = &AddrMap; + info.Sections = &Sections; + info.class_name = nullptr; + info.selector_name = nullptr; + info.method = nullptr; + info.demangled_name = nullptr; + info.bindtable = nullptr; + info.adrp_addr = 0; + info.adrp_inst = 0; + + for (i = 0; i < S.getSize(); i += sizeof(struct objc_module_t)) { + p = S.getAddress() + i; + r = get_pointer_32(p, offset, left, S, &info, true); + if (r == nullptr) + return true; + memset(&module, '\0', sizeof(struct objc_module_t)); + if (left < sizeof(struct objc_module_t)) { + memcpy(&module, r, left); + outs() << " (module extends past end of __module_info section)\n"; + } else + memcpy(&module, r, sizeof(struct objc_module_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(module); + + outs() << "Module " << format("0x%" PRIx32, p) << "\n"; + outs() << " version " << module.version << "\n"; + outs() << " size " << module.size << "\n"; + outs() << " name "; + name = get_pointer_32(module.name, xoffset, left, xS, &info, true); + if (name != nullptr) + outs() << format("%.*s", left, name); + else + outs() << format("0x%08" PRIx32, module.name) + << "(not in an __OBJC section)"; + outs() << "\n"; + + r = get_pointer_32(module.symtab, xoffset, left, xS, &info, true); + if (module.symtab == 0 || r == nullptr) { + outs() << " symtab " << format("0x%08" PRIx32, module.symtab) + << " (not in an __OBJC section)\n"; + continue; + } + outs() << " symtab " << format("0x%08" PRIx32, module.symtab) << "\n"; + memset(&symtab, '\0', sizeof(struct objc_symtab_t)); + defs_left = 0; + defs = nullptr; + if (left < sizeof(struct objc_symtab_t)) { + memcpy(&symtab, r, left); + outs() << "\tsymtab extends past end of an __OBJC section)\n"; + } else { + memcpy(&symtab, r, sizeof(struct objc_symtab_t)); + if (left > sizeof(struct objc_symtab_t)) { + defs_left = left - sizeof(struct objc_symtab_t); + defs = r + sizeof(struct objc_symtab_t); + } + } + if (O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(symtab); + + outs() << "\tsel_ref_cnt " << symtab.sel_ref_cnt << "\n"; + r = get_pointer_32(symtab.refs, xoffset, left, xS, &info, true); + outs() << "\trefs " << format("0x%08" PRIx32, symtab.refs); + if (r == nullptr) + outs() << " (not in an __OBJC section)"; + outs() << "\n"; + outs() << "\tcls_def_cnt " << symtab.cls_def_cnt << "\n"; + outs() << "\tcat_def_cnt " << symtab.cat_def_cnt << "\n"; + if (symtab.cls_def_cnt > 0) + outs() << "\tClass Definitions\n"; + for (j = 0; j < symtab.cls_def_cnt; j++) { + if ((j + 1) * sizeof(uint32_t) > defs_left) { + outs() << "\t(remaining class defs entries entends past the end of the " + << "section)\n"; + break; + } + memcpy(&def, defs + j * sizeof(uint32_t), sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(def); + + r = get_pointer_32(def, xoffset, left, xS, &info, true); + outs() << "\tdefs[" << j << "] " << format("0x%08" PRIx32, def); + if (r != nullptr) { + if (left > sizeof(struct objc_class_t)) { + outs() << "\n"; + memcpy(&objc_class, r, sizeof(struct objc_class_t)); + } else { + outs() << " (entends past the end of the section)\n"; + memset(&objc_class, '\0', sizeof(struct objc_class_t)); + memcpy(&objc_class, r, left); + } + if (O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(objc_class); + print_objc_class_t(&objc_class, &info); + } else { + outs() << "(not in an __OBJC section)\n"; + } + + if (CLS_GETINFO(&objc_class, CLS_CLASS)) { + outs() << "\tMeta Class"; + r = get_pointer_32(objc_class.isa, xoffset, left, xS, &info, true); + if (r != nullptr) { + if (left > sizeof(struct objc_class_t)) { + outs() << "\n"; + memcpy(&objc_class, r, sizeof(struct objc_class_t)); + } else { + outs() << " (entends past the end of the section)\n"; + memset(&objc_class, '\0', sizeof(struct objc_class_t)); + memcpy(&objc_class, r, left); + } + if (O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(objc_class); + print_objc_class_t(&objc_class, &info); + } else { + outs() << "(not in an __OBJC section)\n"; + } + } + } + if (symtab.cat_def_cnt > 0) + outs() << "\tCategory Definitions\n"; + for (j = 0; j < symtab.cat_def_cnt; j++) { + if ((j + symtab.cls_def_cnt + 1) * sizeof(uint32_t) > defs_left) { + outs() << "\t(remaining category defs entries entends past the end of " + << "the section)\n"; + break; + } + memcpy(&def, defs + (j + symtab.cls_def_cnt) * sizeof(uint32_t), + sizeof(uint32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(def); + + r = get_pointer_32(def, xoffset, left, xS, &info, true); + outs() << "\tdefs[" << j + symtab.cls_def_cnt << "] " + << format("0x%08" PRIx32, def); + if (r != nullptr) { + if (left > sizeof(struct objc_category_t)) { + outs() << "\n"; + memcpy(&objc_category, r, sizeof(struct objc_category_t)); + } else { + outs() << " (entends past the end of the section)\n"; + memset(&objc_category, '\0', sizeof(struct objc_category_t)); + memcpy(&objc_category, r, left); + } + if (O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(objc_category); + print_objc_objc_category_t(&objc_category, &info); + } else { + outs() << "(not in an __OBJC section)\n"; + } + } + } + const SectionRef II = get_section(O, "__OBJC", "__image_info"); + if (II != SectionRef()) + print_image_info(II, &info); + + return true; } -// get_objc2_64bit_cfstring_name is used for disassembly and is passed a -// pointer to a cfstring and returns its name or nullptr. -const char *get_objc2_64bit_cfstring_name(uint64_t ReferenceValue, - struct DisassembleInfo *info) { - const char *r, *name; - uint32_t offset, left; - SectionRef S; - struct cfstring64_t cfs; - uint64_t cfs_characters; +static void DumpProtocolSection(MachOObjectFile *O, const char *sect, + uint32_t size, uint32_t addr) { + SymbolAddressMap AddrMap; + CreateSymbolAddressMap(O, &AddrMap); - r = get_pointer_64(ReferenceValue, offset, left, S, info); - if (r == nullptr || left < sizeof(struct cfstring64_t)) - return nullptr; - memcpy(&cfs, r, sizeof(struct cfstring64_t)); - if (info->O->isLittleEndian() != sys::IsLittleEndianHost) - swapStruct(cfs); - if (cfs.characters == 0) { - uint64_t n_value; - const char *symbol_name = get_symbol_64( - offset + offsetof(struct cfstring64_t, characters), S, info, n_value); - if (symbol_name == nullptr) - return nullptr; - cfs_characters = n_value; - } else - cfs_characters = cfs.characters; - name = get_pointer_64(cfs_characters, offset, left, S, info); + std::vector Sections; + for (const SectionRef &Section : O->sections()) { + StringRef SectName; + Section.getName(SectName); + Sections.push_back(Section); + } - return name; + struct DisassembleInfo info; + // Set up the block of info used by the Symbolizer call backs. + info.verbose = true; + info.O = O; + info.AddrMap = &AddrMap; + info.Sections = &Sections; + info.class_name = nullptr; + info.selector_name = nullptr; + info.method = nullptr; + info.demangled_name = nullptr; + info.bindtable = nullptr; + info.adrp_addr = 0; + info.adrp_inst = 0; + + const char *p; + struct objc_protocol_t protocol; + uint32_t left, paddr; + for (p = sect; p < sect + size; p += sizeof(struct objc_protocol_t)) { + memset(&protocol, '\0', sizeof(struct objc_protocol_t)); + left = size - (p - sect); + if (left < sizeof(struct objc_protocol_t)) { + outs() << "Protocol extends past end of __protocol section\n"; + memcpy(&protocol, p, left); + } else + memcpy(&protocol, p, sizeof(struct objc_protocol_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + swapStruct(protocol); + paddr = addr + (p - sect); + outs() << "Protocol " << format("0x%" PRIx32, paddr); + if (print_protocol(paddr, 0, &info)) + outs() << "(not in an __OBJC section)\n"; + } } -// get_objc2_64bit_selref() is used for disassembly and is passed a the address -// of a pointer to an Objective-C selector reference when the pointer value is -// zero as in a .o file and is likely to have a external relocation entry with -// who's symbol's n_value is the real pointer to the selector name. If that is -// the case the real pointer to the selector name is returned else 0 is -// returned -uint64_t get_objc2_64bit_selref(uint64_t ReferenceValue, - struct DisassembleInfo *info) { - uint32_t offset, left; - SectionRef S; - - const char *r = get_pointer_64(ReferenceValue, offset, left, S, info); - if (r == nullptr || left < sizeof(uint64_t)) - return 0; - uint64_t n_value; - const char *symbol_name = get_symbol_64(offset, S, info, n_value); - if (symbol_name == nullptr) - return 0; - return n_value; +static void printObjcMetaData(MachOObjectFile *O, bool verbose) { + if (O->is64Bit()) + printObjc2_64bit_MetaData(O, verbose); + else { + MachO::mach_header H; + H = O->getHeader(); + if (H.cputype == MachO::CPU_TYPE_ARM) + printObjc2_32bit_MetaData(O, verbose); + else { + // This is the 32-bit non-arm cputype case. Which is normally + // the first Objective-C ABI. But it may be the case of a + // binary for the iOS simulator which is the second Objective-C + // ABI. In that case printObjc1_32bit_MetaData() will determine that + // and return false. + if (printObjc1_32bit_MetaData(O, verbose) == false) + printObjc2_32bit_MetaData(O, verbose); + } + } } // GuessLiteralPointer returns a string which for the item in the Mach-O file @@ -1184,15 +5583,10 @@ uint64_t get_objc2_64bit_selref(uint64_t ReferenceValue, // // If there is no item in the Mach-O file for the address passed in as // ReferenceValue nullptr is returned and ReferenceType is unchanged. -const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, - uint64_t *ReferenceType, - struct DisassembleInfo *info) { - // TODO: This rouine's code and the routines it calls are only work with - // x86_64 Mach-O files for now. - unsigned int Arch = info->O->getArch(); - if (Arch != Triple::x86_64) - return nullptr; - +static const char *GuessLiteralPointer(uint64_t ReferenceValue, + uint64_t ReferencePC, + uint64_t *ReferenceType, + struct DisassembleInfo *info) { // First see if there is an external relocation entry at the ReferencePC. uint64_t sect_addr = info->S.getAddress(); uint64_t sect_offset = ReferencePC - sect_addr; @@ -1202,8 +5596,7 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, bool isExtern = false; SymbolRef Symbol; for (const RelocationRef &Reloc : info->S.relocations()) { - uint64_t RelocOffset; - Reloc.getOffset(RelocOffset); + uint64_t RelocOffset = Reloc.getOffset(); if (RelocOffset == sect_offset) { Rel = Reloc.getRawDataRefImpl(); RE = info->O->getRelocation(Rel); @@ -1224,7 +5617,7 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, if (info->O->getAnyRelocationPCRel(RE)) { unsigned Type = info->O->getAnyRelocationType(RE); if (Type == MachO::X86_64_RELOC_SIGNED) { - Symbol.getAddress(ReferenceValue); + ReferenceValue = Symbol.getValue(); } } } @@ -1234,7 +5627,7 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, bool classref, selref, msgref, cfstring; uint64_t pointer_value = GuessPointerPointer(ReferenceValue, info, classref, selref, msgref, cfstring); - if (classref == true && pointer_value == 0) { + if (classref && pointer_value == 0) { // Note the ReferenceValue is a pointer into the __objc_classrefs section. // And the pointer_value in that section is typically zero as it will be // set by dyld as part of the "bind information". @@ -1250,7 +5643,7 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, } } - if (classref == true) { + if (classref) { *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref; const char *name = get_objc2_64bit_class_name(pointer_value, ReferenceValue, info); @@ -1261,13 +5654,13 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, return name; } - if (cfstring == true) { + if (cfstring) { *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref; const char *name = get_objc2_64bit_cfstring_name(ReferenceValue, info); return name; } - if (selref == true && pointer_value == 0) + if (selref && pointer_value == 0) pointer_value = get_objc2_64bit_selref(ReferenceValue, info); if (pointer_value != 0) @@ -1275,10 +5668,10 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, const char *name = GuessCstringPointer(ReferenceValue, info); if (name) { - if (pointer_value != 0 && selref == true) { + if (pointer_value != 0 && selref) { *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref; info->selector_name = name; - } else if (pointer_value != 0 && msgref == true) { + } else if (pointer_value != 0 && msgref) { info->class_name = nullptr; *ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref; info->selector_name = name; @@ -1326,19 +5719,20 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, // SymbolValue is checked to be an address of literal pointer, symbol pointer, // or an Objective-C meta data reference. If so the output ReferenceType is // set to correspond to that as well as setting the ReferenceName. -const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue, - uint64_t *ReferenceType, - uint64_t ReferencePC, - const char **ReferenceName) { +static const char *SymbolizerSymbolLookUp(void *DisInfo, + uint64_t ReferenceValue, + uint64_t *ReferenceType, + uint64_t ReferencePC, + const char **ReferenceName) { struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo; // If no verbose symbolic information is wanted then just return nullptr. - if (info->verbose == false) { + if (!info->verbose) { *ReferenceName = nullptr; *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; return nullptr; } - const char *SymbolName = GuessSymbolName(ReferenceValue, info); + const char *SymbolName = GuessSymbolName(ReferenceValue, info->AddrMap); if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) { *ReferenceName = GuessIndirectSymbol(ReferenceValue, info); @@ -1369,6 +5763,86 @@ const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue, method_reference(info, ReferenceType, ReferenceName); else *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + // If this is arm64 and the reference is an adrp instruction save the + // instruction, passed in ReferenceValue and the address of the instruction + // for use later if we see and add immediate instruction. + } else if (info->O->getArch() == Triple::aarch64 && + *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { + info->adrp_inst = ReferenceValue; + info->adrp_addr = ReferencePC; + SymbolName = nullptr; + *ReferenceName = nullptr; + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + // If this is arm64 and reference is an add immediate instruction and we + // have + // seen an adrp instruction just before it and the adrp's Xd register + // matches + // this add's Xn register reconstruct the value being referenced and look to + // see if it is a literal pointer. Note the add immediate instruction is + // passed in ReferenceValue. + } else if (info->O->getArch() == Triple::aarch64 && + *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && + ReferencePC - 4 == info->adrp_addr && + (info->adrp_inst & 0x9f000000) == 0x90000000 && + (info->adrp_inst & 0x1f) == ((ReferenceValue >> 5) & 0x1f)) { + uint32_t addxri_inst; + uint64_t adrp_imm, addxri_imm; + + adrp_imm = + ((info->adrp_inst & 0x00ffffe0) >> 3) | ((info->adrp_inst >> 29) & 0x3); + if (info->adrp_inst & 0x0200000) + adrp_imm |= 0xfffffffffc000000LL; + + addxri_inst = ReferenceValue; + addxri_imm = (addxri_inst >> 10) & 0xfff; + if (((addxri_inst >> 22) & 0x3) == 1) + addxri_imm <<= 12; + + ReferenceValue = (info->adrp_addr & 0xfffffffffffff000LL) + + (adrp_imm << 12) + addxri_imm; + + *ReferenceName = + GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info); + if (*ReferenceName == nullptr) + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + // If this is arm64 and the reference is a load register instruction and we + // have seen an adrp instruction just before it and the adrp's Xd register + // matches this add's Xn register reconstruct the value being referenced and + // look to see if it is a literal pointer. Note the load register + // instruction is passed in ReferenceValue. + } else if (info->O->getArch() == Triple::aarch64 && + *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_LDRXui && + ReferencePC - 4 == info->adrp_addr && + (info->adrp_inst & 0x9f000000) == 0x90000000 && + (info->adrp_inst & 0x1f) == ((ReferenceValue >> 5) & 0x1f)) { + uint32_t ldrxui_inst; + uint64_t adrp_imm, ldrxui_imm; + + adrp_imm = + ((info->adrp_inst & 0x00ffffe0) >> 3) | ((info->adrp_inst >> 29) & 0x3); + if (info->adrp_inst & 0x0200000) + adrp_imm |= 0xfffffffffc000000LL; + + ldrxui_inst = ReferenceValue; + ldrxui_imm = (ldrxui_inst >> 10) & 0xfff; + + ReferenceValue = (info->adrp_addr & 0xfffffffffffff000LL) + + (adrp_imm << 12) + (ldrxui_imm << 3); + + *ReferenceName = + GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info); + if (*ReferenceName == nullptr) + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + } + // If this arm64 and is an load register (PC-relative) instruction the + // ReferenceValue is the PC plus the immediate value. + else if (info->O->getArch() == Triple::aarch64 && + (*ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_LDRXl || + *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADR)) { + *ReferenceName = + GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info); + if (*ReferenceName == nullptr) + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; } #if HAVE_CXXABI_H else if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) { @@ -1422,8 +5896,8 @@ static void emitComments(raw_svector_ostream &CommentStream, CommentStream.resync(); } -static void DisassembleInputMachO2(StringRef Filename, - MachOObjectFile *MachOOF) { +static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, + StringRef DisSegName, StringRef DisSectName) { const char *McpuDefault = nullptr; const Target *ThumbTarget = nullptr; const Target *TheTarget = GetTarget(MachOOF, &McpuDefault, &ThumbTarget); @@ -1435,15 +5909,9 @@ static void DisassembleInputMachO2(StringRef Filename, MCPU = McpuDefault; std::unique_ptr InstrInfo(TheTarget->createMCInstrInfo()); - std::unique_ptr InstrAnalysis( - TheTarget->createMCInstrAnalysis(InstrInfo.get())); std::unique_ptr ThumbInstrInfo; - std::unique_ptr ThumbInstrAnalysis; - if (ThumbTarget) { + if (ThumbTarget) ThumbInstrInfo.reset(ThumbTarget->createMCInstrInfo()); - ThumbInstrAnalysis.reset( - ThumbTarget->createMCInstrAnalysis(ThumbInstrInfo.get())); - } // Package up features to be passed to target/subtarget std::string FeaturesStr; @@ -1471,20 +5939,25 @@ static void DisassembleInputMachO2(StringRef Filename, if (RelInfo) { Symbolizer.reset(TheTarget->createMCSymbolizer( TripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp, - &SymbolizerInfo, &Ctx, RelInfo.release())); + &SymbolizerInfo, &Ctx, std::move(RelInfo))); DisAsm->setSymbolizer(std::move(Symbolizer)); } int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); std::unique_ptr IP(TheTarget->createMCInstPrinter( - AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI, *STI)); + Triple(TripleName), AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI)); // Set the display preference for hex vs. decimal immediates. IP->setPrintImmHex(PrintImmHex); // Comment stream and backing vector. SmallString<128> CommentsToEmit; raw_svector_ostream CommentStream(CommentsToEmit); - IP->setCommentStream(CommentStream); - - if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) { + // FIXME: Setting the CommentStream in the InstPrinter is problematic in that + // if it is done then arm64 comments for string literals don't get printed + // and some constant get printed instead and not setting it causes intel + // (32-bit and 64-bit) comments printed with different spacing before the + // comment causing different diffs with the 'C' disassembler library API. + // IP->setCommentStream(CommentStream); + + if (!AsmInfo || !STI || !DisAsm || !IP) { errs() << "error: couldn't initialize disassembler for target " << TripleName << '\n'; return; @@ -1514,26 +5987,23 @@ static void DisassembleInputMachO2(StringRef Filename, if (ThumbRelInfo) { ThumbSymbolizer.reset(ThumbTarget->createMCSymbolizer( ThumbTripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp, - &ThumbSymbolizerInfo, PtrThumbCtx, ThumbRelInfo.release())); + &ThumbSymbolizerInfo, PtrThumbCtx, std::move(ThumbRelInfo))); ThumbDisAsm->setSymbolizer(std::move(ThumbSymbolizer)); } int ThumbAsmPrinterVariant = ThumbAsmInfo->getAssemblerDialect(); ThumbIP.reset(ThumbTarget->createMCInstPrinter( - ThumbAsmPrinterVariant, *ThumbAsmInfo, *ThumbInstrInfo, *ThumbMRI, - *ThumbSTI)); + Triple(ThumbTripleName), ThumbAsmPrinterVariant, *ThumbAsmInfo, + *ThumbInstrInfo, *ThumbMRI)); // Set the display preference for hex vs. decimal immediates. ThumbIP->setPrintImmHex(PrintImmHex); } - if (ThumbTarget && (!ThumbInstrAnalysis || !ThumbAsmInfo || !ThumbSTI || - !ThumbDisAsm || !ThumbIP)) { + if (ThumbTarget && (!ThumbAsmInfo || !ThumbSTI || !ThumbDisAsm || !ThumbIP)) { errs() << "error: couldn't initialize disassembler for target " << ThumbTripleName << '\n'; return; } - outs() << '\n' << Filename << ":\n\n"; - MachO::mach_header Header = MachOOF->getHeader(); // FIXME: Using the -cfg command line option, this code used to be able to @@ -1545,7 +6015,7 @@ static void DisassembleInputMachO2(StringRef Filename, SmallVector FoundFns; uint64_t BaseSegmentAddress; - getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns, + getSectionsAndSymbols(MachOOF, Sections, Symbols, FoundFns, BaseSegmentAddress); // Sort the symbols by address, just in case they didn't come in that way. @@ -1592,23 +6062,21 @@ static void DisassembleInputMachO2(StringRef Filename, } // Setup the DIContext - diContext.reset(DIContext::getDWARFContext(*DbgObj)); + diContext.reset(new DWARFContextInMemory(*DbgObj)); } - for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { - - bool SectIsText = Sections[SectIdx].isText(); - if (SectIsText == false) - continue; + if (DumpSections.size() == 0) + outs() << "(" << DisSegName << "," << DisSectName << ") section\n"; + for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { StringRef SectName; - if (Sections[SectIdx].getName(SectName) || SectName != "__text") - continue; // Skip non-text sections + if (Sections[SectIdx].getName(SectName) || SectName != DisSectName) + continue; DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl(); StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR); - if (SegmentName != "__TEXT") + if (SegmentName != DisSegName) continue; StringRef BytesStr; @@ -1622,8 +6090,7 @@ static void DisassembleInputMachO2(StringRef Filename, // Parse relocations. std::vector> Relocs; for (const RelocationRef &Reloc : Sections[SectIdx].relocations()) { - uint64_t RelocOffset; - Reloc.getOffset(RelocOffset); + uint64_t RelocOffset = Reloc.getOffset(); uint64_t SectionAddress = Sections[SectIdx].getAddress(); RelocOffset -= SectionAddress; @@ -1636,20 +6103,27 @@ static void DisassembleInputMachO2(StringRef Filename, // Create a map of symbol addresses to symbol names for use by // the SymbolizerSymbolLookUp() routine. SymbolAddressMap AddrMap; + bool DisSymNameFound = false; for (const SymbolRef &Symbol : MachOOF->symbols()) { - SymbolRef::Type ST; - Symbol.getType(ST); + SymbolRef::Type ST = Symbol.getType(); if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data || ST == SymbolRef::ST_Other) { - uint64_t Address; - Symbol.getAddress(Address); - StringRef SymName; - Symbol.getName(SymName); + uint64_t Address = Symbol.getValue(); + ErrorOr SymNameOrErr = Symbol.getName(); + if (std::error_code EC = SymNameOrErr.getError()) + report_fatal_error(EC.message()); + StringRef SymName = *SymNameOrErr; AddrMap[Address] = SymName; + if (!DisSymName.empty() && DisSymName == SymName) + DisSymNameFound = true; } } + if (!DisSymName.empty() && !DisSymNameFound) { + outs() << "Can't find -dis-symname: " << DisSymName << "\n"; + return; + } // Set up the block of info used by the Symbolizer call backs. - SymbolizerInfo.verbose = true; + SymbolizerInfo.verbose = !NoSymbolicOperands; SymbolizerInfo.O = MachOOF; SymbolizerInfo.S = Sections[SectIdx]; SymbolizerInfo.AddrMap = &AddrMap; @@ -1659,8 +6133,10 @@ static void DisassembleInputMachO2(StringRef Filename, SymbolizerInfo.method = nullptr; SymbolizerInfo.demangled_name = nullptr; SymbolizerInfo.bindtable = nullptr; + SymbolizerInfo.adrp_addr = 0; + SymbolizerInfo.adrp_inst = 0; // Same for the ThumbSymbolizer - ThumbSymbolizerInfo.verbose = true; + ThumbSymbolizerInfo.verbose = !NoSymbolicOperands; ThumbSymbolizerInfo.O = MachOOF; ThumbSymbolizerInfo.S = Sections[SectIdx]; ThumbSymbolizerInfo.AddrMap = &AddrMap; @@ -1670,14 +6146,17 @@ static void DisassembleInputMachO2(StringRef Filename, ThumbSymbolizerInfo.method = nullptr; ThumbSymbolizerInfo.demangled_name = nullptr; ThumbSymbolizerInfo.bindtable = nullptr; + ThumbSymbolizerInfo.adrp_addr = 0; + ThumbSymbolizerInfo.adrp_inst = 0; // Disassemble symbol by symbol. for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) { - StringRef SymName; - Symbols[SymIdx].getName(SymName); + ErrorOr SymNameOrErr = Symbols[SymIdx].getName(); + if (std::error_code EC = SymNameOrErr.getError()) + report_fatal_error(EC.message()); + StringRef SymName = *SymNameOrErr; - SymbolRef::Type ST; - Symbols[SymIdx].getType(ST); + SymbolRef::Type ST = Symbols[SymIdx].getType(); if (ST != SymbolRef::ST_Function) continue; @@ -1686,10 +6165,13 @@ static void DisassembleInputMachO2(StringRef Filename, if (!containsSym) continue; + // If we are only disassembling one symbol see if this is that symbol. + if (!DisSymName.empty() && DisSymName != SymName) + continue; + // Start at the address of the symbol relative to the section's address. - uint64_t Start = 0; + uint64_t Start = Symbols[SymIdx].getValue(); uint64_t SectionAddress = Sections[SectIdx].getAddress(); - Symbols[SymIdx].getAddress(Start); Start -= SectionAddress; // Stop disassembling either at the beginning of the next symbol or at @@ -1698,12 +6180,11 @@ static void DisassembleInputMachO2(StringRef Filename, uint64_t NextSym = 0; uint64_t NextSymIdx = SymIdx + 1; while (Symbols.size() > NextSymIdx) { - SymbolRef::Type NextSymType; - Symbols[NextSymIdx].getType(NextSymType); + SymbolRef::Type NextSymType = Symbols[NextSymIdx].getType(); if (NextSymType == SymbolRef::ST_Function) { containsNextSym = Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]); - Symbols[NextSymIdx].getAddress(NextSym); + NextSym = Symbols[NextSymIdx].getValue(); NextSym -= SectionAddress; break; } @@ -1726,13 +6207,15 @@ static void DisassembleInputMachO2(StringRef Filename, MCInst Inst; uint64_t PC = SectAddress + Index; - if (FullLeadingAddr) { - if (MachOOF->is64Bit()) - outs() << format("%016" PRIx64, PC); - else - outs() << format("%08" PRIx64, PC); - } else { - outs() << format("%8" PRIx64 ":", PC); + if (!NoLeadingAddr) { + if (FullLeadingAddr) { + if (MachOOF->is64Bit()) + outs() << format("%016" PRIx64, PC); + else + outs() << format("%08" PRIx64, PC); + } else { + outs() << format("%8" PRIx64 ":", PC); + } } if (!NoShowRawInsn) outs() << "\t"; @@ -1749,9 +6232,7 @@ static void DisassembleInputMachO2(StringRef Filename, DTI->second.getLength(Length); uint16_t Kind; DTI->second.getKind(Kind); - Size = DumpDataInCode(reinterpret_cast(Bytes.data()) + - Index, - Length, Kind); + Size = DumpDataInCode(Bytes.data() + Index, Length, Kind); if ((Kind == MachO::DICE_KIND_JUMP_TABLE8) && (PC == (DTI->first + Length - 1)) && (Length & 1)) Size++; @@ -1770,16 +6251,15 @@ static void DisassembleInputMachO2(StringRef Filename, DebugOut, Annotations); if (gotInst) { if (!NoShowRawInsn) { - DumpBytes(StringRef( - reinterpret_cast(Bytes.data()) + Index, Size)); + dumpBytes(ArrayRef(Bytes.data() + Index, Size), outs()); } formatted_raw_ostream FormattedOS(outs()); Annotations.flush(); StringRef AnnotationsStr = Annotations.str(); if (isThumb) - ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr); + ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr, *ThumbSTI); else - IP->printInst(&Inst, FormattedOS, AnnotationsStr); + IP->printInst(&Inst, FormattedOS, AnnotationsStr, *STI); emitComments(CommentStream, CommentsToEmit, FormattedOS, *AsmInfo); // Print debug info. @@ -1798,6 +6278,13 @@ static void DisassembleInputMachO2(StringRef Filename, outs() << format("\t.byte 0x%02x #bad opcode\n", *(Bytes.data() + Index) & 0xff); Size = 1; // skip exactly one illegible byte and move on. + } else if (Arch == Triple::aarch64) { + uint32_t opcode = (*(Bytes.data() + Index) & 0xff) | + (*(Bytes.data() + Index + 1) & 0xff) << 8 | + (*(Bytes.data() + Index + 2) & 0xff) << 16 | + (*(Bytes.data() + Index + 3) & 0xff) << 24; + outs() << format("\t.long\t0x%08x\n", opcode); + Size = 4; } else { errs() << "llvm-objdump: warning: invalid instruction encoding\n"; if (Size == 0) @@ -1817,21 +6304,21 @@ static void DisassembleInputMachO2(StringRef Filename, uint64_t PC = SectAddress + Index; if (DisAsm->getInstruction(Inst, InstSize, Bytes.slice(Index), PC, DebugOut, nulls())) { - if (FullLeadingAddr) { - if (MachOOF->is64Bit()) - outs() << format("%016" PRIx64, PC); - else - outs() << format("%08" PRIx64, PC); - } else { - outs() << format("%8" PRIx64 ":", PC); + if (!NoLeadingAddr) { + if (FullLeadingAddr) { + if (MachOOF->is64Bit()) + outs() << format("%016" PRIx64, PC); + else + outs() << format("%08" PRIx64, PC); + } else { + outs() << format("%8" PRIx64 ":", PC); + } } if (!NoShowRawInsn) { outs() << "\t"; - DumpBytes( - StringRef(reinterpret_cast(Bytes.data()) + Index, - InstSize)); + dumpBytes(ArrayRef(Bytes.data() + Index, InstSize), outs()); } - IP->printInst(&Inst, outs(), ""); + IP->printInst(&Inst, outs(), "", *STI); outs() << "\n"; } else { unsigned int Arch = MachOOF->getArch(); @@ -1847,6 +6334,11 @@ static void DisassembleInputMachO2(StringRef Filename, } } } + // The TripleName's need to be reset if we are called again for a different + // archtecture. + TripleName = ""; + ThumbTripleName = ""; + if (SymbolizerInfo.method != nullptr) free(SymbolizerInfo.method); if (SymbolizerInfo.demangled_name != nullptr) @@ -1924,13 +6416,16 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj, const RelocationRef &Reloc, uint64_t Addr, StringRef &Name, uint64_t &Addend) { if (Reloc.getSymbol() != Obj->symbol_end()) { - Reloc.getSymbol()->getName(Name); + ErrorOr NameOrErr = Reloc.getSymbol()->getName(); + if (std::error_code EC = NameOrErr.getError()) + report_fatal_error(EC.message()); + Name = *NameOrErr; Addend = Addr; return; } auto RE = Obj->getRelocation(Reloc.getRawDataRefImpl()); - SectionRef RelocSection = Obj->getRelocationSection(RE); + SectionRef RelocSection = Obj->getAnyRelocationSection(RE); uint64_t SectionAddr = RelocSection.getAddress(); @@ -1950,7 +6445,10 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj, Sym->second.getSection(SymSection); if (RelocSection == *SymSection) { // There's a valid symbol in the same section before this reference. - Sym->second.getName(Name); + ErrorOr NameOrErr = Sym->second.getName(); + if (std::error_code EC = NameOrErr.getError()) + report_fatal_error(EC.message()); + Name = *NameOrErr; Addend = Addr - Sym->first; return; } @@ -1967,7 +6465,7 @@ static void printUnwindRelocDest(const MachOObjectFile *Obj, StringRef Name; uint64_t Addend; - if (!Reloc.getObjectFile()) + if (!Reloc.getObject()) return; findUnwindRelocNameAddend(Obj, Symbols, Reloc, Addr, Name, Addend); @@ -2003,8 +6501,7 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj, // Next we need to look at the relocations to find out what objects are // actually being referred to. for (const RelocationRef &Reloc : CompactUnwind.relocations()) { - uint64_t RelocAddress; - Reloc.getOffset(RelocAddress); + uint64_t RelocAddress = Reloc.getOffset(); uint32_t EntryIdx = RelocAddress / EntrySize; uint32_t OffsetInEntry = RelocAddress - EntryIdx * EntrySize; @@ -2040,7 +6537,7 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj, << format("0x%08" PRIx32, Entry.CompactEncoding) << '\n'; // 4. The personality function, if present. - if (Entry.PersonalityReloc.getObjectFile()) { + if (Entry.PersonalityReloc.getObject()) { outs() << " personality function: " << format("0x%" PRIx64, Entry.PersonalityAddr) << ' '; printUnwindRelocDest(Obj, Symbols, Entry.PersonalityReloc, @@ -2049,7 +6546,7 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj, } // 5. This entry's language-specific data area. - if (Entry.LSDAReloc.getObjectFile()) { + if (Entry.LSDAReloc.getObject()) { outs() << " LSDA: " << format("0x%" PRIx64, Entry.LSDAAddr) << ' '; printUnwindRelocDest(Obj, Symbols, Entry.LSDAReloc, Entry.LSDAAddr); @@ -2293,8 +6790,7 @@ void llvm::printMachOUnwindInfo(const MachOObjectFile *Obj) { if (Section == Obj->section_end()) continue; - uint64_t Addr; - SymRef.getAddress(Addr); + uint64_t Addr = SymRef.getValue(); Symbols.insert(std::make_pair(Addr, SymRef)); } @@ -2338,12 +6834,17 @@ static void PrintMachHeader(uint32_t magic, uint32_t cputype, break; case MachO::CPU_TYPE_X86_64: outs() << " X86_64"; - case MachO::CPU_SUBTYPE_X86_64_ALL: - outs() << " ALL"; - break; - case MachO::CPU_SUBTYPE_X86_64_H: - outs() << " Haswell"; - outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); + switch (cpusubtype & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_X86_64_ALL: + outs() << " ALL"; + break; + case MachO::CPU_SUBTYPE_X86_64_H: + outs() << " Haswell"; + break; + default: + outs() << format(" %10d", cpusubtype & ~MachO::CPU_SUBTYPE_MASK); + break; + } break; case MachO::CPU_TYPE_ARM: outs() << " ARM"; @@ -2608,8 +7109,8 @@ static void PrintSegmentCommand(uint32_t cmd, uint32_t cmdsize, outs() << " vmaddr " << format("0x%016" PRIx64, vmaddr) << "\n"; outs() << " vmsize " << format("0x%016" PRIx64, vmsize) << "\n"; } else { - outs() << " vmaddr " << format("0x%08" PRIx32, vmaddr) << "\n"; - outs() << " vmsize " << format("0x%08" PRIx32, vmsize) << "\n"; + outs() << " vmaddr " << format("0x%08" PRIx64, vmaddr) << "\n"; + outs() << " vmsize " << format("0x%08" PRIx64, vmsize) << "\n"; } outs() << " fileoff " << fileoff; if (fileoff > object_size) @@ -2712,8 +7213,8 @@ static void PrintSection(const char *sectname, const char *segname, outs() << " addr " << format("0x%016" PRIx64, addr) << "\n"; outs() << " size " << format("0x%016" PRIx64, size); } else { - outs() << " addr " << format("0x%08" PRIx32, addr) << "\n"; - outs() << " size " << format("0x%08" PRIx32, size); + outs() << " addr " << format("0x%08" PRIx64, addr) << "\n"; + outs() << " size " << format("0x%08" PRIx64, size); } if ((flags & MachO::S_ZEROFILL) != 0 && offset + size > object_size) outs() << " (past end of file)\n"; @@ -3133,6 +7634,21 @@ static void PrintUuidLoadCommand(MachO::uuid_command uuid) { outs() << "\n"; } +static void PrintRpathLoadCommand(MachO::rpath_command rpath, const char *Ptr) { + outs() << " cmd LC_RPATH\n"; + outs() << " cmdsize " << rpath.cmdsize; + if (rpath.cmdsize < sizeof(struct MachO::rpath_command)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + if (rpath.path >= rpath.cmdsize) + outs() << " path ?(bad offset " << rpath.path << ")\n"; + else { + const char *P = (const char *)(Ptr) + rpath.path; + outs() << " path " << P << " (offset " << rpath.path << ")\n"; + } +} + static void PrintVersionMinLoadCommand(MachO::version_min_command vd) { if (vd.cmd == MachO::LC_VERSION_MIN_MACOSX) outs() << " cmd LC_VERSION_MIN_MACOSX\n"; @@ -3151,7 +7667,7 @@ static void PrintVersionMinLoadCommand(MachO::version_min_command vd) { outs() << "." << (vd.version & 0xff); outs() << "\n"; if (vd.sdk == 0) - outs() << " sdk n/a\n"; + outs() << " sdk n/a"; else { outs() << " sdk " << ((vd.sdk >> 16) & 0xffff) << "." << ((vd.sdk >> 8) & 0xff); @@ -3194,6 +7710,525 @@ static void PrintEntryPointCommand(MachO::entry_point_command ep) { outs() << " stacksize " << ep.stacksize << "\n"; } +static void PrintEncryptionInfoCommand(MachO::encryption_info_command ec, + uint32_t object_size) { + outs() << " cmd LC_ENCRYPTION_INFO\n"; + outs() << " cmdsize " << ec.cmdsize; + if (ec.cmdsize != sizeof(struct MachO::encryption_info_command)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + outs() << " cryptoff " << ec.cryptoff; + if (ec.cryptoff > object_size) + outs() << " (past end of file)\n"; + else + outs() << "\n"; + outs() << " cryptsize " << ec.cryptsize; + if (ec.cryptsize > object_size) + outs() << " (past end of file)\n"; + else + outs() << "\n"; + outs() << " cryptid " << ec.cryptid << "\n"; +} + +static void PrintEncryptionInfoCommand64(MachO::encryption_info_command_64 ec, + uint32_t object_size) { + outs() << " cmd LC_ENCRYPTION_INFO_64\n"; + outs() << " cmdsize " << ec.cmdsize; + if (ec.cmdsize != sizeof(struct MachO::encryption_info_command_64)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + outs() << " cryptoff " << ec.cryptoff; + if (ec.cryptoff > object_size) + outs() << " (past end of file)\n"; + else + outs() << "\n"; + outs() << " cryptsize " << ec.cryptsize; + if (ec.cryptsize > object_size) + outs() << " (past end of file)\n"; + else + outs() << "\n"; + outs() << " cryptid " << ec.cryptid << "\n"; + outs() << " pad " << ec.pad << "\n"; +} + +static void PrintLinkerOptionCommand(MachO::linker_option_command lo, + const char *Ptr) { + outs() << " cmd LC_LINKER_OPTION\n"; + outs() << " cmdsize " << lo.cmdsize; + if (lo.cmdsize < sizeof(struct MachO::linker_option_command)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + outs() << " count " << lo.count << "\n"; + const char *string = Ptr + sizeof(struct MachO::linker_option_command); + uint32_t left = lo.cmdsize - sizeof(struct MachO::linker_option_command); + uint32_t i = 0; + while (left > 0) { + while (*string == '\0' && left > 0) { + string++; + left--; + } + if (left > 0) { + i++; + outs() << " string #" << i << " " << format("%.*s\n", left, string); + uint32_t NullPos = StringRef(string, left).find('\0'); + uint32_t len = std::min(NullPos, left) + 1; + string += len; + left -= len; + } + } + if (lo.count != i) + outs() << " count " << lo.count << " does not match number of strings " + << i << "\n"; +} + +static void PrintSubFrameworkCommand(MachO::sub_framework_command sub, + const char *Ptr) { + outs() << " cmd LC_SUB_FRAMEWORK\n"; + outs() << " cmdsize " << sub.cmdsize; + if (sub.cmdsize < sizeof(struct MachO::sub_framework_command)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + if (sub.umbrella < sub.cmdsize) { + const char *P = Ptr + sub.umbrella; + outs() << " umbrella " << P << " (offset " << sub.umbrella << ")\n"; + } else { + outs() << " umbrella ?(bad offset " << sub.umbrella << ")\n"; + } +} + +static void PrintSubUmbrellaCommand(MachO::sub_umbrella_command sub, + const char *Ptr) { + outs() << " cmd LC_SUB_UMBRELLA\n"; + outs() << " cmdsize " << sub.cmdsize; + if (sub.cmdsize < sizeof(struct MachO::sub_umbrella_command)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + if (sub.sub_umbrella < sub.cmdsize) { + const char *P = Ptr + sub.sub_umbrella; + outs() << " sub_umbrella " << P << " (offset " << sub.sub_umbrella << ")\n"; + } else { + outs() << " sub_umbrella ?(bad offset " << sub.sub_umbrella << ")\n"; + } +} + +static void PrintSubLibraryCommand(MachO::sub_library_command sub, + const char *Ptr) { + outs() << " cmd LC_SUB_LIBRARY\n"; + outs() << " cmdsize " << sub.cmdsize; + if (sub.cmdsize < sizeof(struct MachO::sub_library_command)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + if (sub.sub_library < sub.cmdsize) { + const char *P = Ptr + sub.sub_library; + outs() << " sub_library " << P << " (offset " << sub.sub_library << ")\n"; + } else { + outs() << " sub_library ?(bad offset " << sub.sub_library << ")\n"; + } +} + +static void PrintSubClientCommand(MachO::sub_client_command sub, + const char *Ptr) { + outs() << " cmd LC_SUB_CLIENT\n"; + outs() << " cmdsize " << sub.cmdsize; + if (sub.cmdsize < sizeof(struct MachO::sub_client_command)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + if (sub.client < sub.cmdsize) { + const char *P = Ptr + sub.client; + outs() << " client " << P << " (offset " << sub.client << ")\n"; + } else { + outs() << " client ?(bad offset " << sub.client << ")\n"; + } +} + +static void PrintRoutinesCommand(MachO::routines_command r) { + outs() << " cmd LC_ROUTINES\n"; + outs() << " cmdsize " << r.cmdsize; + if (r.cmdsize != sizeof(struct MachO::routines_command)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + outs() << " init_address " << format("0x%08" PRIx32, r.init_address) << "\n"; + outs() << " init_module " << r.init_module << "\n"; + outs() << " reserved1 " << r.reserved1 << "\n"; + outs() << " reserved2 " << r.reserved2 << "\n"; + outs() << " reserved3 " << r.reserved3 << "\n"; + outs() << " reserved4 " << r.reserved4 << "\n"; + outs() << " reserved5 " << r.reserved5 << "\n"; + outs() << " reserved6 " << r.reserved6 << "\n"; +} + +static void PrintRoutinesCommand64(MachO::routines_command_64 r) { + outs() << " cmd LC_ROUTINES_64\n"; + outs() << " cmdsize " << r.cmdsize; + if (r.cmdsize != sizeof(struct MachO::routines_command_64)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + outs() << " init_address " << format("0x%016" PRIx64, r.init_address) << "\n"; + outs() << " init_module " << r.init_module << "\n"; + outs() << " reserved1 " << r.reserved1 << "\n"; + outs() << " reserved2 " << r.reserved2 << "\n"; + outs() << " reserved3 " << r.reserved3 << "\n"; + outs() << " reserved4 " << r.reserved4 << "\n"; + outs() << " reserved5 " << r.reserved5 << "\n"; + outs() << " reserved6 " << r.reserved6 << "\n"; +} + +static void Print_x86_thread_state64_t(MachO::x86_thread_state64_t &cpu64) { + outs() << " rax " << format("0x%016" PRIx64, cpu64.rax); + outs() << " rbx " << format("0x%016" PRIx64, cpu64.rbx); + outs() << " rcx " << format("0x%016" PRIx64, cpu64.rcx) << "\n"; + outs() << " rdx " << format("0x%016" PRIx64, cpu64.rdx); + outs() << " rdi " << format("0x%016" PRIx64, cpu64.rdi); + outs() << " rsi " << format("0x%016" PRIx64, cpu64.rsi) << "\n"; + outs() << " rbp " << format("0x%016" PRIx64, cpu64.rbp); + outs() << " rsp " << format("0x%016" PRIx64, cpu64.rsp); + outs() << " r8 " << format("0x%016" PRIx64, cpu64.r8) << "\n"; + outs() << " r9 " << format("0x%016" PRIx64, cpu64.r9); + outs() << " r10 " << format("0x%016" PRIx64, cpu64.r10); + outs() << " r11 " << format("0x%016" PRIx64, cpu64.r11) << "\n"; + outs() << " r12 " << format("0x%016" PRIx64, cpu64.r12); + outs() << " r13 " << format("0x%016" PRIx64, cpu64.r13); + outs() << " r14 " << format("0x%016" PRIx64, cpu64.r14) << "\n"; + outs() << " r15 " << format("0x%016" PRIx64, cpu64.r15); + outs() << " rip " << format("0x%016" PRIx64, cpu64.rip) << "\n"; + outs() << "rflags " << format("0x%016" PRIx64, cpu64.rflags); + outs() << " cs " << format("0x%016" PRIx64, cpu64.cs); + outs() << " fs " << format("0x%016" PRIx64, cpu64.fs) << "\n"; + outs() << " gs " << format("0x%016" PRIx64, cpu64.gs) << "\n"; +} + +static void Print_mmst_reg(MachO::mmst_reg_t &r) { + uint32_t f; + outs() << "\t mmst_reg "; + for (f = 0; f < 10; f++) + outs() << format("%02" PRIx32, (r.mmst_reg[f] & 0xff)) << " "; + outs() << "\n"; + outs() << "\t mmst_rsrv "; + for (f = 0; f < 6; f++) + outs() << format("%02" PRIx32, (r.mmst_rsrv[f] & 0xff)) << " "; + outs() << "\n"; +} + +static void Print_xmm_reg(MachO::xmm_reg_t &r) { + uint32_t f; + outs() << "\t xmm_reg "; + for (f = 0; f < 16; f++) + outs() << format("%02" PRIx32, (r.xmm_reg[f] & 0xff)) << " "; + outs() << "\n"; +} + +static void Print_x86_float_state_t(MachO::x86_float_state64_t &fpu) { + outs() << "\t fpu_reserved[0] " << fpu.fpu_reserved[0]; + outs() << " fpu_reserved[1] " << fpu.fpu_reserved[1] << "\n"; + outs() << "\t control: invalid " << fpu.fpu_fcw.invalid; + outs() << " denorm " << fpu.fpu_fcw.denorm; + outs() << " zdiv " << fpu.fpu_fcw.zdiv; + outs() << " ovrfl " << fpu.fpu_fcw.ovrfl; + outs() << " undfl " << fpu.fpu_fcw.undfl; + outs() << " precis " << fpu.fpu_fcw.precis << "\n"; + outs() << "\t\t pc "; + if (fpu.fpu_fcw.pc == MachO::x86_FP_PREC_24B) + outs() << "FP_PREC_24B "; + else if (fpu.fpu_fcw.pc == MachO::x86_FP_PREC_53B) + outs() << "FP_PREC_53B "; + else if (fpu.fpu_fcw.pc == MachO::x86_FP_PREC_64B) + outs() << "FP_PREC_64B "; + else + outs() << fpu.fpu_fcw.pc << " "; + outs() << "rc "; + if (fpu.fpu_fcw.rc == MachO::x86_FP_RND_NEAR) + outs() << "FP_RND_NEAR "; + else if (fpu.fpu_fcw.rc == MachO::x86_FP_RND_DOWN) + outs() << "FP_RND_DOWN "; + else if (fpu.fpu_fcw.rc == MachO::x86_FP_RND_UP) + outs() << "FP_RND_UP "; + else if (fpu.fpu_fcw.rc == MachO::x86_FP_CHOP) + outs() << "FP_CHOP "; + outs() << "\n"; + outs() << "\t status: invalid " << fpu.fpu_fsw.invalid; + outs() << " denorm " << fpu.fpu_fsw.denorm; + outs() << " zdiv " << fpu.fpu_fsw.zdiv; + outs() << " ovrfl " << fpu.fpu_fsw.ovrfl; + outs() << " undfl " << fpu.fpu_fsw.undfl; + outs() << " precis " << fpu.fpu_fsw.precis; + outs() << " stkflt " << fpu.fpu_fsw.stkflt << "\n"; + outs() << "\t errsumm " << fpu.fpu_fsw.errsumm; + outs() << " c0 " << fpu.fpu_fsw.c0; + outs() << " c1 " << fpu.fpu_fsw.c1; + outs() << " c2 " << fpu.fpu_fsw.c2; + outs() << " tos " << fpu.fpu_fsw.tos; + outs() << " c3 " << fpu.fpu_fsw.c3; + outs() << " busy " << fpu.fpu_fsw.busy << "\n"; + outs() << "\t fpu_ftw " << format("0x%02" PRIx32, fpu.fpu_ftw); + outs() << " fpu_rsrv1 " << format("0x%02" PRIx32, fpu.fpu_rsrv1); + outs() << " fpu_fop " << format("0x%04" PRIx32, fpu.fpu_fop); + outs() << " fpu_ip " << format("0x%08" PRIx32, fpu.fpu_ip) << "\n"; + outs() << "\t fpu_cs " << format("0x%04" PRIx32, fpu.fpu_cs); + outs() << " fpu_rsrv2 " << format("0x%04" PRIx32, fpu.fpu_rsrv2); + outs() << " fpu_dp " << format("0x%08" PRIx32, fpu.fpu_dp); + outs() << " fpu_ds " << format("0x%04" PRIx32, fpu.fpu_ds) << "\n"; + outs() << "\t fpu_rsrv3 " << format("0x%04" PRIx32, fpu.fpu_rsrv3); + outs() << " fpu_mxcsr " << format("0x%08" PRIx32, fpu.fpu_mxcsr); + outs() << " fpu_mxcsrmask " << format("0x%08" PRIx32, fpu.fpu_mxcsrmask); + outs() << "\n"; + outs() << "\t fpu_stmm0:\n"; + Print_mmst_reg(fpu.fpu_stmm0); + outs() << "\t fpu_stmm1:\n"; + Print_mmst_reg(fpu.fpu_stmm1); + outs() << "\t fpu_stmm2:\n"; + Print_mmst_reg(fpu.fpu_stmm2); + outs() << "\t fpu_stmm3:\n"; + Print_mmst_reg(fpu.fpu_stmm3); + outs() << "\t fpu_stmm4:\n"; + Print_mmst_reg(fpu.fpu_stmm4); + outs() << "\t fpu_stmm5:\n"; + Print_mmst_reg(fpu.fpu_stmm5); + outs() << "\t fpu_stmm6:\n"; + Print_mmst_reg(fpu.fpu_stmm6); + outs() << "\t fpu_stmm7:\n"; + Print_mmst_reg(fpu.fpu_stmm7); + outs() << "\t fpu_xmm0:\n"; + Print_xmm_reg(fpu.fpu_xmm0); + outs() << "\t fpu_xmm1:\n"; + Print_xmm_reg(fpu.fpu_xmm1); + outs() << "\t fpu_xmm2:\n"; + Print_xmm_reg(fpu.fpu_xmm2); + outs() << "\t fpu_xmm3:\n"; + Print_xmm_reg(fpu.fpu_xmm3); + outs() << "\t fpu_xmm4:\n"; + Print_xmm_reg(fpu.fpu_xmm4); + outs() << "\t fpu_xmm5:\n"; + Print_xmm_reg(fpu.fpu_xmm5); + outs() << "\t fpu_xmm6:\n"; + Print_xmm_reg(fpu.fpu_xmm6); + outs() << "\t fpu_xmm7:\n"; + Print_xmm_reg(fpu.fpu_xmm7); + outs() << "\t fpu_xmm8:\n"; + Print_xmm_reg(fpu.fpu_xmm8); + outs() << "\t fpu_xmm9:\n"; + Print_xmm_reg(fpu.fpu_xmm9); + outs() << "\t fpu_xmm10:\n"; + Print_xmm_reg(fpu.fpu_xmm10); + outs() << "\t fpu_xmm11:\n"; + Print_xmm_reg(fpu.fpu_xmm11); + outs() << "\t fpu_xmm12:\n"; + Print_xmm_reg(fpu.fpu_xmm12); + outs() << "\t fpu_xmm13:\n"; + Print_xmm_reg(fpu.fpu_xmm13); + outs() << "\t fpu_xmm14:\n"; + Print_xmm_reg(fpu.fpu_xmm14); + outs() << "\t fpu_xmm15:\n"; + Print_xmm_reg(fpu.fpu_xmm15); + outs() << "\t fpu_rsrv4:\n"; + for (uint32_t f = 0; f < 6; f++) { + outs() << "\t "; + for (uint32_t g = 0; g < 16; g++) + outs() << format("%02" PRIx32, fpu.fpu_rsrv4[f * g]) << " "; + outs() << "\n"; + } + outs() << "\t fpu_reserved1 " << format("0x%08" PRIx32, fpu.fpu_reserved1); + outs() << "\n"; +} + +static void Print_x86_exception_state_t(MachO::x86_exception_state64_t &exc64) { + outs() << "\t trapno " << format("0x%08" PRIx32, exc64.trapno); + outs() << " err " << format("0x%08" PRIx32, exc64.err); + outs() << " faultvaddr " << format("0x%016" PRIx64, exc64.faultvaddr) << "\n"; +} + +static void PrintThreadCommand(MachO::thread_command t, const char *Ptr, + bool isLittleEndian, uint32_t cputype) { + if (t.cmd == MachO::LC_THREAD) + outs() << " cmd LC_THREAD\n"; + else if (t.cmd == MachO::LC_UNIXTHREAD) + outs() << " cmd LC_UNIXTHREAD\n"; + else + outs() << " cmd " << t.cmd << " (unknown)\n"; + outs() << " cmdsize " << t.cmdsize; + if (t.cmdsize < sizeof(struct MachO::thread_command) + 2 * sizeof(uint32_t)) + outs() << " Incorrect size\n"; + else + outs() << "\n"; + + const char *begin = Ptr + sizeof(struct MachO::thread_command); + const char *end = Ptr + t.cmdsize; + uint32_t flavor, count, left; + if (cputype == MachO::CPU_TYPE_X86_64) { + while (begin < end) { + if (end - begin > (ptrdiff_t)sizeof(uint32_t)) { + memcpy((char *)&flavor, begin, sizeof(uint32_t)); + begin += sizeof(uint32_t); + } else { + flavor = 0; + begin = end; + } + if (isLittleEndian != sys::IsLittleEndianHost) + sys::swapByteOrder(flavor); + if (end - begin > (ptrdiff_t)sizeof(uint32_t)) { + memcpy((char *)&count, begin, sizeof(uint32_t)); + begin += sizeof(uint32_t); + } else { + count = 0; + begin = end; + } + if (isLittleEndian != sys::IsLittleEndianHost) + sys::swapByteOrder(count); + if (flavor == MachO::x86_THREAD_STATE64) { + outs() << " flavor x86_THREAD_STATE64\n"; + if (count == MachO::x86_THREAD_STATE64_COUNT) + outs() << " count x86_THREAD_STATE64_COUNT\n"; + else + outs() << " count " << count + << " (not x86_THREAD_STATE64_COUNT)\n"; + MachO::x86_thread_state64_t cpu64; + left = end - begin; + if (left >= sizeof(MachO::x86_thread_state64_t)) { + memcpy(&cpu64, begin, sizeof(MachO::x86_thread_state64_t)); + begin += sizeof(MachO::x86_thread_state64_t); + } else { + memset(&cpu64, '\0', sizeof(MachO::x86_thread_state64_t)); + memcpy(&cpu64, begin, left); + begin += left; + } + if (isLittleEndian != sys::IsLittleEndianHost) + swapStruct(cpu64); + Print_x86_thread_state64_t(cpu64); + } else if (flavor == MachO::x86_THREAD_STATE) { + outs() << " flavor x86_THREAD_STATE\n"; + if (count == MachO::x86_THREAD_STATE_COUNT) + outs() << " count x86_THREAD_STATE_COUNT\n"; + else + outs() << " count " << count + << " (not x86_THREAD_STATE_COUNT)\n"; + struct MachO::x86_thread_state_t ts; + left = end - begin; + if (left >= sizeof(MachO::x86_thread_state_t)) { + memcpy(&ts, begin, sizeof(MachO::x86_thread_state_t)); + begin += sizeof(MachO::x86_thread_state_t); + } else { + memset(&ts, '\0', sizeof(MachO::x86_thread_state_t)); + memcpy(&ts, begin, left); + begin += left; + } + if (isLittleEndian != sys::IsLittleEndianHost) + swapStruct(ts); + if (ts.tsh.flavor == MachO::x86_THREAD_STATE64) { + outs() << "\t tsh.flavor x86_THREAD_STATE64 "; + if (ts.tsh.count == MachO::x86_THREAD_STATE64_COUNT) + outs() << "tsh.count x86_THREAD_STATE64_COUNT\n"; + else + outs() << "tsh.count " << ts.tsh.count + << " (not x86_THREAD_STATE64_COUNT\n"; + Print_x86_thread_state64_t(ts.uts.ts64); + } else { + outs() << "\t tsh.flavor " << ts.tsh.flavor << " tsh.count " + << ts.tsh.count << "\n"; + } + } else if (flavor == MachO::x86_FLOAT_STATE) { + outs() << " flavor x86_FLOAT_STATE\n"; + if (count == MachO::x86_FLOAT_STATE_COUNT) + outs() << " count x86_FLOAT_STATE_COUNT\n"; + else + outs() << " count " << count << " (not x86_FLOAT_STATE_COUNT)\n"; + struct MachO::x86_float_state_t fs; + left = end - begin; + if (left >= sizeof(MachO::x86_float_state_t)) { + memcpy(&fs, begin, sizeof(MachO::x86_float_state_t)); + begin += sizeof(MachO::x86_float_state_t); + } else { + memset(&fs, '\0', sizeof(MachO::x86_float_state_t)); + memcpy(&fs, begin, left); + begin += left; + } + if (isLittleEndian != sys::IsLittleEndianHost) + swapStruct(fs); + if (fs.fsh.flavor == MachO::x86_FLOAT_STATE64) { + outs() << "\t fsh.flavor x86_FLOAT_STATE64 "; + if (fs.fsh.count == MachO::x86_FLOAT_STATE64_COUNT) + outs() << "fsh.count x86_FLOAT_STATE64_COUNT\n"; + else + outs() << "fsh.count " << fs.fsh.count + << " (not x86_FLOAT_STATE64_COUNT\n"; + Print_x86_float_state_t(fs.ufs.fs64); + } else { + outs() << "\t fsh.flavor " << fs.fsh.flavor << " fsh.count " + << fs.fsh.count << "\n"; + } + } else if (flavor == MachO::x86_EXCEPTION_STATE) { + outs() << " flavor x86_EXCEPTION_STATE\n"; + if (count == MachO::x86_EXCEPTION_STATE_COUNT) + outs() << " count x86_EXCEPTION_STATE_COUNT\n"; + else + outs() << " count " << count + << " (not x86_EXCEPTION_STATE_COUNT)\n"; + struct MachO::x86_exception_state_t es; + left = end - begin; + if (left >= sizeof(MachO::x86_exception_state_t)) { + memcpy(&es, begin, sizeof(MachO::x86_exception_state_t)); + begin += sizeof(MachO::x86_exception_state_t); + } else { + memset(&es, '\0', sizeof(MachO::x86_exception_state_t)); + memcpy(&es, begin, left); + begin += left; + } + if (isLittleEndian != sys::IsLittleEndianHost) + swapStruct(es); + if (es.esh.flavor == MachO::x86_EXCEPTION_STATE64) { + outs() << "\t esh.flavor x86_EXCEPTION_STATE64\n"; + if (es.esh.count == MachO::x86_EXCEPTION_STATE64_COUNT) + outs() << "\t esh.count x86_EXCEPTION_STATE64_COUNT\n"; + else + outs() << "\t esh.count " << es.esh.count + << " (not x86_EXCEPTION_STATE64_COUNT\n"; + Print_x86_exception_state_t(es.ues.es64); + } else { + outs() << "\t esh.flavor " << es.esh.flavor << " esh.count " + << es.esh.count << "\n"; + } + } else { + outs() << " flavor " << flavor << " (unknown)\n"; + outs() << " count " << count << "\n"; + outs() << " state (unknown)\n"; + begin += count * sizeof(uint32_t); + } + } + } else { + while (begin < end) { + if (end - begin > (ptrdiff_t)sizeof(uint32_t)) { + memcpy((char *)&flavor, begin, sizeof(uint32_t)); + begin += sizeof(uint32_t); + } else { + flavor = 0; + begin = end; + } + if (isLittleEndian != sys::IsLittleEndianHost) + sys::swapByteOrder(flavor); + if (end - begin > (ptrdiff_t)sizeof(uint32_t)) { + memcpy((char *)&count, begin, sizeof(uint32_t)); + begin += sizeof(uint32_t); + } else { + count = 0; + begin = end; + } + if (isLittleEndian != sys::IsLittleEndianHost) + sys::swapByteOrder(count); + outs() << " flavor " << flavor << "\n"; + outs() << " count " << count << "\n"; + outs() << " state (Unknown cputype/cpusubtype)\n"; + begin += count * sizeof(uint32_t); + } + } +} + static void PrintDylibCommand(MachO::dylib_command dl, const char *Ptr) { if (dl.cmd == MachO::LC_ID_DYLIB) outs() << " cmd LC_ID_DYLIB\n"; @@ -3274,13 +8309,12 @@ static void PrintLinkEditDataCommand(MachO::linkedit_data_command ld, outs() << "\n"; } -static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds, - uint32_t filetype, uint32_t cputype, - bool verbose) { +static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype, + uint32_t cputype, bool verbose) { StringRef Buf = Obj->getData(); - MachOObjectFile::LoadCommandInfo Command = Obj->getFirstLoadCommandInfo(); - for (unsigned i = 0;; ++i) { - outs() << "Load command " << i << "\n"; + unsigned Index = 0; + for (const auto &Command : Obj->load_commands()) { + outs() << "Load command " << Index++ << "\n"; if (Command.C.cmd == MachO::LC_SEGMENT) { MachO::segment_command SLC = Obj->getSegmentLoadCommand(Command); const char *sg_segname = SLC.segname; @@ -3289,7 +8323,7 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds, SLC.initprot, SLC.nsects, SLC.flags, Buf.size(), verbose); for (unsigned j = 0; j < SLC.nsects; j++) { - MachO::section_64 S = Obj->getSection64(Command, j); + MachO::section S = Obj->getSection(Command, j); PrintSection(S.sectname, S.segname, S.addr, S.size, S.offset, S.align, S.reloff, S.nreloc, S.flags, S.reserved1, S.reserved2, SLC.cmd, sg_segname, filetype, Buf.size(), verbose); @@ -3328,7 +8362,11 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds, } else if (Command.C.cmd == MachO::LC_UUID) { MachO::uuid_command Uuid = Obj->getUuidCommand(Command); PrintUuidLoadCommand(Uuid); - } else if (Command.C.cmd == MachO::LC_VERSION_MIN_MACOSX) { + } else if (Command.C.cmd == MachO::LC_RPATH) { + MachO::rpath_command Rpath = Obj->getRpathCommand(Command); + PrintRpathLoadCommand(Rpath, Command.Ptr); + } else if (Command.C.cmd == MachO::LC_VERSION_MIN_MACOSX || + Command.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) { MachO::version_min_command Vd = Obj->getVersionMinLoadCommand(Command); PrintVersionMinLoadCommand(Vd); } else if (Command.C.cmd == MachO::LC_SOURCE_VERSION) { @@ -3337,6 +8375,40 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds, } else if (Command.C.cmd == MachO::LC_MAIN) { MachO::entry_point_command Ep = Obj->getEntryPointCommand(Command); PrintEntryPointCommand(Ep); + } else if (Command.C.cmd == MachO::LC_ENCRYPTION_INFO) { + MachO::encryption_info_command Ei = + Obj->getEncryptionInfoCommand(Command); + PrintEncryptionInfoCommand(Ei, Buf.size()); + } else if (Command.C.cmd == MachO::LC_ENCRYPTION_INFO_64) { + MachO::encryption_info_command_64 Ei = + Obj->getEncryptionInfoCommand64(Command); + PrintEncryptionInfoCommand64(Ei, Buf.size()); + } else if (Command.C.cmd == MachO::LC_LINKER_OPTION) { + MachO::linker_option_command Lo = + Obj->getLinkerOptionLoadCommand(Command); + PrintLinkerOptionCommand(Lo, Command.Ptr); + } else if (Command.C.cmd == MachO::LC_SUB_FRAMEWORK) { + MachO::sub_framework_command Sf = Obj->getSubFrameworkCommand(Command); + PrintSubFrameworkCommand(Sf, Command.Ptr); + } else if (Command.C.cmd == MachO::LC_SUB_UMBRELLA) { + MachO::sub_umbrella_command Sf = Obj->getSubUmbrellaCommand(Command); + PrintSubUmbrellaCommand(Sf, Command.Ptr); + } else if (Command.C.cmd == MachO::LC_SUB_LIBRARY) { + MachO::sub_library_command Sl = Obj->getSubLibraryCommand(Command); + PrintSubLibraryCommand(Sl, Command.Ptr); + } else if (Command.C.cmd == MachO::LC_SUB_CLIENT) { + MachO::sub_client_command Sc = Obj->getSubClientCommand(Command); + PrintSubClientCommand(Sc, Command.Ptr); + } else if (Command.C.cmd == MachO::LC_ROUTINES) { + MachO::routines_command Rc = Obj->getRoutinesCommand(Command); + PrintRoutinesCommand(Rc); + } else if (Command.C.cmd == MachO::LC_ROUTINES_64) { + MachO::routines_command_64 Rc = Obj->getRoutinesCommand64(Command); + PrintRoutinesCommand64(Rc); + } else if (Command.C.cmd == MachO::LC_THREAD || + Command.C.cmd == MachO::LC_UNIXTHREAD) { + MachO::thread_command Tc = Obj->getThreadCommand(Command); + PrintThreadCommand(Tc, Command.Ptr, Obj->isLittleEndian(), cputype); } else if (Command.C.cmd == MachO::LC_LOAD_DYLIB || Command.C.cmd == MachO::LC_ID_DYLIB || Command.C.cmd == MachO::LC_LOAD_WEAK_DYLIB || @@ -3361,14 +8433,10 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds, // TODO: get and print the raw bytes of the load command. } // TODO: print all the other kinds of load commands. - if (i == ncmds - 1) - break; - else - Command = Obj->getNextLoadCommandInfo(Command); } } -static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds, +static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &filetype, uint32_t &cputype, bool verbose) { if (Obj->is64Bit()) { @@ -3376,7 +8444,6 @@ static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds, H_64 = Obj->getHeader64(); PrintMachHeader(H_64.magic, H_64.cputype, H_64.cpusubtype, H_64.filetype, H_64.ncmds, H_64.sizeofcmds, H_64.flags, verbose); - ncmds = H_64.ncmds; filetype = H_64.filetype; cputype = H_64.cputype; } else { @@ -3384,7 +8451,6 @@ static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds, H = Obj->getHeader(); PrintMachHeader(H.magic, H.cputype, H.cpusubtype, H.filetype, H.ncmds, H.sizeofcmds, H.flags, verbose); - ncmds = H.ncmds; filetype = H.filetype; cputype = H.cputype; } @@ -3392,11 +8458,10 @@ static void getAndPrintMachHeader(const MachOObjectFile *Obj, uint32_t &ncmds, void llvm::printMachOFileHeader(const object::ObjectFile *Obj) { const MachOObjectFile *file = dyn_cast(Obj); - uint32_t ncmds = 0; uint32_t filetype = 0; uint32_t cputype = 0; - getAndPrintMachHeader(file, ncmds, filetype, cputype, true); - PrintLoadCommands(file, ncmds, filetype, cputype, true); + getAndPrintMachHeader(file, filetype, cputype, !NonVerbose); + PrintLoadCommands(file, filetype, cputype, !NonVerbose); } //===----------------------------------------------------------------------===//