[lit] Fix Python-3 compatibility, patch by Dan Liew.
[oota-llvm.git] / tools / llvm-objdump / MachODump.cpp
index 821cf82f418ac227808e807544962a446819967d..54aa3f2367ae267480458cb6e311d127f57a634a 100644 (file)
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-objdump.h"
+#include "llvm-c/Disassembler.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
@@ -35,6 +36,7 @@
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/MachO.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
@@ -50,6 +52,14 @@ static cl::opt<bool>
 static cl::opt<std::string>
   DSYMFile("dsym", cl::desc("Use .dSYM file for debug info"));
 
+static cl::opt<bool>
+    FullLeadingAddr("full-leading-addr",
+                    cl::desc("Print full leading address"));
+
+static cl::opt<bool>
+    PrintImmHex("print-imm-hex",
+                cl::desc("Use hex format for immediate values"));
+
 static std::string ThumbTripleName;
 
 static const Target *GetTarget(const MachOObjectFile *MachOObj,
@@ -224,6 +234,469 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
   DisassembleInputMachO2(Filename, MachOOF.get());
 }
 
+typedef DenseMap<uint64_t, StringRef> SymbolAddressMap;
+
+// The block of info used by the Symbolizer call backs.
+struct DisassembleInfo {
+  bool verbose;
+  MachOObjectFile *O;
+  SectionRef S;
+  SymbolAddressMap *AddrMap;
+};
+
+// SymbolizerGetOpInfo() is the operand information call back function.
+// This is called to get the symbolic information for operand(s) of an
+// instruction when it is being done.  This routine does this from
+// the relocation information, symbol table, etc. That block of information
+// is a pointer to the struct DisassembleInfo that was passed when the
+// disassembler context was created and passed to back to here when
+// called back by the disassembler for instruction operands that could have
+// relocation information. The address of the instruction containing operand is
+// at the Pc parameter.  The immediate value the operand has is passed in
+// op_info->Value and is at Offset past the start of the instruction and has a
+// byte Size of 1, 2 or 4. The symbolc information is returned in TagBuf is the
+// LLVMOpInfo1 struct defined in the header "llvm-c/Disassembler.h" as symbol
+// names and addends of the symbolic expression to add for the operand.  The
+// value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic
+// information is returned then this function returns 1 else it returns 0.
+int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
+                        uint64_t Size, int TagType, void *TagBuf) {
+  struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
+  struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf;
+  unsigned int value = op_info->Value;
+
+  // Make sure all fields returned are zero if we don't set them.
+  memset((void *)op_info, '\0', sizeof(struct LLVMOpInfo1));
+  op_info->Value = value;
+
+  // If the TagType is not the value 1 which it code knows about or if no
+  // verbose symbolic information is wanted then just return 0, indicating no
+  // information is being returned.
+  if (TagType != 1 || info->verbose == false)
+    return 0;
+
+  unsigned int Arch = info->O->getArch();
+  if (Arch == Triple::x86) {
+    return 0;
+  } else if (Arch == Triple::x86_64) {
+    if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+      return 0;
+    // First search the section's relocation entries (if any) for an entry
+    // for this section offset.
+    uint64_t sect_addr = info->S.getAddress();
+    uint64_t sect_offset = (Pc + Offset) - sect_addr;
+    bool reloc_found = false;
+    DataRefImpl Rel;
+    MachO::any_relocation_info RE;
+    bool isExtern = false;
+    SymbolRef Symbol;
+    for (const RelocationRef &Reloc : info->S.relocations()) {
+      uint64_t RelocOffset;
+      Reloc.getOffset(RelocOffset);
+      if (RelocOffset == sect_offset) {
+        Rel = Reloc.getRawDataRefImpl();
+        RE = info->O->getRelocation(Rel);
+        // NOTE: Scattered relocations don't exist on x86_64.
+        isExtern = info->O->getPlainRelocationExternal(RE);
+        if (isExtern) {
+          symbol_iterator RelocSym = Reloc.getSymbol();
+          Symbol = *RelocSym;
+        }
+        reloc_found = true;
+        break;
+      }
+    }
+    if (reloc_found && isExtern) {
+      // The Value passed in will be adjusted by the Pc if the instruction
+      // adds the Pc.  But for x86_64 external relocation entries the Value
+      // is the offset from the external symbol.
+      if (info->O->getAnyRelocationPCRel(RE))
+        op_info->Value -= Pc + Offset + Size;
+      StringRef SymName;
+      Symbol.getName(SymName);
+      const char *name = SymName.data();
+      unsigned Type = info->O->getAnyRelocationType(RE);
+      if (Type == MachO::X86_64_RELOC_SUBTRACTOR) {
+        DataRefImpl RelNext = Rel;
+        info->O->moveRelocationNext(RelNext);
+        MachO::any_relocation_info RENext = info->O->getRelocation(RelNext);
+        unsigned TypeNext = info->O->getAnyRelocationType(RENext);
+        bool isExternNext = info->O->getPlainRelocationExternal(RENext);
+        unsigned SymbolNum = info->O->getPlainRelocationSymbolNum(RENext);
+        if (TypeNext == MachO::X86_64_RELOC_UNSIGNED && isExternNext) {
+          op_info->SubtractSymbol.Present = 1;
+          op_info->SubtractSymbol.Name = name;
+          symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum);
+          Symbol = *RelocSymNext;
+          StringRef SymNameNext;
+          Symbol.getName(SymNameNext);
+          name = SymNameNext.data();
+        }
+      }
+      // TODO: add the VariantKinds to op_info->VariantKind for relocation types
+      // like: X86_64_RELOC_TLV, X86_64_RELOC_GOT_LOAD and X86_64_RELOC_GOT.
+      op_info->AddSymbol.Present = 1;
+      op_info->AddSymbol.Name = name;
+      return 1;
+    }
+    // TODO:
+    // Second search the external relocation entries of a fully linked image
+    // (if any) for an entry that matches this segment offset.
+    //uint64_t seg_offset = (Pc + Offset);
+    return 0;
+  } else if (Arch == Triple::arm) {
+    return 0;
+  } else if (Arch == Triple::aarch64) {
+    return 0;
+  } else {
+    return 0;
+  }
+}
+
+// GuessCstringPointer is passed the address of what might be a pointer to a
+// literal string in a cstring section.  If that address is in a cstring section
+// it returns a pointer to that string.  Else it returns nullptr.
+const char *GuessCstringPointer(uint64_t ReferenceValue,
+                                struct DisassembleInfo *info) {
+  uint32_t LoadCommandCount = info->O->getHeader().ncmds;
+  MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
+  for (unsigned I = 0;; ++I) {
+    if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+      MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
+      for (unsigned J = 0; J < Seg.nsects; ++J) {
+        MachO::section_64 Sec = info->O->getSection64(Load, J);
+        uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
+        if (section_type == MachO::S_CSTRING_LITERALS &&
+            ReferenceValue >= Sec.addr &&
+            ReferenceValue < Sec.addr + Sec.size) {
+          uint64_t sect_offset = ReferenceValue - Sec.addr;
+          uint64_t object_offset = Sec.offset + sect_offset;
+          StringRef MachOContents = info->O->getData();
+          uint64_t object_size = MachOContents.size();
+          const char *object_addr = (const char *)MachOContents.data();
+          if (object_offset < object_size) {
+            const char *name = object_addr + object_offset;
+            return name;
+          } else {
+            return nullptr;
+          }
+        }
+      }
+    } else if (Load.C.cmd == MachO::LC_SEGMENT) {
+      MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load);
+      for (unsigned J = 0; J < Seg.nsects; ++J) {
+        MachO::section Sec = info->O->getSection(Load, J);
+        uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
+        if (section_type == MachO::S_CSTRING_LITERALS &&
+            ReferenceValue >= Sec.addr &&
+            ReferenceValue < Sec.addr + Sec.size) {
+          uint64_t sect_offset = ReferenceValue - Sec.addr;
+          uint64_t object_offset = Sec.offset + sect_offset;
+          StringRef MachOContents = info->O->getData();
+          uint64_t object_size = MachOContents.size();
+          const char *object_addr = (const char *)MachOContents.data();
+          if (object_offset < object_size) {
+            const char *name = object_addr + object_offset;
+            return name;
+          } else {
+            return nullptr;
+          }
+        }
+      }
+    }
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = info->O->getNextLoadCommandInfo(Load);
+  }
+  return nullptr;
+}
+
+// GuessIndirectSymbol returns the name of the indirect symbol for the
+// ReferenceValue passed in or nullptr.  This is used when ReferenceValue maybe
+// an address of a symbol stub or a lazy or non-lazy pointer to associate the
+// symbol name being referenced by the stub or pointer.
+static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
+                                       struct DisassembleInfo *info) {
+  uint32_t LoadCommandCount = info->O->getHeader().ncmds;
+  MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
+  MachO::dysymtab_command Dysymtab = info->O->getDysymtabLoadCommand();
+  MachO::symtab_command Symtab = info->O->getSymtabLoadCommand();
+  for (unsigned I = 0;; ++I) {
+    if (Load.C.cmd == MachO::LC_SEGMENT_64) {
+      MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
+      for (unsigned J = 0; J < Seg.nsects; ++J) {
+        MachO::section_64 Sec = info->O->getSection64(Load, J);
+        uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
+        if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS ||
+             section_type == MachO::S_LAZY_SYMBOL_POINTERS ||
+             section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS ||
+             section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS ||
+             section_type == MachO::S_SYMBOL_STUBS) &&
+            ReferenceValue >= Sec.addr &&
+            ReferenceValue < Sec.addr + Sec.size) {
+          uint32_t stride;
+          if (section_type == MachO::S_SYMBOL_STUBS)
+            stride = Sec.reserved2;
+          else
+            stride = 8;
+          if (stride == 0)
+            return nullptr;
+          uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
+          if (index < Dysymtab.nindirectsyms) {
+            uint32_t indirect_symbol =
+              info->O->getIndirectSymbolTableEntry(Dysymtab, index);
+            if (indirect_symbol < Symtab.nsyms) {
+              symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
+              SymbolRef Symbol = *Sym;
+              StringRef SymName;
+              Symbol.getName(SymName);
+              const char *name = SymName.data();
+              return name;
+            }
+          }
+        }
+      }
+    } else if (Load.C.cmd == MachO::LC_SEGMENT) {
+      MachO::segment_command Seg = info->O->getSegmentLoadCommand(Load);
+      for (unsigned J = 0; J < Seg.nsects; ++J) {
+        MachO::section Sec = info->O->getSection(Load, J);
+        uint32_t section_type = Sec.flags & MachO::SECTION_TYPE;
+        if ((section_type == MachO::S_NON_LAZY_SYMBOL_POINTERS ||
+             section_type == MachO::S_LAZY_SYMBOL_POINTERS ||
+             section_type == MachO::S_LAZY_DYLIB_SYMBOL_POINTERS ||
+             section_type == MachO::S_THREAD_LOCAL_VARIABLE_POINTERS ||
+             section_type == MachO::S_SYMBOL_STUBS) &&
+            ReferenceValue >= Sec.addr &&
+            ReferenceValue < Sec.addr + Sec.size) {
+          uint32_t stride;
+          if (section_type == MachO::S_SYMBOL_STUBS)
+            stride = Sec.reserved2;
+          else
+            stride = 4;
+          if (stride == 0)
+            return nullptr;
+          uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
+          if (index < Dysymtab.nindirectsyms) {
+            uint32_t indirect_symbol =
+              info->O->getIndirectSymbolTableEntry(Dysymtab, index);
+            if (indirect_symbol < Symtab.nsyms) {
+              symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
+              SymbolRef Symbol = *Sym;
+              StringRef SymName;
+              Symbol.getName(SymName);
+              const char *name = SymName.data();
+              return name;
+            }
+          }
+        }
+      }
+    }
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = info->O->getNextLoadCommandInfo(Load);
+  }
+  return nullptr;
+}
+
+// GuessLiteralPointer returns a string which for the item in the Mach-O file
+// for the address passed in as ReferenceValue for printing as a comment with
+// the instruction and also returns the corresponding type of that item
+// indirectly through ReferenceType.
+//
+// If ReferenceValue is an address of literal cstring then a pointer to the
+// cstring is returned and ReferenceType is set to
+// LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr .
+//
+// TODO: other literals such as Objective-C CFStrings refs, Selector refs,
+// Message refs, Class refs and a Symbol address in a literal pool are yet
+// to be done here.
+const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
+                                uint64_t *ReferenceType,
+                                struct DisassembleInfo *info) {
+  // TODO: This rouine's code is only for an x86_64 Mach-O file for now.
+  unsigned int Arch = info->O->getArch();
+  if (Arch != Triple::x86_64)
+    return nullptr;
+
+  // First see if there is an external relocation entry at the ReferencePC.
+  uint64_t sect_addr = info->S.getAddress();
+  uint64_t sect_offset = ReferencePC - sect_addr;
+  bool reloc_found = false;
+  DataRefImpl Rel;
+  MachO::any_relocation_info RE;
+  bool isExtern = false;
+  SymbolRef Symbol;
+  for (const RelocationRef &Reloc : info->S.relocations()) {
+    uint64_t RelocOffset;
+    Reloc.getOffset(RelocOffset);
+    if (RelocOffset == sect_offset) {
+      Rel = Reloc.getRawDataRefImpl();
+      RE = info->O->getRelocation(Rel);
+      if (info->O->isRelocationScattered(RE))
+        continue;
+      isExtern = info->O->getPlainRelocationExternal(RE);
+      if (isExtern) {
+        symbol_iterator RelocSym = Reloc.getSymbol();
+        Symbol = *RelocSym;
+      }
+      reloc_found = true;
+      break;
+    }
+  }
+  // If there is an external relocation entry for a symbol in a section
+  // then used that symbol's value for the value of the reference.
+  if (reloc_found && isExtern) {
+    if (info->O->getAnyRelocationPCRel(RE)) {
+      unsigned Type = info->O->getAnyRelocationType(RE);
+      if (Type == MachO::X86_64_RELOC_SIGNED) {
+        Symbol.getAddress(ReferenceValue);
+      }
+    }
+  }
+
+  // TODO: the code to look for other literals such as Objective-C CFStrings
+  // refs, Selector refs, Message refs, Class refs will be added here.
+
+  const char *name = GuessCstringPointer(ReferenceValue, info);
+  if (name) {
+    // TODO: note when the code is added above for Selector refs and Message
+    // refs we will need check for that here and set the ReferenceType
+    // accordingly.
+    *ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr;
+    return name;
+  }
+
+  // TODO: look for an indirect symbol with this ReferenceValue which is in
+  // a literal pool.
+
+  return nullptr;
+}
+
+// SymbolizerSymbolLookUp is the symbol lookup function passed when creating
+// the Symbolizer.  It looks up the ReferenceValue using the info passed via the
+// pointer to the struct DisassembleInfo that was passed when MCSymbolizer
+// is created and returns the symbol name that matches the ReferenceValue or
+// nullptr if none.  The ReferenceType is passed in for the IN type of
+// reference the instruction is making from the values in defined in the header
+// "llvm-c/Disassembler.h".  On return the ReferenceType can set to a specific
+// Out type and the ReferenceName will also be set which is added as a comment
+// to the disassembled instruction.
+//
+// If the symbol name is a C++ mangled name then the demangled name is
+// returned through ReferenceName and ReferenceType is set to
+// LLVMDisassembler_ReferenceType_DeMangled_Name .
+//
+// When this is called to get a symbol name for a branch target then the
+// ReferenceType will be LLVMDisassembler_ReferenceType_In_Branch and then
+// SymbolValue will be looked for in the indirect symbol table to determine if
+// it is an address for a symbol stub.  If so then the symbol name for that
+// stub is returned indirectly through ReferenceName and then ReferenceType is
+// set to LLVMDisassembler_ReferenceType_Out_SymbolStub.
+//
+// When this is called with an value loaded via a PC relative load then
+// ReferenceType will be LLVMDisassembler_ReferenceType_In_PCrel_Load then the
+// SymbolValue is checked to be an address of literal pointer, symbol pointer,
+// or an Objective-C meta data reference.  If so the output ReferenceType is
+// set to correspond to that as well as ReferenceName.
+const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue,
+                                   uint64_t *ReferenceType,
+                                   uint64_t ReferencePC,
+                                   const char **ReferenceName) {
+  struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
+  // If no verbose symbolic information is wanted then just return nullptr.
+  if (info->verbose == false) {
+    *ReferenceName = nullptr;
+    *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+    return nullptr;
+  }
+
+  const char *SymbolName = nullptr;
+  StringRef name = info->AddrMap->lookup(ReferenceValue);
+  if (!name.empty())
+    SymbolName = name.data();
+
+  if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) {
+    *ReferenceName = GuessIndirectSymbol(ReferenceValue, info);
+    if (*ReferenceName)
+      *ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
+    else
+      *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+  }
+  else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
+    *ReferenceName = GuessLiteralPointer(ReferenceValue, ReferencePC,
+                                         ReferenceType, info);
+    if (*ReferenceName == nullptr)
+      *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+  // TODO: other types of references to be added.
+  } else {
+    *ReferenceName = nullptr;
+    *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+  }
+
+  return SymbolName;
+}
+
+//
+// This is the memory object used by DisAsm->getInstruction() which has its
+// BasePC.  This then allows the 'address' parameter to getInstruction() to
+// be the actual PC of the instruction.  Then when a branch dispacement is
+// added to the PC of an instruction, the 'ReferenceValue' passed to the
+// SymbolizerSymbolLookUp() routine is the correct target addresses.  As in
+// the case of a fully linked Mach-O file where a section being disassembled
+// generally not linked at address zero.
+//
+class DisasmMemoryObject : public MemoryObject {
+  const uint8_t *Bytes;
+  uint64_t Size;
+  uint64_t BasePC;
+public:
+  DisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC) :
+                     Bytes(bytes), Size(size), BasePC(basePC) {}
+
+  uint64_t getBase() const override { return BasePC; }
+  uint64_t getExtent() const override { return Size; }
+
+  int readByte(uint64_t Addr, uint8_t *Byte) const override {
+    if (Addr - BasePC >= Size)
+      return -1;
+    *Byte = Bytes[Addr - BasePC];
+    return 0;
+  }
+};
+
+/// \brief Emits the comments that are stored in the CommentStream.
+/// Each comment in the CommentStream must end with a newline.
+static void emitComments(raw_svector_ostream &CommentStream,
+                         SmallString<128> &CommentsToEmit,
+                         formatted_raw_ostream &FormattedOS,
+                         const MCAsmInfo &MAI) {
+  // Flush the stream before taking its content.
+  CommentStream.flush();
+  StringRef Comments = CommentsToEmit.str();
+  // Get the default information for printing a comment.
+  const char *CommentBegin = MAI.getCommentString();
+  unsigned CommentColumn = MAI.getCommentColumn();
+  bool IsFirst = true;
+  while (!Comments.empty()) {
+    if (!IsFirst)
+      FormattedOS << '\n';
+    // Emit a line of comments.
+    FormattedOS.PadToColumn(CommentColumn);
+    size_t Position = Comments.find('\n');
+    FormattedOS << CommentBegin << ' ' << Comments.substr(0, Position);
+    // Move after the newline character.
+    Comments = Comments.substr(Position + 1);
+    IsFirst = false;
+  }
+  FormattedOS.flush();
+
+  // Tell the comment stream that the vector changed underneath it.
+  CommentsToEmit.clear();
+  CommentStream.resync();
+}
+
 static void DisassembleInputMachO2(StringRef Filename,
                                    MachOObjectFile *MachOOF) {
   const char *McpuDefault = nullptr;
@@ -264,11 +737,27 @@ static void DisassembleInputMachO2(StringRef Filename,
   std::unique_ptr<const MCSubtargetInfo> STI(
       TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
   MCContext Ctx(AsmInfo.get(), MRI.get(), nullptr);
-  std::unique_ptr<const MCDisassembler> DisAsm(
+  std::unique_ptr<MCDisassembler> DisAsm(
       TheTarget->createMCDisassembler(*STI, Ctx));
+  std::unique_ptr<MCSymbolizer> Symbolizer;
+  struct DisassembleInfo SymbolizerInfo;
+  std::unique_ptr<MCRelocationInfo> RelInfo(
+      TheTarget->createMCRelocationInfo(TripleName, Ctx));
+  if (RelInfo) {
+    Symbolizer.reset(TheTarget->createMCSymbolizer(
+        TripleName, SymbolizerGetOpInfo, SymbolizerSymbolLookUp,
+        &SymbolizerInfo, &Ctx, RelInfo.release()));
+    DisAsm->setSymbolizer(std::move(Symbolizer));
+  }
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
   std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
       AsmPrinterVariant, *AsmInfo, *InstrInfo, *MRI, *STI));
+  // Set the display preference for hex vs. decimal immediates.
+  IP->setPrintImmHex(PrintImmHex);
+  // Comment stream and backing vector.
+  SmallString<128> CommentsToEmit;
+  raw_svector_ostream CommentStream(CommentsToEmit);
+  IP->setCommentStream(CommentStream);
 
   if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
     errs() << "error: couldn't initialize disassembler for target "
@@ -291,10 +780,13 @@ static void DisassembleInputMachO2(StringRef Filename,
         ThumbTarget->createMCSubtargetInfo(ThumbTripleName, MCPU, FeaturesStr));
     ThumbCtx.reset(new MCContext(ThumbAsmInfo.get(), ThumbMRI.get(), nullptr));
     ThumbDisAsm.reset(ThumbTarget->createMCDisassembler(*ThumbSTI, *ThumbCtx));
+    // TODO: add MCSymbolizer here for the ThumbTarget like above for TheTarget.
     int ThumbAsmPrinterVariant = ThumbAsmInfo->getAssemblerDialect();
     ThumbIP.reset(ThumbTarget->createMCInstPrinter(
         ThumbAsmPrinterVariant, *ThumbAsmInfo, *ThumbInstrInfo, *ThumbMRI,
         *ThumbSTI));
+    // Set the display preference for hex vs. decimal immediates.
+    ThumbIP->setPrintImmHex(PrintImmHex);
   }
 
   if (ThumbTarget && (!ThumbInstrAnalysis || !ThumbAsmInfo || !ThumbSTI ||
@@ -326,7 +818,7 @@ static void DisassembleInputMachO2(StringRef Filename,
   // Build a data in code table that is sorted on by the address of each entry.
   uint64_t BaseAddress = 0;
   if (Header.filetype == MachO::MH_OBJECT)
-    Sections[0].getAddress(BaseAddress);
+    BaseAddress = Sections[0].getAddress();
   else
     BaseAddress = BaseSegmentAddress;
   DiceTable Dices;
@@ -369,8 +861,7 @@ static void DisassembleInputMachO2(StringRef Filename,
 
   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
 
-    bool SectIsText = false;
-    Sections[SectIdx].isText(SectIsText);
+    bool SectIsText = Sections[SectIdx].isText();
     if (SectIsText == false)
       continue;
 
@@ -387,15 +878,17 @@ static void DisassembleInputMachO2(StringRef Filename,
 
     StringRef Bytes;
     Sections[SectIdx].getContents(Bytes);
-    StringRefMemoryObject memoryObject(Bytes);
+    uint64_t SectAddress = Sections[SectIdx].getAddress();
+    DisasmMemoryObject MemoryObject((const uint8_t *)Bytes.data(), Bytes.size(),
+                                    SectAddress);
     bool symbolTableWorked = false;
 
     // Parse relocations.
     std::vector<std::pair<uint64_t, SymbolRef>> Relocs;
     for (const RelocationRef &Reloc : Sections[SectIdx].relocations()) {
-      uint64_t RelocOffset, SectionAddress;
+      uint64_t RelocOffset;
       Reloc.getOffset(RelocOffset);
-      Sections[SectIdx].getAddress(SectionAddress);
+      uint64_t SectionAddress = Sections[SectIdx].getAddress();
       RelocOffset -= SectionAddress;
 
       symbol_iterator RelocSym = Reloc.getSymbol();
@@ -404,6 +897,27 @@ static void DisassembleInputMachO2(StringRef Filename,
     }
     array_pod_sort(Relocs.begin(), Relocs.end());
 
+    // Create a map of symbol addresses to symbol names for use by
+    // the SymbolizerSymbolLookUp() routine.
+    SymbolAddressMap AddrMap;
+    for (const SymbolRef &Symbol : MachOOF->symbols()) {
+      SymbolRef::Type ST;
+      Symbol.getType(ST);
+      if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
+          ST == SymbolRef::ST_Other) {
+        uint64_t Address;
+        Symbol.getAddress(Address);
+        StringRef SymName;
+        Symbol.getName(SymName);
+        AddrMap[Address] = SymName;
+      }
+    }
+    // Set up the block of info used by the Symbolizer call backs.
+    SymbolizerInfo.verbose = true;
+    SymbolizerInfo.O = MachOOF;
+    SymbolizerInfo.S = Sections[SectIdx];
+    SymbolizerInfo.AddrMap = &AddrMap;
+
     // Disassemble symbol by symbol.
     for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
       StringRef SymName;
@@ -415,15 +929,13 @@ static void DisassembleInputMachO2(StringRef Filename,
         continue;
 
       // Make sure the symbol is defined in this section.
-      bool containsSym = false;
-      Sections[SectIdx].containsSymbol(Symbols[SymIdx], containsSym);
+      bool containsSym = Sections[SectIdx].containsSymbol(Symbols[SymIdx]);
       if (!containsSym)
         continue;
 
       // Start at the address of the symbol relative to the section's address.
-      uint64_t SectionAddress = 0;
       uint64_t Start = 0;
-      Sections[SectIdx].getAddress(SectionAddress);
+      uint64_t SectionAddress = Sections[SectIdx].getAddress();
       Symbols[SymIdx].getAddress(Start);
       Start -= SectionAddress;
 
@@ -436,8 +948,8 @@ static void DisassembleInputMachO2(StringRef Filename,
         SymbolRef::Type NextSymType;
         Symbols[NextSymIdx].getType(NextSymType);
         if (NextSymType == SymbolRef::ST_Function) {
-          Sections[SectIdx].containsSymbol(Symbols[NextSymIdx],
-                                           containsNextSym);
+          containsNextSym =
+              Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]);
           Symbols[NextSymIdx].getAddress(NextSym);
           NextSym -= SectionAddress;
           break;
@@ -445,8 +957,7 @@ static void DisassembleInputMachO2(StringRef Filename,
         ++NextSymIdx;
       }
 
-      uint64_t SectSize;
-      Sections[SectIdx].getSize(SectSize);
+      uint64_t SectSize = Sections[SectIdx].getSize();
       uint64_t End = containsNextSym ?  NextSym : SectSize;
       uint64_t Size;
 
@@ -461,14 +972,22 @@ static void DisassembleInputMachO2(StringRef Filename,
       for (uint64_t Index = Start; Index < End; Index += Size) {
         MCInst Inst;
 
-        uint64_t SectAddress = 0;
-        Sections[SectIdx].getAddress(SectAddress);
-        outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
+        uint64_t PC = SectAddress + Index;
+        if (FullLeadingAddr) {
+          if (MachOOF->is64Bit())
+            outs() << format("%016" PRIx64, PC);
+           else
+            outs() << format("%08" PRIx64, PC);
+        } else {
+          outs() << format("%8" PRIx64 ":", PC);
+        }
+        if (!NoShowRawInsn)
+          outs() << "\t";
 
         // Check the data in code table here to see if this is data not an
         // instruction to be disassembled.
         DiceTable Dice;
-        Dice.push_back(std::make_pair(SectAddress + Index, DiceRef()));
+        Dice.push_back(std::make_pair(PC, DiceRef()));
         dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(),
                                               Dice.begin(), Dice.end(),
                                               compareDiceTableEntries);
@@ -482,24 +1001,33 @@ static void DisassembleInputMachO2(StringRef Filename,
           continue;
         }
 
+        SmallVector<char, 64> AnnotationsBytes;
+        raw_svector_ostream Annotations(AnnotationsBytes);
+
         bool gotInst;
         if (isThumb)
-          gotInst = ThumbDisAsm->getInstruction(Inst, Size, memoryObject, Index,
-                                     DebugOut, nulls());
+          gotInst = ThumbDisAsm->getInstruction(Inst, Size, MemoryObject, PC,
+                                                DebugOut, Annotations);
         else
-          gotInst = DisAsm->getInstruction(Inst, Size, memoryObject, Index,
-                                     DebugOut, nulls());
+          gotInst = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
+                                           DebugOut, Annotations);
         if (gotInst) {
-          DumpBytes(StringRef(Bytes.data() + Index, Size));
+          if (!NoShowRawInsn) {
+            DumpBytes(StringRef(Bytes.data() + Index, Size));
+          }
+          formatted_raw_ostream FormattedOS(outs());
+          Annotations.flush();
+          StringRef AnnotationsStr = Annotations.str();
           if (isThumb)
-            ThumbIP->printInst(&Inst, outs(), "");
+            ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr);
           else
-            IP->printInst(&Inst, outs(), "");
+            IP->printInst(&Inst, FormattedOS, AnnotationsStr);
+          emitComments(CommentStream, CommentsToEmit, FormattedOS, *AsmInfo);
 
           // Print debug info.
           if (diContext) {
             DILineInfo dli =
-              diContext->getLineInfoForAddress(SectAddress + Index);
+              diContext->getLineInfoForAddress(PC);
             // Print valid line info if it changed.
             if (dli != lastLine && dli.Line != 0)
               outs() << "\t## " << dli.FileName << ':' << dli.Line << ':'
@@ -515,19 +1043,28 @@ static void DisassembleInputMachO2(StringRef Filename,
       }
     }
     if (!symbolTableWorked) {
-      // Reading the symbol table didn't work, disassemble the whole section. 
-      uint64_t SectAddress;
-      Sections[SectIdx].getAddress(SectAddress);
-      uint64_t SectSize;
-      Sections[SectIdx].getSize(SectSize);
+      // Reading the symbol table didn't work, disassemble the whole section.
+      uint64_t SectAddress = Sections[SectIdx].getAddress();
+      uint64_t SectSize = Sections[SectIdx].getSize();
       uint64_t InstSize;
       for (uint64_t Index = 0; Index < SectSize; Index += InstSize) {
         MCInst Inst;
 
-        if (DisAsm->getInstruction(Inst, InstSize, memoryObject, Index,
+        uint64_t PC = SectAddress + Index;
+        if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC,
                                    DebugOut, nulls())) {
-          outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
-          DumpBytes(StringRef(Bytes.data() + Index, InstSize));
+          if (FullLeadingAddr) {
+            if (MachOOF->is64Bit())
+              outs() << format("%016" PRIx64, PC);
+             else
+              outs() << format("%08" PRIx64, PC);
+          } else {
+            outs() << format("%8" PRIx64 ":", PC);
+          }
+          if (!NoShowRawInsn) {
+            outs() << "\t";
+            DumpBytes(StringRef(Bytes.data() + Index, InstSize));
+          }
           IP->printInst(&Inst, outs(), "");
           outs() << "\n";
         } else {
@@ -613,8 +1150,7 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
   auto RE = Obj->getRelocation(Reloc.getRawDataRefImpl());
   SectionRef RelocSection = Obj->getRelocationSection(RE);
 
-  uint64_t SectionAddr;
-  RelocSection.getAddress(SectionAddr);
+  uint64_t SectionAddr = RelocSection.getAddress();
 
   auto Sym = Symbols.upper_bound(Addr);
   if (Sym == Symbols.begin()) {
@@ -2030,7 +2566,12 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t ncmds,
     } else if (Command.C.cmd == MachO::LC_MAIN) {
       MachO::entry_point_command Ep = Obj->getEntryPointCommand(Command);
       PrintEntryPointCommand(Ep);
-    } else if (Command.C.cmd == MachO::LC_LOAD_DYLIB) {
+    } else if (Command.C.cmd == MachO::LC_LOAD_DYLIB ||
+               Command.C.cmd == MachO::LC_ID_DYLIB ||
+               Command.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
+               Command.C.cmd == MachO::LC_REEXPORT_DYLIB ||
+               Command.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
+               Command.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
       MachO::dylib_command Dl = Obj->getDylibIDLoadCommand(Command);
       PrintDylibCommand(Dl, Command.Ptr);
     } else if (Command.C.cmd == MachO::LC_CODE_SIGNATURE ||
@@ -2185,10 +2726,8 @@ SegInfo::SegInfo(const object::MachOObjectFile *Obj) {
     SectionInfo Info;
     if (error(Section.getName(Info.SectionName)))
       return;
-    if (error(Section.getAddress(Info.Address)))
-      return;
-    if (error(Section.getSize(Info.Size)))
-      return;
+    Info.Address = Section.getAddress();
+    Info.Size = Section.getSize();
     Info.SegmentName =
         Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl());
     if (!Info.SegmentName.equals(CurSegName)) {
@@ -2264,9 +2803,15 @@ static StringRef ordinalName(const object::MachOObjectFile *Obj, int Ordinal) {
   case MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP:
     return "flat-namespace";
   default:
-    Obj->getLibraryShortNameByIndex(Ordinal-1, DylibName);
-    return DylibName;
+    if (Ordinal > 0) {
+      std::error_code EC = Obj->getLibraryShortNameByIndex(Ordinal-1, 
+                                                           DylibName);
+      if (EC)
+        return "<<bad library ordinal>>";
+      return DylibName;
+    }
   }
+  return "<<unknown special ordinal>>";
 }
 
 //===----------------------------------------------------------------------===//
@@ -2277,8 +2822,8 @@ void llvm::printMachOBindTable(const object::MachOObjectFile *Obj) {
   // Build table of sections so names can used in final output.
   SegInfo sectionTable(Obj);
 
-  outs() << "segment  section            address     type     "
-            "addend   dylib               symbol\n";
+  outs() << "segment  section            address    type       "
+            "addend dylib            symbol\n";
   for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable()) {
     uint32_t SegIndex = Entry.segmentIndex();
     uint64_t OffsetInSeg = Entry.segmentOffset();
@@ -2288,18 +2833,17 @@ void llvm::printMachOBindTable(const object::MachOObjectFile *Obj) {
 
     // Table lines look like:
     //  __DATA  __got  0x00012010    pointer   0 libSystem ___stack_chk_guard
-    outs() << format("%-8s %-18s 0x%08" PRIX64 "  %-8s %-8" PRId64 " %-20s",
-                     SegmentName.str().c_str(),
-                     SectionName.str().c_str(),
-                     Address,
-                     Entry.typeName().str().c_str(),
-                     Entry.addend(),
-                     ordinalName(Obj, Entry.ordinal()))
-           << Entry.symbolName();
+    StringRef Attr;
     if (Entry.flags() & MachO::BIND_SYMBOL_FLAGS_WEAK_IMPORT)
-      outs() << " (weak_import)\n";
-    else
-      outs() << "\n";
+      Attr = " (weak_import)";
+    outs() << left_justify(SegmentName, 8)  << " "
+           << left_justify(SectionName, 18) << " "
+           << format_hex(Address, 10, true) << " "
+           << left_justify(Entry.typeName(), 8) << " "
+           << format_decimal(Entry.addend(), 8)  << " "  
+           << left_justify(ordinalName(Obj, Entry.ordinal()), 16) << " "
+           << Entry.symbolName() 
+           << Attr << "\n";
   }
 }
 
@@ -2311,8 +2855,8 @@ void llvm::printMachOLazyBindTable(const object::MachOObjectFile *Obj) {
   // Build table of sections so names can used in final output.
   SegInfo sectionTable(Obj);
 
-  outs() << "segment  section            address      "
-            "dylib               symbol\n";
+  outs() << "segment  section            address     "
+            "dylib            symbol\n";
   for (const llvm::object::MachOBindEntry &Entry : Obj->lazyBindTable()) {
     uint32_t SegIndex = Entry.segmentIndex();
     uint64_t OffsetInSeg = Entry.segmentOffset();
@@ -2322,11 +2866,10 @@ void llvm::printMachOLazyBindTable(const object::MachOObjectFile *Obj) {
 
     // Table lines look like:
     //  __DATA  __got  0x00012010 libSystem ___stack_chk_guard
-    outs() << format("%-8s %-18s 0x%08" PRIX64 "   %-20s",
-                     SegmentName.str().c_str(),
-                     SectionName.str().c_str(),
-                     Address,
-                     ordinalName(Obj, Entry.ordinal()))
+    outs() << left_justify(SegmentName, 8)  << " "
+           << left_justify(SectionName, 18) << " "
+           << format_hex(Address, 10, true) << " "
+           << left_justify(ordinalName(Obj, Entry.ordinal()), 16) << " "
            << Entry.symbolName() << "\n";
   }
 }
@@ -2340,12 +2883,12 @@ void llvm::printMachOWeakBindTable(const object::MachOObjectFile *Obj) {
   // Build table of sections so names can used in final output.
   SegInfo sectionTable(Obj);
 
-  outs() << "segment  section            address      "
-            "type     addend   symbol\n";
+  outs() << "segment  section            address     "
+            "type       addend   symbol\n";
   for (const llvm::object::MachOBindEntry &Entry : Obj->weakBindTable()) {
     // Strong symbols don't have a location to update.
     if (Entry.flags() & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) {
-      outs() << "                                          strong            "
+      outs() << "                                        strong              "
              << Entry.symbolName() << "\n";
       continue;
     }
@@ -2357,12 +2900,11 @@ void llvm::printMachOWeakBindTable(const object::MachOObjectFile *Obj) {
 
     // Table lines look like:
     // __DATA  __data  0x00001000  pointer    0   _foo
-    outs() << format("%-8s %-18s 0x%08" PRIX64 "   %-8s %-8" PRId64 " ",
-                     SegmentName.str().c_str(),
-                     SectionName.str().c_str(),
-                     Address,
-                     Entry.typeName().str().c_str(),
-                     Entry.addend())
+    outs() << left_justify(SegmentName, 8)  << " "
+           << left_justify(SectionName, 18) << " "
+           << format_hex(Address, 10, true) << " "
+           << left_justify(Entry.typeName(), 8) << " "
+           << format_decimal(Entry.addend(), 8)  << "   "  
            << Entry.symbolName() << "\n";
   }
 }