Update llvm-objdump’s Mach-O symbolizer code to demangle C++ names.
authorKevin Enderby <enderby@apple.com>
Tue, 28 Oct 2014 23:39:46 +0000 (23:39 +0000)
committerKevin Enderby <enderby@apple.com>
Tue, 28 Oct 2014 23:39:46 +0000 (23:39 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220833 91177308-0d34-0410-b5e6-96231b3b80d8

test/tools/llvm-objdump/X86/Inputs/hello_cpp.exe.macho-x86_64 [new file with mode: 0755]
test/tools/llvm-objdump/X86/macho-symbolized-disassembly.test
tools/llvm-objdump/MachODump.cpp

diff --git a/test/tools/llvm-objdump/X86/Inputs/hello_cpp.exe.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/hello_cpp.exe.macho-x86_64
new file mode 100755 (executable)
index 0000000..6b54b15
Binary files /dev/null and b/test/tools/llvm-objdump/X86/Inputs/hello_cpp.exe.macho-x86_64 differ
index c8322bc7df667ac3c0a5891b22ec053dd2b47932..e33d3b560733bc27bffdddb9d3a3c0b579a60f4e 100644 (file)
@@ -2,6 +2,7 @@
 // RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.exe.macho-x86_64 | FileCheck %s -check-prefix=EXE
 // RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.obj.macho-x86_64 | FileCheck %s -check-prefix=ObjC-OBJ
 // RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.exe.macho-x86_64 | FileCheck %s -check-prefix=ObjC-EXE
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello_cpp.exe.macho-x86_64 | FileCheck %s -check-prefix=CXX-EXE
 
 OBJ: 0000000000000008  leaq    L_.str(%rip), %rax      ## literal pool for: "Hello world\n"
 OBJ: 0000000000000026  callq   _printf
@@ -23,3 +24,5 @@ ObjC-EXE: 0000000100000f14    movq    0x125(%rip), %rax       ## Objc class ref: _OBJC
 ObjC-EXE: 0000000100000f1b     movq    0x10e(%rip), %rsi       ## Objc selector ref: date
 ObjC-EXE: 0000000100000f25     callq   0x100000f4a             ## Objc message: +[NSDate date]
 ObjC-EXE: 0000000100000f33     callq   0x100000f44             ## symbol stub for: _NSLog
+
+CXX-EXE: 00000001000014cb      callq   __ZNSt3__116__pad_and_outputIcNS_11char_traitsIcEEEENS_19ostreambuf_iteratorIT_T0_EES6_PKS4_S8_S8_RNS_8ios_baseES4_ ## std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> > std::__1::__pad_and_output<char, std::__1::char_traits<char> >(std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> >, char const*, char const*, char const*, std::__1::ios_base&, char)
index 9c3dfec92e4606f167c96da4152815025efa23d4..7d7eb80569ca52d294101d181222eb253c8fca24 100644 (file)
@@ -16,6 +16,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include <algorithm>
 #include <cstring>
 #include <system_error>
+
+#if HAVE_CXXABI_H
+#include <cxxabi.h>
+#endif
+
 using namespace llvm;
 using namespace object;
 
@@ -249,6 +255,7 @@ struct DisassembleInfo {
   const char *class_name;
   const char *selector_name;
   char *method;
+  char *demangled_name;
   BindTable *bindtable;
 };
 
@@ -1045,9 +1052,11 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
 // Out type and the ReferenceName will also be set which is added as a comment
 // to the disassembled instruction.
 //
-// TODO: If the symbol name is a C++ mangled name then the demangled name is
+#if HAVE_CXXABI_H
+// If the symbol name is a C++ mangled name then the demangled name is
 // returned through ReferenceName and ReferenceType is set to
 // LLVMDisassembler_ReferenceType_DeMangled_Name .
+#endif
 //
 // When this is called to get a symbol name for a branch target then the
 // ReferenceType will be LLVMDisassembler_ReferenceType_In_Branch and then
@@ -1083,13 +1092,25 @@ const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue,
 
   if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) {
     *ReferenceName = GuessIndirectSymbol(ReferenceValue, info);
-    if (*ReferenceName) {
+    if (*ReferenceName != nullptr) {
       method_reference(info, ReferenceType, ReferenceName);
       if (*ReferenceType != LLVMDisassembler_ReferenceType_Out_Objc_Message)
         *ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
     } else
-      // TODO: if SymbolName is not nullptr see if it is a C++ name
-      // and demangle it.
+#if HAVE_CXXABI_H
+    if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) {
+      if (info->demangled_name != nullptr)
+        free(info->demangled_name);
+      int status;
+      info->demangled_name = abi::__cxa_demangle(SymbolName + 1, nullptr,
+                                                 nullptr, &status);
+      if (info->demangled_name != nullptr) {
+        *ReferenceName = info->demangled_name;
+        *ReferenceType = LLVMDisassembler_ReferenceType_DeMangled_Name;
+      } else
+        *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+    } else
+#endif
       *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
   } else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
     *ReferenceName =
@@ -1099,8 +1120,19 @@ const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue,
     else
       *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
   }
-  // TODO: if SymbolName is not nullptr see if it is a C++ name
-  // and demangle it.
+#if HAVE_CXXABI_H
+  else if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) {
+    if (info->demangled_name != nullptr)
+      free(info->demangled_name);
+    int status;
+    info->demangled_name = abi::__cxa_demangle(SymbolName + 1, nullptr, nullptr,
+                                               &status);
+    if (info->demangled_name != nullptr) {
+      *ReferenceName = info->demangled_name;
+      *ReferenceType = LLVMDisassembler_ReferenceType_DeMangled_Name;
+    }
+  }
+#endif
   else {
     *ReferenceName = nullptr;
     *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
@@ -1392,6 +1424,7 @@ static void DisassembleInputMachO2(StringRef Filename,
     SymbolizerInfo.class_name = nullptr;
     SymbolizerInfo.selector_name = nullptr;
     SymbolizerInfo.method = nullptr;
+    SymbolizerInfo.demangled_name = nullptr;
     SymbolizerInfo.bindtable = nullptr;
 
     // Disassemble symbol by symbol.
@@ -1569,6 +1602,8 @@ static void DisassembleInputMachO2(StringRef Filename,
     }
     if (SymbolizerInfo.method != nullptr)
       free(SymbolizerInfo.method);
+    if (SymbolizerInfo.demangled_name != nullptr)
+      free(SymbolizerInfo.demangled_name);
     if (SymbolizerInfo.bindtable != nullptr)
       delete SymbolizerInfo.bindtable;
   }