[llvm-objdump] Use the COFF export table for additional symbols
authorDavid Majnemer <david.majnemer@gmail.com>
Wed, 18 Nov 2015 02:49:19 +0000 (02:49 +0000)
committerDavid Majnemer <david.majnemer@gmail.com>
Wed, 18 Nov 2015 02:49:19 +0000 (02:49 +0000)
Most linked executables do not have a symbol table in COFF.
However, it is pretty typical to have some export entries.  Use those
entries to inform the disassembler about potential function definitions
and call targets.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253429 91177308-0d34-0410-b5e6-96231b3b80d8

test/CodeGen/ARM/inlineasm-switch-mode.ll
test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 [new file with mode: 0755]
test/tools/llvm-objdump/X86/coff-disassemble-export.test [new file with mode: 0644]
tools/llvm-objdump/llvm-objdump.cpp

index 65fea114d7de81ce2d5a9eb6967828c3e73f172b..6035612788d80fdec4439ee0a434619024ae185c 100644 (file)
@@ -15,8 +15,8 @@ define hidden i32 @bah(i8* %start) #0 align 2 {
 ; ARM: $t
 ; ARM-NEXT: 48 1c
 
-; THUMB: $a
+; THUMB: $a{{.*}}:
 ; THUMB-NEXT: 04 70
 ; THUMB-NEXT: 2d e5
-; THUMB: $t
+; THUMB: $t{{.*}}:
 ; THUMB-NEXT: 48 1c   adds    r0, r1, #1
diff --git a/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 b/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386
new file mode 100755 (executable)
index 0000000..c0fbc88
Binary files /dev/null and b/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 differ
diff --git a/test/tools/llvm-objdump/X86/coff-disassemble-export.test b/test/tools/llvm-objdump/X86/coff-disassemble-export.test
new file mode 100644 (file)
index 0000000..2f0b211
--- /dev/null
@@ -0,0 +1,8 @@
+// RUN: llvm-objdump -d %p/Inputs/disassemble.dll.coff-i386 | \
+// RUN:   FileCheck %s
+
+// CHECK-LABEL: g:
+// CHECK: calll 8 <f>
+
+// CHECK-LABEL: f:
+// CHECK: calll -24 <g>
index 27a36c052cc4c3dbb79238ec09c832e0292247e5..df6e2a45862c2d57e42fd80488625b19f8962e97 100644 (file)
@@ -886,27 +886,66 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   }
 
   // Create a mapping from virtual address to symbol name.  This is used to
-  // pretty print the target of a call.
-  std::vector<std::pair<uint64_t, StringRef>> AllSymbols;
-  if (MIA) {
-    for (const SymbolRef &Symbol : Obj->symbols()) {
-      if (Symbol.getType() != SymbolRef::ST_Function)
-        continue;
+  // pretty print the symbols while disassembling.
+  typedef std::vector<std::pair<uint64_t, StringRef>> SectionSymbolsTy;
+  std::map<SectionRef, SectionSymbolsTy> AllSymbols;
+  for (const SymbolRef &Symbol : Obj->symbols()) {
+    ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
+    error(AddressOrErr.getError());
+    uint64_t Address = *AddressOrErr;
+
+    ErrorOr<StringRef> Name = Symbol.getName();
+    error(Name.getError());
+    if (Name->empty())
+      continue;
+
+    ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
+    error(SectionOrErr.getError());
+    section_iterator SecI = *SectionOrErr;
+    if (SecI == Obj->section_end())
+      continue;
 
-      ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
-      error(AddressOrErr.getError());
-      uint64_t Address = *AddressOrErr;
+    AllSymbols[*SecI].emplace_back(Address, *Name);
+  }
 
-      ErrorOr<StringRef> Name = Symbol.getName();
-      error(Name.getError());
-      if (Name->empty())
+  // Create a mapping from virtual address to section.
+  std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
+  for (SectionRef Sec : Obj->sections())
+    SectionAddresses.emplace_back(Sec.getAddress(), Sec);
+  array_pod_sort(SectionAddresses.begin(), SectionAddresses.end());
+
+  // Linked executables (.exe and .dll files) typically don't include a real
+  // symbol table but they might contain an export table.
+  if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) {
+    for (const auto &ExportEntry : COFFObj->export_directories()) {
+      StringRef Name;
+      error(ExportEntry.getSymbolName(Name));
+      if (Name.empty())
         continue;
-      AllSymbols.push_back(std::make_pair(Address, *Name));
-    }
+      uint32_t RVA;
+      error(ExportEntry.getExportRVA(RVA));
+
+      uint64_t VA = COFFObj->getImageBase() + RVA;
+      auto Sec = std::upper_bound(
+          SectionAddresses.begin(), SectionAddresses.end(), VA,
+          [](uint64_t LHS, const std::pair<uint64_t, SectionRef> &RHS) {
+            return LHS < RHS.first;
+          });
+      if (Sec != SectionAddresses.begin())
+        --Sec;
+      else
+        Sec = SectionAddresses.end();
 
-    array_pod_sort(AllSymbols.begin(), AllSymbols.end());
+      if (Sec != SectionAddresses.end())
+        AllSymbols[Sec->second].emplace_back(VA, Name);
+    }
   }
 
+  // Sort all the symbols, this allows us to use a simple binary search to find
+  // a symbol near an address.
+  for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
+    array_pod_sort(SecSyms.second.begin(), SecSyms.second.end());
+
   for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     if (!DisassembleAll && (!Section.isText() || Section.isVirtual()))
       continue;
@@ -916,33 +955,21 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     if (!SectSize)
       continue;
 
-    // Make a list of all the symbols in this section.
-    std::vector<std::pair<uint64_t, StringRef>> Symbols;
+    // Get the list of all the symbols in this section.
+    SectionSymbolsTy &Symbols = AllSymbols[Section];
     std::vector<uint64_t> DataMappingSymsAddr;
     std::vector<uint64_t> TextMappingSymsAddr;
-    for (const SymbolRef &Symbol : Obj->symbols()) {
-      if (Section.containsSymbol(Symbol)) {
-        ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
-        error(AddressOrErr.getError());
-        uint64_t Address = *AddressOrErr;
-        Address -= SectionAddr;
-        if (Address >= SectSize)
-          continue;
-
-        ErrorOr<StringRef> Name = Symbol.getName();
-        error(Name.getError());
-        Symbols.push_back(std::make_pair(Address, *Name));
-        if (Obj->isELF() && Obj->getArch() == Triple::aarch64) {
-          if (Name->startswith("$d"))
-            DataMappingSymsAddr.push_back(Address);
-          if (Name->startswith("$x"))
-            TextMappingSymsAddr.push_back(Address);
-        }
+    if (Obj->isELF() && Obj->getArch() == Triple::aarch64) {
+      for (const auto &Symb : Symbols) {
+        uint64_t Address = Symb.first;
+        StringRef Name = Symb.second;
+        if (Name.startswith("$d"))
+          DataMappingSymsAddr.push_back(Address);
+        if (Name.startswith("$x"))
+          TextMappingSymsAddr.push_back(Address);
       }
     }
 
-    // Sort the symbols by address, just in case they didn't come in that way.
-    array_pod_sort(Symbols.begin(), Symbols.end());
     std::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end());
     std::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end());
 
@@ -991,11 +1018,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     // Disassemble symbol by symbol.
     for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
 
-      uint64_t Start = Symbols[si].first;
-      // The end is either the section end or the beginning of the next symbol.
-      uint64_t End = (si == se - 1) ? SectSize : Symbols[si + 1].first;
+      uint64_t Start = Symbols[si].first - SectionAddr;
+      // The end is either the section end or the beginning of the next
+      // symbol.
+      uint64_t End =
+          (si == se - 1) ? SectSize : Symbols[si + 1].first - SectionAddr;
+      // Don't try to disassemble beyond the end of section contents.
+      if (End > SectSize)
+        End = SectSize;
       // If this symbol has the same address as the next symbol, then skip it.
-      if (Start == End)
+      if (Start >= End)
         continue;
 
       outs() << '\n' << Symbols[si].second << ":\n";
@@ -1056,26 +1088,55 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
                         SectionAddr + Index, outs(), "", *STI);
           outs() << CommentStream.str();
           Comments.clear();
+
+          // Try to resolve the target of a call, tail call, etc. to a specific
+          // symbol.
           if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) ||
                       MIA->isConditionalBranch(Inst))) {
             uint64_t Target;
             if (MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) {
-              auto TargetSym = std::upper_bound(
-                  AllSymbols.begin(), AllSymbols.end(), Target,
-                  [](uint64_t LHS, const std::pair<uint64_t, StringRef> &RHS) {
-                    return LHS < RHS.first;
-                  });
-              if (TargetSym != AllSymbols.begin())
-                --TargetSym;
-              else
-                TargetSym = AllSymbols.end();
-
-              if (TargetSym != AllSymbols.end()) {
-                outs() << " <" << TargetSym->second;
-                uint64_t Disp = Target - TargetSym->first;
-                if (Disp)
-                  outs() << '+' << utohexstr(Disp);
-                outs() << '>';
+              // In a relocatable object, the target's section must reside in
+              // the same section as the call instruction or it is accessed
+              // through a relocation.
+              //
+              // In a non-relocatable object, the target may be in any section.
+              //
+              // N.B. We don't walk the relocations in the relocatable case yet.
+              auto *TargetSectionSymbols = &Symbols;
+              if (!Obj->isRelocatableObject()) {
+                auto SectionAddress = std::upper_bound(
+                    SectionAddresses.begin(), SectionAddresses.end(), Target,
+                    [](uint64_t LHS,
+                       const std::pair<uint64_t, SectionRef> &RHS) {
+                      return LHS < RHS.first;
+                    });
+                if (SectionAddress != SectionAddresses.begin()) {
+                  --SectionAddress;
+                  TargetSectionSymbols = &AllSymbols[SectionAddress->second];
+                } else {
+                  TargetSectionSymbols = nullptr;
+                }
+              }
+
+              // Find the first symbol in the section whose offset is less than
+              // or equal to the target.
+              if (TargetSectionSymbols) {
+                auto TargetSym = std::upper_bound(
+                    TargetSectionSymbols->begin(), TargetSectionSymbols->end(),
+                    Target, [](uint64_t LHS,
+                               const std::pair<uint64_t, StringRef> &RHS) {
+                      return LHS < RHS.first;
+                    });
+                if (TargetSym != Symbols.begin()) {
+                  --TargetSym;
+                  uint64_t TargetAddress = std::get<0>(*TargetSym);
+                  StringRef TargetName = std::get<1>(*TargetSym);
+                  outs() << " <" << TargetName;
+                  uint64_t Disp = Target - TargetAddress;
+                  if (Disp)
+                    outs() << '+' << utohexstr(Disp);
+                  outs() << '>';
+                }
               }
             }
           }