From 6ac90f1dbd012ebe97616b1dd45df8e52c119d1e Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Wed, 18 Nov 2015 02:49:19 +0000 Subject: [PATCH] [llvm-objdump] Use the COFF export table for additional symbols Most linked executables do not have a symbol table in COFF. However, it is pretty typical to have some export entries. Use those entries to inform the disassembler about potential function definitions and call targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253429 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/ARM/inlineasm-switch-mode.ll | 4 +- .../X86/Inputs/disassemble.dll.coff-i386 | Bin 0 -> 1536 bytes .../X86/coff-disassemble-export.test | 8 + tools/llvm-objdump/llvm-objdump.cpp | 175 ++++++++++++------ 4 files changed, 128 insertions(+), 59 deletions(-) create mode 100755 test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 create mode 100644 test/tools/llvm-objdump/X86/coff-disassemble-export.test diff --git a/test/CodeGen/ARM/inlineasm-switch-mode.ll b/test/CodeGen/ARM/inlineasm-switch-mode.ll index 65fea114d7d..6035612788d 100644 --- a/test/CodeGen/ARM/inlineasm-switch-mode.ll +++ b/test/CodeGen/ARM/inlineasm-switch-mode.ll @@ -15,8 +15,8 @@ define hidden i32 @bah(i8* %start) #0 align 2 { ; ARM: $t ; ARM-NEXT: 48 1c -; THUMB: $a +; THUMB: $a{{.*}}: ; THUMB-NEXT: 04 70 ; THUMB-NEXT: 2d e5 -; THUMB: $t +; THUMB: $t{{.*}}: ; THUMB-NEXT: 48 1c adds r0, r1, #1 diff --git a/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 b/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 new file mode 100755 index 0000000000000000000000000000000000000000..c0fbc88036305c35cd29378d1eece2698757a0bf GIT binary patch literal 1536 zcmeZ`n!v!!z`(!)#Q*;@Fzf)*Am9Kd@e?4meDb^a7`AYpRoubob5=1VBePhcpeR4R zC^1(dIWaFUzeFJ^RiUUfPa!i;!NosVAvZrIRgagKO9g7Qcg~A@{Jd?wFkDoU*2@Rv z7nMXo=?E|#l$o3XQUh~QfGY!o4#0AU3n z2C+bFpd1^Ri=Y_{AmSilCI$yquqKdNkOnXfr4$$#+(5zu1oTQ$D@uTNYl7?txeLj! zAR!P?U|>klD@sW$Nrbu;C;(Omiex0 + +// CHECK-LABEL: f: +// CHECK: calll -24 diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 27a36c052cc..df6e2a45862 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -886,27 +886,66 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } // Create a mapping from virtual address to symbol name. This is used to - // pretty print the target of a call. - std::vector> AllSymbols; - if (MIA) { - for (const SymbolRef &Symbol : Obj->symbols()) { - if (Symbol.getType() != SymbolRef::ST_Function) - continue; + // pretty print the symbols while disassembling. + typedef std::vector> SectionSymbolsTy; + std::map AllSymbols; + for (const SymbolRef &Symbol : Obj->symbols()) { + ErrorOr AddressOrErr = Symbol.getAddress(); + error(AddressOrErr.getError()); + uint64_t Address = *AddressOrErr; + + ErrorOr Name = Symbol.getName(); + error(Name.getError()); + if (Name->empty()) + continue; + + ErrorOr SectionOrErr = Symbol.getSection(); + error(SectionOrErr.getError()); + section_iterator SecI = *SectionOrErr; + if (SecI == Obj->section_end()) + continue; - ErrorOr AddressOrErr = Symbol.getAddress(); - error(AddressOrErr.getError()); - uint64_t Address = *AddressOrErr; + AllSymbols[*SecI].emplace_back(Address, *Name); + } - ErrorOr Name = Symbol.getName(); - error(Name.getError()); - if (Name->empty()) + // Create a mapping from virtual address to section. + std::vector> SectionAddresses; + for (SectionRef Sec : Obj->sections()) + SectionAddresses.emplace_back(Sec.getAddress(), Sec); + array_pod_sort(SectionAddresses.begin(), SectionAddresses.end()); + + // Linked executables (.exe and .dll files) typically don't include a real + // symbol table but they might contain an export table. + if (const auto *COFFObj = dyn_cast(Obj)) { + for (const auto &ExportEntry : COFFObj->export_directories()) { + StringRef Name; + error(ExportEntry.getSymbolName(Name)); + if (Name.empty()) continue; - AllSymbols.push_back(std::make_pair(Address, *Name)); - } + uint32_t RVA; + error(ExportEntry.getExportRVA(RVA)); + + uint64_t VA = COFFObj->getImageBase() + RVA; + auto Sec = std::upper_bound( + SectionAddresses.begin(), SectionAddresses.end(), VA, + [](uint64_t LHS, const std::pair &RHS) { + return LHS < RHS.first; + }); + if (Sec != SectionAddresses.begin()) + --Sec; + else + Sec = SectionAddresses.end(); - array_pod_sort(AllSymbols.begin(), AllSymbols.end()); + if (Sec != SectionAddresses.end()) + AllSymbols[Sec->second].emplace_back(VA, Name); + } } + // Sort all the symbols, this allows us to use a simple binary search to find + // a symbol near an address. + for (std::pair &SecSyms : AllSymbols) + array_pod_sort(SecSyms.second.begin(), SecSyms.second.end()); + for (const SectionRef &Section : ToolSectionFilter(*Obj)) { if (!DisassembleAll && (!Section.isText() || Section.isVirtual())) continue; @@ -916,33 +955,21 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { if (!SectSize) continue; - // Make a list of all the symbols in this section. - std::vector> Symbols; + // Get the list of all the symbols in this section. + SectionSymbolsTy &Symbols = AllSymbols[Section]; std::vector DataMappingSymsAddr; std::vector TextMappingSymsAddr; - for (const SymbolRef &Symbol : Obj->symbols()) { - if (Section.containsSymbol(Symbol)) { - ErrorOr AddressOrErr = Symbol.getAddress(); - error(AddressOrErr.getError()); - uint64_t Address = *AddressOrErr; - Address -= SectionAddr; - if (Address >= SectSize) - continue; - - ErrorOr Name = Symbol.getName(); - error(Name.getError()); - Symbols.push_back(std::make_pair(Address, *Name)); - if (Obj->isELF() && Obj->getArch() == Triple::aarch64) { - if (Name->startswith("$d")) - DataMappingSymsAddr.push_back(Address); - if (Name->startswith("$x")) - TextMappingSymsAddr.push_back(Address); - } + if (Obj->isELF() && Obj->getArch() == Triple::aarch64) { + for (const auto &Symb : Symbols) { + uint64_t Address = Symb.first; + StringRef Name = Symb.second; + if (Name.startswith("$d")) + DataMappingSymsAddr.push_back(Address); + if (Name.startswith("$x")) + TextMappingSymsAddr.push_back(Address); } } - // Sort the symbols by address, just in case they didn't come in that way. - array_pod_sort(Symbols.begin(), Symbols.end()); std::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end()); std::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end()); @@ -991,11 +1018,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { // Disassemble symbol by symbol. for (unsigned si = 0, se = Symbols.size(); si != se; ++si) { - uint64_t Start = Symbols[si].first; - // The end is either the section end or the beginning of the next symbol. - uint64_t End = (si == se - 1) ? SectSize : Symbols[si + 1].first; + uint64_t Start = Symbols[si].first - SectionAddr; + // The end is either the section end or the beginning of the next + // symbol. + uint64_t End = + (si == se - 1) ? SectSize : Symbols[si + 1].first - SectionAddr; + // Don't try to disassemble beyond the end of section contents. + if (End > SectSize) + End = SectSize; // If this symbol has the same address as the next symbol, then skip it. - if (Start == End) + if (Start >= End) continue; outs() << '\n' << Symbols[si].second << ":\n"; @@ -1056,26 +1088,55 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { SectionAddr + Index, outs(), "", *STI); outs() << CommentStream.str(); Comments.clear(); + + // Try to resolve the target of a call, tail call, etc. to a specific + // symbol. if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) || MIA->isConditionalBranch(Inst))) { uint64_t Target; if (MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) { - auto TargetSym = std::upper_bound( - AllSymbols.begin(), AllSymbols.end(), Target, - [](uint64_t LHS, const std::pair &RHS) { - return LHS < RHS.first; - }); - if (TargetSym != AllSymbols.begin()) - --TargetSym; - else - TargetSym = AllSymbols.end(); - - if (TargetSym != AllSymbols.end()) { - outs() << " <" << TargetSym->second; - uint64_t Disp = Target - TargetSym->first; - if (Disp) - outs() << '+' << utohexstr(Disp); - outs() << '>'; + // In a relocatable object, the target's section must reside in + // the same section as the call instruction or it is accessed + // through a relocation. + // + // In a non-relocatable object, the target may be in any section. + // + // N.B. We don't walk the relocations in the relocatable case yet. + auto *TargetSectionSymbols = &Symbols; + if (!Obj->isRelocatableObject()) { + auto SectionAddress = std::upper_bound( + SectionAddresses.begin(), SectionAddresses.end(), Target, + [](uint64_t LHS, + const std::pair &RHS) { + return LHS < RHS.first; + }); + if (SectionAddress != SectionAddresses.begin()) { + --SectionAddress; + TargetSectionSymbols = &AllSymbols[SectionAddress->second]; + } else { + TargetSectionSymbols = nullptr; + } + } + + // Find the first symbol in the section whose offset is less than + // or equal to the target. + if (TargetSectionSymbols) { + auto TargetSym = std::upper_bound( + TargetSectionSymbols->begin(), TargetSectionSymbols->end(), + Target, [](uint64_t LHS, + const std::pair &RHS) { + return LHS < RHS.first; + }); + if (TargetSym != Symbols.begin()) { + --TargetSym; + uint64_t TargetAddress = std::get<0>(*TargetSym); + StringRef TargetName = std::get<1>(*TargetSym); + outs() << " <" << TargetName; + uint64_t Disp = Target - TargetAddress; + if (Disp) + outs() << '+' << utohexstr(Disp); + outs() << '>'; + } } } } -- 2.34.1