From: Reid Kleckner Date: Mon, 10 Aug 2015 21:47:11 +0000 (+0000) Subject: [llvm-symbolizer] Remove underscores and other C mangling on Windows X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=b28578be8a0244f1b268229d15efce35989e0813 [llvm-symbolizer] Remove underscores and other C mangling on Windows Summary: This makes it so that reports symbolized after the fact with llvm-symbolizer are more similar to the ones we generate at runtime with in-process dbghelp. Reviewers: samsonov Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11785 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244512 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp b/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp index f1f98af4190..e317ed33589 100644 --- a/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp +++ b/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp @@ -16,3 +16,10 @@ int main() { NS::Foo f; f.bar(); } + +extern "C" { +void __cdecl foo_cdecl() {} +void __stdcall foo_stdcall() {} +void __fastcall foo_fastcall() {} +void __vectorcall foo_vectorcall() {} +} diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe index 80fb34bb6dc..a4f148e67c2 100644 Binary files a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe and b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe differ diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input index affda60449b..f8344470150 100644 --- a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input +++ b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input @@ -1,4 +1,8 @@ +0x401000 +0x401010 +0x401070 0x401030 0x401040 +0x401050 0x401060 0x500000 diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb b/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb index 974e565e87f..d26d33a862d 100644 Binary files a/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb and b/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb differ diff --git a/test/tools/llvm-symbolizer/pdb/pdb.test b/test/tools/llvm-symbolizer/pdb/pdb.test index b5d0f15fbcb..958a5a7e1a8 100644 --- a/test/tools/llvm-symbolizer/pdb/pdb.test +++ b/test/tools/llvm-symbolizer/pdb/pdb.test @@ -1,18 +1,26 @@ -RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" < "%p/Inputs/test.exe.input" | \ -RUN: FileCheck %s --check-prefix=CHECK -RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false < \ -RUN: "%p/Inputs/test.exe.input" | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE - -CHECK: foo(void) -CHECK-NEXT: test.cpp:10 -CHECK: _main -CHECK-NEXT: test.cpp:13:0 -CHECK: NS::Foo::bar(void) -CHECK-NEXT: test.cpp:6:0 - -CHECK-NO-DEMANGLE: foo -CHECK-NO-DEMANGLE-NEXT: test.cpp:10 -CHECK-NO-DEMANGLE: _main -CHECK-LINKAGE-NAME-NEXT: test.cpp:13:0 -CHECK-NO-DEMANGLE: bar -CHECK-LINKAGE-NAME-NEXT: test.cpp:6:0 +RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" < "%p/Inputs/test.exe.input" | \ +RUN: FileCheck %s --check-prefix=CHECK +RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false < \ +RUN: "%p/Inputs/test.exe.input" | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE + +CHECK: foo(void) +CHECK-NEXT: test.cpp:10 +CHECK: main +CHECK-NEXT: test.cpp:13:0 +CHECK: NS::Foo::bar(void) +CHECK-NEXT: test.cpp:6:0 +CHECK: {{^foo_cdecl$}} +CHECK: {{^foo_stdcall$}} +CHECK: {{^foo_fastcall$}} +CHECK: {{^foo_vectorcall$}} + +CHECK-NO-DEMANGLE: ?foo@@YAXXZ +CHECK-NO-DEMANGLE-NEXT: test.cpp:10 +CHECK-NO-DEMANGLE: _main +CHECK-NO-DEMANGLE-NEXT: test.cpp:13 +CHECK-NO-DEMANGLE: ?bar@Foo@NS@@QAEXXZ +CHECK-NO-DEMANGLE-NEXT: test.cpp:6 +CHECK-NO-DEMANGLE: _foo_cdecl +CHECK-NO-DEMANGLE: _foo_stdcall@0 +CHECK-NO-DEMANGLE: @foo_fastcall@0 +CHECK-NO-DEMANGLE: foo_vectorcall@@0 diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp index c57c219b11d..497207ed4ca 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.cpp +++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp @@ -20,6 +20,7 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/SymbolSize.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" @@ -34,6 +35,11 @@ #include #include #pragma comment(lib, "dbghelp.lib") + +// Windows.h conflicts with our COFF header definitions. +#ifdef IMAGE_FILE_MACHINE_I386 +#undef IMAGE_FILE_MACHINE_I386 +#endif #endif namespace llvm { @@ -114,6 +120,12 @@ void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, M.insert(std::make_pair(SD, SymbolName)); } +// Return true if this is a 32-bit x86 PE COFF module. +bool ModuleInfo::isWin32Module() const { + auto *CoffObject = dyn_cast(Module); + return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; +} + bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size) const { @@ -197,7 +209,7 @@ std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset) { ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); if (!Info) - return printDILineInfo(DILineInfo()); + return printDILineInfo(DILineInfo(), Info); if (Opts.PrintInlining) { DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(ModuleOffset, Opts); @@ -206,12 +218,12 @@ std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, std::string Result; for (uint32_t i = 0; i < FramesNum; i++) { DILineInfo LineInfo = InlinedContext.getFrame(i); - Result += printDILineInfo(LineInfo); + Result += printDILineInfo(LineInfo, Info); } return Result; } DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); - return printDILineInfo(LineInfo); + return printDILineInfo(LineInfo, Info); } std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, @@ -222,7 +234,7 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, if (Opts.UseSymbolTable) { if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) - Name = DemangleName(Name); + Name = DemangleName(Name, Info); } } std::stringstream ss; @@ -474,7 +486,8 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { return Info; } -std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { +std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo, + ModuleInfo *ModInfo) const { // By default, DILineInfo contains "" for function/filename it // cannot fetch. We replace it to "??" to make our output closer to addr2line. static const std::string kDILineInfoBadString = ""; @@ -484,7 +497,7 @@ std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { if (FunctionName == kDILineInfoBadString) FunctionName = kBadString; else if (Opts.Demangle) - FunctionName = DemangleName(FunctionName); + FunctionName = DemangleName(FunctionName, ModInfo); Result << FunctionName << "\n"; } std::string Filename = LineInfo.FileName; @@ -494,38 +507,73 @@ std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { return Result.str(); } +// Undo these various manglings for Win32 extern "C" functions: +// cdecl - _foo +// stdcall - _foo@12 +// fastcall - @foo@12 +// vectorcall - foo@@12 +// These are all different linkage names for 'foo'. +static StringRef demanglePE32ExternCFunc(StringRef SymbolName) { + // Remove any '_' or '@' prefix. + char Front = SymbolName.empty() ? '\0' : SymbolName[0]; + if (Front == '_' || Front == '@') + SymbolName = SymbolName.drop_front(); + + // Remove any '@[0-9]+' suffix. + if (Front != '?') { + size_t AtPos = SymbolName.rfind('@'); + if (AtPos != StringRef::npos && + std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), + [](char C) { return C >= '0' && C <= '9'; })) { + SymbolName = SymbolName.substr(0, AtPos); + } + } + + // Remove any ending '@' for vectorcall. + if (SymbolName.endswith("@")) + SymbolName = SymbolName.drop_back(); + + return SymbolName; +} + #if !defined(_MSC_VER) // Assume that __cxa_demangle is provided by libcxxabi (except for Windows). extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, size_t *length, int *status); #endif -std::string LLVMSymbolizer::DemangleName(const std::string &Name) { +std::string LLVMSymbolizer::DemangleName(const std::string &Name, + ModuleInfo *ModInfo) { #if !defined(_MSC_VER) // We can spoil names of symbols with C linkage, so use an heuristic // approach to check if the name should be demangled. - if (Name.substr(0, 2) != "_Z") - return Name; - int status = 0; - char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); - if (status != 0) - return Name; - std::string Result = DemangledName; - free(DemangledName); - return Result; + if (Name.substr(0, 2) == "_Z") { + int status = 0; + char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); + if (status != 0) + return Name; + std::string Result = DemangledName; + free(DemangledName); + return Result; + } #else - char DemangledName[1024] = {0}; - DWORD result = ::UnDecorateSymbolName( - Name.c_str(), DemangledName, 1023, - UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected - UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc - UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications - UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers - UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords - UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types - - return (result == 0) ? Name : std::string(DemangledName); + if (!Name.empty() && Name.front() == '?') { + // Only do MSVC C++ demangling on symbols starting with '?'. + char DemangledName[1024] = {0}; + DWORD result = ::UnDecorateSymbolName( + Name.c_str(), DemangledName, 1023, + UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected + UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc + UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications + UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers + UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords + UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types + return (result == 0) ? Name : std::string(DemangledName); + } #endif + if (ModInfo->isWin32Module()) + return std::string(demanglePE32ExternCFunc(Name)); + return Name; } } // namespace symbolize diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h index be246c3f871..b52c76036e1 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.h +++ b/tools/llvm-symbolizer/LLVMSymbolize.h @@ -63,7 +63,8 @@ public: std::string symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); void flush(); - static std::string DemangleName(const std::string &Name); + static std::string DemangleName(const std::string &Name, ModuleInfo *ModInfo); + private: typedef std::pair ObjectPair; @@ -78,7 +79,7 @@ private: /// universal binary (or the binary itself if it is an object file). ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName); - std::string printDILineInfo(DILineInfo LineInfo) const; + std::string printDILineInfo(DILineInfo LineInfo, ModuleInfo *ModInfo) const; // Owns all the parsed binaries and object files. SmallVector, 4> ParsedBinariesAndObjects; @@ -113,6 +114,9 @@ public: bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start, uint64_t &Size) const; + // Return true if this is a 32-bit x86 PE COFF module. + bool isWin32Module() const; + private: bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr,