X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=tools%2Fllvm-symbolizer%2FLLVMSymbolize.cpp;h=51bb965b8df4b0db7c9d4fa5047608ebf54cb345;hb=f890225b786ddf6845bdfbdc9ba4a6f9f00645b1;hp=afb7cc81c824cfd0abe51a9ed18dae16b75a8392;hpb=43afa429082f1a90e2ca22d73456c2219ec8d774;p=oota-llvm.git diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp index afb7cc81c82..51bb965b8df 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.cpp +++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp @@ -19,6 +19,8 @@ #include "llvm/DebugInfo/PDB/PDBContext.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" @@ -32,6 +34,12 @@ #if defined(_MSC_VER) #include #include +#pragma comment(lib, "dbghelp.lib") + +// Windows.h conflicts with our COFF header definitions. +#ifdef IMAGE_FILE_MACHINE_I386 +#undef IMAGE_FILE_MACHINE_I386 +#endif #endif namespace llvm { @@ -71,31 +79,68 @@ ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) } } } - for (const SymbolRef &Symbol : Module->symbols()) { - addSymbol(Symbol, OpdExtractor.get(), OpdAddress); + std::vector> Symbols = + computeSymbolSizes(*Module); + for (auto &P : Symbols) + addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress); + + // If this is a COFF object and we didn't find any symbols, try the export + // table. + if (Symbols.empty()) { + if (auto *CoffObj = dyn_cast(Obj)) + addCoffExportSymbols(CoffObj); } - bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end()); - if (NoSymbolTable && Module->isELF()) { - // Fallback to dynamic symbol table, if regular symbol table is stripped. - std::pair IDyn = - getELFDynamicSymbolIterators(Module); - for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) { - addSymbol(*si, OpdExtractor.get(), OpdAddress); - } +} + +namespace { +struct OffsetNamePair { + uint32_t Offset; + StringRef Name; + bool operator<(const OffsetNamePair &R) const { + return Offset < R.Offset; } +}; } -void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor, - uint64_t OpdAddress) { - SymbolRef::Type SymbolType; - if (error(Symbol.getType(SymbolType))) +void ModuleInfo::addCoffExportSymbols(const COFFObjectFile *CoffObj) { + // Get all export names and offsets. + std::vector ExportSyms; + for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) { + StringRef Name; + uint32_t Offset; + if (error(Ref.getSymbolName(Name)) || error(Ref.getExportRVA(Offset))) + return; + ExportSyms.push_back(OffsetNamePair{Offset, Name}); + } + if (ExportSyms.empty()) return; + + // Sort by ascending offset. + array_pod_sort(ExportSyms.begin(), ExportSyms.end()); + + // Approximate the symbol sizes by assuming they run to the next symbol. + // FIXME: This assumes all exports are functions. + uint64_t ImageBase = CoffObj->getImageBase(); + for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) { + OffsetNamePair &Export = *I; + // FIXME: The last export has a one byte size now. + uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; + uint64_t SymbolStart = ImageBase + Export.Offset; + uint64_t SymbolSize = NextOffset - Export.Offset; + SymbolDesc SD = {SymbolStart, SymbolSize}; + Functions.insert(std::make_pair(SD, Export.Name)); + } +} + +void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, + DataExtractor *OpdExtractor, uint64_t OpdAddress) { + SymbolRef::Type SymbolType = Symbol.getType(); if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) return; - uint64_t SymbolAddress; - if (error(Symbol.getAddress(SymbolAddress)) || - SymbolAddress == UnknownAddressOrSize) + ErrorOr SymbolAddressOrErr = Symbol.getAddress(); + if (error(SymbolAddressOrErr.getError())) return; + uint64_t SymbolAddress = *SymbolAddressOrErr; if (OpdExtractor) { // For big-endian PowerPC64 ELF, symbols in the .opd section refer to // function descriptors. The first word of the descriptor is a pointer to @@ -108,17 +153,10 @@ void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor, OpdExtractor->isValidOffsetForAddress(OpdOffset32)) SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); } - uint64_t SymbolSize; - // Getting symbol size is linear for Mach-O files, so assume that symbol - // occupies the memory range up to the following symbol. - if (isa(Module)) - SymbolSize = 0; - else if (error(Symbol.getSize(SymbolSize)) || - SymbolSize == UnknownAddressOrSize) - return; - StringRef SymbolName; - if (error(Symbol.getName(SymbolName))) + ErrorOr SymbolNameOrErr = Symbol.getName(); + if (error(SymbolNameOrErr.getError())) return; + StringRef SymbolName = *SymbolNameOrErr; // Mach-O symbol table names have leading underscore, skip it. if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') SymbolName = SymbolName.drop_front(); @@ -129,6 +167,18 @@ void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor, M.insert(std::make_pair(SD, SymbolName)); } +// Return true if this is a 32-bit x86 PE COFF module. +bool ModuleInfo::isWin32Module() const { + auto *CoffObject = dyn_cast(Module); + return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; +} + +uint64_t ModuleInfo::getModulePreferredBase() const { + if (auto *CoffObject = dyn_cast(Module)) + return CoffObject->getImageBase(); + return 0; +} + bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size) const { @@ -212,7 +262,13 @@ std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset) { ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); if (!Info) - return printDILineInfo(DILineInfo()); + return printDILineInfo(DILineInfo(), Info); + + // If the user is giving us relative addresses, add the preferred base of the + // object to the offset before we do the query. It's what DIContext expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); + if (Opts.PrintInlining) { DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(ModuleOffset, Opts); @@ -221,12 +277,12 @@ std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, std::string Result; for (uint32_t i = 0; i < FramesNum; i++) { DILineInfo LineInfo = InlinedContext.getFrame(i); - Result += printDILineInfo(LineInfo); + Result += printDILineInfo(LineInfo, Info); } return Result; } DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); - return printDILineInfo(LineInfo); + return printDILineInfo(LineInfo, Info); } std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, @@ -236,8 +292,12 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, uint64_t Size = 0; if (Opts.UseSymbolTable) { if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { + // If the user is giving us relative addresses, add the preferred base of the + // object to the offset before we do the query. It's what DIContext expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) - Name = DemangleName(Name); + Name = DemangleName(Name, Info); } } std::stringstream ss; @@ -434,7 +494,7 @@ LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, if (I != ObjectFileForArch.end()) return I->second; ErrorOr> ParsedObj = - UB->getObjectForArch(Triple(ArchName).getArch()); + UB->getObjectForArch(ArchName); if (ParsedObj) { Res = ParsedObj.get().get(); ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get())); @@ -477,8 +537,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA, Objects.first->getFileName(), Session); if (Error == PDB_ErrorCode::Success) { - Context = new PDBContext(*CoffObject, std::move(Session), - Opts.RelativeAddresses); + Context = new PDBContext(*CoffObject, std::move(Session)); } } if (!Context) @@ -489,7 +548,8 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { return Info; } -std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { +std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo, + ModuleInfo *ModInfo) const { // By default, DILineInfo contains "" for function/filename it // cannot fetch. We replace it to "??" to make our output closer to addr2line. static const std::string kDILineInfoBadString = ""; @@ -499,7 +559,7 @@ std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { if (FunctionName == kDILineInfoBadString) FunctionName = kBadString; else if (Opts.Demangle) - FunctionName = DemangleName(FunctionName); + FunctionName = DemangleName(FunctionName, ModInfo); Result << FunctionName << "\n"; } std::string Filename = LineInfo.FileName; @@ -509,38 +569,73 @@ std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { return Result.str(); } +// Undo these various manglings for Win32 extern "C" functions: +// cdecl - _foo +// stdcall - _foo@12 +// fastcall - @foo@12 +// vectorcall - foo@@12 +// These are all different linkage names for 'foo'. +static StringRef demanglePE32ExternCFunc(StringRef SymbolName) { + // Remove any '_' or '@' prefix. + char Front = SymbolName.empty() ? '\0' : SymbolName[0]; + if (Front == '_' || Front == '@') + SymbolName = SymbolName.drop_front(); + + // Remove any '@[0-9]+' suffix. + if (Front != '?') { + size_t AtPos = SymbolName.rfind('@'); + if (AtPos != StringRef::npos && + std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), + [](char C) { return C >= '0' && C <= '9'; })) { + SymbolName = SymbolName.substr(0, AtPos); + } + } + + // Remove any ending '@' for vectorcall. + if (SymbolName.endswith("@")) + SymbolName = SymbolName.drop_back(); + + return SymbolName; +} + #if !defined(_MSC_VER) // Assume that __cxa_demangle is provided by libcxxabi (except for Windows). extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, size_t *length, int *status); #endif -std::string LLVMSymbolizer::DemangleName(const std::string &Name) { +std::string LLVMSymbolizer::DemangleName(const std::string &Name, + ModuleInfo *ModInfo) { #if !defined(_MSC_VER) // We can spoil names of symbols with C linkage, so use an heuristic // approach to check if the name should be demangled. - if (Name.substr(0, 2) != "_Z") - return Name; - int status = 0; - char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); - if (status != 0) - return Name; - std::string Result = DemangledName; - free(DemangledName); - return Result; + if (Name.substr(0, 2) == "_Z") { + int status = 0; + char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); + if (status != 0) + return Name; + std::string Result = DemangledName; + free(DemangledName); + return Result; + } #else - char DemangledName[1024] = {0}; - DWORD result = ::UnDecorateSymbolName( - Name.c_str(), DemangledName, 1023, - UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected - UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc - UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications - UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers - UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords - UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types - - return (result == 0) ? Name : std::string(DemangledName); + if (!Name.empty() && Name.front() == '?') { + // Only do MSVC C++ demangling on symbols starting with '?'. + char DemangledName[1024] = {0}; + DWORD result = ::UnDecorateSymbolName( + Name.c_str(), DemangledName, 1023, + UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected + UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc + UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications + UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers + UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords + UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types + return (result == 0) ? Name : std::string(DemangledName); + } #endif + if (ModInfo->isWin32Module()) + return std::string(demanglePE32ExternCFunc(Name)); + return Name; } } // namespace symbolize