X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=tools%2Fllvm-symbolizer%2FLLVMSymbolize.cpp;h=51bb965b8df4b0db7c9d4fa5047608ebf54cb345;hb=f890225b786ddf6845bdfbdc9ba4a6f9f00645b1;hp=71588e171fa9db0266301ee2d5c44e90b0d1ed4d;hpb=22939fa0a4d8e139b14fb0d8c915bb45ce53600e;p=oota-llvm.git diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp index 71588e171fa..51bb965b8df 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.cpp +++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp @@ -14,97 +14,188 @@ #include "LLVMSymbolize.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/DebugInfo/PDB/PDBContext.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include #include +#if defined(_MSC_VER) +#include +#include +#pragma comment(lib, "dbghelp.lib") + +// Windows.h conflicts with our COFF header definitions. +#ifdef IMAGE_FILE_MACHINE_I386 +#undef IMAGE_FILE_MACHINE_I386 +#endif +#endif + namespace llvm { namespace symbolize { -static bool error(error_code ec) { +static bool error(std::error_code ec) { if (!ec) return false; errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n"; return true; } -static uint32_t -getDILineInfoSpecifierFlags(const LLVMSymbolizer::Options &Opts) { - uint32_t Flags = llvm::DILineInfoSpecifier::FileLineInfo | - llvm::DILineInfoSpecifier::AbsoluteFilePath; - if (Opts.PrintFunctions) - Flags |= llvm::DILineInfoSpecifier::FunctionName; - return Flags; -} - -static void patchFunctionNameInDILineInfo(const std::string &NewFunctionName, - DILineInfo &LineInfo) { - std::string FileName = LineInfo.getFileName(); - LineInfo = DILineInfo(StringRef(FileName), StringRef(NewFunctionName), - LineInfo.getLine(), LineInfo.getColumn()); +static DILineInfoSpecifier +getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) { + return DILineInfoSpecifier( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + Opts.PrintFunctions); } ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) : Module(Obj), DebugInfoContext(DICtx) { - error_code ec; - for (symbol_iterator si = Module->begin_symbols(), se = Module->end_symbols(); - si != se; si.increment(ec)) { - if (error(ec)) + std::unique_ptr OpdExtractor; + uint64_t OpdAddress = 0; + // Find the .opd (function descriptor) section if any, for big-endian + // PowerPC64 ELF. + if (Module->getArch() == Triple::ppc64) { + for (section_iterator Section : Module->sections()) { + StringRef Name; + if (!error(Section->getName(Name)) && Name == ".opd") { + StringRef Data; + if (!error(Section->getContents(Data))) { + OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(), + Module->getBytesInAddress())); + OpdAddress = Section->getAddress(); + } + break; + } + } + } + std::vector> Symbols = + computeSymbolSizes(*Module); + for (auto &P : Symbols) + addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress); + + // If this is a COFF object and we didn't find any symbols, try the export + // table. + if (Symbols.empty()) { + if (auto *CoffObj = dyn_cast(Obj)) + addCoffExportSymbols(CoffObj); + } +} + +namespace { +struct OffsetNamePair { + uint32_t Offset; + StringRef Name; + bool operator<(const OffsetNamePair &R) const { + return Offset < R.Offset; + } +}; +} + +void ModuleInfo::addCoffExportSymbols(const COFFObjectFile *CoffObj) { + // Get all export names and offsets. + std::vector ExportSyms; + for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) { + StringRef Name; + uint32_t Offset; + if (error(Ref.getSymbolName(Name)) || error(Ref.getExportRVA(Offset))) return; - SymbolRef::Type SymbolType; - if (error(si->getType(SymbolType))) - continue; - if (SymbolType != SymbolRef::ST_Function && - SymbolType != SymbolRef::ST_Data) - continue; - uint64_t SymbolAddress; - if (error(si->getAddress(SymbolAddress)) || - SymbolAddress == UnknownAddressOrSize) - continue; - uint64_t SymbolSize; - // Getting symbol size is linear for Mach-O files, so assume that symbol - // occupies the memory range up to the following symbol. - if (isa(Obj)) - SymbolSize = 0; - else if (error(si->getSize(SymbolSize)) || - SymbolSize == UnknownAddressOrSize) - continue; - StringRef SymbolName; - if (error(si->getName(SymbolName))) - continue; - // Mach-O symbol table names have leading underscore, skip it. - if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') - SymbolName = SymbolName.drop_front(); - // FIXME: If a function has alias, there are two entries in symbol table - // with same address size. Make sure we choose the correct one. - SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; - SymbolDesc SD = { SymbolAddress, SymbolSize }; - M.insert(std::make_pair(SD, SymbolName)); + ExportSyms.push_back(OffsetNamePair{Offset, Name}); + } + if (ExportSyms.empty()) + return; + + // Sort by ascending offset. + array_pod_sort(ExportSyms.begin(), ExportSyms.end()); + + // Approximate the symbol sizes by assuming they run to the next symbol. + // FIXME: This assumes all exports are functions. + uint64_t ImageBase = CoffObj->getImageBase(); + for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) { + OffsetNamePair &Export = *I; + // FIXME: The last export has a one byte size now. + uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; + uint64_t SymbolStart = ImageBase + Export.Offset; + uint64_t SymbolSize = NextOffset - Export.Offset; + SymbolDesc SD = {SymbolStart, SymbolSize}; + Functions.insert(std::make_pair(SD, Export.Name)); } } +void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, + DataExtractor *OpdExtractor, uint64_t OpdAddress) { + SymbolRef::Type SymbolType = Symbol.getType(); + if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) + return; + ErrorOr SymbolAddressOrErr = Symbol.getAddress(); + if (error(SymbolAddressOrErr.getError())) + return; + uint64_t SymbolAddress = *SymbolAddressOrErr; + if (OpdExtractor) { + // For big-endian PowerPC64 ELF, symbols in the .opd section refer to + // function descriptors. The first word of the descriptor is a pointer to + // the function's code. + // For the purposes of symbolization, pretend the symbol's address is that + // of the function's code, not the descriptor. + uint64_t OpdOffset = SymbolAddress - OpdAddress; + uint32_t OpdOffset32 = OpdOffset; + if (OpdOffset == OpdOffset32 && + OpdExtractor->isValidOffsetForAddress(OpdOffset32)) + SymbolAddress = OpdExtractor->getAddress(&OpdOffset32); + } + ErrorOr SymbolNameOrErr = Symbol.getName(); + if (error(SymbolNameOrErr.getError())) + return; + StringRef SymbolName = *SymbolNameOrErr; + // Mach-O symbol table names have leading underscore, skip it. + if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') + SymbolName = SymbolName.drop_front(); + // FIXME: If a function has alias, there are two entries in symbol table + // with same address size. Make sure we choose the correct one. + auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; + SymbolDesc SD = { SymbolAddress, SymbolSize }; + M.insert(std::make_pair(SD, SymbolName)); +} + +// Return true if this is a 32-bit x86 PE COFF module. +bool ModuleInfo::isWin32Module() const { + auto *CoffObject = dyn_cast(Module); + return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386; +} + +uint64_t ModuleInfo::getModulePreferredBase() const { + if (auto *CoffObject = dyn_cast(Module)) + return CoffObject->getImageBase(); + return 0; +} + bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size) const { - const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects; - if (M.empty()) + const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects; + if (SymbolMap.empty()) return false; SymbolDesc SD = { Address, Address }; - SymbolMapTy::const_iterator it = M.upper_bound(SD); - if (it == M.begin()) + auto SymbolIterator = SymbolMap.upper_bound(SD); + if (SymbolIterator == SymbolMap.begin()) return false; - --it; - if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address) + --SymbolIterator; + if (SymbolIterator->first.Size != 0 && + SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address) return false; - Name = it->second.str(); - Addr = it->first.Addr; - Size = it->first.Size; + Name = SymbolIterator->second.str(); + Addr = SymbolIterator->first.Addr; + Size = SymbolIterator->first.Size; return true; } @@ -113,15 +204,15 @@ DILineInfo ModuleInfo::symbolizeCode( DILineInfo LineInfo; if (DebugInfoContext) { LineInfo = DebugInfoContext->getLineInfoForAddress( - ModuleOffset, getDILineInfoSpecifierFlags(Opts)); + ModuleOffset, getDILineInfoSpecifier(Opts)); } // Override function name from symbol table if necessary. - if (Opts.PrintFunctions && Opts.UseSymbolTable) { + if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { std::string FunctionName; uint64_t Start, Size; if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, FunctionName, Start, Size)) { - patchFunctionNameInDILineInfo(FunctionName, LineInfo); + LineInfo.FunctionName = FunctionName; } } return LineInfo; @@ -130,16 +221,17 @@ DILineInfo ModuleInfo::symbolizeCode( DIInliningInfo ModuleInfo::symbolizeInlinedCode( uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { DIInliningInfo InlinedContext; + if (DebugInfoContext) { InlinedContext = DebugInfoContext->getInliningInfoForAddress( - ModuleOffset, getDILineInfoSpecifierFlags(Opts)); + ModuleOffset, getDILineInfoSpecifier(Opts)); } // Make sure there is at least one frame in context. if (InlinedContext.getNumberOfFrames() == 0) { InlinedContext.addFrame(DILineInfo()); } // Override the function name in lower frame with name from symbol table. - if (Opts.PrintFunctions && Opts.UseSymbolTable) { + if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) { DIInliningInfo PatchedInlinedContext; for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { DILineInfo LineInfo = InlinedContext.getFrame(i); @@ -148,7 +240,7 @@ DIInliningInfo ModuleInfo::symbolizeInlinedCode( uint64_t Start, Size; if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, FunctionName, Start, Size)) { - patchFunctionNameInDILineInfo(FunctionName, LineInfo); + LineInfo.FunctionName = FunctionName; } } PatchedInlinedContext.addFrame(LineInfo); @@ -169,8 +261,14 @@ const char LLVMSymbolizer::kBadString[] = "??"; std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset) { ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); - if (Info == 0) - return printDILineInfo(DILineInfo()); + if (!Info) + return printDILineInfo(DILineInfo(), Info); + + // If the user is giving us relative addresses, add the preferred base of the + // object to the offset before we do the query. It's what DIContext expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); + if (Opts.PrintInlining) { DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(ModuleOffset, Opts); @@ -179,12 +277,12 @@ std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, std::string Result; for (uint32_t i = 0; i < FramesNum; i++) { DILineInfo LineInfo = InlinedContext.getFrame(i); - Result += printDILineInfo(LineInfo); + Result += printDILineInfo(LineInfo, Info); } return Result; } DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); - return printDILineInfo(LineInfo); + return printDILineInfo(LineInfo, Info); } std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, @@ -194,8 +292,12 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, uint64_t Size = 0; if (Opts.UseSymbolTable) { if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { + // If the user is giving us relative addresses, add the preferred base of the + // object to the offset before we do the query. It's what DIContext expects. + if (Opts.RelativeAddresses) + ModuleOffset += Info->getModulePreferredBase(); if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) - Name = DemangleName(Name); + Name = DemangleName(Name, Info); } } std::stringstream ss; @@ -205,25 +307,32 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, void LLVMSymbolizer::flush() { DeleteContainerSeconds(Modules); - DeleteContainerPointers(ParsedBinariesAndObjects); - BinaryForPath.clear(); + ObjectPairForPathArch.clear(); ObjectFileForArch.clear(); } -static std::string getDarwinDWARFResourceForPath(const std::string &Path) { - StringRef Basename = sys::path::filename(Path); - const std::string &DSymDirectory = Path + ".dSYM"; - SmallString<16> ResourceName = StringRef(DSymDirectory); +// For Path="/path/to/foo" and Basename="foo" assume that debug info is in +// /path/to/foo.dSYM/Contents/Resources/DWARF/foo. +// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in +// /path/to/bar.dSYM/Contents/Resources/DWARF/foo. +static +std::string getDarwinDWARFResourceForPath( + const std::string &Path, const std::string &Basename) { + SmallString<16> ResourceName = StringRef(Path); + if (sys::path::extension(Path) != ".dSYM") { + ResourceName += ".dSYM"; + } sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); sys::path::append(ResourceName, Basename); return ResourceName.str(); } static bool checkFileCRC(StringRef Path, uint32_t CRCHash) { - OwningPtr MB; - if (MemoryBuffer::getFileOrSTDIN(Path, MB)) + ErrorOr> MB = + MemoryBuffer::getFileOrSTDIN(Path); + if (!MB) return false; - return !zlib::isAvailable() || CRCHash == zlib::crc32(MB->getBuffer()); + return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); } static bool findDebugBinary(const std::string &OrigPath, @@ -231,7 +340,7 @@ static bool findDebugBinary(const std::string &OrigPath, std::string &Result) { std::string OrigRealPath = OrigPath; #if defined(HAVE_REALPATH) - if (char *RP = realpath(OrigPath.c_str(), NULL)) { + if (char *RP = realpath(OrigPath.c_str(), nullptr)) { OrigRealPath = RP; free(RP); } @@ -263,20 +372,17 @@ static bool findDebugBinary(const std::string &OrigPath, return false; } -static bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName, +static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, uint32_t &CRCHash) { - const ObjectFile *Obj = dyn_cast(Bin); if (!Obj) return false; - error_code EC; - for (section_iterator I = Obj->begin_sections(), E = Obj->end_sections(); - I != E; I.increment(EC)) { + for (const SectionRef &Section : Obj->sections()) { StringRef Name; - I->getName(Name); + Section.getName(Name); Name = Name.substr(Name.find_first_not_of("._")); if (Name == "gnu_debuglink") { StringRef Data; - I->getContents(Data); + Section.getContents(Data); DataExtractor DE(Data, Obj->isLittleEndian(), 0); uint32_t Offset = 0; if (const char *DebugNameStr = DE.getCStr(&Offset)) { @@ -294,67 +400,104 @@ static bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName, return false; } -LLVMSymbolizer::BinaryPair -LLVMSymbolizer::getOrCreateBinary(const std::string &Path) { - BinaryMapTy::iterator I = BinaryForPath.find(Path); - if (I != BinaryForPath.end()) +static +bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, + const MachOObjectFile *Obj) { + ArrayRef dbg_uuid = DbgObj->getUuid(); + ArrayRef bin_uuid = Obj->getUuid(); + if (dbg_uuid.empty() || bin_uuid.empty()) + return false; + return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); +} + +ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, + const MachOObjectFile *MachExeObj, const std::string &ArchName) { + // On Darwin we may find DWARF in separate object file in + // resource directory. + std::vector DsymPaths; + StringRef Filename = sys::path::filename(ExePath); + DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); + for (const auto &Path : Opts.DsymHints) { + DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); + } + for (const auto &path : DsymPaths) { + ErrorOr> BinaryOrErr = createBinary(path); + std::error_code EC = BinaryOrErr.getError(); + if (EC != errc::no_such_file_or_directory && !error(EC)) { + OwningBinary B = std::move(BinaryOrErr.get()); + ObjectFile *DbgObj = + getObjectFileFromBinary(B.getBinary(), ArchName); + const MachOObjectFile *MachDbgObj = + dyn_cast(DbgObj); + if (!MachDbgObj) continue; + if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) { + addOwningBinary(std::move(B)); + return DbgObj; + } + } + } + return nullptr; +} + +LLVMSymbolizer::ObjectPair +LLVMSymbolizer::getOrCreateObjects(const std::string &Path, + const std::string &ArchName) { + const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); + if (I != ObjectPairForPathArch.end()) return I->second; - Binary *Bin = 0; - Binary *DbgBin = 0; - ErrorOr BinaryOrErr = createBinary(Path); + ObjectFile *Obj = nullptr; + ObjectFile *DbgObj = nullptr; + ErrorOr> BinaryOrErr = createBinary(Path); if (!error(BinaryOrErr.getError())) { - OwningPtr ParsedBinary(BinaryOrErr.get()); - // Check if it's a universal binary. - Bin = ParsedBinary.take(); - ParsedBinariesAndObjects.push_back(Bin); - if (Bin->isMachO() || Bin->isMachOUniversalBinary()) { - // On Darwin we may find DWARF in separate object file in - // resource directory. - const std::string &ResourcePath = - getDarwinDWARFResourceForPath(Path); - BinaryOrErr = createBinary(ResourcePath); - error_code EC = BinaryOrErr.getError(); - if (EC != errc::no_such_file_or_directory && !error(EC)) { - DbgBin = BinaryOrErr.get(); - ParsedBinariesAndObjects.push_back(DbgBin); - } + OwningBinary &B = BinaryOrErr.get(); + Obj = getObjectFileFromBinary(B.getBinary(), ArchName); + if (!Obj) { + ObjectPair Res = std::make_pair(nullptr, nullptr); + ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res; + return Res; } + addOwningBinary(std::move(B)); + if (auto MachObj = dyn_cast(Obj)) + DbgObj = lookUpDsymFile(Path, MachObj, ArchName); // Try to locate the debug binary using .gnu_debuglink section. - if (DbgBin == 0) { + if (!DbgObj) { std::string DebuglinkName; uint32_t CRCHash; std::string DebugBinaryPath; - if (getGNUDebuglinkContents(Bin, DebuglinkName, CRCHash) && + if (getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash) && findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) { BinaryOrErr = createBinary(DebugBinaryPath); if (!error(BinaryOrErr.getError())) { - DbgBin = BinaryOrErr.get(); - ParsedBinariesAndObjects.push_back(DbgBin); + OwningBinary B = std::move(BinaryOrErr.get()); + DbgObj = getObjectFileFromBinary(B.getBinary(), ArchName); + addOwningBinary(std::move(B)); } } } } - if (DbgBin == 0) - DbgBin = Bin; - BinaryPair Res = std::make_pair(Bin, DbgBin); - BinaryForPath[Path] = Res; + if (!DbgObj) + DbgObj = Obj; + ObjectPair Res = std::make_pair(Obj, DbgObj); + ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res; return Res; } ObjectFile * -LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) { - if (Bin == 0) - return 0; - ObjectFile *Res = 0; +LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, + const std::string &ArchName) { + if (!Bin) + return nullptr; + ObjectFile *Res = nullptr; if (MachOUniversalBinary *UB = dyn_cast(Bin)) { - ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find( + const auto &I = ObjectFileForArch.find( std::make_pair(UB, ArchName)); if (I != ObjectFileForArch.end()) return I->second; - OwningPtr ParsedObj; - if (!UB->getObjectForArch(Triple(ArchName).getArch(), ParsedObj)) { - Res = ParsedObj.take(); - ParsedBinariesAndObjects.push_back(Res); + ErrorOr> ParsedObj = + UB->getObjectForArch(ArchName); + if (ParsedObj) { + Res = ParsedObj.get().get(); + ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get())); } ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; } else if (Bin->isObject()) { @@ -365,7 +508,7 @@ LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName ModuleInfo * LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { - ModuleMapTy::iterator I = Modules.find(ModuleName); + const auto &I = Modules.find(ModuleName); if (I != Modules.end()) return I->second; std::string BinaryName = ModuleName; @@ -379,65 +522,120 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { ArchName = ArchStr; } } - BinaryPair Binaries = getOrCreateBinary(BinaryName); - ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName); - ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName); + ObjectPair Objects = getOrCreateObjects(BinaryName, ArchName); - if (Obj == 0) { + if (!Objects.first) { // Failed to find valid object file. - Modules.insert(make_pair(ModuleName, (ModuleInfo *)0)); - return 0; + Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr)); + return nullptr; } - DIContext *Context = DIContext::getDWARFContext(DbgObj); + DIContext *Context = nullptr; + if (auto CoffObject = dyn_cast(Objects.first)) { + // If this is a COFF object, assume it contains PDB debug information. If + // we don't find any we will fall back to the DWARF case. + std::unique_ptr Session; + PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA, + Objects.first->getFileName(), Session); + if (Error == PDB_ErrorCode::Success) { + Context = new PDBContext(*CoffObject, std::move(Session)); + } + } + if (!Context) + Context = new DWARFContextInMemory(*Objects.second); assert(Context); - ModuleInfo *Info = new ModuleInfo(Obj, Context); + ModuleInfo *Info = new ModuleInfo(Objects.first, Context); Modules.insert(make_pair(ModuleName, Info)); return Info; } -std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { +std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo, + ModuleInfo *ModInfo) const { // By default, DILineInfo contains "" for function/filename it // cannot fetch. We replace it to "??" to make our output closer to addr2line. static const std::string kDILineInfoBadString = ""; std::stringstream Result; - if (Opts.PrintFunctions) { - std::string FunctionName = LineInfo.getFunctionName(); + if (Opts.PrintFunctions != FunctionNameKind::None) { + std::string FunctionName = LineInfo.FunctionName; if (FunctionName == kDILineInfoBadString) FunctionName = kBadString; else if (Opts.Demangle) - FunctionName = DemangleName(FunctionName); + FunctionName = DemangleName(FunctionName, ModInfo); Result << FunctionName << "\n"; } - std::string Filename = LineInfo.getFileName(); + std::string Filename = LineInfo.FileName; if (Filename == kDILineInfoBadString) Filename = kBadString; - Result << Filename << ":" << LineInfo.getLine() << ":" << LineInfo.getColumn() - << "\n"; + Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n"; return Result.str(); } +// Undo these various manglings for Win32 extern "C" functions: +// cdecl - _foo +// stdcall - _foo@12 +// fastcall - @foo@12 +// vectorcall - foo@@12 +// These are all different linkage names for 'foo'. +static StringRef demanglePE32ExternCFunc(StringRef SymbolName) { + // Remove any '_' or '@' prefix. + char Front = SymbolName.empty() ? '\0' : SymbolName[0]; + if (Front == '_' || Front == '@') + SymbolName = SymbolName.drop_front(); + + // Remove any '@[0-9]+' suffix. + if (Front != '?') { + size_t AtPos = SymbolName.rfind('@'); + if (AtPos != StringRef::npos && + std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), + [](char C) { return C >= '0' && C <= '9'; })) { + SymbolName = SymbolName.substr(0, AtPos); + } + } + + // Remove any ending '@' for vectorcall. + if (SymbolName.endswith("@")) + SymbolName = SymbolName.drop_back(); + + return SymbolName; +} + #if !defined(_MSC_VER) // Assume that __cxa_demangle is provided by libcxxabi (except for Windows). extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, size_t *length, int *status); #endif -std::string LLVMSymbolizer::DemangleName(const std::string &Name) { +std::string LLVMSymbolizer::DemangleName(const std::string &Name, + ModuleInfo *ModInfo) { #if !defined(_MSC_VER) // We can spoil names of symbols with C linkage, so use an heuristic // approach to check if the name should be demangled. - if (Name.substr(0, 2) != "_Z") - return Name; - int status = 0; - char *DemangledName = __cxa_demangle(Name.c_str(), 0, 0, &status); - if (status != 0) - return Name; - std::string Result = DemangledName; - free(DemangledName); - return Result; + if (Name.substr(0, 2) == "_Z") { + int status = 0; + char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); + if (status != 0) + return Name; + std::string Result = DemangledName; + free(DemangledName); + return Result; + } #else - return Name; + if (!Name.empty() && Name.front() == '?') { + // Only do MSVC C++ demangling on symbols starting with '?'. + char DemangledName[1024] = {0}; + DWORD result = ::UnDecorateSymbolName( + Name.c_str(), DemangledName, 1023, + UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected + UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc + UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications + UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers + UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords + UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types + return (result == 0) ? Name : std::string(DemangledName); + } #endif + if (ModInfo->isWin32Module()) + return std::string(demanglePE32ExternCFunc(Name)); + return Name; } } // namespace symbolize