#include "llvm/DebugInfo/PDB/PDBContext.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/DataExtractor.h"
#if defined(_MSC_VER)
#include <Windows.h>
#include <DbgHelp.h>
+#pragma comment(lib, "dbghelp.lib")
+
+// Windows.h conflicts with our COFF header definitions.
+#ifdef IMAGE_FILE_MACHINE_I386
+#undef IMAGE_FILE_MACHINE_I386
+#endif
#endif
namespace llvm {
}
}
}
- for (const SymbolRef &Symbol : Module->symbols()) {
- addSymbol(Symbol, OpdExtractor.get(), OpdAddress);
+ std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
+ computeSymbolSizes(*Module);
+ for (auto &P : Symbols)
+ addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
+
+ // If this is a COFF object and we didn't find any symbols, try the export
+ // table.
+ if (Symbols.empty()) {
+ if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
+ addCoffExportSymbols(CoffObj);
}
- bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end());
- if (NoSymbolTable && Module->isELF()) {
- // Fallback to dynamic symbol table, if regular symbol table is stripped.
- std::pair<symbol_iterator, symbol_iterator> IDyn =
- getELFDynamicSymbolIterators(Module);
- for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) {
- addSymbol(*si, OpdExtractor.get(), OpdAddress);
- }
+}
+
+namespace {
+struct OffsetNamePair {
+ uint32_t Offset;
+ StringRef Name;
+ bool operator<(const OffsetNamePair &R) const {
+ return Offset < R.Offset;
}
+};
}
-void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor,
- uint64_t OpdAddress) {
- SymbolRef::Type SymbolType;
- if (error(Symbol.getType(SymbolType)))
+void ModuleInfo::addCoffExportSymbols(const COFFObjectFile *CoffObj) {
+ // Get all export names and offsets.
+ std::vector<OffsetNamePair> ExportSyms;
+ for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
+ StringRef Name;
+ uint32_t Offset;
+ if (error(Ref.getSymbolName(Name)) || error(Ref.getExportRVA(Offset)))
+ return;
+ ExportSyms.push_back(OffsetNamePair{Offset, Name});
+ }
+ if (ExportSyms.empty())
return;
+
+ // Sort by ascending offset.
+ array_pod_sort(ExportSyms.begin(), ExportSyms.end());
+
+ // Approximate the symbol sizes by assuming they run to the next symbol.
+ // FIXME: This assumes all exports are functions.
+ uint64_t ImageBase = CoffObj->getImageBase();
+ for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
+ OffsetNamePair &Export = *I;
+ // FIXME: The last export has a one byte size now.
+ uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
+ uint64_t SymbolStart = ImageBase + Export.Offset;
+ uint64_t SymbolSize = NextOffset - Export.Offset;
+ SymbolDesc SD = {SymbolStart, SymbolSize};
+ Functions.insert(std::make_pair(SD, Export.Name));
+ }
+}
+
+void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize,
+ DataExtractor *OpdExtractor, uint64_t OpdAddress) {
+ SymbolRef::Type SymbolType = Symbol.getType();
if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
return;
- uint64_t SymbolAddress;
- if (error(Symbol.getAddress(SymbolAddress)) ||
- SymbolAddress == UnknownAddressOrSize)
+ ErrorOr<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
+ if (error(SymbolAddressOrErr.getError()))
return;
+ uint64_t SymbolAddress = *SymbolAddressOrErr;
if (OpdExtractor) {
// For big-endian PowerPC64 ELF, symbols in the .opd section refer to
// function descriptors. The first word of the descriptor is a pointer to
OpdExtractor->isValidOffsetForAddress(OpdOffset32))
SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
}
- uint64_t SymbolSize;
- // Getting symbol size is linear for Mach-O files, so assume that symbol
- // occupies the memory range up to the following symbol.
- if (isa<MachOObjectFile>(Module))
- SymbolSize = 0;
- else if (error(Symbol.getSize(SymbolSize)) ||
- SymbolSize == UnknownAddressOrSize)
- return;
- StringRef SymbolName;
- if (error(Symbol.getName(SymbolName)))
+ ErrorOr<StringRef> SymbolNameOrErr = Symbol.getName();
+ if (error(SymbolNameOrErr.getError()))
return;
+ StringRef SymbolName = *SymbolNameOrErr;
// Mach-O symbol table names have leading underscore, skip it.
if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
SymbolName = SymbolName.drop_front();
M.insert(std::make_pair(SD, SymbolName));
}
+// Return true if this is a 32-bit x86 PE COFF module.
+bool ModuleInfo::isWin32Module() const {
+ auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
+ return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
+}
+
+uint64_t ModuleInfo::getModulePreferredBase() const {
+ if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
+ return CoffObject->getImageBase();
+ return 0;
+}
+
bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
std::string &Name, uint64_t &Addr,
uint64_t &Size) const {
uint64_t ModuleOffset) {
ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
if (!Info)
- return printDILineInfo(DILineInfo());
+ return printDILineInfo(DILineInfo(), Info);
+
+ // If the user is giving us relative addresses, add the preferred base of the
+ // object to the offset before we do the query. It's what DIContext expects.
+ if (Opts.RelativeAddresses)
+ ModuleOffset += Info->getModulePreferredBase();
+
if (Opts.PrintInlining) {
DIInliningInfo InlinedContext =
Info->symbolizeInlinedCode(ModuleOffset, Opts);
std::string Result;
for (uint32_t i = 0; i < FramesNum; i++) {
DILineInfo LineInfo = InlinedContext.getFrame(i);
- Result += printDILineInfo(LineInfo);
+ Result += printDILineInfo(LineInfo, Info);
}
return Result;
}
DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
- return printDILineInfo(LineInfo);
+ return printDILineInfo(LineInfo, Info);
}
std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
uint64_t Size = 0;
if (Opts.UseSymbolTable) {
if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
+ // If the user is giving us relative addresses, add the preferred base of the
+ // object to the offset before we do the query. It's what DIContext expects.
+ if (Opts.RelativeAddresses)
+ ModuleOffset += Info->getModulePreferredBase();
if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle)
- Name = DemangleName(Name);
+ Name = DemangleName(Name, Info);
}
}
std::stringstream ss;
if (I != ObjectFileForArch.end())
return I->second;
ErrorOr<std::unique_ptr<ObjectFile>> ParsedObj =
- UB->getObjectForArch(Triple(ArchName).getArch());
+ UB->getObjectForArch(ArchName);
if (ParsedObj) {
Res = ParsedObj.get().get();
ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get()));
PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA,
Objects.first->getFileName(), Session);
if (Error == PDB_ErrorCode::Success) {
- Context = new PDBContext(*CoffObject, std::move(Session),
- Opts.RelativeAddresses);
+ Context = new PDBContext(*CoffObject, std::move(Session));
}
}
if (!Context)
return Info;
}
-std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
+std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo,
+ ModuleInfo *ModInfo) const {
// By default, DILineInfo contains "<invalid>" for function/filename it
// cannot fetch. We replace it to "??" to make our output closer to addr2line.
static const std::string kDILineInfoBadString = "<invalid>";
if (FunctionName == kDILineInfoBadString)
FunctionName = kBadString;
else if (Opts.Demangle)
- FunctionName = DemangleName(FunctionName);
+ FunctionName = DemangleName(FunctionName, ModInfo);
Result << FunctionName << "\n";
}
std::string Filename = LineInfo.FileName;
return Result.str();
}
+// Undo these various manglings for Win32 extern "C" functions:
+// cdecl - _foo
+// stdcall - _foo@12
+// fastcall - @foo@12
+// vectorcall - foo@@12
+// These are all different linkage names for 'foo'.
+static StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
+ // Remove any '_' or '@' prefix.
+ char Front = SymbolName.empty() ? '\0' : SymbolName[0];
+ if (Front == '_' || Front == '@')
+ SymbolName = SymbolName.drop_front();
+
+ // Remove any '@[0-9]+' suffix.
+ if (Front != '?') {
+ size_t AtPos = SymbolName.rfind('@');
+ if (AtPos != StringRef::npos &&
+ std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(),
+ [](char C) { return C >= '0' && C <= '9'; })) {
+ SymbolName = SymbolName.substr(0, AtPos);
+ }
+ }
+
+ // Remove any ending '@' for vectorcall.
+ if (SymbolName.endswith("@"))
+ SymbolName = SymbolName.drop_back();
+
+ return SymbolName;
+}
+
#if !defined(_MSC_VER)
// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
size_t *length, int *status);
#endif
-std::string LLVMSymbolizer::DemangleName(const std::string &Name) {
+std::string LLVMSymbolizer::DemangleName(const std::string &Name,
+ ModuleInfo *ModInfo) {
#if !defined(_MSC_VER)
// We can spoil names of symbols with C linkage, so use an heuristic
// approach to check if the name should be demangled.
- if (Name.substr(0, 2) != "_Z")
- return Name;
- int status = 0;
- char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
- if (status != 0)
- return Name;
- std::string Result = DemangledName;
- free(DemangledName);
- return Result;
+ if (Name.substr(0, 2) == "_Z") {
+ int status = 0;
+ char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
+ if (status != 0)
+ return Name;
+ std::string Result = DemangledName;
+ free(DemangledName);
+ return Result;
+ }
#else
- char DemangledName[1024] = {0};
- DWORD result = ::UnDecorateSymbolName(
- Name.c_str(), DemangledName, 1023,
- UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected
- UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
- UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications
- UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
- UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
- UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
-
- return (result == 0) ? Name : std::string(DemangledName);
+ if (!Name.empty() && Name.front() == '?') {
+ // Only do MSVC C++ demangling on symbols starting with '?'.
+ char DemangledName[1024] = {0};
+ DWORD result = ::UnDecorateSymbolName(
+ Name.c_str(), DemangledName, 1023,
+ UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected
+ UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
+ UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications
+ UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
+ UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
+ UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
+ return (result == 0) ? Name : std::string(DemangledName);
+ }
#endif
+ if (ModInfo->isWin32Module())
+ return std::string(demanglePE32ExternCFunc(Name));
+ return Name;
}
} // namespace symbolize