From de2cc5ee0d80fb06c95b7ad8c73c8a910a5a114d Mon Sep 17 00:00:00 2001 From: Peter Griess Date: Fri, 15 Mar 2013 09:12:46 -0700 Subject: [PATCH] Add symbol name resolution and value retrieval Summary: - Add ElfFile::getSymbolByName(), which finds a Symbol object corresponding to the symbol w/ the given name - Add ElfFile::getSymbolValue(), which resolves the Symbol object to a value in the mapped file, following pointers if necessary Test Plan: - Unit tests Reviewed By: simpkins@fb.com FB internal diff: D740183 --- folly/experimental/symbolizer/Elf-inl.h | 28 ++++++ folly/experimental/symbolizer/Elf.cpp | 80 +++++++++++----- folly/experimental/symbolizer/Elf.h | 92 +++++++++++++++++-- .../experimental/symbolizer/test/ElfTests.cpp | 65 +++++++++++++ 4 files changed, 234 insertions(+), 31 deletions(-) create mode 100644 folly/experimental/symbolizer/test/ElfTests.cpp diff --git a/folly/experimental/symbolizer/Elf-inl.h b/folly/experimental/symbolizer/Elf-inl.h index 4bc644d5..cc4f51bd 100644 --- a/folly/experimental/symbolizer/Elf-inl.h +++ b/folly/experimental/symbolizer/Elf-inl.h @@ -59,6 +59,34 @@ const char* ElfFile::iterateStrings(const ElfW(Shdr)& stringTable, Fn fn) return ptr != end ? ptr : nullptr; } +template +const ElfW(Sym)* ElfFile::iterateSymbols(const ElfW(Shdr)& section, Fn fn) + const { + enforce(section.sh_entsize == sizeof(ElfW(Sym)), + "invalid entry size in symbol table"); + + const ElfW(Sym)* sym = &at(section.sh_offset); + const ElfW(Sym)* end = sym + (section.sh_size / section.sh_entsize); + + while (sym < end) { + if (fn(*sym)) { + return sym; + } + + ++sym; + } + + return nullptr; +} + +template +const ElfW(Sym)* ElfFile::iterateSymbolsWithType(const ElfW(Shdr)& section, + uint32_t type, Fn fn) const { + // N.B. st_info has the same representation on 32- and 64-bit platforms + return iterateSymbols(section, [&](const ElfW(Sym)& sym) -> bool { + return ELF32_ST_TYPE(sym.st_info) == type && fn(sym); + }); +} } // namespace symbolizer } // namespace folly diff --git a/folly/experimental/symbolizer/Elf.cpp b/folly/experimental/symbolizer/Elf.cpp index d9160018..420b8797 100644 --- a/folly/experimental/symbolizer/Elf.cpp +++ b/folly/experimental/symbolizer/Elf.cpp @@ -234,42 +234,76 @@ const ElfW(Shdr)* ElfFile::getSectionByName(const char* name) const { ElfFile::Symbol ElfFile::getDefinitionByAddress(uintptr_t address) const { Symbol foundSymbol {nullptr, nullptr}; - auto find = [&] (const ElfW(Shdr)& section) { - enforce(section.sh_entsize == sizeof(ElfW(Sym)), - "invalid entry size in symbol table"); - - const ElfW(Sym)* sym = &at(section.sh_offset); - const ElfW(Sym)* end = &at(section.sh_offset + section.sh_size); - for (; sym != end; ++sym) { - // st_info has the same representation on 32- and 64-bit platforms - auto type = ELF32_ST_TYPE(sym->st_info); - - // TODO(tudorb): Handle STT_TLS, but then we'd have to understand - // thread-local relocations. If all we're looking up is functions - // (instruction pointers), it doesn't matter, though. - if (type != STT_OBJECT && type != STT_FUNC) { - continue; + auto findSection = [&](const ElfW(Shdr)& section) { + auto findSymbols = [&](const ElfW(Sym)& sym) { + if (sym.st_shndx == SHN_UNDEF) { + return false; // not a definition } - if (sym->st_shndx == SHN_UNDEF) { - continue; // not a definition - } - if (address >= sym->st_value && address < sym->st_value + sym->st_size) { + if (address >= sym.st_value && address < sym.st_value + sym.st_size) { foundSymbol.first = §ion; - foundSymbol.second = sym; + foundSymbol.second = &sym; return true; } + + return false; + }; + + return iterateSymbolsWithType(section, STT_OBJECT, findSymbols) || + iterateSymbolsWithType(section, STT_FUNC, findSymbols); + }; + + // Try the .dynsym section first if it exists, it's smaller. + (iterateSectionsWithType(SHT_DYNSYM, findSection) || + iterateSectionsWithType(SHT_SYMTAB, findSection)); + + return foundSymbol; +} + +ElfFile::Symbol ElfFile::getSymbolByName(const char* name) const { + Symbol foundSymbol{nullptr, nullptr}; + + auto findSection = [&](const ElfW(Shdr)& section) -> bool { + // This section has no string table associated w/ its symbols; hence we + // can't get names for them + if (section.sh_link == SHN_UNDEF) { + return false; } - return false; + auto findSymbols = [&](const ElfW(Sym)& sym) -> bool { + if (sym.st_shndx == SHN_UNDEF) { + return false; // not a definition + } + if (sym.st_name == 0) { + return false; // no name for this symbol + } + const char* sym_name = getString( + *getSectionByIndex(section.sh_link), sym.st_name); + if (strcmp(sym_name, name) == 0) { + foundSymbol.first = §ion; + foundSymbol.second = &sym; + return true; + } + + return false; + }; + + return iterateSymbolsWithType(section, STT_OBJECT, findSymbols) || + iterateSymbolsWithType(section, STT_FUNC, findSymbols); }; // Try the .dynsym section first if it exists, it's smaller. - (iterateSectionsWithType(SHT_DYNSYM, find) || - iterateSectionsWithType(SHT_SYMTAB, find)); + iterateSectionsWithType(SHT_DYNSYM, findSection) || + iterateSectionsWithType(SHT_SYMTAB, findSection); return foundSymbol; } +const ElfW(Shdr)* ElfFile::getSectionContainingAddress(ElfW(Addr) addr) const { + return iterateSections([&](const ElfW(Shdr)& sh) -> bool { + return (addr >= sh.sh_addr) && (addr < (sh.sh_addr + sh.sh_size)); + }); +} + const char* ElfFile::getSymbolName(Symbol symbol) const { if (!symbol.first || !symbol.second) { return nullptr; diff --git a/folly/experimental/symbolizer/Elf.h b/folly/experimental/symbolizer/Elf.h index 91f587c2..9b1ddc49 100644 --- a/folly/experimental/symbolizer/Elf.h +++ b/folly/experimental/symbolizer/Elf.h @@ -33,6 +33,13 @@ namespace folly { namespace symbolizer { +template +inline void enforce(bool v, Args... args) { + if (UNLIKELY(!v)) { + throw std::runtime_error(folly::to(args...)); + } +} + /** * ELF file parser. * @@ -101,6 +108,18 @@ class ElfFile { template const ElfW(Shdr)* iterateSectionsWithType(uint32_t type, Fn fn) const; + /** + * Iterate over all symbols witin a given section. + * + * Returns a pointer to the current ("found") symbol when fn returned true, + * or nullptr if fn returned false for all symbols. + */ + template + const ElfW(Sym)* iterateSymbols(const ElfW(Shdr)& section, Fn fn) const; + template + const ElfW(Sym)* iterateSymbolsWithType(const ElfW(Shdr)& section, + uint32_t type, Fn fn) const; + /** * Find symbol definition by address. * Note that this is the file virtual address, so you need to undo @@ -109,11 +128,52 @@ class ElfFile { typedef std::pair Symbol; Symbol getDefinitionByAddress(uintptr_t address) const; + /** + * Find symbol definition by name. + * + * If a symbol with this name cannot be found, a Symbol + * will be returned. This is O(N) in the number of symbols in the file. + */ + Symbol getSymbolByName(const char* name) const; + + /** + * Get the value of a symbol. + */ + template + const T& getSymbolValue(const ElfW(Sym)* symbol) const { + const ElfW(Shdr)* section = getSectionByIndex(symbol->st_shndx); + enforce(section, "Symbol's section index is invalid"); + + return valueAt(*section, symbol->st_value); + } + + /** + * Get the value of the object stored at the given address. + * + * This is the function that you want to use in conjunction with + * getSymbolValue() to follow pointers. For example, to get the value of + * a char* symbol, you'd do something like this: + * + * auto sym = getSymbolByName("someGlobalValue"); + * auto addr = getSymbolValue(sym.second); + * const char* str = &getSymbolValue(addr); + */ + template + const T& getAddressValue(const ElfW(Addr) addr) const { + const ElfW(Shdr)* section = getSectionContainingAddress(addr); + enforce(section, "Address does not refer to existing section"); + + return valueAt(*section, addr); + } + /** * Retrieve symbol name. */ const char* getSymbolName(Symbol symbol) const; + /** Find the section containing the given address */ + const ElfW(Shdr)* getSectionContainingAddress(ElfW(Addr) addr) const; + private: void init(); void destroy(); @@ -124,10 +184,33 @@ class ElfFile { template const typename std::enable_if::value, T>::type& - at(off_t offset) const { + at(ElfW(Off) offset) const { + enforce(offset + sizeof(T) <= length_, + "Offset is not contained within our mmapped file"); + return *reinterpret_cast(file_ + offset); } + template + const T& valueAt(const ElfW(Shdr)& section, const ElfW(Addr) addr) const { + // For exectuables and shared objects, st_value holds a virtual address + // that refers to the memory owned by sections. Since we didn't map the + // sections into the addresses that they're expecting (sh_addr), but + // instead just mmapped the entire file directly, we need to translate + // between addresses and offsets into the file. + // + // TODO: For other file types, st_value holds a file offset directly. Since + // I don't have a use-case for that right now, just assert that + // nobody wants this. We can always add it later. + enforce(elfHeader().e_type == ET_EXEC || elfHeader().e_type == ET_DYN, + "Only exectuables and shared objects are supported"); + enforce(addr >= section.sh_addr && + (addr + sizeof(T)) <= (section.sh_addr + section.sh_size), + "Address is not contained within the provided segment"); + + return at(section.sh_offset + (addr - section.sh_addr)); + } + int fd_; char* file_; // mmap() location size_t length_; // mmap() length @@ -135,13 +218,6 @@ class ElfFile { uintptr_t baseAddress_; }; -template -inline void enforce(bool v, Args... args) { - if (UNLIKELY(!v)) { - throw std::runtime_error(folly::to(args...)); - } -} - } // namespace symbolizer } // namespace folly diff --git a/folly/experimental/symbolizer/test/ElfTests.cpp b/folly/experimental/symbolizer/test/ElfTests.cpp new file mode 100644 index 00000000..3c5043f0 --- /dev/null +++ b/folly/experimental/symbolizer/test/ElfTests.cpp @@ -0,0 +1,65 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "folly/experimental/symbolizer/Elf.h" + +using folly::symbolizer::ElfFile; + +// Add some symbols for testing. Note that we have to be careful with type +// signatures here to prevent name mangling +uint64_t kIntegerValue = 1234567890UL; +const char* kStringValue = "coconuts"; + +class ElfTest : public ::testing::Test { + public: + // Path to the test binary itself; set by main() + static std::string binaryPath; + + ElfTest() : elfFile_(binaryPath.c_str()) { + } + virtual ~ElfTest() { + } + + protected: + ElfFile elfFile_; +}; + +std::string ElfTest::binaryPath; + +TEST_F(ElfTest, IntegerValue) { + auto sym = elfFile_.getSymbolByName("kIntegerValue"); + EXPECT_NE(nullptr, sym.first) << + "Failed to look up symbol kIntegerValue"; + EXPECT_EQ(kIntegerValue, elfFile_.getSymbolValue(sym.second)); +} + +TEST_F(ElfTest, PointerValue) { + auto sym = elfFile_.getSymbolByName("kStringValue"); + EXPECT_NE(nullptr, sym.first) << + "Failed to look up symbol kStringValue"; + ElfW(Addr) addr = elfFile_.getSymbolValue(sym.second); + const char *str = &elfFile_.getAddressValue(addr); + EXPECT_STREQ(kStringValue, str); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + google::ParseCommandLineFlags(&argc, &argv, true); + ElfTest::binaryPath = argv[0]; + return RUN_ALL_TESTS(); +} -- 2.34.1