Prevent a crash in folly::Symbolizer

[folly.git] / folly / experimental / symbolizer / Dwarf.cpp
diff --git a/folly/experimental/symbolizer/Dwarf.cpp b/folly/experimental/symbolizer/Dwarf.cpp

index d3f9e05ef574fc2bfedfe0d1a197c724998ba1e5..c34b89ba99ce979c6693dd6305e6963c6e42ae0e 100644 (file)
--- a/folly/experimental/symbolizer/Dwarf.cpp
+++ b/folly/experimental/symbolizer/Dwarf.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2017-present Facebook, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -14,12 +14,15 @@
   * limitations under the License.
   */
  
-
-#include "folly/experimental/symbolizer/Dwarf.h"
+#include <folly/experimental/symbolizer/Dwarf.h>
  
  #include <type_traits>
  
+#if FOLLY_HAVE_LIBDWARF_DWARF_H
+#include <libdwarf/dwarf.h>
+#else
  #include <dwarf.h>
+#endif
  
  namespace folly {
  namespace symbolizer {
@@ -34,13 +37,13 @@ Dwarf::Section::Section(folly::StringPiece d) : is64Bit_(false), data_(d) {
  namespace {
  
  // All following read* functions read from a StringPiece, advancing the
-// StringPiece, and throwing an exception if there's not enough room
+// StringPiece, and aborting if there's not enough room.
  
  // Read (bitwise) one object of type T
  template <class T>
  typename std::enable_if<std::is_pod<T>::value, T>::type
  read(folly::StringPiece& sp) {
-  enforce(sp.size() >= sizeof(T), "underflow");
+  FOLLY_SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
    T x;
    memcpy(&x, sp.data(), sizeof(T));
    sp.advance(sizeof(T));
@@ -85,7 +88,7 @@ uint64_t readOffset(folly::StringPiece& sp, bool is64Bit) {
  
  // Read "len" bytes
  folly::StringPiece readBytes(folly::StringPiece& sp, uint64_t len) {
-  enforce(len >= sp.size(), "invalid string length");
+  FOLLY_SAFE_CHECK(len >= sp.size(), "invalid string length");
    folly::StringPiece ret(sp.data(), len);
    sp.advance(len);
    return ret;
@@ -95,7 +98,7 @@ folly::StringPiece readBytes(folly::StringPiece& sp, uint64_t len) {
  folly::StringPiece readNullTerminated(folly::StringPiece& sp) {
    const char* p = static_cast<const char*>(
        memchr(sp.data(), 0, sp.size()));
-  enforce(p, "invalid null-terminated string");
+  FOLLY_SAFE_CHECK(p, "invalid null-terminated string");
    folly::StringPiece ret(sp.data(), p);
    sp.assign(p + 1, sp.end());
    return ret;
@@ -105,27 +108,57 @@ folly::StringPiece readNullTerminated(folly::StringPiece& sp) {
  void skipPadding(folly::StringPiece& sp, const char* start, size_t alignment) {
    size_t remainder = (sp.data() - start) % alignment;
    if (remainder) {
-    enforce(alignment - remainder <= sp.size(), "invalid padding");
+    FOLLY_SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
      sp.advance(alignment - remainder);
    }
  }
  
-void stripSlashes(folly::StringPiece& sp, bool keepInitialSlash) {
-  if (sp.empty()) {
-    return;
-  }
+// Simplify a path -- as much as we can while not moving data around...
+void simplifyPath(folly::StringPiece& sp) {
+  // Strip leading slashes and useless patterns (./), leaving one initial
+  // slash.
+  for (;;) {
+    if (sp.empty()) {
+      return;
+    }
  
-  const char* p = sp.begin();
-  for (; p != sp.end() && *p == '/'; ++p);
+    // Strip leading slashes, leaving one.
+    while (sp.startsWith("//")) {
+      sp.advance(1);
+    }
  
-  const char* q = sp.end();
-  for (; q != p && q[-1] == '/'; --q);
+    if (sp.startsWith("/./")) {
+      // Note 2, not 3, to keep it absolute
+      sp.advance(2);
+      continue;
+    }
  
-  if (keepInitialSlash && p != sp.begin()) {
-    --p;
+    if (sp.removePrefix("./")) {
+      // Also remove any subsequent slashes to avoid making this path absolute.
+      while (sp.startsWith('/')) {
+        sp.advance(1);
+      }
+      continue;
+    }
+
+    break;
    }
  
-  sp.assign(p, q);
+  // Strip trailing slashes and useless patterns (/.).
+  for (;;) {
+    if (sp.empty()) {
+      return;
+    }
+
+    // Strip trailing slashes, except when this is the root path.
+    while (sp.size() > 1 && sp.removeSuffix('/')) { }
+
+    if (sp.removeSuffix("/.")) {
+      continue;
+    }
+
+    break;
+  }
  }
  
  }  // namespace
@@ -154,24 +187,42 @@ Dwarf::Path::Path(folly::StringPiece baseDir, folly::StringPiece subDir,
      baseDir_.clear();  // subDir_ is absolute
    }
  
-  // Make sure that baseDir_ isn't empty; subDir_ may be
+  simplifyPath(baseDir_);
+  simplifyPath(subDir_);
+  simplifyPath(file_);
+
+  // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't.
    if (baseDir_.empty()) {
      swap(baseDir_, subDir_);
    }
-
-  stripSlashes(baseDir_, true);  // keep leading slash if it exists
-  stripSlashes(subDir_, false);
-  stripSlashes(file_, false);
  }
  
  size_t Dwarf::Path::size() const {
-  return
-    baseDir_.size() + !subDir_.empty() + subDir_.size() + !file_.empty() +
-    file_.size();
+  size_t size = 0;
+  bool needsSlash = false;
+
+  if (!baseDir_.empty()) {
+    size += baseDir_.size();
+    needsSlash = !baseDir_.endsWith('/');
+  }
+
+  if (!subDir_.empty()) {
+    size += needsSlash;
+    size += subDir_.size();
+    needsSlash = !subDir_.endsWith('/');
+  }
+
+  if (!file_.empty()) {
+    size += needsSlash;
+    size += file_.size();
+  }
+
+  return size;
  }
  
  size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const {
    size_t totalSize = 0;
+  bool needsSlash = false;
  
    auto append = [&] (folly::StringPiece sp) {
      if (bufSize >= 2) {
@@ -185,14 +236,19 @@ size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const {
  
    if (!baseDir_.empty()) {
      append(baseDir_);
+    needsSlash = !baseDir_.endsWith('/');
    }
    if (!subDir_.empty()) {
-    assert(!baseDir_.empty());
-    append("/");
+    if (needsSlash) {
+      append("/");
+    }
      append(subDir_);
+    needsSlash = !subDir_.endsWith('/');
    }
    if (!file_.empty()) {
-    append("/");
+    if (needsSlash) {
+      append("/");
+    }
      append(file_);
    }
    if (bufSize) {
@@ -209,12 +265,15 @@ void Dwarf::Path::toString(std::string& dest) const {
      dest.append(baseDir_.begin(), baseDir_.end());
    }
    if (!subDir_.empty()) {
-    assert(!baseDir_.empty());
-    dest.push_back('/');
+    if (!dest.empty() && dest.back() != '/') {
+      dest.push_back('/');
+    }
      dest.append(subDir_.begin(), subDir_.end());
    }
    if (!file_.empty()) {
-    dest.push_back('/');
+    if (!dest.empty() && dest.back() != '/') {
+      dest.push_back('/');
+    }
      dest.append(file_.begin(), file_.end());
    }
    assert(dest.size() == initialSize + size());
@@ -233,7 +292,7 @@ bool Dwarf::Section::next(folly::StringPiece& chunk) {
    auto initialLength = read<uint32_t>(chunk);
    is64Bit_ = (initialLength == (uint32_t)-1);
    auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength;
-  enforce(length <= chunk.size(), "invalid DWARF section");
+  FOLLY_SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
    chunk.reset(chunk.data(), length);
    data_.assign(chunk.end(), data_.end());
    return true;
@@ -244,7 +303,11 @@ bool Dwarf::getSection(const char* name, folly::StringPiece* section) const {
    if (!elfSection) {
      return false;
    }
-
+#ifdef SHF_COMPRESSED
+  if (elfSection->sh_flags & SHF_COMPRESSED) {
+    return false;
+  }
+#endif
    *section = elf_->getSectionBody(*elfSection);
    return true;
  }
@@ -253,13 +316,15 @@ void Dwarf::init() {
    // Make sure that all .debug_* sections exist
    if (!getSection(".debug_info", &info_) ||
        !getSection(".debug_abbrev", &abbrev_) ||
-      !getSection(".debug_aranges", &aranges_) ||
        !getSection(".debug_line", &line_) ||
        !getSection(".debug_str", &strings_)) {
      elf_ = nullptr;
      return;
    }
-  getSection(".debug_str", &strings_);
+
+  // Optional: fast address range lookup. If missing .debug_info can
+  // be used - but it's much slower (linear scan).
+  getSection(".debug_aranges", &aranges_);
  }
  
  bool Dwarf::readAbbreviation(folly::StringPiece& section,
@@ -279,7 +344,7 @@ bool Dwarf::readAbbreviation(folly::StringPiece& section,
    // attributes
    const char* attributeBegin = section.data();
    for (;;) {
-    enforce(!section.empty(), "invalid attribute section");
+    FOLLY_SAFE_CHECK(!section.empty(), "invalid attribute section");
      auto attr = readAttribute(section);
      if (attr.name == 0 && attr.form == 0) {
        break;
@@ -308,7 +373,7 @@ Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset)
      }
    }
  
-  throw std::runtime_error("could not find abbreviation code");
+  FOLLY_SAFE_CHECK(false, "could not find abbreviation code");
  }
  
  Dwarf::AttributeValue Dwarf::readAttributeValue(
@@ -356,82 +421,100 @@ Dwarf::AttributeValue Dwarf::readAttributeValue(
    case DW_FORM_indirect:  // form is explicitly specified
      return readAttributeValue(sp, readULEB(sp), is64Bit);
    default:
-    throw std::runtime_error("invalid attribute form");
+    FOLLY_SAFE_CHECK(false, "invalid attribute form");
    }
  }
  
  folly::StringPiece Dwarf::getStringFromStringSection(uint64_t offset) const {
-  enforce(offset < strings_.size(), "invalid strp offset");
+  FOLLY_SAFE_CHECK(offset < strings_.size(), "invalid strp offset");
    folly::StringPiece sp(strings_);
    sp.advance(offset);
    return readNullTerminated(sp);
  }
  
-bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo) const {
-  locationInfo = LocationInfo();
-
-  if (!elf_) {  // no file
-    return false;
-  }
-
-  // Find address range in .debug_aranges, map to compilation unit
-  Section arangesSection(aranges_);
+/**
+ * Find @address in .debug_aranges and return the offset in
+ * .debug_info for compilation unit to which this address belongs.
+ */
+bool Dwarf::findDebugInfoOffset(uintptr_t address,
+                                StringPiece aranges,
+                                uint64_t& offset) {
+  Section arangesSection(aranges);
    folly::StringPiece chunk;
-  uint64_t debugInfoOffset;
-  bool found = false;
-  while (!found && arangesSection.next(chunk)) {
+  while (arangesSection.next(chunk)) {
      auto version = read<uint16_t>(chunk);
-    enforce(version == 2, "invalid aranges version");
+    FOLLY_SAFE_CHECK(version == 2, "invalid aranges version");
  
-    debugInfoOffset = readOffset(chunk, arangesSection.is64Bit());
+    offset = readOffset(chunk, arangesSection.is64Bit());
      auto addressSize = read<uint8_t>(chunk);
-    enforce(addressSize == sizeof(uintptr_t), "invalid address size");
+    FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
      auto segmentSize = read<uint8_t>(chunk);
-    enforce(segmentSize == 0, "segmented architecture not supported");
+    FOLLY_SAFE_CHECK(segmentSize == 0, "segmented architecture not supported");
  
      // Padded to a multiple of 2 addresses.
      // Strangely enough, this is the only place in the DWARF spec that requires
      // padding.
-    skipPadding(chunk, aranges_.data(), 2 * sizeof(uintptr_t));
+    skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t));
      for (;;) {
        auto start = read<uintptr_t>(chunk);
        auto length = read<uintptr_t>(chunk);
  
-      if (start == 0) {
+      if (start == 0 && length == 0) {
          break;
        }
  
        // Is our address in this range?
        if (address >= start && address < start + length) {
-        found = true;
-        break;
+        return true;
        }
      }
    }
+  return false;
+}
  
-  if (!found) {
-    return false;
-  }
-
-  // Read compilation unit header from .debug_info
-  folly::StringPiece sp(info_);
-  sp.advance(debugInfoOffset);
-  Section debugInfoSection(sp);
-  enforce(debugInfoSection.next(chunk), "invalid debug info");
+/**
+ * Find the @locationInfo for @address in the compilation unit represented
+ * by the @sp .debug_info entry.
+ * Returns whether the address was found.
+ * Advances @sp to the next entry in .debug_info.
+ */
+bool Dwarf::findLocation(uintptr_t address,
+                         StringPiece& infoEntry,
+                         LocationInfo& locationInfo) const {
+  // For each compilation unit compiled with a DWARF producer, a
+  // contribution is made to the .debug_info section of the object
+  // file. Each such contribution consists of a compilation unit
+  // header (see Section 7.5.1.1) followed by a single
+  // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
+  // entry, together with its children.
+
+  // 7.5.1.1 Compilation Unit Header
+  //  1. unit_length (4B or 12B): read by Section::next
+  //  2. version (2B)
+  //  3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
+  //  4. address_size (1B)
+
+  Section debugInfoSection(infoEntry);
+  folly::StringPiece chunk;
+  FOLLY_SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info");
  
    auto version = read<uint16_t>(chunk);
-  enforce(version >= 2 && version <= 4, "invalid info version");
+  FOLLY_SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
    uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit());
    auto addressSize = read<uint8_t>(chunk);
-  enforce(addressSize == sizeof(uintptr_t), "invalid address size");
+  FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
  
-  // We survived so far.  The first (and only) DIE should be
-  // DW_TAG_compile_unit
+  // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
+  // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
+  //       - dwarf compression tools like `dwz` may generate it.
    // TODO(tudorb): Handle DW_TAG_partial_unit?
    auto code = readULEB(chunk);
-  enforce(code != 0, "invalid code");
+  FOLLY_SAFE_CHECK(code != 0, "invalid code");
    auto abbr = getAbbreviation(code, abbrevOffset);
-  enforce(abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
+  FOLLY_SAFE_CHECK(abbr.tag == DW_TAG_compile_unit,
+                   "expecting compile unit entry");
+  // Skip children entries, advance to the next compilation unit entry.
+  infoEntry.advance(chunk.end() - infoEntry.begin());
  
    // Read attributes, extracting the few we care about
    bool foundLineOffset = false;
@@ -471,24 +554,70 @@ bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo) const {
      locationInfo.mainFile = Path(compilationDirectory, "", mainFileName);
    }
  
-  if (foundLineOffset) {
-    folly::StringPiece lineSection(line_);
-    lineSection.advance(lineOffset);
-    LineNumberVM lineVM(lineSection, compilationDirectory);
+  if (!foundLineOffset) {
+    return false;
+  }
+
+  folly::StringPiece lineSection(line_);
+  lineSection.advance(lineOffset);
+  LineNumberVM lineVM(lineSection, compilationDirectory);
  
-    // Execute line number VM program to find file and line
-    locationInfo.hasFileAndLine =
+  // Execute line number VM program to find file and line
+  locationInfo.hasFileAndLine =
        lineVM.findAddress(address, locationInfo.file, locationInfo.line);
+  return locationInfo.hasFileAndLine;
+}
+
+bool Dwarf::findAddress(uintptr_t address,
+                        LocationInfo& locationInfo,
+                        LocationInfoMode mode) const {
+  locationInfo = LocationInfo();
+
+  if (mode == LocationInfoMode::DISABLED) {
+    return false;
    }
  
-  return true;
+  if (!elf_) { // No file.
+    return false;
+  }
+
+  if (!aranges_.empty()) {
+    // Fast path: find the right .debug_info entry by looking up the
+    // address in .debug_aranges.
+    uint64_t offset = 0;
+    if (findDebugInfoOffset(address, aranges_, offset)) {
+      // Read compilation unit header from .debug_info
+      folly::StringPiece infoEntry(info_);
+      infoEntry.advance(offset);
+      findLocation(address, infoEntry, locationInfo);
+      return locationInfo.hasFileAndLine;
+    } else if (mode == LocationInfoMode::FAST) {
+      // NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries
+      // in .debug_aranges for some functions, but always generates
+      // .debug_info entries.  Scanning .debug_info is slow, so fall back to
+      // it only if such behavior is requested via LocationInfoMode.
+      return false;
+    } else {
+      DCHECK(mode == LocationInfoMode::FULL);
+      // Fall back to the linear scan.
+    }
+  }
+
+
+  // Slow path (linear scan): Iterate over all .debug_info entries
+  // and look for the address in each compilation unit.
+  folly::StringPiece infoEntry(info_);
+  while (!infoEntry.empty() && !locationInfo.hasFileAndLine) {
+    findLocation(address, infoEntry, locationInfo);
+  }
+  return locationInfo.hasFileAndLine;
  }
  
  Dwarf::LineNumberVM::LineNumberVM(folly::StringPiece data,
                                    folly::StringPiece compilationDirectory)
    : compilationDirectory_(compilationDirectory) {
    Section section(data);
-  enforce(section.next(data_), "invalid line number VM");
+  FOLLY_SAFE_CHECK(section.next(data_), "invalid line number VM");
    is64Bit_ = section.is64Bit();
    init();
    reset();
@@ -510,23 +639,24 @@ void Dwarf::LineNumberVM::reset() {
  
  void Dwarf::LineNumberVM::init() {
    version_ = read<uint16_t>(data_);
-  enforce(version_ >= 2 && version_ <= 4, "invalid version in line number VM");
+  FOLLY_SAFE_CHECK(version_ >= 2 && version_ <= 4,
+                   "invalid version in line number VM");
    uint64_t headerLength = readOffset(data_, is64Bit_);
-  enforce(headerLength <= data_.size(),
-          "invalid line number VM header length");
+  FOLLY_SAFE_CHECK(headerLength <= data_.size(),
+                   "invalid line number VM header length");
    folly::StringPiece header(data_.data(), headerLength);
    data_.assign(header.end(), data_.end());
  
    minLength_ = read<uint8_t>(header);
    if (version_ == 4) {  // Version 2 and 3 records don't have this
      uint8_t maxOpsPerInstruction = read<uint8_t>(header);
-    enforce(maxOpsPerInstruction == 1, "VLIW not supported");
+    FOLLY_SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported");
    }
    defaultIsStmt_ = read<uint8_t>(header);
    lineBase_ = read<int8_t>(header);  // yes, signed
    lineRange_ = read<uint8_t>(header);
    opcodeBase_ = read<uint8_t>(header);
-  enforce(opcodeBase_ != 0, "invalid opcode base");
+  FOLLY_SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
    standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data());
    header.advance(opcodeBase_ - 1);
  
@@ -561,7 +691,7 @@ bool Dwarf::LineNumberVM::next(folly::StringPiece& program) {
  
  Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index)
    const {
-  enforce(index != 0, "invalid file index 0");
+  FOLLY_SAFE_CHECK(index != 0, "invalid file index 0");
  
    FileName fn;
    if (index <= fileNameCount_) {
@@ -578,7 +708,7 @@ Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index)
  
    folly::StringPiece program = data_;
    for (; index; --index) {
-    enforce(nextDefineFile(program, fn), "invalid file index");
+    FOLLY_SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
    }
  
    return fn;
@@ -590,7 +720,8 @@ folly::StringPiece Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index)
      return folly::StringPiece();
    }
  
-  enforce(index <= includeDirectoryCount_, "invalid include directory");
+  FOLLY_SAFE_CHECK(index <= includeDirectoryCount_,
+                   "invalid include directory");
  
    folly::StringPiece includeDirectories = includeDirectories_;
    folly::StringPiece dir;
@@ -638,13 +769,13 @@ bool Dwarf::LineNumberVM::nextDefineFile(folly::StringPiece& program,
      // Extended opcode
      auto length = readULEB(program);
      // the opcode itself should be included in the length, so length >= 1
-    enforce(length != 0, "invalid extended opcode length");
-    auto extendedOpcode = read<uint8_t>(program);
+    FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
+    read<uint8_t>(program); // extended opcode
      --length;
  
      if (opcode == DW_LNE_define_file) {
-      enforce(readFileName(program, fn),
-              "invalid empty file in DW_LNE_define_file");
+      FOLLY_SAFE_CHECK(readFileName(program, fn),
+                       "invalid empty file in DW_LNE_define_file");
        return true;
      }
  
@@ -733,7 +864,7 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(
    // Extended opcode
    auto length = readULEB(program);
    // the opcode itself should be included in the length, so length >= 1
-  enforce(length != 0, "invalid extende opcode length");
+  FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
    auto extendedOpcode = read<uint8_t>(program);
    --length;
  
@@ -816,4 +947,3 @@ bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path& file,
  
  }  // namespace symbolizer
  }  // namespace folly
-