folly/experimental/symbolizer/Dwarf.cpp

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17
  18 #include <folly/experimental/symbolizer/Dwarf.h>
  19
  20 #include <type_traits>
  21
  22 #if FOLLY_HAVE_LIBDWARF_DWARF_H
  23 #include <libdwarf/dwarf.h>
  24 #else
  25 #include <dwarf.h>
  26 #endif
  27
  28 namespace folly {
  29 namespace symbolizer {
  30
  31 Dwarf::Dwarf(const ElfFile* elf) : elf_(elf) {
  32   init();
  33 }
  34
  35 Dwarf::Section::Section(folly::StringPiece d) : is64Bit_(false), data_(d) {
  36 }
  37
  38 namespace {
  39
  40 // All following read* functions read from a StringPiece, advancing the
  41 // StringPiece, and aborting if there's not enough room.
  42
  43 // Read (bitwise) one object of type T
  44 template <class T>
  45 typename std::enable_if<std::is_pod<T>::value, T>::type
  46 read(folly::StringPiece& sp) {
  47   FOLLY_SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
  48   T x;
  49   memcpy(&x, sp.data(), sizeof(T));
  50   sp.advance(sizeof(T));
  51   return x;
  52 }
  53
  54 // Read ULEB (unsigned) varint value; algorithm from the DWARF spec
  55 uint64_t readULEB(folly::StringPiece& sp, uint8_t& shift, uint8_t& val) {
  56   uint64_t r = 0;
  57   shift = 0;
  58   do {
  59     val = read<uint8_t>(sp);
  60     r |= ((uint64_t)(val & 0x7f) << shift);
  61     shift += 7;
  62   } while (val & 0x80);
  63   return r;
  64 }
  65
  66 uint64_t readULEB(folly::StringPiece& sp) {
  67   uint8_t shift;
  68   uint8_t val;
  69   return readULEB(sp, shift, val);
  70 }
  71
  72 // Read SLEB (signed) varint value; algorithm from the DWARF spec
  73 int64_t readSLEB(folly::StringPiece& sp) {
  74   uint8_t shift;
  75   uint8_t val;
  76   uint64_t r = readULEB(sp, shift, val);
  77
  78   if (shift < 64 && (val & 0x40)) {
  79     r |= -(1ULL << shift);  // sign extend
  80   }
  81
  82   return r;
  83 }
  84
  85 // Read a value of "section offset" type, which may be 4 or 8 bytes
  86 uint64_t readOffset(folly::StringPiece& sp, bool is64Bit) {
  87   return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp);
  88 }
  89
  90 // Read "len" bytes
  91 folly::StringPiece readBytes(folly::StringPiece& sp, uint64_t len) {
  92   FOLLY_SAFE_CHECK(len >= sp.size(), "invalid string length");
  93   folly::StringPiece ret(sp.data(), len);
  94   sp.advance(len);
  95   return ret;
  96 }
  97
  98 // Read a null-terminated string
  99 folly::StringPiece readNullTerminated(folly::StringPiece& sp) {
 100   const char* p = static_cast<const char*>(
 101       memchr(sp.data(), 0, sp.size()));
 102   FOLLY_SAFE_CHECK(p, "invalid null-terminated string");
 103   folly::StringPiece ret(sp.data(), p);
 104   sp.assign(p + 1, sp.end());
 105   return ret;
 106 }
 107
 108 // Skip over padding until sp.data() - start is a multiple of alignment
 109 void skipPadding(folly::StringPiece& sp, const char* start, size_t alignment) {
 110   size_t remainder = (sp.data() - start) % alignment;
 111   if (remainder) {
 112     FOLLY_SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
 113     sp.advance(alignment - remainder);
 114   }
 115 }
 116
 117 // Simplify a path -- as much as we can while not moving data around...
 118 void simplifyPath(folly::StringPiece& sp) {
 119   // Strip leading slashes and useless patterns (./), leaving one initial
 120   // slash.
 121   for (;;) {
 122     if (sp.empty()) {
 123       return;
 124     }
 125
 126     // Strip leading slashes, leaving one.
 127     while (sp.startsWith("//")) {
 128       sp.advance(1);
 129     }
 130
 131     if (sp.startsWith("/./")) {
 132       // Note 2, not 3, to keep it absolute
 133       sp.advance(2);
 134       continue;
 135     }
 136
 137     if (sp.removePrefix("./")) {
 138       // Also remove any subsequent slashes to avoid making this path absolute.
 139       while (sp.startsWith('/')) {
 140         sp.advance(1);
 141       }
 142       continue;
 143     }
 144
 145     break;
 146   }
 147
 148   // Strip trailing slashes and useless patterns (/.).
 149   for (;;) {
 150     if (sp.empty()) {
 151       return;
 152     }
 153
 154     // Strip trailing slashes, except when this is the root path.
 155     while (sp.size() > 1 && sp.removeSuffix('/')) { }
 156
 157     if (sp.removeSuffix("/.")) {
 158       continue;
 159     }
 160
 161     break;
 162   }
 163 }
 164
 165 }  // namespace
 166
 167 Dwarf::Path::Path(folly::StringPiece baseDir, folly::StringPiece subDir,
 168                   folly::StringPiece file)
 169   : baseDir_(baseDir),
 170     subDir_(subDir),
 171     file_(file) {
 172   using std::swap;
 173
 174   // Normalize
 175   if (file_.empty()) {
 176     baseDir_.clear();
 177     subDir_.clear();
 178     return;
 179   }
 180
 181   if (file_[0] == '/') {
 182     // file_ is absolute
 183     baseDir_.clear();
 184     subDir_.clear();
 185   }
 186
 187   if (!subDir_.empty() && subDir_[0] == '/') {
 188     baseDir_.clear();  // subDir_ is absolute
 189   }
 190
 191   simplifyPath(baseDir_);
 192   simplifyPath(subDir_);
 193   simplifyPath(file_);
 194
 195   // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't.
 196   if (baseDir_.empty()) {
 197     swap(baseDir_, subDir_);
 198   }
 199 }
 200
 201 size_t Dwarf::Path::size() const {
 202   size_t size = 0;
 203   bool needsSlash = false;
 204
 205   if (!baseDir_.empty()) {
 206     size += baseDir_.size();
 207     needsSlash = !baseDir_.endsWith('/');
 208   }
 209
 210   if (!subDir_.empty()) {
 211     size += needsSlash;
 212     size += subDir_.size();
 213     needsSlash = !subDir_.endsWith('/');
 214   }
 215
 216   if (!file_.empty()) {
 217     size += needsSlash;
 218     size += file_.size();
 219   }
 220
 221   return size;
 222 }
 223
 224 size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const {
 225   size_t totalSize = 0;
 226   bool needsSlash = false;
 227
 228   auto append = [&] (folly::StringPiece sp) {
 229     if (bufSize >= 2) {
 230       size_t toCopy = std::min(sp.size(), bufSize - 1);
 231       memcpy(buf, sp.data(), toCopy);
 232       buf += toCopy;
 233       bufSize -= toCopy;
 234     }
 235     totalSize += sp.size();
 236   };
 237
 238   if (!baseDir_.empty()) {
 239     append(baseDir_);
 240     needsSlash = !baseDir_.endsWith('/');
 241   }
 242   if (!subDir_.empty()) {
 243     if (needsSlash) {
 244       append("/");
 245     }
 246     append(subDir_);
 247     needsSlash = !subDir_.endsWith('/');
 248   }
 249   if (!file_.empty()) {
 250     if (needsSlash) {
 251       append("/");
 252     }
 253     append(file_);
 254   }
 255   if (bufSize) {
 256     *buf = '\0';
 257   }
 258   assert(totalSize == size());
 259   return totalSize;
 260 }
 261
 262 void Dwarf::Path::toString(std::string& dest) const {
 263   size_t initialSize = dest.size();
 264   dest.reserve(initialSize + size());
 265   if (!baseDir_.empty()) {
 266     dest.append(baseDir_.begin(), baseDir_.end());
 267   }
 268   if (!subDir_.empty()) {
 269     if (!dest.empty() && dest.back() != '/') {
 270       dest.push_back('/');
 271     }
 272     dest.append(subDir_.begin(), subDir_.end());
 273   }
 274   if (!file_.empty()) {
 275     if (!dest.empty() && dest.back() != '/') {
 276       dest.push_back('/');
 277     }
 278     dest.append(file_.begin(), file_.end());
 279   }
 280   assert(dest.size() == initialSize + size());
 281 }
 282
 283 // Next chunk in section
 284 bool Dwarf::Section::next(folly::StringPiece& chunk) {
 285   chunk = data_;
 286   if (chunk.empty()) {
 287     return false;
 288   }
 289
 290   // Initial length is a uint32_t value for a 32-bit section, and
 291   // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit
 292   // section.
 293   auto initialLength = read<uint32_t>(chunk);
 294   is64Bit_ = (initialLength == (uint32_t)-1);
 295   auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength;
 296   FOLLY_SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
 297   chunk.reset(chunk.data(), length);
 298   data_.assign(chunk.end(), data_.end());
 299   return true;
 300 }
 301
 302 bool Dwarf::getSection(const char* name, folly::StringPiece* section) const {
 303   const ElfW(Shdr)* elfSection = elf_->getSectionByName(name);
 304   if (!elfSection) {
 305     return false;
 306   }
 307
 308   *section = elf_->getSectionBody(*elfSection);
 309   return true;
 310 }
 311
 312 void Dwarf::init() {
 313   // Make sure that all .debug_* sections exist
 314   if (!getSection(".debug_info", &info_) ||
 315       !getSection(".debug_abbrev", &abbrev_) ||
 316       !getSection(".debug_line", &line_) ||
 317       !getSection(".debug_str", &strings_)) {
 318     elf_ = nullptr;
 319     return;
 320   }
 321   getSection(".debug_str", &strings_);
 322
 323   // Optional: fast address range lookup. If missing .debug_info can
 324   // be used - but it's much slower (linear scan).
 325   getSection(".debug_aranges", &aranges_);
 326 }
 327
 328 bool Dwarf::readAbbreviation(folly::StringPiece& section,
 329                              DIEAbbreviation& abbr) {
 330   // abbreviation code
 331   abbr.code = readULEB(section);
 332   if (abbr.code == 0) {
 333     return false;
 334   }
 335
 336   // abbreviation tag
 337   abbr.tag = readULEB(section);
 338
 339   // does this entry have children?
 340   abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
 341
 342   // attributes
 343   const char* attributeBegin = section.data();
 344   for (;;) {
 345     FOLLY_SAFE_CHECK(!section.empty(), "invalid attribute section");
 346     auto attr = readAttribute(section);
 347     if (attr.name == 0 && attr.form == 0) {
 348       break;
 349     }
 350   }
 351
 352   abbr.attributes.assign(attributeBegin, section.data());
 353   return true;
 354 }
 355
 356 Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(
 357     folly::StringPiece& sp) {
 358   return { readULEB(sp), readULEB(sp) };
 359 }
 360
 361 Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset)
 362   const {
 363   // Linear search in the .debug_abbrev section, starting at offset
 364   folly::StringPiece section = abbrev_;
 365   section.advance(offset);
 366
 367   Dwarf::DIEAbbreviation abbr;
 368   while (readAbbreviation(section, abbr)) {
 369     if (abbr.code == code) {
 370       return abbr;
 371     }
 372   }
 373
 374   FOLLY_SAFE_CHECK(false, "could not find abbreviation code");
 375 }
 376
 377 Dwarf::AttributeValue Dwarf::readAttributeValue(
 378     folly::StringPiece& sp, uint64_t form, bool is64Bit) const {
 379   switch (form) {
 380   case DW_FORM_addr:
 381     return read<uintptr_t>(sp);
 382   case DW_FORM_block1:
 383     return readBytes(sp, read<uint8_t>(sp));
 384   case DW_FORM_block2:
 385     return readBytes(sp, read<uint16_t>(sp));
 386   case DW_FORM_block4:
 387     return readBytes(sp, read<uint32_t>(sp));
 388   case DW_FORM_block:  // fallthrough
 389   case DW_FORM_exprloc:
 390     return readBytes(sp, readULEB(sp));
 391   case DW_FORM_data1:  // fallthrough
 392   case DW_FORM_ref1:
 393     return read<uint8_t>(sp);
 394   case DW_FORM_data2:  // fallthrough
 395   case DW_FORM_ref2:
 396     return read<uint16_t>(sp);
 397   case DW_FORM_data4:  // fallthrough
 398   case DW_FORM_ref4:
 399     return read<uint32_t>(sp);
 400   case DW_FORM_data8:  // fallthrough
 401   case DW_FORM_ref8:
 402     return read<uint64_t>(sp);
 403   case DW_FORM_sdata:
 404     return readSLEB(sp);
 405   case DW_FORM_udata:  // fallthrough
 406   case DW_FORM_ref_udata:
 407     return readULEB(sp);
 408   case DW_FORM_flag:
 409     return read<uint8_t>(sp);
 410   case DW_FORM_flag_present:
 411     return 1;
 412   case DW_FORM_sec_offset:  // fallthrough
 413   case DW_FORM_ref_addr:
 414     return readOffset(sp, is64Bit);
 415   case DW_FORM_string:
 416     return readNullTerminated(sp);
 417   case DW_FORM_strp:
 418     return getStringFromStringSection(readOffset(sp, is64Bit));
 419   case DW_FORM_indirect:  // form is explicitly specified
 420     return readAttributeValue(sp, readULEB(sp), is64Bit);
 421   default:
 422     FOLLY_SAFE_CHECK(false, "invalid attribute form");
 423   }
 424 }
 425
 426 folly::StringPiece Dwarf::getStringFromStringSection(uint64_t offset) const {
 427   FOLLY_SAFE_CHECK(offset < strings_.size(), "invalid strp offset");
 428   folly::StringPiece sp(strings_);
 429   sp.advance(offset);
 430   return readNullTerminated(sp);
 431 }
 432
 433 /**
 434  * Find @address in .debug_aranges and return the offset in
 435  * .debug_info for compilation unit to which this address belongs.
 436  */
 437 bool Dwarf::findDebugInfoOffset(uintptr_t address,
 438                                 StringPiece aranges,
 439                                 uint64_t& offset) {
 440   Section arangesSection(aranges);
 441   folly::StringPiece chunk;
 442   while (arangesSection.next(chunk)) {
 443     auto version = read<uint16_t>(chunk);
 444     FOLLY_SAFE_CHECK(version == 2, "invalid aranges version");
 445
 446     offset = readOffset(chunk, arangesSection.is64Bit());
 447     auto addressSize = read<uint8_t>(chunk);
 448     FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
 449     auto segmentSize = read<uint8_t>(chunk);
 450     FOLLY_SAFE_CHECK(segmentSize == 0, "segmented architecture not supported");
 451
 452     // Padded to a multiple of 2 addresses.
 453     // Strangely enough, this is the only place in the DWARF spec that requires
 454     // padding.
 455     skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t));
 456     for (;;) {
 457       auto start = read<uintptr_t>(chunk);
 458       auto length = read<uintptr_t>(chunk);
 459
 460       if (start == 0 && length == 0) {
 461         break;
 462       }
 463
 464       // Is our address in this range?
 465       if (address >= start && address < start + length) {
 466         return true;
 467       }
 468     }
 469   }
 470   return false;
 471 }
 472
 473 /**
 474  * Find the @locationInfo for @address in the compilation unit represented
 475  * by the @sp .debug_info entry.
 476  * Returns whether the address was found.
 477  * Advances @sp to the next entry in .debug_info.
 478  */
 479 bool Dwarf::findLocation(uintptr_t address,
 480                          StringPiece& infoEntry,
 481                          LocationInfo& locationInfo) const {
 482   // For each compilation unit compiled with a DWARF producer, a
 483   // contribution is made to the .debug_info section of the object
 484   // file. Each such contribution consists of a compilation unit
 485   // header (see Section 7.5.1.1) followed by a single
 486   // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
 487   // entry, together with its children.
 488
 489   // 7.5.1.1 Compilation Unit Header
 490   //  1. unit_length (4B or 12B): read by Section::next
 491   //  2. version (2B)
 492   //  3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
 493   //  4. address_size (1B)
 494
 495   Section debugInfoSection(infoEntry);
 496   folly::StringPiece chunk;
 497   FOLLY_SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info");
 498
 499   auto version = read<uint16_t>(chunk);
 500   FOLLY_SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
 501   uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit());
 502   auto addressSize = read<uint8_t>(chunk);
 503   FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
 504
 505   // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
 506   // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
 507   //       - dwarf compression tools like `dwz` may generate it.
 508   // TODO(tudorb): Handle DW_TAG_partial_unit?
 509   auto code = readULEB(chunk);
 510   FOLLY_SAFE_CHECK(code != 0, "invalid code");
 511   auto abbr = getAbbreviation(code, abbrevOffset);
 512   FOLLY_SAFE_CHECK(abbr.tag == DW_TAG_compile_unit,
 513                    "expecting compile unit entry");
 514   // Skip children entries, advance to the next compilation unit entry.
 515   infoEntry.advance(chunk.end() - infoEntry.begin());
 516
 517   // Read attributes, extracting the few we care about
 518   bool foundLineOffset = false;
 519   uint64_t lineOffset = 0;
 520   folly::StringPiece compilationDirectory;
 521   folly::StringPiece mainFileName;
 522
 523   DIEAbbreviation::Attribute attr;
 524   folly::StringPiece attributes = abbr.attributes;
 525   for (;;) {
 526     attr = readAttribute(attributes);
 527     if (attr.name == 0 && attr.form == 0) {
 528       break;
 529     }
 530     auto val = readAttributeValue(chunk, attr.form,
 531                                   debugInfoSection.is64Bit());
 532     switch (attr.name) {
 533     case DW_AT_stmt_list:
 534       // Offset in .debug_line for the line number VM program for this
 535       // compilation unit
 536       lineOffset = boost::get<uint64_t>(val);
 537       foundLineOffset = true;
 538       break;
 539     case DW_AT_comp_dir:
 540       // Compilation directory
 541       compilationDirectory = boost::get<folly::StringPiece>(val);
 542       break;
 543     case DW_AT_name:
 544       // File name of main file being compiled
 545       mainFileName = boost::get<folly::StringPiece>(val);
 546       break;
 547     }
 548   }
 549
 550   if (!mainFileName.empty()) {
 551     locationInfo.hasMainFile = true;
 552     locationInfo.mainFile = Path(compilationDirectory, "", mainFileName);
 553   }
 554
 555   if (!foundLineOffset) {
 556     return false;
 557   }
 558
 559   folly::StringPiece lineSection(line_);
 560   lineSection.advance(lineOffset);
 561   LineNumberVM lineVM(lineSection, compilationDirectory);
 562
 563   // Execute line number VM program to find file and line
 564   locationInfo.hasFileAndLine =
 565       lineVM.findAddress(address, locationInfo.file, locationInfo.line);
 566   return locationInfo.hasFileAndLine;
 567 }
 568
 569 bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo) const {
 570   locationInfo = LocationInfo();
 571
 572   if (!elf_) { // no file
 573     return false;
 574   }
 575
 576   if (!aranges_.empty()) {
 577     // Fast path: find the right .debug_info entry by looking up the
 578     // address in .debug_aranges.
 579     uint64_t offset = 0;
 580     if (!findDebugInfoOffset(address, aranges_, offset)) {
 581       // NOTE: clang doesn't generate entries in .debug_aranges for
 582       // some functions, but always generates .debug_info entries.
 583       // We could read them from .debug_info but that's too slow.
 584       // If .debug_aranges is present fast address lookup is assumed.
 585       return false;
 586     }
 587     // Read compilation unit header from .debug_info
 588     folly::StringPiece infoEntry(info_);
 589     infoEntry.advance(offset);
 590     findLocation(address, infoEntry, locationInfo);
 591     return true;
 592   }
 593
 594   // Slow path (linear scan): Iterate over all .debug_info entries
 595   // and look for the address in each compilation unit.
 596   folly::StringPiece infoEntry(info_);
 597   while (!infoEntry.empty() && !locationInfo.hasFileAndLine) {
 598     findLocation(address, infoEntry, locationInfo);
 599   }
 600   return locationInfo.hasFileAndLine;
 601 }
 602
 603 Dwarf::LineNumberVM::LineNumberVM(folly::StringPiece data,
 604                                   folly::StringPiece compilationDirectory)
 605   : compilationDirectory_(compilationDirectory) {
 606   Section section(data);
 607   FOLLY_SAFE_CHECK(section.next(data_), "invalid line number VM");
 608   is64Bit_ = section.is64Bit();
 609   init();
 610   reset();
 611 }
 612
 613 void Dwarf::LineNumberVM::reset() {
 614   address_ = 0;
 615   file_ = 1;
 616   line_ = 1;
 617   column_ = 0;
 618   isStmt_ = defaultIsStmt_;
 619   basicBlock_ = false;
 620   endSequence_ = false;
 621   prologueEnd_ = false;
 622   epilogueBegin_ = false;
 623   isa_ = 0;
 624   discriminator_ = 0;
 625 }
 626
 627 void Dwarf::LineNumberVM::init() {
 628   version_ = read<uint16_t>(data_);
 629   FOLLY_SAFE_CHECK(version_ >= 2 && version_ <= 4,
 630                    "invalid version in line number VM");
 631   uint64_t headerLength = readOffset(data_, is64Bit_);
 632   FOLLY_SAFE_CHECK(headerLength <= data_.size(),
 633                    "invalid line number VM header length");
 634   folly::StringPiece header(data_.data(), headerLength);
 635   data_.assign(header.end(), data_.end());
 636
 637   minLength_ = read<uint8_t>(header);
 638   if (version_ == 4) {  // Version 2 and 3 records don't have this
 639     uint8_t maxOpsPerInstruction = read<uint8_t>(header);
 640     FOLLY_SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported");
 641   }
 642   defaultIsStmt_ = read<uint8_t>(header);
 643   lineBase_ = read<int8_t>(header);  // yes, signed
 644   lineRange_ = read<uint8_t>(header);
 645   opcodeBase_ = read<uint8_t>(header);
 646   FOLLY_SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
 647   standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data());
 648   header.advance(opcodeBase_ - 1);
 649
 650   // We don't want to use heap, so we don't keep an unbounded amount of state.
 651   // We'll just skip over include directories and file names here, and
 652   // we'll loop again when we actually need to retrieve one.
 653   folly::StringPiece sp;
 654   const char* tmp = header.data();
 655   includeDirectoryCount_ = 0;
 656   while (!(sp = readNullTerminated(header)).empty()) {
 657     ++includeDirectoryCount_;
 658   }
 659   includeDirectories_.assign(tmp, header.data());
 660
 661   tmp = header.data();
 662   FileName fn;
 663   fileNameCount_ = 0;
 664   while (readFileName(header, fn)) {
 665     ++fileNameCount_;
 666   }
 667   fileNames_.assign(tmp, header.data());
 668 }
 669
 670 bool Dwarf::LineNumberVM::next(folly::StringPiece& program) {
 671   Dwarf::LineNumberVM::StepResult ret;
 672   do {
 673     ret = step(program);
 674   } while (ret == CONTINUE);
 675
 676   return (ret == COMMIT);
 677 }
 678
 679 Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index)
 680   const {
 681   FOLLY_SAFE_CHECK(index != 0, "invalid file index 0");
 682
 683   FileName fn;
 684   if (index <= fileNameCount_) {
 685     folly::StringPiece fileNames = fileNames_;
 686     for (; index; --index) {
 687       if (!readFileName(fileNames, fn)) {
 688         abort();
 689       }
 690     }
 691     return fn;
 692   }
 693
 694   index -= fileNameCount_;
 695
 696   folly::StringPiece program = data_;
 697   for (; index; --index) {
 698     FOLLY_SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
 699   }
 700
 701   return fn;
 702 }
 703
 704 folly::StringPiece Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index)
 705   const {
 706   if (index == 0) {
 707     return folly::StringPiece();
 708   }
 709
 710   FOLLY_SAFE_CHECK(index <= includeDirectoryCount_,
 711                    "invalid include directory");
 712
 713   folly::StringPiece includeDirectories = includeDirectories_;
 714   folly::StringPiece dir;
 715   for (; index; --index) {
 716     dir = readNullTerminated(includeDirectories);
 717     if (dir.empty()) {
 718       abort();  // BUG
 719     }
 720   }
 721
 722   return dir;
 723 }
 724
 725 bool Dwarf::LineNumberVM::readFileName(folly::StringPiece& program,
 726                                        FileName& fn) {
 727   fn.relativeName = readNullTerminated(program);
 728   if (fn.relativeName.empty()) {
 729     return false;
 730   }
 731   fn.directoryIndex = readULEB(program);
 732   // Skip over file size and last modified time
 733   readULEB(program);
 734   readULEB(program);
 735   return true;
 736 }
 737
 738 bool Dwarf::LineNumberVM::nextDefineFile(folly::StringPiece& program,
 739                                          FileName& fn) const {
 740   while (!program.empty()) {
 741     auto opcode = read<uint8_t>(program);
 742
 743     if (opcode >= opcodeBase_) {  // special opcode
 744       continue;
 745     }
 746
 747     if (opcode != 0) {  // standard opcode
 748       // Skip, slurp the appropriate number of LEB arguments
 749       uint8_t argCount = standardOpcodeLengths_[opcode - 1];
 750       while (argCount--) {
 751         readULEB(program);
 752       }
 753       continue;
 754     }
 755
 756     // Extended opcode
 757     auto length = readULEB(program);
 758     // the opcode itself should be included in the length, so length >= 1
 759     FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
 760     read<uint8_t>(program); // extended opcode
 761     --length;
 762
 763     if (opcode == DW_LNE_define_file) {
 764       FOLLY_SAFE_CHECK(readFileName(program, fn),
 765                        "invalid empty file in DW_LNE_define_file");
 766       return true;
 767     }
 768
 769     program.advance(length);
 770     continue;
 771   }
 772
 773   return false;
 774 }
 775
 776 Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(
 777     folly::StringPiece& program) {
 778   auto opcode = read<uint8_t>(program);
 779
 780   if (opcode >= opcodeBase_) {  // special opcode
 781     uint8_t adjustedOpcode = opcode - opcodeBase_;
 782     uint8_t opAdvance = adjustedOpcode / lineRange_;
 783
 784     address_ += minLength_ * opAdvance;
 785     line_ += lineBase_ + adjustedOpcode % lineRange_;
 786
 787     basicBlock_ = false;
 788     prologueEnd_ = false;
 789     epilogueBegin_ = false;
 790     discriminator_ = 0;
 791     return COMMIT;
 792   }
 793
 794   if (opcode != 0) {  // standard opcode
 795     // Only interpret opcodes that are recognized by the version we're parsing;
 796     // the others are vendor extensions and we should ignore them.
 797     switch (opcode) {
 798     case DW_LNS_copy:
 799       basicBlock_ = false;
 800       prologueEnd_ = false;
 801       epilogueBegin_ = false;
 802       discriminator_ = 0;
 803       return COMMIT;
 804     case DW_LNS_advance_pc:
 805       address_ += minLength_ * readULEB(program);
 806       return CONTINUE;
 807     case DW_LNS_advance_line:
 808       line_ += readSLEB(program);
 809       return CONTINUE;
 810     case DW_LNS_set_file:
 811       file_ = readULEB(program);
 812       return CONTINUE;
 813     case DW_LNS_set_column:
 814       column_ = readULEB(program);
 815       return CONTINUE;
 816     case DW_LNS_negate_stmt:
 817       isStmt_ = !isStmt_;
 818       return CONTINUE;
 819     case DW_LNS_set_basic_block:
 820       basicBlock_ = true;
 821       return CONTINUE;
 822     case DW_LNS_const_add_pc:
 823       address_ += minLength_ * ((255 - opcodeBase_) / lineRange_);
 824       return CONTINUE;
 825     case DW_LNS_fixed_advance_pc:
 826       address_ += read<uint16_t>(program);
 827       return CONTINUE;
 828     case DW_LNS_set_prologue_end:
 829       if (version_ == 2) break;  // not supported in version 2
 830       prologueEnd_ = true;
 831       return CONTINUE;
 832     case DW_LNS_set_epilogue_begin:
 833       if (version_ == 2) break;  // not supported in version 2
 834       epilogueBegin_ = true;
 835       return CONTINUE;
 836     case DW_LNS_set_isa:
 837       if (version_ == 2) break;  // not supported in version 2
 838       isa_ = readULEB(program);
 839       return CONTINUE;
 840     }
 841
 842     // Unrecognized standard opcode, slurp the appropriate number of LEB
 843     // arguments.
 844     uint8_t argCount = standardOpcodeLengths_[opcode - 1];
 845     while (argCount--) {
 846       readULEB(program);
 847     }
 848     return CONTINUE;
 849   }
 850
 851   // Extended opcode
 852   auto length = readULEB(program);
 853   // the opcode itself should be included in the length, so length >= 1
 854   FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
 855   auto extendedOpcode = read<uint8_t>(program);
 856   --length;
 857
 858   switch (extendedOpcode) {
 859   case DW_LNE_end_sequence:
 860     return END;
 861   case DW_LNE_set_address:
 862     address_ = read<uintptr_t>(program);
 863     return CONTINUE;
 864   case DW_LNE_define_file:
 865     // We can't process DW_LNE_define_file here, as it would require us to
 866     // use unbounded amounts of state (ie. use the heap).  We'll do a second
 867     // pass (using nextDefineFile()) if necessary.
 868     break;
 869   case DW_LNE_set_discriminator:
 870     discriminator_ = readULEB(program);
 871     return CONTINUE;
 872   }
 873
 874   // Unrecognized extended opcode
 875   program.advance(length);
 876   return CONTINUE;
 877 }
 878
 879 bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path& file,
 880                                       uint64_t& line) {
 881   folly::StringPiece program = data_;
 882
 883   // Within each sequence of instructions, the address may only increase.
 884   // Unfortunately, within the same compilation unit, sequences may appear
 885   // in any order.  So any sequence is a candidate if it starts at an address
 886   // <= the target address, and we know we've found the target address if
 887   // a candidate crosses the target address.
 888   enum State {
 889     START,
 890     LOW_SEQ,  // candidate
 891     HIGH_SEQ
 892   };
 893   State state = START;
 894   reset();
 895
 896   uint64_t prevFile = 0;
 897   uint64_t prevLine = 0;
 898   while (!program.empty()) {
 899     bool seqEnd = !next(program);
 900
 901     if (state == START) {
 902       if (!seqEnd) {
 903         state = address_ <= target ? LOW_SEQ : HIGH_SEQ;
 904       }
 905     }
 906
 907     if (state == LOW_SEQ) {
 908       if (address_ > target) {
 909         // Found it!  Note that ">" is indeed correct (not ">="), as each
 910         // sequence is guaranteed to have one entry past-the-end (emitted by
 911         // DW_LNE_end_sequence)
 912         if (prevFile == 0) {
 913           return false;
 914         }
 915         auto fn = getFileName(prevFile);
 916         file = Path(compilationDirectory_,
 917                     getIncludeDirectory(fn.directoryIndex),
 918                     fn.relativeName);
 919         line = prevLine;
 920         return true;
 921       }
 922       prevFile = file_;
 923       prevLine = line_;
 924     }
 925
 926     if (seqEnd) {
 927       state = START;
 928       reset();
 929     }
 930   }
 931
 932   return false;
 933 }
 934
 935 }  // namespace symbolizer
 936 }  // namespace folly