folly/experimental/symbolizer/Dwarf.cpp

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17
  18 #include <folly/experimental/symbolizer/Dwarf.h>
  19
  20 #include <type_traits>
  21
  22 #include <dwarf.h>
  23
  24 namespace folly {
  25 namespace symbolizer {
  26
  27 Dwarf::Dwarf(const ElfFile* elf) : elf_(elf) {
  28   init();
  29 }
  30
  31 Dwarf::Section::Section(folly::StringPiece d) : is64Bit_(false), data_(d) {
  32 }
  33
  34 namespace {
  35
  36 // All following read* functions read from a StringPiece, advancing the
  37 // StringPiece, and aborting if there's not enough room.
  38
  39 // Read (bitwise) one object of type T
  40 template <class T>
  41 typename std::enable_if<std::is_pod<T>::value, T>::type
  42 read(folly::StringPiece& sp) {
  43   FOLLY_SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
  44   T x;
  45   memcpy(&x, sp.data(), sizeof(T));
  46   sp.advance(sizeof(T));
  47   return x;
  48 }
  49
  50 // Read ULEB (unsigned) varint value; algorithm from the DWARF spec
  51 uint64_t readULEB(folly::StringPiece& sp, uint8_t& shift, uint8_t& val) {
  52   uint64_t r = 0;
  53   shift = 0;
  54   do {
  55     val = read<uint8_t>(sp);
  56     r |= ((uint64_t)(val & 0x7f) << shift);
  57     shift += 7;
  58   } while (val & 0x80);
  59   return r;
  60 }
  61
  62 uint64_t readULEB(folly::StringPiece& sp) {
  63   uint8_t shift;
  64   uint8_t val;
  65   return readULEB(sp, shift, val);
  66 }
  67
  68 // Read SLEB (signed) varint value; algorithm from the DWARF spec
  69 int64_t readSLEB(folly::StringPiece& sp) {
  70   uint8_t shift;
  71   uint8_t val;
  72   uint64_t r = readULEB(sp, shift, val);
  73
  74   if (shift < 64 && (val & 0x40)) {
  75     r |= -(1ULL << shift);  // sign extend
  76   }
  77
  78   return r;
  79 }
  80
  81 // Read a value of "section offset" type, which may be 4 or 8 bytes
  82 uint64_t readOffset(folly::StringPiece& sp, bool is64Bit) {
  83   return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp);
  84 }
  85
  86 // Read "len" bytes
  87 folly::StringPiece readBytes(folly::StringPiece& sp, uint64_t len) {
  88   FOLLY_SAFE_CHECK(len >= sp.size(), "invalid string length");
  89   folly::StringPiece ret(sp.data(), len);
  90   sp.advance(len);
  91   return ret;
  92 }
  93
  94 // Read a null-terminated string
  95 folly::StringPiece readNullTerminated(folly::StringPiece& sp) {
  96   const char* p = static_cast<const char*>(
  97       memchr(sp.data(), 0, sp.size()));
  98   FOLLY_SAFE_CHECK(p, "invalid null-terminated string");
  99   folly::StringPiece ret(sp.data(), p);
 100   sp.assign(p + 1, sp.end());
 101   return ret;
 102 }
 103
 104 // Skip over padding until sp.data() - start is a multiple of alignment
 105 void skipPadding(folly::StringPiece& sp, const char* start, size_t alignment) {
 106   size_t remainder = (sp.data() - start) % alignment;
 107   if (remainder) {
 108     FOLLY_SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
 109     sp.advance(alignment - remainder);
 110   }
 111 }
 112
 113 // Simplify a path -- as much as we can while not moving data around...
 114 void simplifyPath(folly::StringPiece& sp) {
 115   // Strip leading slashes and useless patterns (./), leaving one initial
 116   // slash.
 117   for (;;) {
 118     if (sp.empty()) {
 119       return;
 120     }
 121
 122     // Strip leading slashes, leaving one.
 123     while (sp.startsWith("//")) {
 124       sp.advance(1);
 125     }
 126
 127     if (sp.startsWith("/./")) {
 128       // Note 2, not 3, to keep it absolute
 129       sp.advance(2);
 130       continue;
 131     }
 132
 133     if (sp.removePrefix("./")) {
 134       // Also remove any subsequent slashes to avoid making this path absolute.
 135       while (sp.startsWith('/')) {
 136         sp.advance(1);
 137       }
 138       continue;
 139     }
 140
 141     break;
 142   }
 143
 144   // Strip trailing slashes and useless patterns (/.).
 145   for (;;) {
 146     if (sp.empty()) {
 147       return;
 148     }
 149
 150     // Strip trailing slashes, except when this is the root path.
 151     while (sp.size() > 1 && sp.removeSuffix('/')) { }
 152
 153     if (sp.removeSuffix("/.")) {
 154       continue;
 155     }
 156
 157     break;
 158   }
 159 }
 160
 161 }  // namespace
 162
 163 Dwarf::Path::Path(folly::StringPiece baseDir, folly::StringPiece subDir,
 164                   folly::StringPiece file)
 165   : baseDir_(baseDir),
 166     subDir_(subDir),
 167     file_(file) {
 168   using std::swap;
 169
 170   // Normalize
 171   if (file_.empty()) {
 172     baseDir_.clear();
 173     subDir_.clear();
 174     return;
 175   }
 176
 177   if (file_[0] == '/') {
 178     // file_ is absolute
 179     baseDir_.clear();
 180     subDir_.clear();
 181   }
 182
 183   if (!subDir_.empty() && subDir_[0] == '/') {
 184     baseDir_.clear();  // subDir_ is absolute
 185   }
 186
 187   simplifyPath(baseDir_);
 188   simplifyPath(subDir_);
 189   simplifyPath(file_);
 190
 191   // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't.
 192   if (baseDir_.empty()) {
 193     swap(baseDir_, subDir_);
 194   }
 195 }
 196
 197 size_t Dwarf::Path::size() const {
 198   size_t size = 0;
 199   bool needsSlash = false;
 200
 201   if (!baseDir_.empty()) {
 202     size += baseDir_.size();
 203     needsSlash = !baseDir_.endsWith('/');
 204   }
 205
 206   if (!subDir_.empty()) {
 207     size += needsSlash;
 208     size += subDir_.size();
 209     needsSlash = !subDir_.endsWith('/');
 210   }
 211
 212   if (!file_.empty()) {
 213     size += needsSlash;
 214     size += file_.size();
 215   }
 216
 217   return size;
 218 }
 219
 220 size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const {
 221   size_t totalSize = 0;
 222   bool needsSlash = false;
 223
 224   auto append = [&] (folly::StringPiece sp) {
 225     if (bufSize >= 2) {
 226       size_t toCopy = std::min(sp.size(), bufSize - 1);
 227       memcpy(buf, sp.data(), toCopy);
 228       buf += toCopy;
 229       bufSize -= toCopy;
 230     }
 231     totalSize += sp.size();
 232   };
 233
 234   if (!baseDir_.empty()) {
 235     append(baseDir_);
 236     needsSlash = !baseDir_.endsWith('/');
 237   }
 238   if (!subDir_.empty()) {
 239     if (needsSlash) {
 240       append("/");
 241     }
 242     append(subDir_);
 243     needsSlash = !subDir_.endsWith('/');
 244   }
 245   if (!file_.empty()) {
 246     if (needsSlash) {
 247       append("/");
 248     }
 249     append(file_);
 250   }
 251   if (bufSize) {
 252     *buf = '\0';
 253   }
 254   assert(totalSize == size());
 255   return totalSize;
 256 }
 257
 258 void Dwarf::Path::toString(std::string& dest) const {
 259   size_t initialSize = dest.size();
 260   dest.reserve(initialSize + size());
 261   if (!baseDir_.empty()) {
 262     dest.append(baseDir_.begin(), baseDir_.end());
 263   }
 264   if (!subDir_.empty()) {
 265     if (!dest.empty() && dest.back() != '/') {
 266       dest.push_back('/');
 267     }
 268     dest.append(subDir_.begin(), subDir_.end());
 269   }
 270   if (!file_.empty()) {
 271     if (!dest.empty() && dest.back() != '/') {
 272       dest.push_back('/');
 273     }
 274     dest.append(file_.begin(), file_.end());
 275   }
 276   assert(dest.size() == initialSize + size());
 277 }
 278
 279 // Next chunk in section
 280 bool Dwarf::Section::next(folly::StringPiece& chunk) {
 281   chunk = data_;
 282   if (chunk.empty()) {
 283     return false;
 284   }
 285
 286   // Initial length is a uint32_t value for a 32-bit section, and
 287   // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit
 288   // section.
 289   auto initialLength = read<uint32_t>(chunk);
 290   is64Bit_ = (initialLength == (uint32_t)-1);
 291   auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength;
 292   FOLLY_SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
 293   chunk.reset(chunk.data(), length);
 294   data_.assign(chunk.end(), data_.end());
 295   return true;
 296 }
 297
 298 bool Dwarf::getSection(const char* name, folly::StringPiece* section) const {
 299   const ElfW(Shdr)* elfSection = elf_->getSectionByName(name);
 300   if (!elfSection) {
 301     return false;
 302   }
 303
 304   *section = elf_->getSectionBody(*elfSection);
 305   return true;
 306 }
 307
 308 void Dwarf::init() {
 309   // Make sure that all .debug_* sections exist
 310   if (!getSection(".debug_info", &info_) ||
 311       !getSection(".debug_abbrev", &abbrev_) ||
 312       !getSection(".debug_line", &line_) ||
 313       !getSection(".debug_str", &strings_)) {
 314     elf_ = nullptr;
 315     return;
 316   }
 317   getSection(".debug_str", &strings_);
 318
 319   // Optional: fast address range lookup. If missing .debug_info can
 320   // be used - but it's much slower (linear scan).
 321   getSection(".debug_aranges", &aranges_);
 322 }
 323
 324 bool Dwarf::readAbbreviation(folly::StringPiece& section,
 325                              DIEAbbreviation& abbr) {
 326   // abbreviation code
 327   abbr.code = readULEB(section);
 328   if (abbr.code == 0) {
 329     return false;
 330   }
 331
 332   // abbreviation tag
 333   abbr.tag = readULEB(section);
 334
 335   // does this entry have children?
 336   abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
 337
 338   // attributes
 339   const char* attributeBegin = section.data();
 340   for (;;) {
 341     FOLLY_SAFE_CHECK(!section.empty(), "invalid attribute section");
 342     auto attr = readAttribute(section);
 343     if (attr.name == 0 && attr.form == 0) {
 344       break;
 345     }
 346   }
 347
 348   abbr.attributes.assign(attributeBegin, section.data());
 349   return true;
 350 }
 351
 352 Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(
 353     folly::StringPiece& sp) {
 354   return { readULEB(sp), readULEB(sp) };
 355 }
 356
 357 Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset)
 358   const {
 359   // Linear search in the .debug_abbrev section, starting at offset
 360   folly::StringPiece section = abbrev_;
 361   section.advance(offset);
 362
 363   Dwarf::DIEAbbreviation abbr;
 364   while (readAbbreviation(section, abbr)) {
 365     if (abbr.code == code) {
 366       return abbr;
 367     }
 368   }
 369
 370   FOLLY_SAFE_CHECK(false, "could not find abbreviation code");
 371 }
 372
 373 Dwarf::AttributeValue Dwarf::readAttributeValue(
 374     folly::StringPiece& sp, uint64_t form, bool is64Bit) const {
 375   switch (form) {
 376   case DW_FORM_addr:
 377     return read<uintptr_t>(sp);
 378   case DW_FORM_block1:
 379     return readBytes(sp, read<uint8_t>(sp));
 380   case DW_FORM_block2:
 381     return readBytes(sp, read<uint16_t>(sp));
 382   case DW_FORM_block4:
 383     return readBytes(sp, read<uint32_t>(sp));
 384   case DW_FORM_block:  // fallthrough
 385   case DW_FORM_exprloc:
 386     return readBytes(sp, readULEB(sp));
 387   case DW_FORM_data1:  // fallthrough
 388   case DW_FORM_ref1:
 389     return read<uint8_t>(sp);
 390   case DW_FORM_data2:  // fallthrough
 391   case DW_FORM_ref2:
 392     return read<uint16_t>(sp);
 393   case DW_FORM_data4:  // fallthrough
 394   case DW_FORM_ref4:
 395     return read<uint32_t>(sp);
 396   case DW_FORM_data8:  // fallthrough
 397   case DW_FORM_ref8:
 398     return read<uint64_t>(sp);
 399   case DW_FORM_sdata:
 400     return readSLEB(sp);
 401   case DW_FORM_udata:  // fallthrough
 402   case DW_FORM_ref_udata:
 403     return readULEB(sp);
 404   case DW_FORM_flag:
 405     return read<uint8_t>(sp);
 406   case DW_FORM_flag_present:
 407     return 1;
 408   case DW_FORM_sec_offset:  // fallthrough
 409   case DW_FORM_ref_addr:
 410     return readOffset(sp, is64Bit);
 411   case DW_FORM_string:
 412     return readNullTerminated(sp);
 413   case DW_FORM_strp:
 414     return getStringFromStringSection(readOffset(sp, is64Bit));
 415   case DW_FORM_indirect:  // form is explicitly specified
 416     return readAttributeValue(sp, readULEB(sp), is64Bit);
 417   default:
 418     FOLLY_SAFE_CHECK(false, "invalid attribute form");
 419   }
 420 }
 421
 422 folly::StringPiece Dwarf::getStringFromStringSection(uint64_t offset) const {
 423   FOLLY_SAFE_CHECK(offset < strings_.size(), "invalid strp offset");
 424   folly::StringPiece sp(strings_);
 425   sp.advance(offset);
 426   return readNullTerminated(sp);
 427 }
 428
 429 /**
 430  * Find @address in .debug_aranges and return the offset in
 431  * .debug_info for compilation unit to which this address belongs.
 432  */
 433 bool Dwarf::findDebugInfoOffset(uintptr_t address,
 434                                 StringPiece aranges,
 435                                 uint64_t& offset) {
 436   Section arangesSection(aranges);
 437   folly::StringPiece chunk;
 438   while (arangesSection.next(chunk)) {
 439     auto version = read<uint16_t>(chunk);
 440     FOLLY_SAFE_CHECK(version == 2, "invalid aranges version");
 441
 442     offset = readOffset(chunk, arangesSection.is64Bit());
 443     auto addressSize = read<uint8_t>(chunk);
 444     FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
 445     auto segmentSize = read<uint8_t>(chunk);
 446     FOLLY_SAFE_CHECK(segmentSize == 0, "segmented architecture not supported");
 447
 448     // Padded to a multiple of 2 addresses.
 449     // Strangely enough, this is the only place in the DWARF spec that requires
 450     // padding.
 451     skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t));
 452     for (;;) {
 453       auto start = read<uintptr_t>(chunk);
 454       auto length = read<uintptr_t>(chunk);
 455
 456       if (start == 0) {
 457         break;
 458       }
 459
 460       // Is our address in this range?
 461       if (address >= start && address < start + length) {
 462         return true;
 463       }
 464     }
 465   }
 466   return false;
 467 }
 468
 469 /**
 470  * Find the @locationInfo for @address in the compilation unit represented
 471  * by the @sp .debug_info entry.
 472  * Returns whether the address was found.
 473  * Advances @sp to the next entry in .debug_info.
 474  */
 475 bool Dwarf::findLocation(uintptr_t address,
 476                          StringPiece& infoEntry,
 477                          LocationInfo& locationInfo) const {
 478   // For each compilation unit compiled with a DWARF producer, a
 479   // contribution is made to the .debug_info section of the object
 480   // file. Each such contribution consists of a compilation unit
 481   // header (see Section 7.5.1.1) followed by a single
 482   // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
 483   // entry, together with its children.
 484
 485   // 7.5.1.1 Compilation Unit Header
 486   //  1. unit_length (4B or 12B): read by Section::next
 487   //  2. version (2B)
 488   //  3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
 489   //  4. address_size (1B)
 490
 491   Section debugInfoSection(infoEntry);
 492   folly::StringPiece chunk;
 493   FOLLY_SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info");
 494
 495   auto version = read<uint16_t>(chunk);
 496   FOLLY_SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
 497   uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit());
 498   auto addressSize = read<uint8_t>(chunk);
 499   FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
 500
 501   // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
 502   // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
 503   //       - dwarf compression tools like `dwz` may generate it.
 504   // TODO(tudorb): Handle DW_TAG_partial_unit?
 505   auto code = readULEB(chunk);
 506   FOLLY_SAFE_CHECK(code != 0, "invalid code");
 507   auto abbr = getAbbreviation(code, abbrevOffset);
 508   FOLLY_SAFE_CHECK(abbr.tag == DW_TAG_compile_unit,
 509                    "expecting compile unit entry");
 510   // Skip children entries, advance to the next compilation unit entry.
 511   infoEntry.advance(chunk.end() - infoEntry.begin());
 512
 513   // Read attributes, extracting the few we care about
 514   bool foundLineOffset = false;
 515   uint64_t lineOffset = 0;
 516   folly::StringPiece compilationDirectory;
 517   folly::StringPiece mainFileName;
 518
 519   DIEAbbreviation::Attribute attr;
 520   folly::StringPiece attributes = abbr.attributes;
 521   for (;;) {
 522     attr = readAttribute(attributes);
 523     if (attr.name == 0 && attr.form == 0) {
 524       break;
 525     }
 526     auto val = readAttributeValue(chunk, attr.form,
 527                                   debugInfoSection.is64Bit());
 528     switch (attr.name) {
 529     case DW_AT_stmt_list:
 530       // Offset in .debug_line for the line number VM program for this
 531       // compilation unit
 532       lineOffset = boost::get<uint64_t>(val);
 533       foundLineOffset = true;
 534       break;
 535     case DW_AT_comp_dir:
 536       // Compilation directory
 537       compilationDirectory = boost::get<folly::StringPiece>(val);
 538       break;
 539     case DW_AT_name:
 540       // File name of main file being compiled
 541       mainFileName = boost::get<folly::StringPiece>(val);
 542       break;
 543     }
 544   }
 545
 546   if (!mainFileName.empty()) {
 547     locationInfo.hasMainFile = true;
 548     locationInfo.mainFile = Path(compilationDirectory, "", mainFileName);
 549   }
 550
 551   if (!foundLineOffset) {
 552     return false;
 553   }
 554
 555   folly::StringPiece lineSection(line_);
 556   lineSection.advance(lineOffset);
 557   LineNumberVM lineVM(lineSection, compilationDirectory);
 558
 559   // Execute line number VM program to find file and line
 560   locationInfo.hasFileAndLine =
 561       lineVM.findAddress(address, locationInfo.file, locationInfo.line);
 562   return locationInfo.hasFileAndLine;
 563 }
 564
 565 bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo) const {
 566   locationInfo = LocationInfo();
 567
 568   if (!elf_) { // no file
 569     return false;
 570   }
 571
 572   if (!aranges_.empty()) {
 573     // Fast path: find the right .debug_info entry by looking up the
 574     // address in .debug_aranges.
 575     uint64_t offset = 0;
 576     if (!findDebugInfoOffset(address, aranges_, offset)) {
 577       // NOTE: clang doesn't generate entries in .debug_aranges for
 578       // some functions, but always generates .debug_info entries.
 579       // We could read them from .debug_info but that's too slow.
 580       // If .debug_aranges is present fast address lookup is assumed.
 581       return false;
 582     }
 583     // Read compilation unit header from .debug_info
 584     folly::StringPiece infoEntry(info_);
 585     infoEntry.advance(offset);
 586     findLocation(address, infoEntry, locationInfo);
 587     return true;
 588   }
 589
 590   // Slow path (linear scan): Iterate over all .debug_info entries
 591   // and look for the address in each compilation unit.
 592   folly::StringPiece infoEntry(info_);
 593   while (!infoEntry.empty() && !locationInfo.hasFileAndLine) {
 594     findLocation(address, infoEntry, locationInfo);
 595   }
 596   return locationInfo.hasFileAndLine;
 597 }
 598
 599 Dwarf::LineNumberVM::LineNumberVM(folly::StringPiece data,
 600                                   folly::StringPiece compilationDirectory)
 601   : compilationDirectory_(compilationDirectory) {
 602   Section section(data);
 603   FOLLY_SAFE_CHECK(section.next(data_), "invalid line number VM");
 604   is64Bit_ = section.is64Bit();
 605   init();
 606   reset();
 607 }
 608
 609 void Dwarf::LineNumberVM::reset() {
 610   address_ = 0;
 611   file_ = 1;
 612   line_ = 1;
 613   column_ = 0;
 614   isStmt_ = defaultIsStmt_;
 615   basicBlock_ = false;
 616   endSequence_ = false;
 617   prologueEnd_ = false;
 618   epilogueBegin_ = false;
 619   isa_ = 0;
 620   discriminator_ = 0;
 621 }
 622
 623 void Dwarf::LineNumberVM::init() {
 624   version_ = read<uint16_t>(data_);
 625   FOLLY_SAFE_CHECK(version_ >= 2 && version_ <= 4,
 626                    "invalid version in line number VM");
 627   uint64_t headerLength = readOffset(data_, is64Bit_);
 628   FOLLY_SAFE_CHECK(headerLength <= data_.size(),
 629                    "invalid line number VM header length");
 630   folly::StringPiece header(data_.data(), headerLength);
 631   data_.assign(header.end(), data_.end());
 632
 633   minLength_ = read<uint8_t>(header);
 634   if (version_ == 4) {  // Version 2 and 3 records don't have this
 635     uint8_t maxOpsPerInstruction = read<uint8_t>(header);
 636     FOLLY_SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported");
 637   }
 638   defaultIsStmt_ = read<uint8_t>(header);
 639   lineBase_ = read<int8_t>(header);  // yes, signed
 640   lineRange_ = read<uint8_t>(header);
 641   opcodeBase_ = read<uint8_t>(header);
 642   FOLLY_SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
 643   standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data());
 644   header.advance(opcodeBase_ - 1);
 645
 646   // We don't want to use heap, so we don't keep an unbounded amount of state.
 647   // We'll just skip over include directories and file names here, and
 648   // we'll loop again when we actually need to retrieve one.
 649   folly::StringPiece sp;
 650   const char* tmp = header.data();
 651   includeDirectoryCount_ = 0;
 652   while (!(sp = readNullTerminated(header)).empty()) {
 653     ++includeDirectoryCount_;
 654   }
 655   includeDirectories_.assign(tmp, header.data());
 656
 657   tmp = header.data();
 658   FileName fn;
 659   fileNameCount_ = 0;
 660   while (readFileName(header, fn)) {
 661     ++fileNameCount_;
 662   }
 663   fileNames_.assign(tmp, header.data());
 664 }
 665
 666 bool Dwarf::LineNumberVM::next(folly::StringPiece& program) {
 667   Dwarf::LineNumberVM::StepResult ret;
 668   do {
 669     ret = step(program);
 670   } while (ret == CONTINUE);
 671
 672   return (ret == COMMIT);
 673 }
 674
 675 Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index)
 676   const {
 677   FOLLY_SAFE_CHECK(index != 0, "invalid file index 0");
 678
 679   FileName fn;
 680   if (index <= fileNameCount_) {
 681     folly::StringPiece fileNames = fileNames_;
 682     for (; index; --index) {
 683       if (!readFileName(fileNames, fn)) {
 684         abort();
 685       }
 686     }
 687     return fn;
 688   }
 689
 690   index -= fileNameCount_;
 691
 692   folly::StringPiece program = data_;
 693   for (; index; --index) {
 694     FOLLY_SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
 695   }
 696
 697   return fn;
 698 }
 699
 700 folly::StringPiece Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index)
 701   const {
 702   if (index == 0) {
 703     return folly::StringPiece();
 704   }
 705
 706   FOLLY_SAFE_CHECK(index <= includeDirectoryCount_,
 707                    "invalid include directory");
 708
 709   folly::StringPiece includeDirectories = includeDirectories_;
 710   folly::StringPiece dir;
 711   for (; index; --index) {
 712     dir = readNullTerminated(includeDirectories);
 713     if (dir.empty()) {
 714       abort();  // BUG
 715     }
 716   }
 717
 718   return dir;
 719 }
 720
 721 bool Dwarf::LineNumberVM::readFileName(folly::StringPiece& program,
 722                                        FileName& fn) {
 723   fn.relativeName = readNullTerminated(program);
 724   if (fn.relativeName.empty()) {
 725     return false;
 726   }
 727   fn.directoryIndex = readULEB(program);
 728   // Skip over file size and last modified time
 729   readULEB(program);
 730   readULEB(program);
 731   return true;
 732 }
 733
 734 bool Dwarf::LineNumberVM::nextDefineFile(folly::StringPiece& program,
 735                                          FileName& fn) const {
 736   while (!program.empty()) {
 737     auto opcode = read<uint8_t>(program);
 738
 739     if (opcode >= opcodeBase_) {  // special opcode
 740       continue;
 741     }
 742
 743     if (opcode != 0) {  // standard opcode
 744       // Skip, slurp the appropriate number of LEB arguments
 745       uint8_t argCount = standardOpcodeLengths_[opcode - 1];
 746       while (argCount--) {
 747         readULEB(program);
 748       }
 749       continue;
 750     }
 751
 752     // Extended opcode
 753     auto length = readULEB(program);
 754     // the opcode itself should be included in the length, so length >= 1
 755     FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
 756     read<uint8_t>(program); // extended opcode
 757     --length;
 758
 759     if (opcode == DW_LNE_define_file) {
 760       FOLLY_SAFE_CHECK(readFileName(program, fn),
 761                        "invalid empty file in DW_LNE_define_file");
 762       return true;
 763     }
 764
 765     program.advance(length);
 766     continue;
 767   }
 768
 769   return false;
 770 }
 771
 772 Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(
 773     folly::StringPiece& program) {
 774   auto opcode = read<uint8_t>(program);
 775
 776   if (opcode >= opcodeBase_) {  // special opcode
 777     uint8_t adjustedOpcode = opcode - opcodeBase_;
 778     uint8_t opAdvance = adjustedOpcode / lineRange_;
 779
 780     address_ += minLength_ * opAdvance;
 781     line_ += lineBase_ + adjustedOpcode % lineRange_;
 782
 783     basicBlock_ = false;
 784     prologueEnd_ = false;
 785     epilogueBegin_ = false;
 786     discriminator_ = 0;
 787     return COMMIT;
 788   }
 789
 790   if (opcode != 0) {  // standard opcode
 791     // Only interpret opcodes that are recognized by the version we're parsing;
 792     // the others are vendor extensions and we should ignore them.
 793     switch (opcode) {
 794     case DW_LNS_copy:
 795       basicBlock_ = false;
 796       prologueEnd_ = false;
 797       epilogueBegin_ = false;
 798       discriminator_ = 0;
 799       return COMMIT;
 800     case DW_LNS_advance_pc:
 801       address_ += minLength_ * readULEB(program);
 802       return CONTINUE;
 803     case DW_LNS_advance_line:
 804       line_ += readSLEB(program);
 805       return CONTINUE;
 806     case DW_LNS_set_file:
 807       file_ = readULEB(program);
 808       return CONTINUE;
 809     case DW_LNS_set_column:
 810       column_ = readULEB(program);
 811       return CONTINUE;
 812     case DW_LNS_negate_stmt:
 813       isStmt_ = !isStmt_;
 814       return CONTINUE;
 815     case DW_LNS_set_basic_block:
 816       basicBlock_ = true;
 817       return CONTINUE;
 818     case DW_LNS_const_add_pc:
 819       address_ += minLength_ * ((255 - opcodeBase_) / lineRange_);
 820       return CONTINUE;
 821     case DW_LNS_fixed_advance_pc:
 822       address_ += read<uint16_t>(program);
 823       return CONTINUE;
 824     case DW_LNS_set_prologue_end:
 825       if (version_ == 2) break;  // not supported in version 2
 826       prologueEnd_ = true;
 827       return CONTINUE;
 828     case DW_LNS_set_epilogue_begin:
 829       if (version_ == 2) break;  // not supported in version 2
 830       epilogueBegin_ = true;
 831       return CONTINUE;
 832     case DW_LNS_set_isa:
 833       if (version_ == 2) break;  // not supported in version 2
 834       isa_ = readULEB(program);
 835       return CONTINUE;
 836     }
 837
 838     // Unrecognized standard opcode, slurp the appropriate number of LEB
 839     // arguments.
 840     uint8_t argCount = standardOpcodeLengths_[opcode - 1];
 841     while (argCount--) {
 842       readULEB(program);
 843     }
 844     return CONTINUE;
 845   }
 846
 847   // Extended opcode
 848   auto length = readULEB(program);
 849   // the opcode itself should be included in the length, so length >= 1
 850   FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
 851   auto extendedOpcode = read<uint8_t>(program);
 852   --length;
 853
 854   switch (extendedOpcode) {
 855   case DW_LNE_end_sequence:
 856     return END;
 857   case DW_LNE_set_address:
 858     address_ = read<uintptr_t>(program);
 859     return CONTINUE;
 860   case DW_LNE_define_file:
 861     // We can't process DW_LNE_define_file here, as it would require us to
 862     // use unbounded amounts of state (ie. use the heap).  We'll do a second
 863     // pass (using nextDefineFile()) if necessary.
 864     break;
 865   case DW_LNE_set_discriminator:
 866     discriminator_ = readULEB(program);
 867     return CONTINUE;
 868   }
 869
 870   // Unrecognized extended opcode
 871   program.advance(length);
 872   return CONTINUE;
 873 }
 874
 875 bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path& file,
 876                                       uint64_t& line) {
 877   folly::StringPiece program = data_;
 878
 879   // Within each sequence of instructions, the address may only increase.
 880   // Unfortunately, within the same compilation unit, sequences may appear
 881   // in any order.  So any sequence is a candidate if it starts at an address
 882   // <= the target address, and we know we've found the target address if
 883   // a candidate crosses the target address.
 884   enum State {
 885     START,
 886     LOW_SEQ,  // candidate
 887     HIGH_SEQ
 888   };
 889   State state = START;
 890   reset();
 891
 892   uint64_t prevFile = 0;
 893   uint64_t prevLine = 0;
 894   while (!program.empty()) {
 895     bool seqEnd = !next(program);
 896
 897     if (state == START) {
 898       if (!seqEnd) {
 899         state = address_ <= target ? LOW_SEQ : HIGH_SEQ;
 900       }
 901     }
 902
 903     if (state == LOW_SEQ) {
 904       if (address_ > target) {
 905         // Found it!  Note that ">" is indeed correct (not ">="), as each
 906         // sequence is guaranteed to have one entry past-the-end (emitted by
 907         // DW_LNE_end_sequence)
 908         if (prevFile == 0) {
 909           return false;
 910         }
 911         auto fn = getFileName(prevFile);
 912         file = Path(compilationDirectory_,
 913                     getIncludeDirectory(fn.directoryIndex),
 914                     fn.relativeName);
 915         line = prevLine;
 916         return true;
 917       }
 918       prevFile = file_;
 919       prevLine = line_;
 920     }
 921
 922     if (seqEnd) {
 923       state = START;
 924       reset();
 925     }
 926   }
 927
 928   return false;
 929 }
 930
 931 }  // namespace symbolizer
 932 }  // namespace folly