2 * Copyright 2013 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "folly/experimental/symbolizer/Dwarf.h"
20 #include <type_traits>
25 namespace symbolizer {
27 Dwarf::Dwarf(const ElfFile* elf) : elf_(elf) {
31 Dwarf::Section::Section(folly::StringPiece d) : is64Bit_(false), data_(d) {
36 // All following read* functions read from a StringPiece, advancing the
37 // StringPiece, and aborting if there's not enough room.
39 // Read (bitwise) one object of type T
41 typename std::enable_if<std::is_pod<T>::value, T>::type
42 read(folly::StringPiece& sp) {
43 FOLLY_SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
45 memcpy(&x, sp.data(), sizeof(T));
46 sp.advance(sizeof(T));
50 // Read ULEB (unsigned) varint value; algorithm from the DWARF spec
51 uint64_t readULEB(folly::StringPiece& sp, uint8_t& shift, uint8_t& val) {
55 val = read<uint8_t>(sp);
56 r |= ((uint64_t)(val & 0x7f) << shift);
62 uint64_t readULEB(folly::StringPiece& sp) {
65 return readULEB(sp, shift, val);
68 // Read SLEB (signed) varint value; algorithm from the DWARF spec
69 int64_t readSLEB(folly::StringPiece& sp) {
72 uint64_t r = readULEB(sp, shift, val);
74 if (shift < 64 && (val & 0x40)) {
75 r |= -(1ULL << shift); // sign extend
81 // Read a value of "section offset" type, which may be 4 or 8 bytes
82 uint64_t readOffset(folly::StringPiece& sp, bool is64Bit) {
83 return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp);
87 folly::StringPiece readBytes(folly::StringPiece& sp, uint64_t len) {
88 FOLLY_SAFE_CHECK(len >= sp.size(), "invalid string length");
89 folly::StringPiece ret(sp.data(), len);
94 // Read a null-terminated string
95 folly::StringPiece readNullTerminated(folly::StringPiece& sp) {
96 const char* p = static_cast<const char*>(
97 memchr(sp.data(), 0, sp.size()));
98 FOLLY_SAFE_CHECK(p, "invalid null-terminated string");
99 folly::StringPiece ret(sp.data(), p);
100 sp.assign(p + 1, sp.end());
104 // Skip over padding until sp.data() - start is a multiple of alignment
105 void skipPadding(folly::StringPiece& sp, const char* start, size_t alignment) {
106 size_t remainder = (sp.data() - start) % alignment;
108 FOLLY_SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
109 sp.advance(alignment - remainder);
113 void stripSlashes(folly::StringPiece& sp, bool keepInitialSlash) {
118 const char* p = sp.begin();
119 for (; p != sp.end() && *p == '/'; ++p);
121 const char* q = sp.end();
122 for (; q != p && q[-1] == '/'; --q);
124 if (keepInitialSlash && p != sp.begin()) {
133 Dwarf::Path::Path(folly::StringPiece baseDir, folly::StringPiece subDir,
134 folly::StringPiece file)
147 if (file_[0] == '/') {
153 if (!subDir_.empty() && subDir_[0] == '/') {
154 baseDir_.clear(); // subDir_ is absolute
157 // Make sure that baseDir_ isn't empty; subDir_ may be
158 if (baseDir_.empty()) {
159 swap(baseDir_, subDir_);
162 stripSlashes(baseDir_, true); // keep leading slash if it exists
163 stripSlashes(subDir_, false);
164 stripSlashes(file_, false);
167 size_t Dwarf::Path::size() const {
169 baseDir_.size() + !subDir_.empty() + subDir_.size() + !file_.empty() +
173 size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const {
174 size_t totalSize = 0;
176 auto append = [&] (folly::StringPiece sp) {
178 size_t toCopy = std::min(sp.size(), bufSize - 1);
179 memcpy(buf, sp.data(), toCopy);
183 totalSize += sp.size();
186 if (!baseDir_.empty()) {
189 if (!subDir_.empty()) {
190 assert(!baseDir_.empty());
194 if (!file_.empty()) {
201 assert(totalSize == size());
205 void Dwarf::Path::toString(std::string& dest) const {
206 size_t initialSize = dest.size();
207 dest.reserve(initialSize + size());
208 if (!baseDir_.empty()) {
209 dest.append(baseDir_.begin(), baseDir_.end());
211 if (!subDir_.empty()) {
212 assert(!baseDir_.empty());
214 dest.append(subDir_.begin(), subDir_.end());
216 if (!file_.empty()) {
218 dest.append(file_.begin(), file_.end());
220 assert(dest.size() == initialSize + size());
223 // Next chunk in section
224 bool Dwarf::Section::next(folly::StringPiece& chunk) {
230 // Initial length is a uint32_t value for a 32-bit section, and
231 // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit
233 auto initialLength = read<uint32_t>(chunk);
234 is64Bit_ = (initialLength == (uint32_t)-1);
235 auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength;
236 FOLLY_SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
237 chunk.reset(chunk.data(), length);
238 data_.assign(chunk.end(), data_.end());
242 bool Dwarf::getSection(const char* name, folly::StringPiece* section) const {
243 const ElfW(Shdr)* elfSection = elf_->getSectionByName(name);
248 *section = elf_->getSectionBody(*elfSection);
253 // Make sure that all .debug_* sections exist
254 if (!getSection(".debug_info", &info_) ||
255 !getSection(".debug_abbrev", &abbrev_) ||
256 !getSection(".debug_aranges", &aranges_) ||
257 !getSection(".debug_line", &line_) ||
258 !getSection(".debug_str", &strings_)) {
262 getSection(".debug_str", &strings_);
265 bool Dwarf::readAbbreviation(folly::StringPiece& section,
266 DIEAbbreviation& abbr) {
268 abbr.code = readULEB(section);
269 if (abbr.code == 0) {
274 abbr.tag = readULEB(section);
276 // does this entry have children?
277 abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
280 const char* attributeBegin = section.data();
282 FOLLY_SAFE_CHECK(!section.empty(), "invalid attribute section");
283 auto attr = readAttribute(section);
284 if (attr.name == 0 && attr.form == 0) {
289 abbr.attributes.assign(attributeBegin, section.data());
293 Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(
294 folly::StringPiece& sp) {
295 return { readULEB(sp), readULEB(sp) };
298 Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset)
300 // Linear search in the .debug_abbrev section, starting at offset
301 folly::StringPiece section = abbrev_;
302 section.advance(offset);
304 Dwarf::DIEAbbreviation abbr;
305 while (readAbbreviation(section, abbr)) {
306 if (abbr.code == code) {
311 FOLLY_SAFE_CHECK(false, "could not find abbreviation code");
314 Dwarf::AttributeValue Dwarf::readAttributeValue(
315 folly::StringPiece& sp, uint64_t form, bool is64Bit) const {
318 return read<uintptr_t>(sp);
320 return readBytes(sp, read<uint8_t>(sp));
322 return readBytes(sp, read<uint16_t>(sp));
324 return readBytes(sp, read<uint32_t>(sp));
325 case DW_FORM_block: // fallthrough
326 case DW_FORM_exprloc:
327 return readBytes(sp, readULEB(sp));
328 case DW_FORM_data1: // fallthrough
330 return read<uint8_t>(sp);
331 case DW_FORM_data2: // fallthrough
333 return read<uint16_t>(sp);
334 case DW_FORM_data4: // fallthrough
336 return read<uint32_t>(sp);
337 case DW_FORM_data8: // fallthrough
339 return read<uint64_t>(sp);
342 case DW_FORM_udata: // fallthrough
343 case DW_FORM_ref_udata:
346 return read<uint8_t>(sp);
347 case DW_FORM_flag_present:
349 case DW_FORM_sec_offset: // fallthrough
350 case DW_FORM_ref_addr:
351 return readOffset(sp, is64Bit);
353 return readNullTerminated(sp);
355 return getStringFromStringSection(readOffset(sp, is64Bit));
356 case DW_FORM_indirect: // form is explicitly specified
357 return readAttributeValue(sp, readULEB(sp), is64Bit);
359 FOLLY_SAFE_CHECK(false, "invalid attribute form");
363 folly::StringPiece Dwarf::getStringFromStringSection(uint64_t offset) const {
364 FOLLY_SAFE_CHECK(offset < strings_.size(), "invalid strp offset");
365 folly::StringPiece sp(strings_);
367 return readNullTerminated(sp);
370 bool Dwarf::findAddress(uintptr_t address, LocationInfo& locationInfo) const {
371 locationInfo = LocationInfo();
373 if (!elf_) { // no file
377 // Find address range in .debug_aranges, map to compilation unit
378 Section arangesSection(aranges_);
379 folly::StringPiece chunk;
380 uint64_t debugInfoOffset;
382 while (!found && arangesSection.next(chunk)) {
383 auto version = read<uint16_t>(chunk);
384 FOLLY_SAFE_CHECK(version == 2, "invalid aranges version");
386 debugInfoOffset = readOffset(chunk, arangesSection.is64Bit());
387 auto addressSize = read<uint8_t>(chunk);
388 FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
389 auto segmentSize = read<uint8_t>(chunk);
390 FOLLY_SAFE_CHECK(segmentSize == 0, "segmented architecture not supported");
392 // Padded to a multiple of 2 addresses.
393 // Strangely enough, this is the only place in the DWARF spec that requires
395 skipPadding(chunk, aranges_.data(), 2 * sizeof(uintptr_t));
397 auto start = read<uintptr_t>(chunk);
398 auto length = read<uintptr_t>(chunk);
404 // Is our address in this range?
405 if (address >= start && address < start + length) {
416 // Read compilation unit header from .debug_info
417 folly::StringPiece sp(info_);
418 sp.advance(debugInfoOffset);
419 Section debugInfoSection(sp);
420 FOLLY_SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info");
422 auto version = read<uint16_t>(chunk);
423 FOLLY_SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
424 uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit());
425 auto addressSize = read<uint8_t>(chunk);
426 FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
428 // We survived so far. The first (and only) DIE should be
429 // DW_TAG_compile_unit
430 // TODO(tudorb): Handle DW_TAG_partial_unit?
431 auto code = readULEB(chunk);
432 FOLLY_SAFE_CHECK(code != 0, "invalid code");
433 auto abbr = getAbbreviation(code, abbrevOffset);
434 FOLLY_SAFE_CHECK(abbr.tag == DW_TAG_compile_unit,
435 "expecting compile unit entry");
437 // Read attributes, extracting the few we care about
438 bool foundLineOffset = false;
439 uint64_t lineOffset = 0;
440 folly::StringPiece compilationDirectory;
441 folly::StringPiece mainFileName;
443 DIEAbbreviation::Attribute attr;
444 folly::StringPiece attributes = abbr.attributes;
446 attr = readAttribute(attributes);
447 if (attr.name == 0 && attr.form == 0) {
450 auto val = readAttributeValue(chunk, attr.form,
451 debugInfoSection.is64Bit());
453 case DW_AT_stmt_list:
454 // Offset in .debug_line for the line number VM program for this
456 lineOffset = boost::get<uint64_t>(val);
457 foundLineOffset = true;
460 // Compilation directory
461 compilationDirectory = boost::get<folly::StringPiece>(val);
464 // File name of main file being compiled
465 mainFileName = boost::get<folly::StringPiece>(val);
470 if (!mainFileName.empty()) {
471 locationInfo.hasMainFile = true;
472 locationInfo.mainFile = Path(compilationDirectory, "", mainFileName);
475 if (foundLineOffset) {
476 folly::StringPiece lineSection(line_);
477 lineSection.advance(lineOffset);
478 LineNumberVM lineVM(lineSection, compilationDirectory);
480 // Execute line number VM program to find file and line
481 locationInfo.hasFileAndLine =
482 lineVM.findAddress(address, locationInfo.file, locationInfo.line);
488 Dwarf::LineNumberVM::LineNumberVM(folly::StringPiece data,
489 folly::StringPiece compilationDirectory)
490 : compilationDirectory_(compilationDirectory) {
491 Section section(data);
492 FOLLY_SAFE_CHECK(section.next(data_), "invalid line number VM");
493 is64Bit_ = section.is64Bit();
498 void Dwarf::LineNumberVM::reset() {
503 isStmt_ = defaultIsStmt_;
505 endSequence_ = false;
506 prologueEnd_ = false;
507 epilogueBegin_ = false;
512 void Dwarf::LineNumberVM::init() {
513 version_ = read<uint16_t>(data_);
514 FOLLY_SAFE_CHECK(version_ >= 2 && version_ <= 4,
515 "invalid version in line number VM");
516 uint64_t headerLength = readOffset(data_, is64Bit_);
517 FOLLY_SAFE_CHECK(headerLength <= data_.size(),
518 "invalid line number VM header length");
519 folly::StringPiece header(data_.data(), headerLength);
520 data_.assign(header.end(), data_.end());
522 minLength_ = read<uint8_t>(header);
523 if (version_ == 4) { // Version 2 and 3 records don't have this
524 uint8_t maxOpsPerInstruction = read<uint8_t>(header);
525 FOLLY_SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported");
527 defaultIsStmt_ = read<uint8_t>(header);
528 lineBase_ = read<int8_t>(header); // yes, signed
529 lineRange_ = read<uint8_t>(header);
530 opcodeBase_ = read<uint8_t>(header);
531 FOLLY_SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
532 standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data());
533 header.advance(opcodeBase_ - 1);
535 // We don't want to use heap, so we don't keep an unbounded amount of state.
536 // We'll just skip over include directories and file names here, and
537 // we'll loop again when we actually need to retrieve one.
538 folly::StringPiece sp;
539 const char* tmp = header.data();
540 includeDirectoryCount_ = 0;
541 while (!(sp = readNullTerminated(header)).empty()) {
542 ++includeDirectoryCount_;
544 includeDirectories_.assign(tmp, header.data());
549 while (readFileName(header, fn)) {
552 fileNames_.assign(tmp, header.data());
555 bool Dwarf::LineNumberVM::next(folly::StringPiece& program) {
556 Dwarf::LineNumberVM::StepResult ret;
559 } while (ret == CONTINUE);
561 return (ret == COMMIT);
564 Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index)
566 FOLLY_SAFE_CHECK(index != 0, "invalid file index 0");
569 if (index <= fileNameCount_) {
570 folly::StringPiece fileNames = fileNames_;
571 for (; index; --index) {
572 if (!readFileName(fileNames, fn)) {
579 index -= fileNameCount_;
581 folly::StringPiece program = data_;
582 for (; index; --index) {
583 FOLLY_SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
589 folly::StringPiece Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index)
592 return folly::StringPiece();
595 FOLLY_SAFE_CHECK(index <= includeDirectoryCount_,
596 "invalid include directory");
598 folly::StringPiece includeDirectories = includeDirectories_;
599 folly::StringPiece dir;
600 for (; index; --index) {
601 dir = readNullTerminated(includeDirectories);
610 bool Dwarf::LineNumberVM::readFileName(folly::StringPiece& program,
612 fn.relativeName = readNullTerminated(program);
613 if (fn.relativeName.empty()) {
616 fn.directoryIndex = readULEB(program);
617 // Skip over file size and last modified time
623 bool Dwarf::LineNumberVM::nextDefineFile(folly::StringPiece& program,
624 FileName& fn) const {
625 while (!program.empty()) {
626 auto opcode = read<uint8_t>(program);
628 if (opcode >= opcodeBase_) { // special opcode
632 if (opcode != 0) { // standard opcode
633 // Skip, slurp the appropriate number of LEB arguments
634 uint8_t argCount = standardOpcodeLengths_[opcode - 1];
642 auto length = readULEB(program);
643 // the opcode itself should be included in the length, so length >= 1
644 FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
645 read<uint8_t>(program); // extended opcode
648 if (opcode == DW_LNE_define_file) {
649 FOLLY_SAFE_CHECK(readFileName(program, fn),
650 "invalid empty file in DW_LNE_define_file");
654 program.advance(length);
661 Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(
662 folly::StringPiece& program) {
663 auto opcode = read<uint8_t>(program);
665 if (opcode >= opcodeBase_) { // special opcode
666 uint8_t adjustedOpcode = opcode - opcodeBase_;
667 uint8_t opAdvance = adjustedOpcode / lineRange_;
669 address_ += minLength_ * opAdvance;
670 line_ += lineBase_ + adjustedOpcode % lineRange_;
673 prologueEnd_ = false;
674 epilogueBegin_ = false;
679 if (opcode != 0) { // standard opcode
680 // Only interpret opcodes that are recognized by the version we're parsing;
681 // the others are vendor extensions and we should ignore them.
685 prologueEnd_ = false;
686 epilogueBegin_ = false;
689 case DW_LNS_advance_pc:
690 address_ += minLength_ * readULEB(program);
692 case DW_LNS_advance_line:
693 line_ += readSLEB(program);
695 case DW_LNS_set_file:
696 file_ = readULEB(program);
698 case DW_LNS_set_column:
699 column_ = readULEB(program);
701 case DW_LNS_negate_stmt:
704 case DW_LNS_set_basic_block:
707 case DW_LNS_const_add_pc:
708 address_ += minLength_ * ((255 - opcodeBase_) / lineRange_);
710 case DW_LNS_fixed_advance_pc:
711 address_ += read<uint16_t>(program);
713 case DW_LNS_set_prologue_end:
714 if (version_ == 2) break; // not supported in version 2
717 case DW_LNS_set_epilogue_begin:
718 if (version_ == 2) break; // not supported in version 2
719 epilogueBegin_ = true;
722 if (version_ == 2) break; // not supported in version 2
723 isa_ = readULEB(program);
727 // Unrecognized standard opcode, slurp the appropriate number of LEB
729 uint8_t argCount = standardOpcodeLengths_[opcode - 1];
737 auto length = readULEB(program);
738 // the opcode itself should be included in the length, so length >= 1
739 FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
740 auto extendedOpcode = read<uint8_t>(program);
743 switch (extendedOpcode) {
744 case DW_LNE_end_sequence:
746 case DW_LNE_set_address:
747 address_ = read<uintptr_t>(program);
749 case DW_LNE_define_file:
750 // We can't process DW_LNE_define_file here, as it would require us to
751 // use unbounded amounts of state (ie. use the heap). We'll do a second
752 // pass (using nextDefineFile()) if necessary.
754 case DW_LNE_set_discriminator:
755 discriminator_ = readULEB(program);
759 // Unrecognized extended opcode
760 program.advance(length);
764 bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path& file,
766 folly::StringPiece program = data_;
768 // Within each sequence of instructions, the address may only increase.
769 // Unfortunately, within the same compilation unit, sequences may appear
770 // in any order. So any sequence is a candidate if it starts at an address
771 // <= the target address, and we know we've found the target address if
772 // a candidate crosses the target address.
775 LOW_SEQ, // candidate
781 uint64_t prevFile = 0;
782 uint64_t prevLine = 0;
783 while (!program.empty()) {
784 bool seqEnd = !next(program);
786 if (state == START) {
788 state = address_ <= target ? LOW_SEQ : HIGH_SEQ;
792 if (state == LOW_SEQ) {
793 if (address_ > target) {
794 // Found it! Note that ">" is indeed correct (not ">="), as each
795 // sequence is guaranteed to have one entry past-the-end (emitted by
796 // DW_LNE_end_sequence)
800 auto fn = getFileName(prevFile);
801 file = Path(compilationDirectory_,
802 getIncludeDirectory(fn.directoryIndex),
820 } // namespace symbolizer