1 //===-- DWARFDebugLine.cpp ------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "DWARFDebugLine.h"
11 #include "llvm/Support/Dwarf.h"
12 #include "llvm/Support/Format.h"
13 #include "llvm/Support/Path.h"
14 #include "llvm/Support/raw_ostream.h"
17 using namespace dwarf;
18 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
20 DWARFDebugLine::Prologue::Prologue() {
24 void DWARFDebugLine::Prologue::clear() {
25 TotalLength = Version = PrologueLength = 0;
26 MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0;
28 StandardOpcodeLengths.clear();
29 IncludeDirectories.clear();
33 void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
34 OS << "Line table prologue:\n"
35 << format(" total_length: 0x%8.8x\n", TotalLength)
36 << format(" version: %u\n", Version)
37 << format(" prologue_length: 0x%8.8x\n", PrologueLength)
38 << format(" min_inst_length: %u\n", MinInstLength)
39 << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst)
40 << format(" default_is_stmt: %u\n", DefaultIsStmt)
41 << format(" line_base: %i\n", LineBase)
42 << format(" line_range: %u\n", LineRange)
43 << format(" opcode_base: %u\n", OpcodeBase);
45 for (uint32_t i = 0; i < StandardOpcodeLengths.size(); ++i)
46 OS << format("standard_opcode_lengths[%s] = %u\n", LNStandardString(i+1),
47 StandardOpcodeLengths[i]);
49 if (!IncludeDirectories.empty())
50 for (uint32_t i = 0; i < IncludeDirectories.size(); ++i)
51 OS << format("include_directories[%3u] = '", i+1)
52 << IncludeDirectories[i] << "'\n";
54 if (!FileNames.empty()) {
55 OS << " Dir Mod Time File Len File Name\n"
56 << " ---- ---------- ---------- -----------"
58 for (uint32_t i = 0; i < FileNames.size(); ++i) {
59 const FileNameEntry& fileEntry = FileNames[i];
60 OS << format("file_names[%3u] %4" PRIu64 " ", i+1, fileEntry.DirIdx)
61 << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ",
62 fileEntry.ModTime, fileEntry.Length)
63 << fileEntry.Name << '\n';
68 bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data,
69 uint32_t *offset_ptr) {
70 const uint32_t prologue_offset = *offset_ptr;
73 TotalLength = debug_line_data.getU32(offset_ptr);
74 Version = debug_line_data.getU16(offset_ptr);
78 PrologueLength = debug_line_data.getU32(offset_ptr);
79 const uint32_t end_prologue_offset = PrologueLength + *offset_ptr;
80 MinInstLength = debug_line_data.getU8(offset_ptr);
82 MaxOpsPerInst = debug_line_data.getU8(offset_ptr);
83 DefaultIsStmt = debug_line_data.getU8(offset_ptr);
84 LineBase = debug_line_data.getU8(offset_ptr);
85 LineRange = debug_line_data.getU8(offset_ptr);
86 OpcodeBase = debug_line_data.getU8(offset_ptr);
88 StandardOpcodeLengths.reserve(OpcodeBase - 1);
89 for (uint32_t i = 1; i < OpcodeBase; ++i) {
90 uint8_t op_len = debug_line_data.getU8(offset_ptr);
91 StandardOpcodeLengths.push_back(op_len);
94 while (*offset_ptr < end_prologue_offset) {
95 const char *s = debug_line_data.getCStr(offset_ptr);
97 IncludeDirectories.push_back(s);
102 while (*offset_ptr < end_prologue_offset) {
103 const char *name = debug_line_data.getCStr(offset_ptr);
104 if (name && name[0]) {
105 FileNameEntry fileEntry;
106 fileEntry.Name = name;
107 fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
108 fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
109 fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
110 FileNames.push_back(fileEntry);
116 if (*offset_ptr != end_prologue_offset) {
117 fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
118 " have ended at 0x%8.8x but it ended at 0x%8.8x\n",
119 prologue_offset, end_prologue_offset, *offset_ptr);
125 DWARFDebugLine::Row::Row(bool default_is_stmt) {
126 reset(default_is_stmt);
129 void DWARFDebugLine::Row::postAppend() {
132 EpilogueBegin = false;
135 void DWARFDebugLine::Row::reset(bool default_is_stmt) {
142 IsStmt = default_is_stmt;
146 EpilogueBegin = false;
149 void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
150 OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column)
151 << format(" %6u %3u %13u ", File, Isa, Discriminator)
152 << (IsStmt ? " is_stmt" : "")
153 << (BasicBlock ? " basic_block" : "")
154 << (PrologueEnd ? " prologue_end" : "")
155 << (EpilogueBegin ? " epilogue_begin" : "")
156 << (EndSequence ? " end_sequence" : "")
160 DWARFDebugLine::Sequence::Sequence() {
164 void DWARFDebugLine::Sequence::reset() {
172 DWARFDebugLine::LineTable::LineTable() {
176 void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
181 OS << "Address Line Column File ISA Discriminator Flags\n"
182 << "------------------ ------ ------ ------ --- ------------- "
184 for (const Row &R : Rows) {
190 void DWARFDebugLine::LineTable::clear() {
196 DWARFDebugLine::ParsingState::ParsingState(struct LineTable *LT)
197 : LineTable(LT), RowNumber(0) {
198 resetRowAndSequence();
201 void DWARFDebugLine::ParsingState::resetRowAndSequence() {
202 Row.reset(LineTable->Prologue.DefaultIsStmt);
206 void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) {
207 if (Sequence.Empty) {
208 // Record the beginning of instruction sequence.
209 Sequence.Empty = false;
210 Sequence.LowPC = Row.Address;
211 Sequence.FirstRowIndex = RowNumber;
214 LineTable->appendRow(Row);
215 if (Row.EndSequence) {
216 // Record the end of instruction sequence.
217 Sequence.HighPC = Row.Address;
218 Sequence.LastRowIndex = RowNumber;
219 if (Sequence.isValid())
220 LineTable->appendSequence(Sequence);
226 const DWARFDebugLine::LineTable *
227 DWARFDebugLine::getLineTable(uint32_t offset) const {
228 LineTableConstIter pos = LineTableMap.find(offset);
229 if (pos != LineTableMap.end())
234 const DWARFDebugLine::LineTable *
235 DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data,
237 std::pair<LineTableIter, bool> pos =
238 LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable()));
239 LineTable *LT = &pos.first->second;
241 if (!LT->parse(debug_line_data, RelocMap, &offset))
247 bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
248 const RelocAddrMap *RMap,
249 uint32_t *offset_ptr) {
250 const uint32_t debug_line_offset = *offset_ptr;
254 if (!Prologue.parse(debug_line_data, offset_ptr)) {
255 // Restore our offset and return false to indicate failure!
256 *offset_ptr = debug_line_offset;
260 const uint32_t end_offset = debug_line_offset + Prologue.TotalLength +
261 sizeof(Prologue.TotalLength);
263 ParsingState State(this);
265 while (*offset_ptr < end_offset) {
266 uint8_t opcode = debug_line_data.getU8(offset_ptr);
269 // Extended Opcodes always start with a zero opcode followed by
270 // a uleb128 length so you can skip ones you don't know about
271 uint32_t ext_offset = *offset_ptr;
272 uint64_t len = debug_line_data.getULEB128(offset_ptr);
273 uint32_t arg_size = len - (*offset_ptr - ext_offset);
275 uint8_t sub_opcode = debug_line_data.getU8(offset_ptr);
276 switch (sub_opcode) {
277 case DW_LNE_end_sequence:
278 // Set the end_sequence register of the state machine to true and
279 // append a row to the matrix using the current values of the
280 // state-machine registers. Then reset the registers to the initial
281 // values specified above. Every statement program sequence must end
282 // with a DW_LNE_end_sequence instruction which creates a row whose
283 // address is that of the byte after the last target machine instruction
285 State.Row.EndSequence = true;
286 State.appendRowToMatrix(*offset_ptr);
287 State.resetRowAndSequence();
290 case DW_LNE_set_address:
291 // Takes a single relocatable address as an operand. The size of the
292 // operand is the size appropriate to hold an address on the target
293 // machine. Set the address register to the value given by the
294 // relocatable address. All of the other statement program opcodes
295 // that affect the address register add a delta to it. This instruction
296 // stores a relocatable value into it instead.
298 // If this address is in our relocation map, apply the relocation.
299 RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr);
300 if (AI != RMap->end()) {
301 const std::pair<uint8_t, int64_t> &R = AI->second;
303 debug_line_data.getAddress(offset_ptr) + R.second;
305 State.Row.Address = debug_line_data.getAddress(offset_ptr);
309 case DW_LNE_define_file:
310 // Takes 4 arguments. The first is a null terminated string containing
311 // a source file name. The second is an unsigned LEB128 number
312 // representing the directory index of the directory in which the file
313 // was found. The third is an unsigned LEB128 number representing the
314 // time of last modification of the file. The fourth is an unsigned
315 // LEB128 number representing the length in bytes of the file. The time
316 // and length fields may contain LEB128(0) if the information is not
319 // The directory index represents an entry in the include_directories
320 // section of the statement program prologue. The index is LEB128(0)
321 // if the file was found in the current directory of the compilation,
322 // LEB128(1) if it was found in the first directory in the
323 // include_directories section, and so on. The directory index is
324 // ignored for file names that represent full path names.
326 // The files are numbered, starting at 1, in the order in which they
327 // appear; the names in the prologue come before names defined by
328 // the DW_LNE_define_file instruction. These numbers are used in the
329 // the file register of the state machine.
331 FileNameEntry fileEntry;
332 fileEntry.Name = debug_line_data.getCStr(offset_ptr);
333 fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
334 fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
335 fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
336 Prologue.FileNames.push_back(fileEntry);
340 case DW_LNE_set_discriminator:
341 State.Row.Discriminator = debug_line_data.getULEB128(offset_ptr);
345 // Length doesn't include the zero opcode byte or the length itself, but
346 // it does include the sub_opcode, so we have to adjust for that below
347 (*offset_ptr) += arg_size;
350 } else if (opcode < Prologue.OpcodeBase) {
354 // Takes no arguments. Append a row to the matrix using the
355 // current values of the state-machine registers. Then set
356 // the basic_block register to false.
357 State.appendRowToMatrix(*offset_ptr);
360 case DW_LNS_advance_pc:
361 // Takes a single unsigned LEB128 operand, multiplies it by the
362 // min_inst_length field of the prologue, and adds the
363 // result to the address register of the state machine.
365 debug_line_data.getULEB128(offset_ptr) * Prologue.MinInstLength;
368 case DW_LNS_advance_line:
369 // Takes a single signed LEB128 operand and adds that value to
370 // the line register of the state machine.
371 State.Row.Line += debug_line_data.getSLEB128(offset_ptr);
374 case DW_LNS_set_file:
375 // Takes a single unsigned LEB128 operand and stores it in the file
376 // register of the state machine.
377 State.Row.File = debug_line_data.getULEB128(offset_ptr);
380 case DW_LNS_set_column:
381 // Takes a single unsigned LEB128 operand and stores it in the
382 // column register of the state machine.
383 State.Row.Column = debug_line_data.getULEB128(offset_ptr);
386 case DW_LNS_negate_stmt:
387 // Takes no arguments. Set the is_stmt register of the state
388 // machine to the logical negation of its current value.
389 State.Row.IsStmt = !State.Row.IsStmt;
392 case DW_LNS_set_basic_block:
393 // Takes no arguments. Set the basic_block register of the
394 // state machine to true
395 State.Row.BasicBlock = true;
398 case DW_LNS_const_add_pc:
399 // Takes no arguments. Add to the address register of the state
400 // machine the address increment value corresponding to special
401 // opcode 255. The motivation for DW_LNS_const_add_pc is this:
402 // when the statement program needs to advance the address by a
403 // small amount, it can use a single special opcode, which occupies
404 // a single byte. When it needs to advance the address by up to
405 // twice the range of the last special opcode, it can use
406 // DW_LNS_const_add_pc followed by a special opcode, for a total
407 // of two bytes. Only if it needs to advance the address by more
408 // than twice that range will it need to use both DW_LNS_advance_pc
409 // and a special opcode, requiring three or more bytes.
411 uint8_t adjust_opcode = 255 - Prologue.OpcodeBase;
412 uint64_t addr_offset =
413 (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength;
414 State.Row.Address += addr_offset;
418 case DW_LNS_fixed_advance_pc:
419 // Takes a single uhalf operand. Add to the address register of
420 // the state machine the value of the (unencoded) operand. This
421 // is the only extended opcode that takes an argument that is not
422 // a variable length number. The motivation for DW_LNS_fixed_advance_pc
423 // is this: existing assemblers cannot emit DW_LNS_advance_pc or
424 // special opcodes because they cannot encode LEB128 numbers or
425 // judge when the computation of a special opcode overflows and
426 // requires the use of DW_LNS_advance_pc. Such assemblers, however,
427 // can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
428 State.Row.Address += debug_line_data.getU16(offset_ptr);
431 case DW_LNS_set_prologue_end:
432 // Takes no arguments. Set the prologue_end register of the
433 // state machine to true
434 State.Row.PrologueEnd = true;
437 case DW_LNS_set_epilogue_begin:
438 // Takes no arguments. Set the basic_block register of the
439 // state machine to true
440 State.Row.EpilogueBegin = true;
444 // Takes a single unsigned LEB128 operand and stores it in the
445 // column register of the state machine.
446 State.Row.Isa = debug_line_data.getULEB128(offset_ptr);
450 // Handle any unknown standard opcodes here. We know the lengths
451 // of such opcodes because they are specified in the prologue
452 // as a multiple of LEB128 operands for each opcode.
454 assert(opcode - 1U < Prologue.StandardOpcodeLengths.size());
455 uint8_t opcode_length = Prologue.StandardOpcodeLengths[opcode - 1];
456 for (uint8_t i = 0; i < opcode_length; ++i)
457 debug_line_data.getULEB128(offset_ptr);
464 // A special opcode value is chosen based on the amount that needs
465 // to be added to the line and address registers. The maximum line
466 // increment for a special opcode is the value of the line_base
467 // field in the header, plus the value of the line_range field,
468 // minus 1 (line base + line range - 1). If the desired line
469 // increment is greater than the maximum line increment, a standard
470 // opcode must be used instead of a special opcode. The "address
471 // advance" is calculated by dividing the desired address increment
472 // by the minimum_instruction_length field from the header. The
473 // special opcode is then calculated using the following formula:
475 // opcode = (desired line increment - line_base) +
476 // (line_range * address advance) + opcode_base
478 // If the resulting opcode is greater than 255, a standard opcode
479 // must be used instead.
481 // To decode a special opcode, subtract the opcode_base from the
482 // opcode itself to give the adjusted opcode. The amount to
483 // increment the address register is the result of the adjusted
484 // opcode divided by the line_range multiplied by the
485 // minimum_instruction_length field from the header. That is:
487 // address increment = (adjusted opcode / line_range) *
488 // minimum_instruction_length
490 // The amount to increment the line register is the line_base plus
491 // the result of the adjusted opcode modulo the line_range. That is:
493 // line increment = line_base + (adjusted opcode % line_range)
495 uint8_t adjust_opcode = opcode - Prologue.OpcodeBase;
496 uint64_t addr_offset =
497 (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength;
498 int32_t line_offset =
499 Prologue.LineBase + (adjust_opcode % Prologue.LineRange);
500 State.Row.Line += line_offset;
501 State.Row.Address += addr_offset;
502 State.appendRowToMatrix(*offset_ptr);
506 if (!State.Sequence.Empty) {
507 fprintf(stderr, "warning: last sequence in debug line table is not"
511 // Sort all sequences so that address lookup will work faster.
512 if (!Sequences.empty()) {
513 std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
514 // Note: actually, instruction address ranges of sequences should not
515 // overlap (in shared objects and executables). If they do, the address
516 // lookup would still work, though, but result would be ambiguous.
517 // We don't report warning in this case. For example,
518 // sometimes .so compiled from multiple object files contains a few
519 // rudimentary sequences for address ranges [0x0, 0xsomething).
525 uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
526 uint32_t unknown_index = UINT32_MAX;
527 if (Sequences.empty())
528 return unknown_index;
529 // First, find an instruction sequence containing the given address.
530 DWARFDebugLine::Sequence sequence;
531 sequence.LowPC = address;
532 SequenceIter first_seq = Sequences.begin();
533 SequenceIter last_seq = Sequences.end();
534 SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence,
535 DWARFDebugLine::Sequence::orderByLowPC);
536 DWARFDebugLine::Sequence found_seq;
537 if (seq_pos == last_seq) {
538 found_seq = Sequences.back();
539 } else if (seq_pos->LowPC == address) {
540 found_seq = *seq_pos;
542 if (seq_pos == first_seq)
543 return unknown_index;
544 found_seq = *(seq_pos - 1);
546 if (!found_seq.containsPC(address))
547 return unknown_index;
548 // Search for instruction address in the rows describing the sequence.
549 // Rows are stored in a vector, so we may use arithmetical operations with
551 DWARFDebugLine::Row row;
552 row.Address = address;
553 RowIter first_row = Rows.begin() + found_seq.FirstRowIndex;
554 RowIter last_row = Rows.begin() + found_seq.LastRowIndex;
555 RowIter row_pos = std::lower_bound(first_row, last_row, row,
556 DWARFDebugLine::Row::orderByAddress);
557 if (row_pos == last_row) {
558 return found_seq.LastRowIndex - 1;
560 uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row);
561 if (row_pos->Address > address) {
562 if (row_pos == first_row)
563 return unknown_index;
570 bool DWARFDebugLine::LineTable::lookupAddressRange(
571 uint64_t address, uint64_t size, std::vector<uint32_t> &result) const {
572 if (Sequences.empty())
574 uint64_t end_addr = address + size;
575 // First, find an instruction sequence containing the given address.
576 DWARFDebugLine::Sequence sequence;
577 sequence.LowPC = address;
578 SequenceIter first_seq = Sequences.begin();
579 SequenceIter last_seq = Sequences.end();
580 SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence,
581 DWARFDebugLine::Sequence::orderByLowPC);
582 if (seq_pos == last_seq || seq_pos->LowPC != address) {
583 if (seq_pos == first_seq)
587 if (!seq_pos->containsPC(address))
590 SequenceIter start_pos = seq_pos;
592 // Add the rows from the first sequence to the vector, starting with the
593 // index we just calculated
595 while (seq_pos != last_seq && seq_pos->LowPC < end_addr) {
596 DWARFDebugLine::Sequence cur_seq = *seq_pos;
597 uint32_t first_row_index;
598 uint32_t last_row_index;
599 if (seq_pos == start_pos) {
600 // For the first sequence, we need to find which row in the sequence is the
601 // first in our range. Rows are stored in a vector, so we may use
602 // arithmetical operations with iterators.
603 DWARFDebugLine::Row row;
604 row.Address = address;
605 RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex;
606 RowIter last_row = Rows.begin() + cur_seq.LastRowIndex;
607 RowIter row_pos = std::upper_bound(first_row, last_row, row,
608 DWARFDebugLine::Row::orderByAddress);
609 // The 'row_pos' iterator references the first row that is greater than
610 // our start address. Unless that's the first row, we want to start at
611 // the row before that.
612 first_row_index = cur_seq.FirstRowIndex + (row_pos - first_row);
613 if (row_pos != first_row)
616 first_row_index = cur_seq.FirstRowIndex;
618 // For the last sequence in our range, we need to figure out the last row in
619 // range. For all other sequences we can go to the end of the sequence.
620 if (cur_seq.HighPC > end_addr) {
621 DWARFDebugLine::Row row;
622 row.Address = end_addr;
623 RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex;
624 RowIter last_row = Rows.begin() + cur_seq.LastRowIndex;
625 RowIter row_pos = std::upper_bound(first_row, last_row, row,
626 DWARFDebugLine::Row::orderByAddress);
627 // The 'row_pos' iterator references the first row that is greater than
628 // our end address. The row before that is the last row we want.
629 last_row_index = cur_seq.FirstRowIndex + (row_pos - first_row) - 1;
631 // Contrary to what you might expect, DWARFDebugLine::SequenceLastRowIndex
632 // isn't a valid index within the current sequence. It's that plus one.
633 last_row_index = cur_seq.LastRowIndex - 1;
635 for (uint32_t i = first_row_index; i <= last_row_index; ++i) {
646 DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
648 FileLineInfoKind Kind,
649 std::string &Result) const {
650 if (FileIndex == 0 || FileIndex > Prologue.FileNames.size() ||
651 Kind == FileLineInfoKind::None)
653 const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
654 const char *FileName = Entry.Name;
655 if (Kind != FileLineInfoKind::AbsoluteFilePath ||
656 sys::path::is_absolute(FileName)) {
661 SmallString<16> FilePath;
662 uint64_t IncludeDirIndex = Entry.DirIdx;
663 const char *IncludeDir = "";
664 // Be defensive about the contents of Entry.
665 if (IncludeDirIndex > 0 &&
666 IncludeDirIndex <= Prologue.IncludeDirectories.size())
667 IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1];
669 // We may still need to append compilation directory of compile unit.
670 // We know that FileName is not absolute, the only way to have an
671 // absolute path at this point would be if IncludeDir is absolute.
672 if (CompDir && Kind == FileLineInfoKind::AbsoluteFilePath &&
673 sys::path::is_relative(IncludeDir))
674 sys::path::append(FilePath, CompDir);
676 // sys::path::append skips empty strings.
677 sys::path::append(FilePath, IncludeDir, FileName);
678 Result = FilePath.str();
683 DWARFDebugLine::LineTable::getFileLineInfoForAddress(uint64_t Address,
685 FileLineInfoKind Kind,
686 DILineInfo &Result) const {
687 // Get the index of row we're looking for in the line table.
688 uint32_t RowIndex = lookupAddress(Address);
691 // Take file number and line/column from the row.
692 const auto &Row = Rows[RowIndex];
693 if (!getFileNameByIndex(Row.File, CompDir, Kind, Result.FileName))
695 Result.Line = Row.Line;
696 Result.Column = Row.Column;