Fix the representation of debug line table in DebugInfo LLVM library,
authorAlexey Samsonov <samsonov@google.com>
Tue, 7 Aug 2012 11:46:57 +0000 (11:46 +0000)
committerAlexey Samsonov <samsonov@google.com>
Tue, 7 Aug 2012 11:46:57 +0000 (11:46 +0000)
and "instruction address -> file/line" lookup.

Instead of plain collection of rows, debug line table for compilation unit is now
treated as the number of row ranges, describing sequences (series of contiguous machine
instructions). The sequences are not always listed in the order of increasing
address, so previously used std::lower_bound() sometimes produced wrong results.
Now the instruction address lookup consists of two stages: finding the correct
sequence, and searching for address in range of rows for this sequence.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161414 91177308-0d34-0410-b5e6-96231b3b80d8

lib/DebugInfo/DWARFContext.cpp
lib/DebugInfo/DWARFDebugLine.cpp
lib/DebugInfo/DWARFDebugLine.h
test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 [new file with mode: 0755]
test/DebugInfo/dwarfdump-test.test

index a4e0d8eae43c49b34ec3bcc24089c30ceed3a859..797662b083f1b904727fbec8c51031158e3f3d24 100644 (file)
@@ -167,9 +167,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address,
     const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu);
     if (lineTable) {
       // Get the index of the row we're looking for in the line table.
-      uint64_t hiPC = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(
-          cu, DW_AT_high_pc, -1ULL);
-      uint32_t rowIndex = lineTable->lookupAddress(address, hiPC);
+      uint32_t rowIndex = lineTable->lookupAddress(address);
       if (rowIndex != -1U) {
         const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex];
         // Take file/line info from the line table.
index 117fa31aa86f6e1218178d906f2d054e384f6b18..d99575d80033327cbf35cd711868643565fb4090 100644 (file)
@@ -95,14 +95,46 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
 DWARFDebugLine::State::~State() {}
 
 void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) {
+  if (Sequence::Empty) {
+    // Record the beginning of instruction sequence.
+    Sequence::Empty = false;
+    Sequence::LowPC = Address;
+    Sequence::FirstRowIndex = row;
+  }
   ++row;  // Increase the row number.
   LineTable::appendRow(*this);
+  if (EndSequence) {
+    // Record the end of instruction sequence.
+    Sequence::HighPC = Address;
+    Sequence::LastRowIndex = row;
+    if (Sequence::isValid())
+      LineTable::appendSequence(*this);
+    Sequence::reset();
+  }
   Row::postAppend();
 }
 
+void DWARFDebugLine::State::finalize() {
+  row = DoneParsingLineTable;
+  if (!Sequence::Empty) {
+    fprintf(stderr, "warning: last sequence in debug line table is not"
+                    "terminated!\n");
+  }
+  // Sort all sequences so that address lookup will work faster.
+  if (!Sequences.empty()) {
+    std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
+    // Note: actually, instruction address ranges of sequences should not
+    // overlap (in shared objects and executables). If they do, the address
+    // lookup would still work, though, but result would be ambiguous.
+    // We don't report warning in this case. For example,
+    // sometimes .so compiled from multiple object files contains a few
+    // rudimentary sequences for address ranges [0x0, 0xsomething).
+  }
+}
+
 DWARFDebugLine::DumpingState::~DumpingState() {}
 
-void DWARFDebugLine::DumpingState::finalize(uint32_t offset) {
+void DWARFDebugLine::DumpingState::finalize() {
   LineTable::dump(OS);
 }
 
@@ -180,8 +212,9 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data,
     fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
                     " have ended at 0x%8.8x but it ended ad 0x%8.8x\n",
             prologue_offset, end_prologue_offset, *offset_ptr);
+    return false;
   }
-  return end_prologue_offset;
+  return true;
 }
 
 bool
@@ -430,47 +463,53 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
     }
   }
 
-  state.finalize(*offset_ptr);
+  state.finalize();
 
   return end_offset;
 }
 
-static bool findMatchingAddress(const DWARFDebugLine::Row& row1,
-                                const DWARFDebugLine::Row& row2) {
-  return row1.Address < row2.Address;
-}
-
 uint32_t
-DWARFDebugLine::LineTable::lookupAddress(uint64_t address,
-                                         uint64_t cu_high_pc) const {
-  uint32_t index = UINT32_MAX;
-  if (!Rows.empty()) {
-    // Use the lower_bound algorithm to perform a binary search since we know
-    // that our line table data is ordered by address.
-    DWARFDebugLine::Row row;
-    row.Address = address;
-    typedef std::vector<Row>::const_iterator iterator;
-    iterator begin_pos = Rows.begin();
-    iterator end_pos = Rows.end();
-    iterator pos = std::lower_bound(begin_pos, end_pos, row,
-                                    findMatchingAddress);
-    if (pos == end_pos) {
-      if (address < cu_high_pc)
-        return Rows.size()-1;
-    } else {
-      // Rely on fact that we are using a std::vector and we can do
-      // pointer arithmetic to find the row index (which will be one less
-      // that what we found since it will find the first position after
-      // the current address) since std::vector iterators are just
-      // pointers to the container type.
-      index = pos - begin_pos;
-      if (pos->Address > address) {
-        if (index > 0)
-          --index;
-        else
-          index = UINT32_MAX;
-      }
-    }
+DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
+  uint32_t unknown_index = UINT32_MAX;
+  if (Sequences.empty())
+    return unknown_index;
+  // First, find an instruction sequence containing the given address.
+  DWARFDebugLine::Sequence sequence;
+  sequence.LowPC = address;
+  SequenceIter first_seq = Sequences.begin();
+  SequenceIter last_seq = Sequences.end();
+  SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence,
+      DWARFDebugLine::Sequence::orderByLowPC);
+  DWARFDebugLine::Sequence found_seq;
+  if (seq_pos == last_seq) {
+    found_seq = Sequences.back();
+  } else if (seq_pos->LowPC == address) {
+    found_seq = *seq_pos;
+  } else {
+    if (seq_pos == first_seq)
+      return unknown_index;
+    found_seq = *(seq_pos - 1);
+  }
+  if (!found_seq.containsPC(address))
+    return unknown_index;
+  // Search for instruction address in the rows describing the sequence.
+  // Rows are stored in a vector, so we may use arithmetical operations with
+  // iterators.
+  DWARFDebugLine::Row row;
+  row.Address = address;
+  RowIter first_row = Rows.begin() + found_seq.FirstRowIndex;
+  RowIter last_row = Rows.begin() + found_seq.LastRowIndex;
+  RowIter row_pos = std::lower_bound(first_row, last_row, row,
+      DWARFDebugLine::Row::orderByAddress);
+  if (row_pos == last_row) {
+    return found_seq.LastRowIndex - 1;
+  }
+  uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row);
+  if (row_pos->Address > address) {
+    if (row_pos == first_row)
+      return unknown_index;
+    else
+      index--;
   }
-  return index; // Failed to find address.
+  return index;
 }
index a8c0669b738b7055d1954294a6d8ac825dec561d..6382b45a93abd0c53387208fddadb7381dad9a4e 100644 (file)
@@ -88,6 +88,10 @@ public:
     void reset(bool default_is_stmt);
     void dump(raw_ostream &OS) const;
 
+    static bool orderByAddress(const Row& LHS, const Row& RHS) {
+      return LHS.Address < RHS.Address;
+    }
+
     // The program-counter value corresponding to a machine instruction
     // generated by the compiler.
     uint64_t Address;
@@ -125,21 +129,63 @@ public:
             EpilogueBegin:1;
   };
 
+  // Represents a series of contiguous machine instructions. Line table for each
+  // compilation unit may consist of multiple sequences, which are not
+  // guaranteed to be in the order of ascending instruction address.
+  struct Sequence {
+    // Sequence describes instructions at address range [LowPC, HighPC)
+    // and is described by line table rows [FirstRowIndex, LastRowIndex).
+    uint64_t LowPC;
+    uint64_t HighPC;
+    unsigned FirstRowIndex;
+    unsigned LastRowIndex;
+    bool Empty;
+
+    Sequence() { reset(); }
+    void reset() {
+      LowPC = 0;
+      HighPC = 0;
+      FirstRowIndex = 0;
+      LastRowIndex = 0;
+      Empty = true;
+    }
+    static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) {
+      return LHS.LowPC < RHS.LowPC;
+    }
+    bool isValid() const {
+      return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
+    }
+    bool containsPC(uint64_t pc) const {
+      return (LowPC <= pc && pc < HighPC);
+    }
+  };
+
   struct LineTable {
     void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); }
+    void appendSequence(const DWARFDebugLine::Sequence &sequence) {
+      Sequences.push_back(sequence);
+    }
     void clear() {
       Prologue.clear();
       Rows.clear();
+      Sequences.clear();
     }
 
-    uint32_t lookupAddress(uint64_t address, uint64_t cu_high_pc) const;
+    // Returns the index of the row with file/line info for a given address,
+    // or -1 if there is no such row.
+    uint32_t lookupAddress(uint64_t address) const;
     void dump(raw_ostream &OS) const;
 
     struct Prologue Prologue;
-    std::vector<Row> Rows;
+    typedef std::vector<Row> RowVector;
+    typedef RowVector::const_iterator RowIter;
+    typedef std::vector<Sequence> SequenceVector;
+    typedef SequenceVector::const_iterator SequenceIter;
+    RowVector Rows;
+    SequenceVector Sequences;
   };
 
-  struct State : public Row, public LineTable {
+  struct State : public Row, public Sequence, public LineTable {
     // Special row codes.
     enum {
       StartParsingLineTable = 0,
@@ -150,8 +196,11 @@ public:
     virtual ~State();
 
     virtual void appendRowToMatrix(uint32_t offset);
-    virtual void finalize(uint32_t offset) { row = DoneParsingLineTable; }
-    virtual void reset() { Row::reset(Prologue.DefaultIsStmt); }
+    virtual void finalize();
+    virtual void reset() {
+      Row::reset(Prologue.DefaultIsStmt);
+      Sequence::reset();
+    }
 
     // The row number that starts at zero for the prologue, and increases for
     // each row added to the matrix.
@@ -161,7 +210,7 @@ public:
   struct DumpingState : public State {
     DumpingState(raw_ostream &OS) : OS(OS) {}
     virtual ~DumpingState();
-    virtual void finalize(uint32_t offset);
+    virtual void finalize();
   private:
     raw_ostream &OS;
   };
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
new file mode 100755 (executable)
index 0000000..8848708
Binary files /dev/null and b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 differ
index faef03ba622ec1ef63a4c1a453ef5e530bcdf9f1..de23dcd9c2786490b4d4d1baa730f8b8f794032b 100644 (file)
@@ -14,6 +14,9 @@ RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
 RUN:   --address=0x573 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
 RUN:   --address=0x56d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
+RUN:   --address=0x55c --functions \
+RUN:   | FileCheck %s -check-prefix MANY_SEQ_IN_LINE_TABLE
 
 MAIN: main
 MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16:10
@@ -38,3 +41,6 @@ INCLUDE_TEST_1-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}decl2.h:1:0
 
 INCLUDE_TEST_2: _Z3do1v
 INCLUDE_TEST_2-NEXT: /tmp/include{{[/\\]}}decl.h:5:0
+
+MANY_SEQ_IN_LINE_TABLE: _Z1cv
+MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo/sequences{{[/\\]}}c.cc:2:0