[CMake] Add option LLVM_EXTERNALIZE_DEBUGINFO

[oota-llvm.git] / lib / ProfileData / SampleProfReader.cpp
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp

index c620d4ca84a5d3b7f02b333a939f8fa0de991ee9..e71d0bae07bdbc367dfdc99f5cb3cf12f85b869d 100644 (file)
--- a/lib/ProfileData/SampleProfReader.cpp
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -8,166 +8,37 @@
  //===----------------------------------------------------------------------===//
  //
  // This file implements the class that reads LLVM sample profiles. It
-// supports two file formats: text and binary. The textual representation
-// is useful for debugging and testing purposes. The binary representation
-// is more compact, resulting in smaller file sizes. However, they can
-// both be used interchangeably.
+// supports three file formats: text, binary and gcov.
  //
-// NOTE: If you are making changes to the file format, please remember
-//       to document them in the Clang documentation at
-//       tools/clang/docs/UsersManual.rst.
+// The textual representation is useful for debugging and testing purposes. The
+// binary representation is more compact, resulting in smaller file sizes.
  //
-// Text format
-// -----------
+// The gcov encoding is the one generated by GCC's AutoFDO profile creation
+// tool (https://github.com/google/autofdo)
  //
-// Sample profiles are written as ASCII text. The file is divided into
-// sections, which correspond to each of the functions executed at runtime.
-// Each section has the following format
+// All three encodings can be used interchangeably as an input sample profile.
  //
-//     function1:total_samples:total_head_samples
-//      offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
-//      offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
-//      ...
-//      offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
-//      offsetA[.discriminator]: fnA:num_of_total_samples
-//       offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
-//       ...
-//
-// This is a nested tree in which the identation represent the nest level
-// of the inline stack. There is no blank line in the file. And the spacing
-// within a single line is fixed. Additional spaces will result in an error
-// while reading the file.
-//
-// Inline stack is a stack of source locations in which the top of the stack
-// represents the leaf function, and the bottom of the stack represents the
-// actual symbol in which the instruction belongs.
-//
-// Function names must be mangled in order for the profile loader to
-// match them in the current translation unit. The two numbers in the
-// function header specify how many total samples were accumulated in the
-// function (first number), and the total number of samples accumulated
-// in the prologue of the function (second number). This head sample
-// count provides an indicator of how frequently the function is invoked.
-//
-// There are two types of lines in the function body.
-//
-// * Sampled line represents the profile information of a source location.
-// * Callsite line represents the profile inofrmation of a callsite.
-//
-// Each sampled line may contain several items. Some are optional (marked
-// below):
-//
-// a. Source line offset. This number represents the line number
-//    in the function where the sample was collected. The line number is
-//    always relative to the line where symbol of the function is
-//    defined. So, if the function has its header at line 280, the offset
-//    13 is at line 293 in the file.
-//
-//    Note that this offset should never be a negative number. This could
-//    happen in cases like macros. The debug machinery will register the
-//    line number at the point of macro expansion. So, if the macro was
-//    expanded in a line before the start of the function, the profile
-//    converter should emit a 0 as the offset (this means that the optimizers
-//    will not be able to associate a meaningful weight to the instructions
-//    in the macro).
-//
-// b. [OPTIONAL] Discriminator. This is used if the sampled program
-//    was compiled with DWARF discriminator support
-//    (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
-//    DWARF discriminators are unsigned integer values that allow the
-//    compiler to distinguish between multiple execution paths on the
-//    same source line location.
-//
-//    For example, consider the line of code ``if (cond) foo(); else bar();``.
-//    If the predicate ``cond`` is true 80% of the time, then the edge
-//    into function ``foo`` should be considered to be taken most of the
-//    time. But both calls to ``foo`` and ``bar`` are at the same source
-//    line, so a sample count at that line is not sufficient. The
-//    compiler needs to know which part of that line is taken more
-//    frequently.
-//
-//    This is what discriminators provide. In this case, the calls to
-//    ``foo`` and ``bar`` will be at the same line, but will have
-//    different discriminator values. This allows the compiler to correctly
-//    set edge weights into ``foo`` and ``bar``.
-//
-// c. Number of samples. This is an integer quantity representing the
-//    number of samples collected by the profiler at this source
-//    location.
-//
-// d. [OPTIONAL] Potential call targets and samples. If present, this
-//    line contains a call instruction. This models both direct and
-//    number of samples. For example,
-//
-//      130: 7  foo:3  bar:2  baz:7
-//
-//    The above means that at relative line offset 130 there is a call
-//    instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
-//    with ``baz()`` being the relatively more frequently called target.
-//
-// Each callsite line may contain several items. Some are optional.
-//
-// a. Source line offset. This number represents the line number of the
-//    callsite that is inlined in the profiled binary.
-//
-// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
-//
-// c. Number of samples. This is an integer quantity representing the
-//    total number of samples collected for the inlined instance at this
-//    callsite
  //===----------------------------------------------------------------------===//
  
  #include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorOr.h"
  #include "llvm/Support/LEB128.h"
  #include "llvm/Support/LineIterator.h"
  #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
  
  using namespace llvm::sampleprof;
  using namespace llvm;
  
-/// \brief Print the samples collected for a function on stream \p OS.
-///
-/// \param OS Stream to emit the output to.
-void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
-  OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
-     << " sampled lines\n";
-  for (const auto &SI : BodySamples) {
-    LineLocation Loc = SI.first;
-    const SampleRecord &Sample = SI.second;
-    OS.indent(Indent);
-    OS << "line offset: " << Loc.LineOffset
-       << ", discriminator: " << Loc.Discriminator
-       << ", number of samples: " << Sample.getSamples();
-    if (Sample.hasCalls()) {
-      OS << ", calls:";
-      for (const auto &I : Sample.getCallTargets())
-        OS << " " << I.first() << ":" << I.second;
-    }
-    OS << "\n";
-  }
-  for (const auto &CS : CallsiteSamples) {
-    CallsiteLocation Loc = CS.first;
-    const FunctionSamples &CalleeSamples = CS.second;
-    OS.indent(Indent);
-    OS << "line offset: " << Loc.LineOffset
-       << ", discriminator: " << Loc.Discriminator
-       << ", inlined callee: " << Loc.CalleeName << ": ";
-    CalleeSamples.print(OS, Indent + 2);
-  }
-}
-
  /// \brief Dump the function profile for \p FName.
  ///
  /// \param FName Name of the function to print.
  /// \param OS Stream to emit the output to.
  void SampleProfileReader::dumpFunctionProfile(StringRef FName,
                                                raw_ostream &OS) {
-  OS << "Function: " << FName << ": ";
-  Profiles[FName].print(OS);
+  OS << "Function: " << FName << ": " << Profiles[FName];
  }
  
  /// \brief Dump all the function profiles found on stream \p OS.
@@ -184,7 +55,7 @@ void SampleProfileReader::dump(raw_ostream &OS) {
  ///
  /// \returns true if parsing is successful.
  static bool ParseHead(const StringRef &Input, StringRef &FName,
-                      unsigned &NumSamples, unsigned &NumHeadSamples) {
+                      uint64_t &NumSamples, uint64_t &NumHeadSamples) {
    if (Input[0] == ' ')
      return false;
    size_t n2 = Input.rfind(':');
@@ -197,6 +68,12 @@ static bool ParseHead(const StringRef &Input, StringRef &FName,
    return true;
  }
  
+
+/// \brief Returns true if line offset \p L is legal (only has 16 bits).
+static bool isOffsetLegal(unsigned L) {
+  return (L & 0xffff) == L;
+}
+
  /// \brief Parse \p Input as line sample.
  ///
  /// \param Input input line.
@@ -208,10 +85,10 @@ static bool ParseHead(const StringRef &Input, StringRef &FName,
  /// \param TargetCountMap map from indirect call target to count.
  ///
  /// returns true if parsing is successful.
-static bool ParseLine(const StringRef &Input, bool &IsCallsite, unsigned &Depth,
-                      unsigned &NumSamples, unsigned &LineOffset,
-                      unsigned &Discriminator, StringRef &CalleeName,
-                      DenseMap<StringRef, unsigned> &TargetCountMap) {
+static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
+                      uint64_t &NumSamples, uint32_t &LineOffset,
+                      uint32_t &Discriminator, StringRef &CalleeName,
+                      DenseMap<StringRef, uint64_t> &TargetCountMap) {
    for (Depth = 0; Input[Depth] == ' '; Depth++)
      ;
    if (Depth == 0)
@@ -221,7 +98,7 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, unsigned &Depth,
    StringRef Loc = Input.substr(Depth, n1 - Depth);
    size_t n2 = Loc.find('.');
    if (n2 == StringRef::npos) {
-    if (Loc.getAsInteger(10, LineOffset))
+    if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
        return false;
      Discriminator = 0;
    } else {
@@ -250,15 +127,15 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, unsigned &Depth,
        if (n3 != StringRef::npos) {
          pair = Rest.substr(0, n3);
        }
-      int n4 = pair.find(':');
-      unsigned count;
+      size_t n4 = pair.find(':');
+      uint64_t count;
        if (pair.substr(n4 + 1).getAsInteger(10, count))
          return false;
        TargetCountMap[pair.substr(0, n4)] = count;
      }
    } else {
      IsCallsite = true;
-    int n3 = Rest.find_last_of(':');
+    size_t n3 = Rest.find_last_of(':');
      CalleeName = Rest.substr(0, n3);
      if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
        return false;
@@ -293,7 +170,7 @@ std::error_code SampleProfileReaderText::read() {
      // The only requirement we place on the identifier, then, is that it
      // should not begin with a number.
      if ((*LineIt)[0] != ' ') {
-      unsigned NumSamples, NumHeadSamples;
+      uint64_t NumSamples, NumHeadSamples;
        StringRef FName;
        if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
          reportError(LineIt.line_number(),
@@ -307,11 +184,11 @@ std::error_code SampleProfileReaderText::read() {
        InlineStack.clear();
        InlineStack.push_back(&FProfile);
      } else {
-      unsigned NumSamples;
+      uint64_t NumSamples;
        StringRef FName;
-      DenseMap<StringRef, unsigned> TargetCountMap;
+      DenseMap<StringRef, uint64_t> TargetCountMap;
        bool IsCallsite;
-      unsigned Depth, LineOffset, Discriminator;
+      uint32_t Depth, LineOffset, Discriminator;
        if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
                       Discriminator, FName, TargetCountMap)) {
          reportError(LineIt.line_number(),
@@ -344,6 +221,22 @@ std::error_code SampleProfileReaderText::read() {
    return sampleprof_error::success;
  }
  
+bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
+  bool result = false;
+
+  // Check that the first non-comment line is a valid function header.
+  line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
+  if (!LineIt.is_at_eof()) {
+    if ((*LineIt)[0] != ' ') {
+      uint64_t NumSamples, NumHeadSamples;
+      StringRef FName;
+      result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
+    }
+  }
+
+  return result;
+}
+
  template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
    unsigned NumBytesRead = 0;
    std::error_code EC;
@@ -378,28 +271,37 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
    return Str;
  }
  
-std::error_code
-SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
-  auto Val = readNumber<unsigned>();
-  if (std::error_code EC = Val.getError())
+ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
+  std::error_code EC;
+  auto Idx = readNumber<uint32_t>();
+  if (std::error_code EC = Idx.getError())
      return EC;
-  FProfile.addTotalSamples(*Val);
+  if (*Idx >= NameTable.size())
+    return sampleprof_error::truncated_name_table;
+  return NameTable[*Idx];
+}
  
-  Val = readNumber<unsigned>();
-  if (std::error_code EC = Val.getError())
+std::error_code
+SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
+  auto NumSamples = readNumber<uint64_t>();
+  if (std::error_code EC = NumSamples.getError())
      return EC;
-  FProfile.addHeadSamples(*Val);
+  FProfile.addTotalSamples(*NumSamples);
  
    // Read the samples in the body.
-  auto NumRecords = readNumber<unsigned>();
+  auto NumRecords = readNumber<uint32_t>();
    if (std::error_code EC = NumRecords.getError())
      return EC;
  
-  for (unsigned I = 0; I < *NumRecords; ++I) {
+  for (uint32_t I = 0; I < *NumRecords; ++I) {
      auto LineOffset = readNumber<uint64_t>();
      if (std::error_code EC = LineOffset.getError())
        return EC;
  
+    if (!isOffsetLegal(*LineOffset)) {
+      return std::error_code();
+    }
+
      auto Discriminator = readNumber<uint64_t>();
      if (std::error_code EC = Discriminator.getError())
        return EC;
@@ -408,12 +310,12 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
      if (std::error_code EC = NumSamples.getError())
        return EC;
  
-    auto NumCalls = readNumber<unsigned>();
+    auto NumCalls = readNumber<uint32_t>();
      if (std::error_code EC = NumCalls.getError())
        return EC;
  
-    for (unsigned J = 0; J < *NumCalls; ++J) {
-      auto CalledFunction(readString());
+    for (uint32_t J = 0; J < *NumCalls; ++J) {
+      auto CalledFunction(readStringFromTable());
        if (std::error_code EC = CalledFunction.getError())
          return EC;
  
@@ -429,11 +331,11 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
    }
  
    // Read all the samples for inlined function calls.
-  auto NumCallsites = readNumber<unsigned>();
+  auto NumCallsites = readNumber<uint32_t>();
    if (std::error_code EC = NumCallsites.getError())
      return EC;
  
-  for (unsigned J = 0; J < *NumCallsites; ++J) {
+  for (uint32_t J = 0; J < *NumCallsites; ++J) {
      auto LineOffset = readNumber<uint64_t>();
      if (std::error_code EC = LineOffset.getError())
        return EC;
@@ -442,7 +344,7 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
      if (std::error_code EC = Discriminator.getError())
        return EC;
  
-    auto FName(readString());
+    auto FName(readStringFromTable());
      if (std::error_code EC = FName.getError())
        return EC;
  
@@ -457,13 +359,19 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
  
  std::error_code SampleProfileReaderBinary::read() {
    while (!at_eof()) {
-    auto FName(readString());
+    auto NumHeadSamples = readNumber<uint64_t>();
+    if (std::error_code EC = NumHeadSamples.getError())
+      return EC;
+
+    auto FName(readStringFromTable());
      if (std::error_code EC = FName.getError())
        return EC;
  
      Profiles[*FName] = FunctionSamples();
      FunctionSamples &FProfile = Profiles[*FName];
  
+    FProfile.addHeadSamples(*NumHeadSamples);
+
      if (std::error_code EC = readProfile(FProfile))
        return EC;
    }
@@ -489,6 +397,18 @@ std::error_code SampleProfileReaderBinary::readHeader() {
    else if (*Version != SPVersion())
      return sampleprof_error::unsupported_version;
  
+  // Read the name table.
+  auto Size = readNumber<uint32_t>();
+  if (std::error_code EC = Size.getError())
+    return EC;
+  NameTable.reserve(*Size);
+  for (uint32_t I = 0; I < *Size; ++I) {
+    auto Name(readString());
+    if (std::error_code EC = Name.getError())
+      return EC;
+    NameTable.push_back(*Name);
+  }
+
    return sampleprof_error::success;
  }
  
@@ -499,16 +419,6 @@ bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) {
    return Magic == SPMagic();
  }
  
-bool SourceInfo::operator<(const SourceInfo &P) const {
-  if (Line != P.Line)
-    return Line < P.Line;
-  if (StartLine != P.StartLine)
-    return StartLine < P.StartLine;
-  if (Discriminator != P.Discriminator)
-    return Discriminator < P.Discriminator;
-  return FuncName < P.FuncName;
-}
-
  std::error_code SampleProfileReaderGCC::skipNextWord() {
    uint32_t dummy;
    if (!GcovBuffer.readInt(dummy))
@@ -763,7 +673,7 @@ setupMemoryBuffer(std::string Filename) {
    auto Buffer = std::move(BufferOrErr.get());
  
    // Sanity check the file.
-  if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
+  if (Buffer->getBufferSize() > std::numeric_limits<uint32_t>::max())
      return sampleprof_error::too_large;
  
    return std::move(Buffer);
@@ -790,8 +700,10 @@ SampleProfileReader::create(StringRef Filename, LLVMContext &C) {
      Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C));
    else if (SampleProfileReaderGCC::hasFormat(*Buffer))
      Reader.reset(new SampleProfileReaderGCC(std::move(Buffer), C));
-  else
+  else if (SampleProfileReaderText::hasFormat(*Buffer))
      Reader.reset(new SampleProfileReaderText(std::move(Buffer), C));
+  else
+    return sampleprof_error::unrecognized_format;
  
    if (std::error_code EC = Reader->readHeader())
      return EC;