Profile: Add a library for the instrumentation based profiling format
authorJustin Bogner <mail@justinbogner.com>
Wed, 12 Mar 2014 20:14:05 +0000 (20:14 +0000)
committerJustin Bogner <mail@justinbogner.com>
Wed, 12 Mar 2014 20:14:05 +0000 (20:14 +0000)
This provides a library to work with the instrumentation based
profiling format that is used by clang's -fprofile-instr-* options and
by the llvm-profdata tool. This is a binary format, rather than the
textual one that's currently in use.

The tests are in the subsequent commits that use this.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203703 91177308-0d34-0410-b5e6-96231b3b80d8

12 files changed:
include/llvm/Profile/ProfileData.h [new file with mode: 0644]
include/llvm/Profile/ProfileDataReader.h [new file with mode: 0644]
include/llvm/Profile/ProfileDataWriter.h [new file with mode: 0644]
lib/CMakeLists.txt
lib/LLVMBuild.txt
lib/Makefile
lib/Profile/CMakeLists.txt [new file with mode: 0644]
lib/Profile/LLVMBuild.txt [new file with mode: 0644]
lib/Profile/Makefile [new file with mode: 0644]
lib/Profile/ProfileData.cpp [new file with mode: 0644]
lib/Profile/ProfileDataReader.cpp [new file with mode: 0644]
lib/Profile/ProfileDataWriter.cpp [new file with mode: 0644]

diff --git a/include/llvm/Profile/ProfileData.h b/include/llvm/Profile/ProfileData.h
new file mode 100644 (file)
index 0000000..b25e82c
--- /dev/null
@@ -0,0 +1,55 @@
+//=-- ProfileData.h - Instrumented profiling format support -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for instrumentation based PGO and coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILE_PROFILEDATA_H__
+#define LLVM_PROFILE_PROFILEDATA_H__
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/system_error.h"
+
+#include <vector>
+
+namespace llvm {
+
+const char PROFILEDATA_MAGIC[4] = {'L', 'P', 'R', 'F'};
+const uint32_t PROFILEDATA_VERSION = 1;
+
+const error_category &profiledata_category();
+
+struct profiledata_error {
+  enum ErrorType {
+    success = 0,
+    bad_magic,
+    unsupported_version,
+    too_large,
+    truncated,
+    malformed,
+    unknown_function
+  };
+  ErrorType V;
+
+  profiledata_error(ErrorType V) : V(V) {}
+  operator ErrorType() const { return V; }
+};
+
+inline error_code make_error_code(profiledata_error E) {
+  return error_code(static_cast<int>(E), profiledata_category());
+}
+
+template <> struct is_error_code_enum<profiledata_error> : std::true_type {};
+template <> struct is_error_code_enum<profiledata_error::ErrorType>
+  : std::true_type {};
+
+} // end namespace llvm
+
+#endif // LLVM_PROFILE_PROFILEDATA_H__
diff --git a/include/llvm/Profile/ProfileDataReader.h b/include/llvm/Profile/ProfileDataReader.h
new file mode 100644 (file)
index 0000000..1f8ebea
--- /dev/null
@@ -0,0 +1,93 @@
+//=-- ProfileDataReader.h - Instrumented profiling reader ---------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading profiling data for instrumentation
+// based PGO and coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILE_PROFILEDATA_READER_H__
+#define LLVM_PROFILE_PROFILEDATA_READER_H__
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+
+namespace llvm {
+
+class ProfileDataCursor;
+
+/// Reader for the profile data that is used for instrumentation based PGO.
+class ProfileDataReader {
+private:
+  /// The profile data file contents.
+  std::unique_ptr<MemoryBuffer> DataBuffer;
+  /// Offsets into DataBuffer for each function's counters.
+  StringMap<uint32_t> DataOffsets;
+  /// The maximal execution count among all functions.
+  uint64_t MaxFunctionCount;
+
+  ProfileDataReader(const ProfileDataReader &) LLVM_DELETED_FUNCTION;
+  ProfileDataReader &operator=(const ProfileDataReader &) LLVM_DELETED_FUNCTION;
+protected:
+  ProfileDataReader(std::unique_ptr<MemoryBuffer> &DataBuffer)
+      : DataBuffer(DataBuffer.release()) {}
+
+  /// Populate internal state using the profile data's index
+  error_code readIndex();
+public:
+
+  class name_iterator {
+    typedef StringMap<unsigned>::const_iterator IterTy;
+    IterTy Ix;
+  public:
+    explicit name_iterator(const IterTy &Ix) : Ix(Ix) {}
+
+    StringRef operator*() const { return Ix->getKey(); }
+
+    bool operator==(const name_iterator &RHS) const { return Ix == RHS.Ix; }
+    bool operator!=(const name_iterator &RHS) const { return Ix != RHS.Ix; }
+
+    inline name_iterator& operator++() { ++Ix; return *this; }
+  };
+
+  /// Iterators over the names of indexed items
+  name_iterator begin() const {
+    return name_iterator(DataOffsets.begin());
+  }
+  name_iterator end() const {
+    return name_iterator(DataOffsets.end());
+  }
+
+private:
+  error_code findFunctionCounts(StringRef FuncName, uint64_t &FunctionHash,
+                                ProfileDataCursor &Cursor);
+public:
+  /// The number of profiled functions
+  size_t numProfiledFunctions() { return DataOffsets.size(); }
+  /// Fill Counts with the profile data for the given function name.
+  error_code getFunctionCounts(StringRef FuncName, uint64_t &FunctionHash,
+                               std::vector<uint64_t> &Counts);
+  /// Get the frequency with which a function is called relative to the function
+  /// that is called most often in the program.
+  error_code getCallFrequency(StringRef FuncName, uint64_t &FunctionHash,
+                              double &F);
+
+  static error_code create(std::string Path,
+                           std::unique_ptr<ProfileDataReader> &Result);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_PROFILE_PROFILEDATA_READER_H__
diff --git a/include/llvm/Profile/ProfileDataWriter.h b/include/llvm/Profile/ProfileDataWriter.h
new file mode 100644 (file)
index 0000000..7e7802a
--- /dev/null
@@ -0,0 +1,54 @@
+//=-- ProfileDataWriter.h - Instrumented profiling writer ---------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing profiling data for instrumentation
+// based PGO and coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILE_PROFILEDATA_WRITER_H__
+#define LLVM_PROFILE_PROFILEDATA_WRITER_H__
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+
+namespace llvm {
+
+struct __attribute__((packed)) ProfileDataHeader {
+  char     Magic[4];
+  uint32_t Version;
+  uint32_t DataStart;
+  uint32_t Padding;
+  uint64_t MaxFunctionCount;
+};
+
+/// Writer for instrumentation based profile data
+class ProfileDataWriter {
+  StringMap<size_t> FunctionOffsets;
+  std::vector<uint64_t> FunctionData;
+  uint32_t DataStart;
+  uint64_t MaxFunctionCount;
+
+  void write32(raw_ostream &OS, uint32_t Value);
+  void write64(raw_ostream &OS, uint64_t Value);
+public:
+  ProfileDataWriter()
+      : DataStart(sizeof(ProfileDataHeader)), MaxFunctionCount(0) {}
+
+  void addFunctionCounts(StringRef FuncName, uint64_t FunctionHash,
+                         uint64_t NumCounters, const uint64_t *Counters);
+  void write(raw_ostream &OS);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_PROFILE_PROFILEDATA_WRITER_H__
index 9367f553134208550f1a80153decdf4b83bf9d55..a4496f9e58b256e47d3edeba0f1c5dc4c64c77ad 100644 (file)
@@ -16,3 +16,4 @@ add_subdirectory(ExecutionEngine)
 add_subdirectory(Target)
 add_subdirectory(AsmParser)
 add_subdirectory(LineEditor)
+add_subdirectory(Profile)
index a0984d410c699d7fb3092746d4d233c557cfbd90..c75ca4e90300f5246b6036908f508ea06cb4e016 100644 (file)
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Analysis AsmParser Bitcode CodeGen DebugInfo ExecutionEngine LineEditor Linker IR IRReader LTO MC Object Option Support TableGen Target Transforms
+subdirectories = Analysis AsmParser Bitcode CodeGen DebugInfo ExecutionEngine LineEditor Linker IR IRReader LTO MC Object Option Profile Support TableGen Target Transforms
 
 [component_0]
 type = Group
index a97f71aded08062e1e247e03e06d26e6aff83304..1f55dd7f96069d954261318e3531602efe3d15eb 100644 (file)
@@ -12,6 +12,6 @@ include $(LEVEL)/Makefile.config
 
 PARALLEL_DIRS := IR AsmParser Bitcode Analysis Transforms CodeGen Target \
                  ExecutionEngine Linker LTO MC Object Option DebugInfo   \
-                 IRReader LineEditor
+                 IRReader LineEditor Profile
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Profile/CMakeLists.txt b/lib/Profile/CMakeLists.txt
new file mode 100644 (file)
index 0000000..e0a4f0a
--- /dev/null
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMProfile
+  ProfileData.cpp
+  ProfileDataReader.cpp
+  ProfileDataWriter.cpp
+  )
diff --git a/lib/Profile/LLVMBuild.txt b/lib/Profile/LLVMBuild.txt
new file mode 100644 (file)
index 0000000..ae1fdd6
--- /dev/null
@@ -0,0 +1,21 @@
+;===- ./lib/Profile/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Profile
+parent = Libraries
diff --git a/lib/Profile/Makefile b/lib/Profile/Makefile
new file mode 100644 (file)
index 0000000..fb80a12
--- /dev/null
@@ -0,0 +1,14 @@
+##===- lib/Profile/Makefile --------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMProfile
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Profile/ProfileData.cpp b/lib/Profile/ProfileData.cpp
new file mode 100644 (file)
index 0000000..65dca9d
--- /dev/null
@@ -0,0 +1,54 @@
+//=-- ProfileData.cpp - Instrumented profiling format support ---------------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for clang's instrumentation based PGO and
+// coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Profile/ProfileData.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class ProfileDataErrorCategoryType : public _do_message {
+  const char *name() const override { return "llvm.profiledata"; }
+  std::string message(int IE) const {
+    profiledata_error::ErrorType E =
+        static_cast<profiledata_error::ErrorType>(IE);
+    switch (E) {
+    case profiledata_error::success: return "Success";
+    case profiledata_error::bad_magic:
+      return "Invalid file format (bad magic)";
+    case profiledata_error::unsupported_version:
+      return "Unsupported format version";
+    case profiledata_error::too_large:
+      return "Too much profile data";
+    case profiledata_error::truncated:
+      return "Truncated profile data";
+    case profiledata_error::malformed:
+      return "Malformed profile data";
+    case profiledata_error::unknown_function:
+      return "No profile data available for function";
+    }
+    llvm_unreachable("A value of profiledata_error has no message.");
+  }
+  error_condition default_error_condition(int EV) const {
+    if (EV == profiledata_error::success)
+      return errc::success;
+    return errc::invalid_argument;
+  }
+};
+}
+
+const error_category &llvm::profiledata_category() {
+  static ProfileDataErrorCategoryType C;
+  return C;
+}
diff --git a/lib/Profile/ProfileDataReader.cpp b/lib/Profile/ProfileDataReader.cpp
new file mode 100644 (file)
index 0000000..68929c3
--- /dev/null
@@ -0,0 +1,183 @@
+//=-- ProfileDataReader.cpp - Instrumented profiling reader -----------------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading profiling data for clang's
+// instrumentation based PGO and coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Profile/ProfileDataReader.h"
+#include "llvm/Profile/ProfileData.h"
+#include "llvm/Support/Endian.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+error_code ProfileDataReader::create(
+    std::string Path, std::unique_ptr<ProfileDataReader> &Result) {
+  std::unique_ptr<MemoryBuffer> Buffer;
+  if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
+    return EC;
+
+  if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
+    return profiledata_error::too_large;
+
+  Result.reset(new ProfileDataReader(Buffer));
+  if (error_code EC = Result->readIndex())
+    return EC;
+  return profiledata_error::success;
+}
+
+class llvm::ProfileDataCursor {
+  const char *Start;
+  const char *Next;
+  const char *End;
+
+  error_code skip(unsigned bytes) {
+    if (Next + bytes > End)
+      return profiledata_error::malformed;
+    Next += bytes;
+    return profiledata_error::success;
+  }
+
+  template <typename T>
+  error_code read(T &Result) {
+    typedef support::detail::packed_endian_specific_integral
+        <T, support::little, support::unaligned> Endian_t;
+    const char *Prev = Next;
+    if (error_code EC = skip(sizeof(T)))
+      return EC;
+    Result = *reinterpret_cast<const Endian_t*>(Prev);
+    return profiledata_error::success;
+  }
+public:
+  ProfileDataCursor(const MemoryBuffer *Buf)
+      : Start(Buf->getBufferStart()), Next(Start), End(Buf->getBufferEnd()) {}
+  bool offsetReached(size_t Offset) { return Start + Offset <= Next; }
+  bool offsetInBounds(size_t Offset) { return Start + Offset < End; }
+
+  error_code skipToOffset(size_t Offset) {
+    if (!offsetInBounds(Offset))
+      return profiledata_error::malformed;
+    Next = Start + Offset;
+    return profiledata_error::success;
+  }
+
+  error_code skip32() { return skip(4); }
+  error_code skip64() { return skip(8); }
+  error_code read32(uint32_t &Result) { return read<uint32_t>(Result); }
+  error_code read64(uint64_t &Result) { return read<uint64_t>(Result); }
+
+  error_code readChars(StringRef &Result, uint32_t Len) {
+    error_code EC;
+    const char *Prev = Next;
+    if (error_code EC = skip(Len))
+      return EC;
+    Result = StringRef(Prev, Len);
+    return profiledata_error::success;
+  }
+  error_code readString(StringRef &Result) {
+    uint32_t Len;
+    if (error_code EC = read32(Len))
+      return EC;
+    return readChars(Result, Len);
+  }
+};
+
+error_code ProfileDataReader::readIndex() {
+  ProfileDataCursor Cursor(DataBuffer.get());
+  error_code EC;
+  StringRef Magic;
+  uint32_t Version, IndexEnd, DataStart;
+
+  if ((EC = Cursor.readChars(Magic, 4)))
+    return EC;
+  if (StringRef(PROFILEDATA_MAGIC, 4) != Magic)
+    return profiledata_error::bad_magic;
+  if ((EC = Cursor.read32(Version)))
+    return EC;
+  if (Version != PROFILEDATA_VERSION)
+    return profiledata_error::unsupported_version;
+  if ((EC = Cursor.read32(IndexEnd)))
+    return EC;
+  if ((EC = Cursor.skip32()))
+    return EC;
+  if ((EC = Cursor.read64(MaxFunctionCount)))
+    return EC;
+
+  DataStart = IndexEnd + (sizeof(uint64_t) - IndexEnd % sizeof(uint64_t));
+  while (!Cursor.offsetReached(IndexEnd)) {
+    StringRef FuncName;
+    uint32_t Offset, TotalOffset;
+    if ((EC = Cursor.readString(FuncName)))
+      return EC;
+    if ((EC = Cursor.read32(Offset)))
+      return EC;
+    TotalOffset = DataStart + Offset;
+    if (!Cursor.offsetInBounds(TotalOffset))
+      return profiledata_error::truncated;
+    DataOffsets[FuncName] = TotalOffset;
+  }
+
+  return profiledata_error::success;
+}
+
+error_code ProfileDataReader::findFunctionCounts(StringRef FuncName,
+                                                 uint64_t &FunctionHash,
+                                                 ProfileDataCursor &Cursor) {
+  error_code EC;
+  // Find the relevant section of the pgo-data file.
+  const auto &OffsetIter = DataOffsets.find(FuncName);
+  if (OffsetIter == DataOffsets.end())
+    return profiledata_error::unknown_function;
+  // Go there and read the function data
+  if ((EC = Cursor.skipToOffset(OffsetIter->getValue())))
+    return EC;
+  if ((EC = Cursor.read64(FunctionHash)))
+    return EC;
+  return profiledata_error::success;
+}
+
+error_code ProfileDataReader::getFunctionCounts(StringRef FuncName,
+                                                uint64_t &FunctionHash,
+                                                std::vector<uint64_t> &Counts) {
+  ProfileDataCursor Cursor(DataBuffer.get());
+  error_code EC;
+  if ((EC = findFunctionCounts(FuncName, FunctionHash, Cursor)))
+    return EC;
+
+  uint64_t NumCounters;
+  if ((EC = Cursor.read64(NumCounters)))
+    return EC;
+  for (uint64_t I = 0; I < NumCounters; ++I) {
+    uint64_t Count;
+    if ((EC = Cursor.read64(Count)))
+      return EC;
+    Counts.push_back(Count);
+  }
+
+  return profiledata_error::success;
+}
+
+error_code ProfileDataReader::getCallFrequency(StringRef FuncName,
+                                               uint64_t &FunctionHash,
+                                               double &Frequency) {
+  ProfileDataCursor Cursor(DataBuffer.get());
+  error_code EC;
+  if ((EC = findFunctionCounts(FuncName, FunctionHash, Cursor)))
+    return EC;
+  if ((EC = Cursor.skip64()))
+    return EC;
+  uint64_t CallCount;
+  if ((EC = Cursor.read64(CallCount)))
+    return EC;
+  Frequency = CallCount / (double)MaxFunctionCount;
+  return profiledata_error::success;
+}
diff --git a/lib/Profile/ProfileDataWriter.cpp b/lib/Profile/ProfileDataWriter.cpp
new file mode 100644 (file)
index 0000000..b5993dd
--- /dev/null
@@ -0,0 +1,71 @@
+//=-- ProfileDataWriter.cpp - Instrumented profiling writer -----------------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing profiling data for clang's
+// instrumentation based PGO and coverage.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Profile/ProfileDataWriter.h"
+#include "llvm/Profile/ProfileData.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+
+template <typename T>
+struct LEBytes {
+  const T &Data;
+  LEBytes(const T &Data) : Data(Data) {}
+  void print(raw_ostream &OS) const {
+    for (uint32_t Shift = 0; Shift < sizeof(Data); ++Shift)
+      OS << (char)((Data >> (8 * Shift)) & 0xFF);
+  }
+};
+template <typename T>
+static raw_ostream &operator<<(raw_ostream &OS, const LEBytes<T> &Bytes) {
+  Bytes.print(OS);
+  return OS;
+}
+
+void ProfileDataWriter::addFunctionCounts(StringRef FuncName,
+                                          uint64_t FunctionHash,
+                                          uint64_t NumCounters,
+                                          const uint64_t *Counters) {
+  DataStart += 2 * sizeof(uint32_t) + FuncName.size();
+  FunctionOffsets[FuncName] = FunctionData.size() * sizeof(uint64_t);
+  FunctionData.push_back(FunctionHash);
+  FunctionData.push_back(NumCounters);
+  assert(NumCounters > 0 && "Function call counter missing!");
+  if (Counters[0] > MaxFunctionCount)
+    MaxFunctionCount = Counters[0];
+  for (uint64_t I = 0; I < NumCounters; ++I)
+    FunctionData.push_back(Counters[I]);
+}
+
+void ProfileDataWriter::write(raw_ostream &OS) {
+  for (char C : PROFILEDATA_MAGIC)
+    OS << C;
+  OS << LEBytes<uint32_t>(PROFILEDATA_VERSION);
+  OS << LEBytes<uint32_t>(DataStart);
+  OS << LEBytes<uint32_t>(0);
+  OS << LEBytes<uint64_t>(MaxFunctionCount);
+
+  for (const auto &I : FunctionOffsets) {
+    StringRef Name = I.getKey();
+    OS << LEBytes<uint32_t>(Name.size());
+    OS << Name;
+    OS << LEBytes<uint32_t>(I.getValue());
+  }
+
+  for (unsigned I = 0; I < sizeof(uint64_t) - DataStart % sizeof(uint64_t); ++I)
+    OS << '\0';
+
+  for (uint64_t Value : FunctionData)
+    OS << LEBytes<uint64_t>(Value);
+}