PGO: llvm-profdata: tool for merging profiles
authorDuncan P. N. Exon Smith <dexonsmith@apple.com>
Mon, 17 Feb 2014 23:22:49 +0000 (23:22 +0000)
committerDuncan P. N. Exon Smith <dexonsmith@apple.com>
Mon, 17 Feb 2014 23:22:49 +0000 (23:22 +0000)
Introducing llvm-profdata, a tool for merging profile data generated by
PGO instrumentation in clang.

- The name indicates a file extension of <name>.profdata.  Eventually
  profile data output by clang should be changed to that extension.

- llvm-profdata merges two profiles.  However, the name is more general,
  since it will likely pick up more tasks (such as summarizing a single
  profile).

- llvm-profdata parses the current text-based format, but will be
  updated once we settle on a binary format.

<rdar://problem/15949645>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201535 91177308-0d34-0410-b5e6-96231b3b80d8

26 files changed:
docs/CommandGuide/index.rst
docs/CommandGuide/llvm-profdata.rst [new file with mode: 0644]
test/CMakeLists.txt
test/lit.cfg
test/tools/llvm-profdata/Inputs/bad-function-count.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/bar3-1.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/empty.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/extra-word.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/foo3-1.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/foo3-2.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/foo3bar3-1.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/foo3bar3-2.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/foo4-1.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/foo4-2.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/invalid-count-later.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/overflow.profdata [new file with mode: 0644]
test/tools/llvm-profdata/Inputs/three-words-long.profdata [new file with mode: 0644]
test/tools/llvm-profdata/errors.test [new file with mode: 0644]
test/tools/llvm-profdata/simple.test [new file with mode: 0644]
tools/CMakeLists.txt
tools/LLVMBuild.txt
tools/Makefile
tools/llvm-profdata/CMakeLists.txt [new file with mode: 0644]
tools/llvm-profdata/LLVMBuild.txt [new file with mode: 0644]
tools/llvm-profdata/Makefile [new file with mode: 0644]
tools/llvm-profdata/llvm-profdata.cpp [new file with mode: 0644]

index d799941aeaeaa8558ccf9e05ce64831dcebebf5c..ab4788aee7fef3f386bbc436f83e44ee0b1f0ca4 100644 (file)
@@ -25,6 +25,7 @@ Basic Commands
    llvm-config
    llvm-diff
    llvm-cov
+   llvm-profdata
    llvm-stress
    llvm-symbolizer
 
diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst
new file mode 100644 (file)
index 0000000..6b8e4d7
--- /dev/null
@@ -0,0 +1,29 @@
+llvm-profdata - work with profile data
+======================================
+
+SYNOPSIS
+--------
+
+:program:`llvm-profdata` [-output=output] file1 file2
+
+DESCRIPTION
+-----------
+
+The experimental :program:`llvm-profdata` tool reads two profile data files
+generated by PGO instrumentation and generates a file with merged data.
+
+The profile data format itself is currently textual.
+
+OPTIONS
+-------
+
+.. option:: -output=output
+
+ This option selects the output filename.  If not specified, output is to
+ stdout.
+
+EXIT STATUS
+-----------
+
+:program:`llvm-profdata` returns 1 if it cannot read input files or there is a
+mismatch between their data.
index 835579c8f6b823cb9657d479c6007d0024570898..d26d0f3a1ab07363ebe77a300f1a46d7e803d14e 100644 (file)
@@ -37,6 +37,7 @@ set(LLVM_TEST_DEPENDS
           llvm-mcmarkup
           llvm-nm
           llvm-objdump
+          llvm-profdata
           llvm-readobj
           llvm-rtdyld
           llvm-symbolizer
index 962728d0cb4caa63ec8d3bdd5227e1e7a2c5b7fe..ac8d4497f09110ec3002dfe4a754195263406126 100644 (file)
@@ -234,6 +234,7 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 r"\bllvm-mcmarkup\b",
                 r"\bllvm-nm\b",
                 r"\bllvm-objdump\b",
+                r"\bllvm-profdata\b",
                 r"\bllvm-ranlib\b",
                 r"\bllvm-readobj\b",
                 r"\bllvm-rtdyld\b",
diff --git a/test/tools/llvm-profdata/Inputs/bad-function-count.profdata b/test/tools/llvm-profdata/Inputs/bad-function-count.profdata
new file mode 100644 (file)
index 0000000..7d24762
--- /dev/null
@@ -0,0 +1,2 @@
+function_count_not 1count
+1
diff --git a/test/tools/llvm-profdata/Inputs/bar3-1.profdata b/test/tools/llvm-profdata/Inputs/bar3-1.profdata
new file mode 100644 (file)
index 0000000..cb8b409
--- /dev/null
@@ -0,0 +1,4 @@
+bar 3
+1
+2
+3
diff --git a/test/tools/llvm-profdata/Inputs/empty.profdata b/test/tools/llvm-profdata/Inputs/empty.profdata
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/test/tools/llvm-profdata/Inputs/extra-word.profdata b/test/tools/llvm-profdata/Inputs/extra-word.profdata
new file mode 100644 (file)
index 0000000..67a6629
--- /dev/null
@@ -0,0 +1,2 @@
+extra 1 word
+1
diff --git a/test/tools/llvm-profdata/Inputs/foo3-1.profdata b/test/tools/llvm-profdata/Inputs/foo3-1.profdata
new file mode 100644 (file)
index 0000000..d6f9f64
--- /dev/null
@@ -0,0 +1,4 @@
+foo 3
+1
+2
+3
diff --git a/test/tools/llvm-profdata/Inputs/foo3-2.profdata b/test/tools/llvm-profdata/Inputs/foo3-2.profdata
new file mode 100644 (file)
index 0000000..94fd034
--- /dev/null
@@ -0,0 +1,4 @@
+foo 3
+7
+5
+3
diff --git a/test/tools/llvm-profdata/Inputs/foo3bar3-1.profdata b/test/tools/llvm-profdata/Inputs/foo3bar3-1.profdata
new file mode 100644 (file)
index 0000000..85b702d
--- /dev/null
@@ -0,0 +1,9 @@
+foo 3
+2
+3
+5
+
+bar 3
+7
+11
+13
diff --git a/test/tools/llvm-profdata/Inputs/foo3bar3-2.profdata b/test/tools/llvm-profdata/Inputs/foo3bar3-2.profdata
new file mode 100644 (file)
index 0000000..d652781
--- /dev/null
@@ -0,0 +1,9 @@
+foo 3
+17
+19
+23
+
+bar 3
+29
+31
+37
diff --git a/test/tools/llvm-profdata/Inputs/foo4-1.profdata b/test/tools/llvm-profdata/Inputs/foo4-1.profdata
new file mode 100644 (file)
index 0000000..4d69408
--- /dev/null
@@ -0,0 +1,5 @@
+foo 4
+11
+22
+33
+44
diff --git a/test/tools/llvm-profdata/Inputs/foo4-2.profdata b/test/tools/llvm-profdata/Inputs/foo4-2.profdata
new file mode 100644 (file)
index 0000000..8d91d8b
--- /dev/null
@@ -0,0 +1,5 @@
+foo 4
+7
+6
+5
+4
diff --git a/test/tools/llvm-profdata/Inputs/invalid-count-later.profdata b/test/tools/llvm-profdata/Inputs/invalid-count-later.profdata
new file mode 100644 (file)
index 0000000..5575df3
--- /dev/null
@@ -0,0 +1,2 @@
+invalid_count 1
+1later
diff --git a/test/tools/llvm-profdata/Inputs/overflow.profdata b/test/tools/llvm-profdata/Inputs/overflow.profdata
new file mode 100644 (file)
index 0000000..bfb9a52
--- /dev/null
@@ -0,0 +1,2 @@
+overflow 1
+9223372036854775808
diff --git a/test/tools/llvm-profdata/Inputs/three-words-long.profdata b/test/tools/llvm-profdata/Inputs/three-words-long.profdata
new file mode 100644 (file)
index 0000000..a4d45fb
--- /dev/null
@@ -0,0 +1 @@
+three words long
diff --git a/test/tools/llvm-profdata/errors.test b/test/tools/llvm-profdata/errors.test
new file mode 100644 (file)
index 0000000..6335ea9
--- /dev/null
@@ -0,0 +1,22 @@
+RUN: not llvm-profdata %p/Inputs/empty.profdata %p/Inputs/foo3-1.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
+RUN: not llvm-profdata %p/Inputs/foo3-1.profdata %p/Inputs/foo3bar3-1.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
+RUN: not llvm-profdata %p/Inputs/foo4-1.profdata %p/Inputs/empty.profdata 2>&1 | FileCheck %s --check-prefix=LENGTH
+LENGTH: error: {{.*}}: truncated file
+
+RUN: not llvm-profdata %p/Inputs/foo3-1.profdata %p/Inputs/bar3-1.profdata 2>&1 | FileCheck %s --check-prefix=NAME
+NAME: error: {{.*}}: function name mismatch
+
+RUN: not llvm-profdata %p/Inputs/foo3-1.profdata %p/Inputs/foo4-1.profdata 2>&1 | FileCheck %s --check-prefix=COUNT
+COUNT: error: {{.*}}: function count mismatch
+
+RUN: not llvm-profdata %p/Inputs/overflow.profdata %p/Inputs/overflow.profdata 2>&1 | FileCheck %s --check-prefix=OVERFLOW
+OVERFLOW: error: {{.*}}: counter overflow
+
+RUN: not llvm-profdata %p/Inputs/invalid-count-later.profdata %p/Inputs/invalid-count-later.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
+INVALID-COUNT-LATER: error: {{.*}}: invalid counter
+
+RUN: not llvm-profdata %p/Inputs/bad-function-count.profdata %p/Inputs/bad-function-count.profdata 2>&1 | FileCheck %s --check-prefix=BAD-FUNCTION-COUNT
+BAD-FUNCTION-COUNT: error: {{.*}}: bad function count
+
+RUN: not llvm-profdata %p/Inputs/three-words-long.profdata %p/Inputs/three-words-long.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-DATA
+INVALID-DATA: error: {{.*}}: invalid data
diff --git a/test/tools/llvm-profdata/simple.test b/test/tools/llvm-profdata/simple.test
new file mode 100644 (file)
index 0000000..87073fa
--- /dev/null
@@ -0,0 +1,25 @@
+RUN: llvm-profdata %p/Inputs/foo3-1.profdata %p/Inputs/foo3-2.profdata 2>&1 | FileCheck %s --check-prefix=FOO3
+RUN: llvm-profdata %p/Inputs/foo3-2.profdata %p/Inputs/foo3-1.profdata 2>&1 | FileCheck %s --check-prefix=FOO3
+FOO3:      {{^foo 3$}}
+FOO3-NEXT: {{^8$}}
+FOO3-NEXT: {{^7$}}
+FOO3-NEXT: {{^6$}}
+
+RUN: llvm-profdata %p/Inputs/foo4-1.profdata %p/Inputs/foo4-2.profdata 2>&1 | FileCheck %s --check-prefix=FOO4
+RUN: llvm-profdata %p/Inputs/foo4-2.profdata %p/Inputs/foo4-1.profdata 2>&1 | FileCheck %s --check-prefix=FOO4
+FOO4:      {{^foo 4$}}
+FOO4-NEXT: {{^18$}}
+FOO4-NEXT: {{^28$}}
+FOO4-NEXT: {{^38$}}
+FOO4-NEXT: {{^48$}}
+
+RUN: llvm-profdata %p/Inputs/foo3bar3-1.profdata %p/Inputs/foo3bar3-2.profdata 2>&1 | FileCheck %s --check-prefix=FOO3BAR3
+RUN: llvm-profdata %p/Inputs/foo3bar3-2.profdata %p/Inputs/foo3bar3-1.profdata 2>&1 | FileCheck %s --check-prefix=FOO3BAR3
+FOO3BAR3:      {{^foo 3$}}
+FOO3BAR3-NEXT: {{^19$}}
+FOO3BAR3-NEXT: {{^22$}}
+FOO3BAR3-NEXT: {{^28$}}
+FOO3BAR3:      {{^bar 3$}}
+FOO3BAR3-NEXT: {{^36$}}
+FOO3BAR3-NEXT: {{^42$}}
+FOO3BAR3-NEXT: {{^50$}}
index 12e10fd0bbd1cbfe9cb6aac262a1532e001d8562..5f006594c52a5544da9add4189a2a9e8054d9346 100644 (file)
@@ -15,6 +15,7 @@ add_llvm_tool_subdirectory(llvm-nm)
 add_llvm_tool_subdirectory(llvm-size)
 
 add_llvm_tool_subdirectory(llvm-cov)
+add_llvm_tool_subdirectory(llvm-profdata)
 add_llvm_tool_subdirectory(llvm-link)
 add_llvm_tool_subdirectory(lli)
 
index 93b8d98dcba4f81042356c10b6ac8cf1e4dc9be3..1b537a372da38ee0a0792c36a7016ae0eb8381d3 100644 (file)
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-jitlistener llvm-link llvm-lto llvm-mc llvm-nm llvm-objdump llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
+subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-jitlistener llvm-link llvm-lto llvm-mc llvm-nm llvm-objdump llvm-profdata llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
 
 [component_0]
 type = Group
index be872548e313522437111cf2b3760f03bcc7b2f5..2b8c32ee32a72486f1e2d39dc264d0a2d67c91e1 100644 (file)
@@ -31,7 +31,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis llc llvm-ar llvm-nm llvm-link \
                  lli llvm-extract llvm-mc bugpoint llvm-bcanalyzer llvm-diff \
                  macho-dump llvm-objdump llvm-readobj llvm-rtdyld \
                  llvm-dwarfdump llvm-cov llvm-size llvm-stress llvm-mcmarkup \
-                 llvm-symbolizer obj2yaml yaml2obj llvm-c-test
+                 llvm-profdata llvm-symbolizer obj2yaml yaml2obj llvm-c-test
 
 # If Intel JIT Events support is configured, build an extra tool to test it.
 ifeq ($(USE_INTEL_JITEVENTS), 1)
diff --git a/tools/llvm-profdata/CMakeLists.txt b/tools/llvm-profdata/CMakeLists.txt
new file mode 100644 (file)
index 0000000..4b1357d
--- /dev/null
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core support )
+
+add_llvm_tool(llvm-profdata
+  llvm-profdata.cpp
+  )
diff --git a/tools/llvm-profdata/LLVMBuild.txt b/tools/llvm-profdata/LLVMBuild.txt
new file mode 100644 (file)
index 0000000..fc9e469
--- /dev/null
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-profdata/LLVMBuild.txt ----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-profdata
+parent = Tools
+required_libraries = Support
diff --git a/tools/llvm-profdata/Makefile b/tools/llvm-profdata/Makefile
new file mode 100644 (file)
index 0000000..9d7ad52
--- /dev/null
@@ -0,0 +1,17 @@
+##===- tools/llvm-profdata/Makefile ------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := llvm-profdata
+LINK_COMPONENTS := core support
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp
new file mode 100644 (file)
index 0000000..df10356
--- /dev/null
@@ -0,0 +1,178 @@
+//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// llvm-profdata merges .profdata files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<std::string> Filename1(cl::Positional, cl::Required,
+                                      cl::desc("file1"));
+static cl::opt<std::string> Filename2(cl::Positional, cl::Required,
+                                      cl::desc("file2"));
+
+static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
+                                           cl::init("-"),
+                                           cl::desc("Output file"));
+static cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
+                                 cl::aliasopt(OutputFilename));
+
+static bool readLine(const char *&Start, const char *End, StringRef &S) {
+  if (Start == End)
+    return false;
+
+  for (const char *I = Start; I != End; ++I) {
+    assert(*I && "unexpected binary data");
+    if (*I == '\n') {
+      S = StringRef(Start, I - Start);
+      Start = I + 1;
+      return true;
+    }
+  }
+
+  S = StringRef(Start, End - Start);
+  Start = End;
+  return true;
+}
+
+static StringRef getWord(const char *&Start, const char *End) {
+  for (const char *I = Start; I != End; ++I)
+    if (*I == ' ') {
+      StringRef S(Start, I - Start);
+      Start = I + 1;
+      return S;
+    }
+  StringRef S(Start, End - Start);
+  Start = End;
+  return S;
+}
+
+static size_t splitWords(const StringRef &Line, std::vector<StringRef> &Words) {
+  const char *Start = Line.data();
+  const char *End = Line.data() + Line.size();
+  Words.clear();
+  while (Start != End)
+    Words.push_back(getWord(Start, End));
+  return Words.size();
+}
+
+static bool getNumber(const StringRef &S, uint64_t &N) {
+  N = 0;
+  for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I)
+    if (*I >= '0' && *I <= '9')
+      N = N * 10 + (*I - '0');
+    else
+      return false;
+
+  return true;
+}
+
+static void exitWithError(const std::string &Message,
+                          const std::string &Filename, int64_t Line = -1) {
+  errs() << "error: " << Filename;
+  if (Line >= 0)
+    errs() << ":" << Line;
+  errs() << ": " << Message << "\n";
+  ::exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
+
+  OwningPtr<MemoryBuffer> File1;
+  OwningPtr<MemoryBuffer> File2;
+  if (error_code ec = MemoryBuffer::getFile(Filename1, File1))
+    exitWithError(ec.message(), Filename1);
+  if (error_code ec = MemoryBuffer::getFile(Filename2, File2))
+    exitWithError(ec.message(), Filename2);
+
+  if (OutputFilename.empty())
+    OutputFilename = "-";
+
+  std::string ErrorInfo;
+  raw_fd_ostream Output(OutputFilename.data(), ErrorInfo);
+  if (!ErrorInfo.empty())
+    exitWithError(ErrorInfo, OutputFilename);
+
+  const char *Start1 = File1->getBufferStart();
+  const char *Start2 = File2->getBufferStart();
+  const char *End1 = File1->getBufferEnd();
+  const char *End2 = File2->getBufferEnd();
+  const char *P1 = Start1;
+  const char *P2 = Start2;
+
+  StringRef Line1, Line2;
+  int64_t Num = 0;
+  while (readLine(P1, End1, Line1)) {
+    ++Num;
+    if (!readLine(P2, End2, Line2))
+      exitWithError("truncated file", Filename2, Num);
+
+    std::vector<StringRef> Words1, Words2;
+    if (splitWords(Line1, Words1) != splitWords(Line2, Words2))
+      exitWithError("data mismatch", Filename2, Num);
+
+    if (Words1.size() > 2)
+      exitWithError("invalid data", Filename1, Num);
+
+    if (Words1.empty()) {
+      Output << "\n";
+      continue;
+    }
+
+    if (Words1.size() == 2) {
+      if (Words1[0] != Words2[0])
+        exitWithError("function name mismatch", Filename2, Num);
+
+      uint64_t N1, N2;
+      if (!getNumber(Words1[1], N1))
+        exitWithError("bad function count", Filename1, Num);
+      if (!getNumber(Words2[1], N2))
+        exitWithError("bad function count", Filename2, Num);
+
+      if (N1 != N2)
+        exitWithError("function count mismatch", Filename2, Num);
+
+      Output << Line1 << "\n";
+      continue;
+    }
+
+    uint64_t N1, N2;
+    if (!getNumber(Words1[0], N1))
+      exitWithError("invalid counter", Filename1, Num);
+    if (!getNumber(Words2[0], N2))
+      exitWithError("invalid counter", Filename2, Num);
+
+    uint64_t Sum = N1 + N2;
+    if (Sum < N1)
+      exitWithError("counter overflow", Filename2, Num);
+
+    Output << N1 + N2 << "\n";
+  }
+  if (readLine(P2, End2, Line2))
+    exitWithError("truncated file", Filename1, Num + 1);
+
+  return 0;
+}