[llvm-profdata] Add check for text profile formats and improve error reporting (2nd...
[oota-llvm.git] / tools / llvm-profdata / llvm-profdata.cpp
index fcc54042f404599a3f5faece3e7777a5a0b30875..1cd47dd5e84c248cb883474f6c7c2866e1fe7d57 100644 (file)
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Profile/ProfileDataReader.h"
-#include "llvm/Profile/ProfileDataWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/InstrProfWriter.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/ProfileData/SampleProfWriter.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-static void exitWithError(const std::string &Message,
-                          const std::string &Filename, int64_t Line = -1) {
-  errs() << "error: " << Filename;
-  if (Line >= 0)
-    errs() << ":" << Line;
-  errs() << ": " << Message << "\n";
+static void exitWithError(const Twine &Message,
+                          StringRef Whence = "",
+                          StringRef Hint = "") {
+  errs() << "error: ";
+  if (!Whence.empty())
+    errs() << Whence << ": ";
+  errs() << Message << "\n";
+  if (!Hint.empty())
+    errs() << Hint << "\n";
   ::exit(1);
 }
 
-int merge_main(int argc, const char *argv[]) {
-  cl::opt<std::string> Filename1(cl::Positional, cl::Required,
-                                 cl::desc("file1"));
-  cl::opt<std::string> Filename2(cl::Positional, cl::Required,
-                                 cl::desc("file2"));
-
-  cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
-                                      cl::init("-"),
-                                      cl::desc("Output file"));
-  cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
-                            cl::aliasopt(OutputFilename));
-
-  cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
-
-  std::unique_ptr<ProfileDataReader> Reader1, Reader2;
-  if (error_code EC = ProfileDataReader::create(Filename1, Reader1))
-    exitWithError(EC.message(), Filename1);
-  if (error_code EC = ProfileDataReader::create(Filename2, Reader2))
-    exitWithError(EC.message(), Filename2);
-
-  if (OutputFilename.empty())
-    OutputFilename = "-";
-
-  std::string ErrorInfo;
-  raw_fd_ostream Output(OutputFilename.data(), ErrorInfo, sys::fs::F_Text);
-  if (!ErrorInfo.empty())
-    exitWithError(ErrorInfo, OutputFilename);
-
-  if (Output.is_displayed())
-    exitWithError("Refusing to write a binary file to stdout", OutputFilename);
-
-  StringRef Name1, Name2;
-  std::vector<uint64_t> Counts1, Counts2, NewCounts;
-  uint64_t Hash1, Hash2;
-  ProfileDataWriter Writer;
-  ProfileDataReader::name_iterator I1 = Reader1->begin(),
-                                   E1 = Reader1->end(),
-                                   I2 = Reader2->begin(),
-                                   E2 = Reader2->end();
-  for (; I1 != E1 && I2 != E2; ++I1, ++I2) {
-    Name1 = *I1;
-    Name2 = *I2;
-    if (Name1 != Name2)
-      exitWithError("Function name mismatch", Filename2); // ???
-
-    if (error_code EC = Reader1->getFunctionCounts(Name1, Hash1, Counts1))
-      exitWithError(EC.message(), Filename1);
-    if (error_code EC = Reader2->getFunctionCounts(Name2, Hash2, Counts2))
-      exitWithError(EC.message(), Filename2);
-
-    if (Counts1.size() != Counts2.size())
-      exitWithError("Function count mismatch", Filename2); // ???
-    if (Hash1 != Hash2)
-      exitWithError("Function hash mismatch", Filename2); // ???
-
-    for (size_t II = 0, EE = Counts1.size(); II < EE; ++II) {
-      uint64_t Sum = Counts1[II] + Counts2[II];
-      if (Sum < Counts1[II])
-        exitWithError("Counter overflow", Filename2); // ???
-      NewCounts.push_back(Sum);
+static void exitWithErrorCode(const std::error_code &Error, StringRef Whence = "") {
+  if (Error.category() == instrprof_category()) {
+    instrprof_error instrError = static_cast<instrprof_error>(Error.value());
+    if (instrError == instrprof_error::unrecognized_format) {
+      // Hint for common error of forgetting -sample for sample profiles.
+      exitWithError(Error.message(), Whence,
+                    "Perhaps you forgot to use the -sample option?");
     }
-
-    Writer.addFunctionCounts(Name1, Hash1, NewCounts.size(), NewCounts.data());
-
-    Counts1.clear();
-    Counts2.clear();
-    NewCounts.clear();
   }
-  if (I1 != E1 || I2 != E2)
-    exitWithError("Truncated file", Filename2);
-
-  Writer.write(Output);
+  exitWithError(Error.message(), Whence);
+}
 
-  return 0;
+namespace {
+    enum ProfileKinds { instr, sample };
 }
 
-struct HashPrinter {
-  uint64_t Hash;
-  HashPrinter(uint64_t Hash) : Hash(Hash) {}
-  void print(raw_ostream &OS) const {
-    char Buf[18], *Cur = Buf;
-    *Cur++ = '0'; *Cur++ = 'x';
-    for (unsigned I = 16; I;) {
-      char Digit = 0xF & (Hash >> (--I * 4));
-      *Cur++ = (Digit < 10 ? '0' + Digit : 'A' + Digit - 10);
-    }
-    OS.write(Buf, 18);
+static void mergeInstrProfile(const cl::list<std::string> &Inputs,
+                              StringRef OutputFilename) {
+  if (OutputFilename.compare("-") == 0)
+    exitWithError("Cannot write indexed profdata format to stdout.");
+
+  std::error_code EC;
+  raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
+  if (EC)
+    exitWithErrorCode(EC, OutputFilename);
+
+  InstrProfWriter Writer;
+  for (const auto &Filename : Inputs) {
+    auto ReaderOrErr = InstrProfReader::create(Filename);
+    if (std::error_code ec = ReaderOrErr.getError())
+      exitWithErrorCode(ec, Filename);
+
+    auto Reader = std::move(ReaderOrErr.get());
+    for (auto &I : *Reader)
+      if (std::error_code EC = Writer.addRecord(std::move(I)))
+        errs() << Filename << ": " << I.Name << ": " << EC.message() << "\n";
+    if (Reader->hasError())
+      exitWithErrorCode(Reader->getError(), Filename);
   }
-};
-static raw_ostream &operator<<(raw_ostream &OS, const HashPrinter &Hash) {
-  Hash.print(OS);
-  return OS;
+  Writer.write(Output);
 }
 
-struct FreqPrinter {
-  double Freq;
-  FreqPrinter(double Freq) : Freq(Freq) {}
-  void print(raw_ostream &OS) const {
-    OS << (unsigned)(Freq * 100) << "." << ((unsigned)(Freq * 1000) % 10)
-       << ((unsigned)(Freq * 10000) % 10) << "%";
+static void mergeSampleProfile(const cl::list<std::string> &Inputs,
+                               StringRef OutputFilename,
+                               sampleprof::SampleProfileFormat OutputFormat) {
+  using namespace sampleprof;
+  auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
+  if (std::error_code EC = WriterOrErr.getError())
+    exitWithErrorCode(EC, OutputFilename);
+
+  auto Writer = std::move(WriterOrErr.get());
+  StringMap<FunctionSamples> ProfileMap;
+  SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
+  for (const auto &Filename : Inputs) {
+    auto ReaderOrErr =
+        SampleProfileReader::create(Filename, getGlobalContext());
+    if (std::error_code EC = ReaderOrErr.getError())
+      exitWithErrorCode(EC, Filename);
+
+    // We need to keep the readers around until after all the files are
+    // read so that we do not lose the function names stored in each
+    // reader's memory. The function names are needed to write out the
+    // merged profile map.
+    Readers.push_back(std::move(ReaderOrErr.get()));
+    const auto Reader = Readers.back().get();
+    if (std::error_code EC = Reader->read())
+      exitWithErrorCode(EC, Filename);
+
+    StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
+    for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
+                                              E = Profiles.end();
+         I != E; ++I) {
+      StringRef FName = I->first();
+      FunctionSamples &Samples = I->second;
+      ProfileMap[FName].merge(Samples);
+    }
   }
-};
-static raw_ostream &operator<<(raw_ostream &OS, const FreqPrinter &Freq) {
-  Freq.print(OS);
-  return OS;
+  Writer->write(ProfileMap);
 }
 
-int show_main(int argc, const char *argv[]) {
-  cl::opt<std::string> Filename(cl::Positional, cl::Required,
-                                cl::desc("<profdata-file>"));
-
-  cl::opt<bool> ShowCounts("counts", cl::init(false));
-  cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false));
-  cl::opt<std::string> ShowFunction("function");
+static int merge_main(int argc, const char *argv[]) {
+  cl::list<std::string> Inputs(cl::Positional, cl::Required, cl::OneOrMore,
+                               cl::desc("<filenames...>"));
 
   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
-                                      cl::init("-"),
+                                      cl::init("-"), cl::Required,
                                       cl::desc("Output file"));
   cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
                             cl::aliasopt(OutputFilename));
+  cl::opt<ProfileKinds> ProfileKind(
+      cl::desc("Profile kind:"), cl::init(instr),
+      cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
+                 clEnumVal(sample, "Sample profile"), clEnumValEnd));
+
+  cl::opt<sampleprof::SampleProfileFormat> OutputFormat(
+      cl::desc("Format of output profile (only meaningful with --sample)"),
+      cl::init(sampleprof::SPF_Binary),
+      cl::values(clEnumValN(sampleprof::SPF_Binary, "binary",
+                            "Binary encoding (default)"),
+                 clEnumValN(sampleprof::SPF_Text, "text", "Text encoding"),
+                 clEnumValN(sampleprof::SPF_GCC, "gcc", "GCC encoding"),
+                 clEnumValEnd));
 
-  cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
-
-  std::unique_ptr<ProfileDataReader> Reader;
-  if (error_code EC = ProfileDataReader::create(Filename, Reader))
-    exitWithError(EC.message(), Filename);
+  cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
 
-  if (OutputFilename.empty())
-    OutputFilename = "-";
+  if (ProfileKind == instr)
+    mergeInstrProfile(Inputs, OutputFilename);
+  else
+    mergeSampleProfile(Inputs, OutputFilename, OutputFormat);
 
-  std::string ErrorInfo;
-  raw_fd_ostream OS(OutputFilename.data(), ErrorInfo, sys::fs::F_Text);
-  if (!ErrorInfo.empty())
-    exitWithError(ErrorInfo, OutputFilename);
+  return 0;
+}
 
-  if (ShowAllFunctions && !ShowFunction.empty())
-    errs() << "warning: -function argument ignored: showing all functions\n";
+static int showInstrProfile(std::string Filename, bool ShowCounts,
+                            bool ShowIndirectCallTargets, bool ShowAllFunctions,
+                            std::string ShowFunction, raw_fd_ostream &OS) {
+  auto ReaderOrErr = InstrProfReader::create(Filename);
+  if (std::error_code EC = ReaderOrErr.getError())
+    exitWithErrorCode(EC, Filename);
+
+  auto Reader = std::move(ReaderOrErr.get());
+  uint64_t MaxFunctionCount = 0, MaxBlockCount = 0;
+  size_t ShownFunctions = 0, TotalFunctions = 0;
+  for (const auto &Func : *Reader) {
+    bool Show =
+        ShowAllFunctions || (!ShowFunction.empty() &&
+                             Func.Name.find(ShowFunction) != Func.Name.npos);
+
+    ++TotalFunctions;
+    assert(Func.Counts.size() > 0 && "function missing entry counter");
+    if (Func.Counts[0] > MaxFunctionCount)
+      MaxFunctionCount = Func.Counts[0];
 
-  uint64_t MaxBlockCount = 0, MaxFunctionCount = 0;
-  uint64_t Hash;
-  double CallFreq;
-  size_t ShownFunctions = false;
-  std::vector<uint64_t> Counts;
-  for (const auto &Name : *Reader) {
-    bool Show = ShowAllFunctions || Name.find(ShowFunction) != Name.npos;
-    if (error_code EC = Reader->getFunctionCounts(Name, Hash, Counts))
-      exitWithError(EC.message(), Filename);
-    if (error_code EC = Reader->getCallFrequency(Name, Hash, CallFreq))
-      exitWithError(EC.message(), Filename);
     if (Show) {
       if (!ShownFunctions)
         OS << "Counters:\n";
       ++ShownFunctions;
-      OS << "  " << Name << ":\n"
-         << "    Hash: " << HashPrinter(Hash) << "\n"
-         << "    Relative call frequency: " << FreqPrinter(CallFreq) << "\n"
-         << "    Counters: " << Counts.size() << "\n"
-         << "    Function count: " << Counts[0] << "\n";
-    }
 
-    if (Counts[0] > MaxFunctionCount)
-      MaxFunctionCount = Counts[0];
+      OS << "  " << Func.Name << ":\n"
+         << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
+         << "    Counters: " << Func.Counts.size() << "\n"
+         << "    Function count: " << Func.Counts[0] << "\n";
+      if (ShowIndirectCallTargets)
+        OS << "    Indirect Call Site Count: "
+           << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
+    }
 
     if (Show && ShowCounts)
       OS << "    Block counts: [";
-    for (size_t I = 1, E = Counts.size(); I < E; ++I) {
-      if (Counts[I] > MaxBlockCount)
-        MaxBlockCount = Counts[I];
+    for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
+      if (Func.Counts[I] > MaxBlockCount)
+        MaxBlockCount = Func.Counts[I];
       if (Show && ShowCounts)
-        OS << (I == 1 ? "" : ", ") << Counts[I];
+        OS << (I == 1 ? "" : ", ") << Func.Counts[I];
     }
     if (Show && ShowCounts)
       OS << "]\n";
 
-    Counts.clear();
+    if (Show && ShowIndirectCallTargets) {
+      uint32_t NS = Func.getNumValueSites(IPVK_IndirectCallTarget);
+      OS << "    Indirect Target Results: \n";
+      for (size_t I = 0; I < NS; ++I) {
+        uint32_t NV = Func.getNumValueDataForSite(IPVK_IndirectCallTarget, I);
+        std::unique_ptr<InstrProfValueData[]> VD =
+            Func.getValueForSite(IPVK_IndirectCallTarget, I);
+        for (uint32_t V = 0; V < NV; V++) {
+          OS << "\t[ " << I << ", ";
+          OS << (const char *)VD[V].Value << ", " << VD[V].Count << " ]\n";
+        }
+      }
+    }
   }
+  if (Reader->hasError())
+    exitWithErrorCode(Reader->getError(), Filename);
 
   if (ShowAllFunctions || !ShowFunction.empty())
     OS << "Functions shown: " << ShownFunctions << "\n";
-  OS << "Total functions: " << Reader->numProfiledFunctions() << "\n";
+  OS << "Total functions: " << TotalFunctions << "\n";
   OS << "Maximum function count: " << MaxFunctionCount << "\n";
   OS << "Maximum internal block count: " << MaxBlockCount << "\n";
   return 0;
 }
 
-int generate_main(int argc, const char *argv[]) {
-  cl::opt<std::string> InputName(cl::Positional, cl::Required,
-                                 cl::desc("<input-file>"));
+static int showSampleProfile(std::string Filename, bool ShowCounts,
+                             bool ShowAllFunctions, std::string ShowFunction,
+                             raw_fd_ostream &OS) {
+  using namespace sampleprof;
+  auto ReaderOrErr = SampleProfileReader::create(Filename, getGlobalContext());
+  if (std::error_code EC = ReaderOrErr.getError())
+    exitWithErrorCode(EC, Filename);
+
+  auto Reader = std::move(ReaderOrErr.get());
+  if (std::error_code EC = Reader->read())
+    exitWithErrorCode(EC, Filename);
+
+  if (ShowAllFunctions || ShowFunction.empty())
+    Reader->dump(OS);
+  else
+    Reader->dumpFunctionProfile(ShowFunction, OS);
+
+  return 0;
+}
+
+static int show_main(int argc, const char *argv[]) {
+  cl::opt<std::string> Filename(cl::Positional, cl::Required,
+                                cl::desc("<profdata-file>"));
+
+  cl::opt<bool> ShowCounts("counts", cl::init(false),
+                           cl::desc("Show counter values for shown functions"));
+  cl::opt<bool> ShowIndirectCallTargets(
+      "ic-targets", cl::init(false),
+      cl::desc("Show indirect call site target values for shown functions"));
+  cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
+                                 cl::desc("Details for every function"));
+  cl::opt<std::string> ShowFunction("function",
+                                    cl::desc("Details for matching functions"));
 
   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
-                                      cl::init("-"),
-                                      cl::desc("Output file"));
+                                      cl::init("-"), cl::desc("Output file"));
   cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
                             cl::aliasopt(OutputFilename));
+  cl::opt<ProfileKinds> ProfileKind(
+      cl::desc("Profile kind:"), cl::init(instr),
+      cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
+                 clEnumVal(sample, "Sample profile"), clEnumValEnd));
 
-  cl::ParseCommandLineOptions(argc, argv, "LLVM profile data generator\n");
+  cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
 
   if (OutputFilename.empty())
     OutputFilename = "-";
 
-  std::string ErrorInfo;
-  raw_fd_ostream Output(OutputFilename.data(), ErrorInfo, sys::fs::F_Text);
-  if (!ErrorInfo.empty())
-    exitWithError(ErrorInfo, OutputFilename);
-
-  if (Output.is_displayed())
-    exitWithError("Refusing to write a binary file to stdout", OutputFilename);
-
-  std::unique_ptr<MemoryBuffer> Buffer;
-  if (error_code EC = MemoryBuffer::getFile(InputName, Buffer))
-    exitWithError(EC.message(), InputName);
-
-  ProfileDataWriter Writer;
-  StringRef Name;
-  uint64_t Hash, NumCounters;
-  std::vector<uint64_t> Counters;
-  for (line_iterator I(*Buffer, '#'); !I.is_at_end(); ++I) {
-    if (I->empty())
-      continue;
-    Name = *I;
-    if ((++I).is_at_end())
-      exitWithError("Truncated file", InputName, I.line_number());
-    if (I->getAsInteger(10, Hash))
-      exitWithError("Failed to read hash", InputName, I.line_number());
-    if ((++I).is_at_end())
-      exitWithError("Truncated file", InputName, I.line_number());
-    if (I->getAsInteger(10, NumCounters))
-      exitWithError("Failed to read num counters", InputName, I.line_number());
-    for (uint64_t CurCounter = 0; CurCounter < NumCounters; ++CurCounter) {
-      uint64_t Counter;
-      if ((++I).is_at_end())
-        exitWithError("Truncated file", InputName, I.line_number());
-      if (I->getAsInteger(10, Counter))
-        exitWithError("Failed to read counter", InputName, I.line_number());
-      Counters.push_back(Counter);
-    }
-    Writer.addFunctionCounts(Name, Hash, NumCounters, Counters.data());
-    Counters.clear();
-  }
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
+  if (EC)
+      exitWithErrorCode(EC, OutputFilename);
 
-  Writer.write(Output);
+  if (ShowAllFunctions && !ShowFunction.empty())
+    errs() << "warning: -function argument ignored: showing all functions\n";
 
-  return 0;
+  if (ProfileKind == instr)
+    return showInstrProfile(Filename, ShowCounts, ShowIndirectCallTargets,
+                            ShowAllFunctions, ShowFunction, OS);
+  else
+    return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
+                             ShowFunction, OS);
 }
 
 int main(int argc, const char *argv[]) {
@@ -286,14 +296,12 @@ int main(int argc, const char *argv[]) {
 
   StringRef ProgName(sys::path::filename(argv[0]));
   if (argc > 1) {
-    int (*func)(int, const char *[]) = 0;
+    int (*func)(int, const char *[]) = nullptr;
 
     if (strcmp(argv[1], "merge") == 0)
       func = merge_main;
     else if (strcmp(argv[1], "show") == 0)
       func = show_main;
-    else if (strcmp(argv[1], "generate") == 0)
-      func = generate_main;
 
     if (func) {
       std::string Invocation(ProgName.str() + " " + argv[1]);
@@ -308,7 +316,7 @@ int main(int argc, const char *argv[]) {
       errs() << "OVERVIEW: LLVM profile data tools\n\n"
              << "USAGE: " << ProgName << " <command> [args...]\n"
              << "USAGE: " << ProgName << " <command> -help\n\n"
-             << "Available commands: merge, show, generate\n";
+             << "Available commands: merge, show\n";
       return 0;
     }
   }
@@ -318,6 +326,6 @@ int main(int argc, const char *argv[]) {
   else
     errs() << ProgName << ": Unknown command!\n";
 
-  errs() << "USAGE: " << ProgName << " <merge|show|generate> [args...]\n";
+  errs() << "USAGE: " << ProgName << " <merge|show> [args...]\n";
   return 1;
 }