[PGO] Fix a bug in InstProfWriter addRecord
[oota-llvm.git] / unittests / ProfileData / InstrProfTest.cpp
index 051bf391b9531fa76fb0e0299f2167eb3e94939a..a4fadd3d65d93256005720e84c662525401109ca 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/InstrProfWriter.h"
+#include "llvm/Support/Compression.h"
 #include "gtest/gtest.h"
 
 #include <cstdarg>
@@ -163,6 +164,62 @@ TEST_F(InstrProfTest, get_icall_data_read_write) {
             [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
               return VD1.Count > VD2.Count;
             });
+
+  ASSERT_EQ(3U, VD[0].Count);
+  ASSERT_EQ(2U, VD[1].Count);
+  ASSERT_EQ(1U, VD[2].Count);
+
+  ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
+}
+
+TEST_F(InstrProfTest, get_icall_data_read_write_with_weight) {
+  InstrProfRecord Record1("caller", 0x1234, {1, 2});
+  InstrProfRecord Record2("callee1", 0x1235, {3, 4});
+  InstrProfRecord Record3("callee2", 0x1235, {3, 4});
+  InstrProfRecord Record4("callee3", 0x1235, {3, 4});
+
+  // 4 value sites.
+  Record1.reserveSites(IPVK_IndirectCallTarget, 4);
+  InstrProfValueData VD0[] = {{(uint64_t) "callee1", 1},
+                              {(uint64_t) "callee2", 2},
+                              {(uint64_t) "callee3", 3}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr);
+  // No value profile data at the second site.
+  Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+  InstrProfValueData VD2[] = {{(uint64_t) "callee1", 1},
+                              {(uint64_t) "callee2", 2}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr);
+  InstrProfValueData VD3[] = {{(uint64_t) "callee1", 1}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+
+  Writer.addRecord(std::move(Record1), 10);
+  Writer.addRecord(std::move(Record2));
+  Writer.addRecord(std::move(Record3));
+  Writer.addRecord(std::move(Record4));
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  ErrorOr<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
+  ASSERT_TRUE(NoError(R.getError()));
+  ASSERT_EQ(4U, R.get().getNumValueSites(IPVK_IndirectCallTarget));
+  ASSERT_EQ(3U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
+  ASSERT_EQ(0U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
+  ASSERT_EQ(2U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
+  ASSERT_EQ(1U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+
+  std::unique_ptr<InstrProfValueData[]> VD =
+      R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
+  // Now sort the target acording to frequency.
+  std::sort(&VD[0], &VD[3],
+            [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
+              return VD1.Count > VD2.Count;
+            });
+  ASSERT_EQ(30U, VD[0].Count);
+  ASSERT_EQ(20U, VD[1].Count);
+  ASSERT_EQ(10U, VD[2].Count);
+
   ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
   ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
   ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
@@ -226,57 +283,63 @@ TEST_F(InstrProfTest, get_icall_data_read_write_big_endian) {
 }
 
 TEST_F(InstrProfTest, get_icall_data_merge1) {
-  InstrProfRecord Record11("caller", 0x1234, {1, 2});
-  InstrProfRecord Record12("caller", 0x1234, {1, 2});
-  InstrProfRecord Record2("callee1", 0x1235, {3, 4});
-  InstrProfRecord Record3("callee2", 0x1235, {3, 4});
-  InstrProfRecord Record4("callee3", 0x1235, {3, 4});
-  InstrProfRecord Record5("callee3", 0x1235, {3, 4});
-  InstrProfRecord Record6("callee4", 0x1235, {3, 5});
+  static const char caller[] = "caller";
+  static const char callee1[] = "callee1";
+  static const char callee2[] = "callee2";
+  static const char callee3[] = "callee3";
+  static const char callee4[] = "callee4";
+
+  InstrProfRecord Record11(caller, 0x1234, {1, 2});
+  InstrProfRecord Record12(caller, 0x1234, {1, 2});
+  InstrProfRecord Record2(callee1, 0x1235, {3, 4});
+  InstrProfRecord Record3(callee2, 0x1235, {3, 4});
+  InstrProfRecord Record4(callee3, 0x1235, {3, 4});
+  InstrProfRecord Record5(callee3, 0x1235, {3, 4});
+  InstrProfRecord Record6(callee4, 0x1235, {3, 5});
 
   // 5 value sites.
   Record11.reserveSites(IPVK_IndirectCallTarget, 5);
-  InstrProfValueData VD0[] = {{(uint64_t) "callee1", 1},
-                              {(uint64_t) "callee2", 2},
-                              {(uint64_t) "callee3", 3},
-                              {(uint64_t) "callee4", 4}};
+  InstrProfValueData VD0[] = {{uint64_t(callee1), 1},
+                              {uint64_t(callee2), 2},
+                              {uint64_t(callee3), 3},
+                              {uint64_t(callee4), 4}};
   Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr);
 
   // No valeu profile data at the second site.
   Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
 
-  InstrProfValueData VD2[] = {{(uint64_t) "callee1", 1},
-                              {(uint64_t) "callee2", 2},
-                              {(uint64_t) "callee3", 3}};
+  InstrProfValueData VD2[] = {{uint64_t(callee1), 1},
+                              {uint64_t(callee2), 2},
+                              {uint64_t(callee3), 3}};
   Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
 
-  InstrProfValueData VD3[] = {{(uint64_t) "callee1", 1}};
+  InstrProfValueData VD3[] = {{uint64_t(callee1), 1}};
   Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
 
-  InstrProfValueData VD4[] = {{(uint64_t) "callee1", 1},
-                              {(uint64_t) "callee2", 2},
-                              {(uint64_t) "callee3", 3}};
+  InstrProfValueData VD4[] = {{uint64_t(callee1), 1},
+                              {uint64_t(callee2), 2},
+                              {uint64_t(callee3), 3}};
   Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr);
 
   // A differnt record for the same caller.
   Record12.reserveSites(IPVK_IndirectCallTarget, 5);
-  InstrProfValueData VD02[] = {{(uint64_t) "callee2", 5},
-                               {(uint64_t) "callee3", 3}};
+  InstrProfValueData VD02[] = {{uint64_t(callee2), 5},
+                               {uint64_t(callee3), 3}};
   Record12.addValueData(IPVK_IndirectCallTarget, 0, VD02, 2, nullptr);
 
   // No valeu profile data at the second site.
   Record12.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
 
-  InstrProfValueData VD22[] = {{(uint64_t) "callee2", 1},
-                               {(uint64_t) "callee3", 3},
-                               {(uint64_t) "callee4", 4}};
+  InstrProfValueData VD22[] = {{uint64_t(callee2), 1},
+                               {uint64_t(callee3), 3},
+                               {uint64_t(callee4), 4}};
   Record12.addValueData(IPVK_IndirectCallTarget, 2, VD22, 3, nullptr);
 
   Record12.addValueData(IPVK_IndirectCallTarget, 3, nullptr, 0, nullptr);
 
-  InstrProfValueData VD42[] = {{(uint64_t) "callee1", 1},
-                               {(uint64_t) "callee2", 2},
-                               {(uint64_t) "callee3", 3}};
+  InstrProfValueData VD42[] = {{uint64_t(callee1), 1},
+                               {uint64_t(callee2), 2},
+                               {uint64_t(callee3), 3}};
   Record12.addValueData(IPVK_IndirectCallTarget, 4, VD42, 3, nullptr);
 
   Writer.addRecord(std::move(Record11));
@@ -351,34 +414,53 @@ TEST_F(InstrProfTest, get_icall_data_merge1) {
 }
 
 TEST_F(InstrProfTest, get_icall_data_merge1_saturation) {
-  const uint64_t Max = std::numeric_limits<uint64_t>::max();
-
-  InstrProfRecord Record1("caller", 0x1234, {1});
-  InstrProfRecord Record2("caller", 0x1234, {1});
-  InstrProfRecord Record3("callee1", 0x1235, {3, 4});
-
-  Record1.reserveSites(IPVK_IndirectCallTarget, 1);
-  InstrProfValueData VD1[] = {{(uint64_t) "callee1", 1}};
-  Record1.addValueData(IPVK_IndirectCallTarget, 0, VD1, 1, nullptr);
+  static const char bar[] = "bar";
 
-  Record2.reserveSites(IPVK_IndirectCallTarget, 1);
-  InstrProfValueData VD2[] = {{(uint64_t) "callee1", Max}};
-  Record2.addValueData(IPVK_IndirectCallTarget, 0, VD2, 1, nullptr);
+  const uint64_t Max = std::numeric_limits<uint64_t>::max();
 
-  Writer.addRecord(std::move(Record1));
-  Writer.addRecord(std::move(Record2));
-  Writer.addRecord(std::move(Record3));
+  InstrProfRecord Record1("foo", 0x1234, {1});
+  auto Result1 = Writer.addRecord(std::move(Record1));
+  ASSERT_EQ(Result1, instrprof_error::success);
+
+  // Verify counter overflow.
+  InstrProfRecord Record2("foo", 0x1234, {Max});
+  auto Result2 = Writer.addRecord(std::move(Record2));
+  ASSERT_EQ(Result2, instrprof_error::counter_overflow);
+
+  InstrProfRecord Record3(bar, 0x9012, {8});
+  auto Result3 = Writer.addRecord(std::move(Record3));
+  ASSERT_EQ(Result3, instrprof_error::success);
+
+  InstrProfRecord Record4("baz", 0x5678, {3, 4});
+  Record4.reserveSites(IPVK_IndirectCallTarget, 1);
+  InstrProfValueData VD4[] = {{uint64_t(bar), 1}};
+  Record4.addValueData(IPVK_IndirectCallTarget, 0, VD4, 1, nullptr);
+  auto Result4 = Writer.addRecord(std::move(Record4));
+  ASSERT_EQ(Result4, instrprof_error::success);
+
+  // Verify value data counter overflow.
+  InstrProfRecord Record5("baz", 0x5678, {5, 6});
+  Record5.reserveSites(IPVK_IndirectCallTarget, 1);
+  InstrProfValueData VD5[] = {{uint64_t(bar), Max}};
+  Record5.addValueData(IPVK_IndirectCallTarget, 0, VD5, 1, nullptr);
+  auto Result5 = Writer.addRecord(std::move(Record5));
+  ASSERT_EQ(Result5, instrprof_error::counter_overflow);
 
   auto Profile = Writer.writeBuffer();
   readProfile(std::move(Profile));
 
   // Verify saturation of counts.
-  ErrorOr<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
-  ASSERT_TRUE(NoError(R.getError()));
-  ASSERT_EQ(1U, R.get().getNumValueSites(IPVK_IndirectCallTarget));
+  ErrorOr<InstrProfRecord> ReadRecord1 =
+      Reader->getInstrProfRecord("foo", 0x1234);
+  ASSERT_TRUE(NoError(ReadRecord1.getError()));
+  ASSERT_EQ(Max, ReadRecord1.get().Counts[0]);
+
+  ErrorOr<InstrProfRecord> ReadRecord2 =
+      Reader->getInstrProfRecord("baz", 0x5678);
+  ASSERT_EQ(1U, ReadRecord2.get().getNumValueSites(IPVK_IndirectCallTarget));
   std::unique_ptr<InstrProfValueData[]> VD =
-          R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
-  ASSERT_EQ(StringRef("callee1"), StringRef((const char *)VD[0].Value, 7));
+      ReadRecord2.get().getValueForSite(IPVK_IndirectCallTarget, 0);
+  ASSERT_EQ(StringRef("bar"), StringRef((const char *)VD[0].Value, 3));
   ASSERT_EQ(Max, VD[0].Count);
 }
 
@@ -409,7 +491,7 @@ TEST_F(InstrProfTest, runtime_value_prof_data_read_write) {
   initializeValueProfRuntimeRecord(&RTRecord, &NumValueSites[0],
                                    &ValueProfNodes[0]);
 
-  ValueProfData *VPData = serializeValueProfDataFromRT(&RTRecord);
+  ValueProfData *VPData = serializeValueProfDataFromRT(&RTRecord, nullptr);
 
   InstrProfRecord Record("caller", 0x1234, {1ULL << 31, 2});
 
@@ -487,4 +569,148 @@ TEST_F(InstrProfTest, get_max_function_count) {
   ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount());
 }
 
+TEST_F(InstrProfTest, get_weighted_function_counts) {
+  InstrProfRecord Record1("foo", 0x1234, {1, 2});
+  InstrProfRecord Record2("foo", 0x1235, {3, 4});
+  Writer.addRecord(std::move(Record1), 3);
+  Writer.addRecord(std::move(Record2), 5);
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  std::vector<uint64_t> Counts;
+  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1234, Counts)));
+  ASSERT_EQ(2U, Counts.size());
+  ASSERT_EQ(3U, Counts[0]);
+  ASSERT_EQ(6U, Counts[1]);
+
+  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1235, Counts)));
+  ASSERT_EQ(2U, Counts.size());
+  ASSERT_EQ(15U, Counts[0]);
+  ASSERT_EQ(20U, Counts[1]);
+}
+
+TEST_F(InstrProfTest, instr_prof_symtab_test) {
+  std::vector<StringRef> FuncNames;
+  FuncNames.push_back("func1");
+  FuncNames.push_back("func2");
+  FuncNames.push_back("func3");
+  FuncNames.push_back("bar1");
+  FuncNames.push_back("bar2");
+  FuncNames.push_back("bar3");
+  InstrProfSymtab Symtab;
+  Symtab.create(FuncNames);
+  StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func1"));
+  ASSERT_EQ(StringRef("func1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func2"));
+  ASSERT_EQ(StringRef("func2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func3"));
+  ASSERT_EQ(StringRef("func3"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar1"));
+  ASSERT_EQ(StringRef("bar1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar2"));
+  ASSERT_EQ(StringRef("bar2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar3"));
+  ASSERT_EQ(StringRef("bar3"), R);
+
+  // Now incrementally update the symtab
+  Symtab.addFuncName("blah_1");
+  Symtab.addFuncName("blah_2");
+  Symtab.addFuncName("blah_3");
+  // Finalize it
+  Symtab.finalizeSymtab();
+
+  // Check again
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("blah_1"));
+  ASSERT_EQ(StringRef("blah_1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("blah_2"));
+  ASSERT_EQ(StringRef("blah_2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("blah_3"));
+  ASSERT_EQ(StringRef("blah_3"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func1"));
+  ASSERT_EQ(StringRef("func1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func2"));
+  ASSERT_EQ(StringRef("func2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func3"));
+  ASSERT_EQ(StringRef("func3"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar1"));
+  ASSERT_EQ(StringRef("bar1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar2"));
+  ASSERT_EQ(StringRef("bar2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar3"));
+  ASSERT_EQ(StringRef("bar3"), R);
+}
+
+TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
+  std::vector<std::string> FuncNames1;
+  std::vector<std::string> FuncNames2;
+  for (int I = 0; I < 10 * 1024; I++) {
+    std::string str;
+    raw_string_ostream OS(str);
+    OS << "func_" << I;
+    FuncNames1.push_back(OS.str());
+    str.clear();
+    OS << "fooooooooooooooo_" << I;
+    FuncNames1.push_back(OS.str());
+    str.clear();
+    OS << "BAR_" << I;
+    FuncNames2.push_back(OS.str());
+    str.clear();
+    OS << "BlahblahBlahblahBar_" << I;
+    FuncNames2.push_back(OS.str());
+  }
+
+  for (int Padding = 0; Padding < 10; Padding++) {
+    for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
+      // Compressing:
+      std::string FuncNameStrings1;
+      collectPGOFuncNameStrings(FuncNames1,
+                                (DoCompression != 0 && zlib::isAvailable()),
+                                FuncNameStrings1);
+
+      // Compressing:
+      std::string FuncNameStrings2;
+      collectPGOFuncNameStrings(FuncNames2,
+                                (DoCompression != 0 && zlib::isAvailable()),
+                                FuncNameStrings2);
+
+      // Join with paddings:
+      std::string FuncNameStrings = FuncNameStrings1;
+      for (int P = 0; P < Padding; P++) {
+        FuncNameStrings.push_back('\0');
+      }
+      FuncNameStrings += FuncNameStrings2;
+
+      // Now decompress:
+      InstrProfSymtab Symtab;
+      Symtab.create(StringRef(FuncNameStrings));
+
+      // Now do the checks:
+      // First sampling some data points:
+      StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(FuncNames1[0]));
+      ASSERT_EQ(StringRef("func_0"), R);
+      R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(FuncNames1[1]));
+      ASSERT_EQ(StringRef("fooooooooooooooo_0"), R);
+      R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(FuncNames1[998]));
+      ASSERT_EQ(StringRef("func_499"), R);
+      R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(FuncNames1[999]));
+      ASSERT_EQ(StringRef("fooooooooooooooo_499"), R);
+      R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(FuncNames2[100]));
+      ASSERT_EQ(StringRef("BAR_50"), R);
+      R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(FuncNames2[101]));
+      ASSERT_EQ(StringRef("BlahblahBlahblahBar_50"), R);
+      for (int I = 0; I < 10 * 1024; I++) {
+        std::string N[4];
+        N[0] = FuncNames1[2 * I];
+        N[1] = FuncNames1[2 * I + 1];
+        N[2] = FuncNames2[2 * I];
+        N[3] = FuncNames2[2 * I + 1];
+        for (int J = 0; J < 4; J++) {
+          StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
+          ASSERT_EQ(StringRef(N[J]), R);
+        }
+      }
+    }
+  }
+}
+
 } // end anonymous namespace