Sort #include lines
[folly.git] / folly / experimental / test / EliasFanoCodingTest.cpp
index 4af8db9f2cea4a1515a85e49eae2b95b540edd5f..55af37d84d189cace91cbd8d0fe8b020eff99739 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 
 #include <algorithm>
+#include <limits>
 #include <numeric>
 #include <random>
 #include <vector>
 
 using namespace folly::compression;
 
-#if defined(EF_TEST_NEHALEM)
-#define EF_TEST_ARCH Nehalem
-#elif defined(EF_TEST_HASWELL)
-#define EF_TEST_ARCH Haswell
-#else
+#ifndef EF_TEST_ARCH
 #define EF_TEST_ARCH Default
-#endif
+#endif  // EF_TEST_ARCH
 
-template <size_t kVersion>
-struct TestType {
-  static constexpr size_t Version = kVersion;
-};
+namespace {
+
+uint8_t slowDefaultNumLowerBits(size_t upperBound, size_t size) {
+  if (size == 0 || upperBound < size) {
+    return 0;
+  }
+  // floor(log(upperBound / size));
+  return uint8_t(folly::findLastSet(upperBound / size) - 1);
+}
+
+}  // namespace
+
+TEST(EliasFanoCoding, defaultNumLowerBits) {
+  // Verify that slowDefaultNumLowerBits and optimized
+  // Encoder::defaultNumLowerBits agree.
+  static constexpr size_t kNumIterations = 2500;
+  auto compare = [](size_t upperBound, size_t size) {
+    using Encoder = EliasFanoEncoderV2<size_t>;
+    EXPECT_EQ(int(slowDefaultNumLowerBits(upperBound, size)),
+              int(Encoder::defaultNumLowerBits(upperBound, size)))
+        << upperBound << " " << size;
+  };
+  auto batch = [&compare](size_t initialUpperBound) {
+    for (size_t upperBound = initialUpperBound, i = 0;
+         i < kNumIterations;
+         ++i, --upperBound) {
+      // Test "size" values close to "upperBound".
+      for (size_t size = upperBound, j = 0; j < kNumIterations; ++j, --size) {
+        compare(upperBound, size);
+      }
+      // Sample "size" values between [0, upperBound].
+      for (size_t size = upperBound;
+           size > 1 + upperBound / kNumIterations;
+           size -= 1 + upperBound / kNumIterations) {
+        compare(upperBound, size);
+      }
+      // Test "size" values close to 0.
+      for (size_t size = 0; size < kNumIterations; ++size) {
+        compare(upperBound, size);
+      }
+    }
+  };
+  batch(std::numeric_limits<size_t>::max());
+  batch(kNumIterations + 1312213123);
+  batch(kNumIterations);
+
+  std::mt19937 gen;
+  std::uniform_int_distribution<size_t> distribution;
+  for (size_t i = 0; i < kNumIterations; ++i) {
+    const auto a = distribution(gen);
+    const auto b = distribution(gen);
+    compare(std::max(a, b), std::min(a, b));
+  }
+}
 
-template <class T>
 class EliasFanoCodingTest : public ::testing::Test {
  public:
   void doTestEmpty() {
-    typedef EliasFanoEncoder<uint32_t, size_t, 0, 0, T::Version> Encoder;
+    typedef EliasFanoEncoderV2<uint32_t, size_t> Encoder;
     typedef EliasFanoReader<Encoder> Reader;
     testEmpty<Reader, Encoder>();
   }
 
-  template <size_t kSkipQuantum, size_t kForwardQuantum>
+  template <size_t kSkipQuantum, size_t kForwardQuantum, class SizeType>
   void doTestAll() {
-    typedef EliasFanoEncoder<
-      uint32_t, uint32_t, kSkipQuantum, kForwardQuantum, T::Version> Encoder;
-    typedef EliasFanoReader<Encoder, instructions::EF_TEST_ARCH> Reader;
+    typedef EliasFanoEncoderV2<
+      uint32_t, uint32_t, kSkipQuantum, kForwardQuantum> Encoder;
+    using Reader =
+        EliasFanoReader<Encoder, instructions::EF_TEST_ARCH, false, SizeType>;
+    testAll<Reader, Encoder>({0});
     testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
     testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
   }
 };
 
-typedef ::testing::Types<TestType<0>, TestType<1>> TestTypes;
-TYPED_TEST_CASE(EliasFanoCodingTest, TestTypes);
+TEST_F(EliasFanoCodingTest, Empty) {
+  doTestEmpty();
+}
 
-TYPED_TEST(EliasFanoCodingTest, Empty) {
-  TestFixture::doTestEmpty();
+TEST_F(EliasFanoCodingTest, Simple) {
+  doTestAll<0, 0, uint32_t>();
+  doTestAll<0, 0, size_t>();
 }
 
-TYPED_TEST(EliasFanoCodingTest, Simple) {
-  TestFixture::template doTestAll<0, 0>();
+TEST_F(EliasFanoCodingTest, SkipPointers) {
+  doTestAll<128, 0, uint32_t>();
+  doTestAll<128, 0, size_t>();
 }
 
-TYPED_TEST(EliasFanoCodingTest, SkipPointers) {
-  TestFixture::template doTestAll<128, 0>();
+TEST_F(EliasFanoCodingTest, ForwardPointers) {
+  doTestAll<0, 128, uint32_t>();
+  doTestAll<0, 128, size_t>();
 }
 
-TYPED_TEST(EliasFanoCodingTest, ForwardPointers) {
-  TestFixture::template doTestAll<0, 128>();
+TEST_F(EliasFanoCodingTest, SkipForwardPointers) {
+  doTestAll<128, 128, uint32_t>();
+  doTestAll<128, 128, size_t>();
 }
 
-TYPED_TEST(EliasFanoCodingTest, SkipForwardPointers) {
-  TestFixture::template doTestAll<128, 128>();
+TEST_F(EliasFanoCodingTest, Select64) {
+  typedef instructions::EF_TEST_ARCH instr;
+  constexpr uint64_t kPrime = uint64_t(-59);
+  for (uint64_t x = kPrime, i = 0; i < (1 << 20); x *= kPrime, i += 1) {
+    size_t w = instr::popcount(x);
+    for (size_t k = 0; k < w; ++k) {
+      auto pos = folly::select64<instr>(x, k);
+      CHECK_EQ((x >> pos) & 1, 1);
+      CHECK_EQ(instr::popcount(x & ((uint64_t(1) << pos) - 1)), k);
+    }
+  }
 }
 
-namespace bm {
+TEST_F(EliasFanoCodingTest, BugLargeGapInUpperBits) { // t16274876
+  typedef EliasFanoEncoderV2<uint32_t, uint32_t, 2, 2> Encoder;
+  typedef EliasFanoReader<Encoder, instructions::EF_TEST_ARCH> Reader;
+  constexpr uint32_t kLargeValue = 127;
+
+  // Build a list where the upper bits have a large gap after the
+  // first element, so that we need to reposition in the upper bits
+  // using skips to position the iterator on the second element.
+  std::vector<uint32_t> data = {0, kLargeValue};
+  for (uint32_t i = 0; i < kLargeValue; ++i) {
+    data.push_back(data.back() + 1);
+  }
+  auto list = Encoder::encode(data.begin(), data.end());
 
-constexpr size_t k1M = 1000000;
-constexpr size_t kVersion = 1;
+  {
+    Reader reader(list);
+    ASSERT_TRUE(reader.skipTo(kLargeValue - 1));
+    ASSERT_EQ(kLargeValue, reader.value());
+    ASSERT_EQ(0, reader.previousValue());
+  }
+
+  list.free();
+}
 
-typedef EliasFanoEncoder<uint32_t, uint32_t, 128, 128, kVersion> Encoder;
+namespace bm {
+
+typedef EliasFanoEncoderV2<uint32_t, uint32_t, 128, 128> Encoder;
 typedef EliasFanoReader<Encoder> Reader;
 
 std::vector<uint32_t> data;
@@ -95,13 +178,14 @@ std::vector<size_t> order;
 std::vector<uint32_t> encodeSmallData;
 std::vector<uint32_t> encodeLargeData;
 
-typename Encoder::CompressedList list;
+std::vector<std::pair<size_t, size_t>> numLowerBitsInput;
+
+typename Encoder::MutableCompressedList list;
 
 void init() {
   std::mt19937 gen;
 
   data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
-  //data = loadList("/home/philipp/pl_test_dump.txt");
   list = Encoder::encode(data.begin(), data.end());
 
   order.resize(data.size());
@@ -110,6 +194,13 @@ void init() {
 
   encodeSmallData = generateRandomList(10, 100 * 1000, gen);
   encodeLargeData = generateRandomList(1000 * 1000, 100 * 1000 * 1000, gen);
+
+  std::uniform_int_distribution<size_t> distribution;
+  for (size_t i = 0; i < 10000; ++i) {
+    const auto a = distribution(gen);
+    const auto b = distribution(gen);
+    numLowerBitsInput.emplace_back(std::max(a, b), std::min(a, b));
+  }
 }
 
 void free() {
@@ -174,44 +265,60 @@ BENCHMARK(Encode) {
 
 BENCHMARK_DRAW_LINE();
 
-BENCHMARK(Select64, iters) {
-  typedef instructions::EF_TEST_ARCH instr;
-  constexpr uint64_t kPrime = uint64_t(-59);
-  for (uint64_t x = kPrime, i = 0; i < iters; x *= kPrime, i += 1) {
-    size_t w = instr::popcount(x);
-    folly::doNotOptimizeAway(folly::select64<instr>(x, w - 1));
+BENCHMARK(defaultNumLowerBits, iters) {
+  using Encoder = EliasFanoEncoderV2<size_t>;
+
+  size_t i = 0;
+  while (iters--) {
+    const auto& p = bm::numLowerBitsInput[i];
+    folly::doNotOptimizeAway(Encoder::defaultNumLowerBits(p.first, p.second));
+    if (++i == bm::numLowerBitsInput.size()) {
+      i = 0;
+    }
+  }
+}
+
+BENCHMARK(slowDefaultNumLowerBits, iters) {
+  size_t i = 0;
+  while (iters--) {
+    const auto& p = bm::numLowerBitsInput[i];
+    folly::doNotOptimizeAway(slowDefaultNumLowerBits(p.first, p.second));
+    if (++i == bm::numLowerBitsInput.size()) {
+      i = 0;
+    }
   }
 }
 
 #if 0
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz (turbo off),
-using instructions::Haswell and GCC 4.9 with --bm_min_usec 100000.
+Intel(R) Xeon(R) CPU E5-2678 v3 @ 2.50GHz (turbo on),
+using -DEF_TEST_ARCH Haswell and GCC 4.9 with --bm_min_usec 100000.
 ============================================================================
 folly/experimental/test/EliasFanoCodingTest.cpp relative  time/iter  iters/s
 ============================================================================
-Next                                                         2.52ns  397.28M
-Skip_ForwardQ128(1)                                          3.92ns  255.28M
-Skip_ForwardQ128(2)                                          5.08ns  197.04M
-Skip_ForwardQ128(4_pm_1)                                     7.04ns  142.02M
-Skip_ForwardQ128(16_pm_4)                                   19.68ns   50.82M
-Skip_ForwardQ128(64_pm_16)                                  27.58ns   36.26M
-Skip_ForwardQ128(256_pm_64)                                 32.49ns   30.78M
-Skip_ForwardQ128(1024_pm_256)                               33.39ns   29.95M
-Jump_ForwardQ128                                            34.05ns   29.37M
+Next                                                         2.31ns  433.77M
+Skip_ForwardQ128(1)                                          3.73ns  267.93M
+Skip_ForwardQ128(2)                                          4.89ns  204.34M
+Skip_ForwardQ128(4_pm_1)                                     6.86ns  145.79M
+Skip_ForwardQ128(16_pm_4)                                   18.92ns   52.85M
+Skip_ForwardQ128(64_pm_16)                                  26.56ns   37.66M
+Skip_ForwardQ128(256_pm_64)                                 30.12ns   33.20M
+Skip_ForwardQ128(1024_pm_256)                               30.74ns   32.53M
+Jump_ForwardQ128                                            30.49ns   32.80M
 ----------------------------------------------------------------------------
-SkipTo_SkipQ128(1)                                           4.42ns  226.49M
-SkipTo_SkipQ128(2)                                           8.58ns  116.55M
-SkipTo_SkipQ128(4_pm_1)                                     11.43ns   87.50M
-SkipTo_SkipQ128(16_pm_4)                                    31.19ns   32.06M
-SkipTo_SkipQ128(64_pm_16)                                   43.88ns   22.79M
-SkipTo_SkipQ128(256_pm_64)                                  49.08ns   20.37M
-SkipTo_SkipQ128(1024_pm_256)                                52.24ns   19.14M
-JumpTo_SkipQ128                                             54.61ns   18.31M
+SkipTo_SkipQ128(1)                                           3.86ns  258.96M
+SkipTo_SkipQ128(2)                                           7.73ns  129.36M
+SkipTo_SkipQ128(4_pm_1)                                     10.29ns   97.18M
+SkipTo_SkipQ128(16_pm_4)                                    28.69ns   34.86M
+SkipTo_SkipQ128(64_pm_16)                                   39.73ns   25.17M
+SkipTo_SkipQ128(256_pm_64)                                  43.45ns   23.01M
+SkipTo_SkipQ128(1024_pm_256)                                44.66ns   22.39M
+JumpTo_SkipQ128                                             47.98ns   20.84M
 ----------------------------------------------------------------------------
-Encode_10                                                  117.24ns    8.53M
-Encode                                                       5.64ms   177.15
+Encode_10                                                   77.92ns   12.83M
+Encode                                                       4.73ms   211.41
 ----------------------------------------------------------------------------
-Select64                                                     8.04ns  124.35M
+defaultNumLowerBits                                          2.20ns  455.01M
+slowDefaultNumLowerBits                                      7.90ns  126.59M
 ============================================================================
 #endif