EliasFanoReader::{jump,jumpTo}
authorPhilip Pronin <philipp@fb.com>
Sat, 2 Aug 2014 08:48:46 +0000 (01:48 -0700)
committerSara Golemon <sgolemon@fb.com>
Thu, 14 Aug 2014 18:49:04 +0000 (11:49 -0700)
Summary:
Renamed `goTo` to `jump`, introduced `jumpTo`.

FBCode: Updated CSS tree benchmark to compare with EF-based search that
is using `jumpTo` (P14517259).

Test Plan: fbconfig -r folly/experimental/test:eliasfano_test && fbmake runtests_opt -j32

@override-unit-failures

Reviewed By: lucian@fb.com

Subscribers: fbcode-common-diffs@

FB internal diff: D1474431

Tasks: 4536072

folly/experimental/EliasFanoCoding.h
folly/experimental/test/CodingTestUtils.h
folly/experimental/test/EliasFanoCodingTest.cpp

index c7406fd00bc84365a31daa5efde9e093d99cf918..2a1e21a9fdfc8568d8de2072b3fe36836be253ee 100644 (file)
@@ -460,15 +460,24 @@ class UpperBitsReader {
     return value_;
   }
 
-  ValueType goTo(size_t n) {
+  ValueType jump(size_t n) {
     if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) {
       reset();
     } else {
-      position_ = -1;  // Avoid reading the head, skip() will call reposition().
+      position_ = -1;  // Avoid reading the head, skip() will reposition.
     }
     return skip(n);
   }
 
+  ValueType jumpToNext(ValueType v) {
+    if (Encoder::skipQuantum == 0 || v < Encoder::skipQuantum) {
+      reset();
+    } else {
+      value_ = 0;  // Avoid reading the head, skipToNext() will reposition.
+    }
+    return skipToNext(v);
+  }
+
  private:
   ValueType setValue() {
     value_ = static_cast<ValueType>(8 * outer_ + inner_ - position_);
@@ -520,10 +529,11 @@ class EliasFanoReader : private boost::noncopyable {
                  (lastUpperValue << list_.numLowerBits);
   }
 
-  size_t size() const { return list_.size; }
-
-  size_t position() const { return progress_ - 1; }
-  ValueType value() const { return value_; }
+  void reset() {
+    upper_.reset();
+    progress_ = 0;
+    value_ = 0;
+  }
 
   bool next() {
     if (UNLIKELY(progress_ == list_.size)) {
@@ -555,36 +565,24 @@ class EliasFanoReader : private boost::noncopyable {
     DCHECK_GE(value, value_);
     if (value <= value_) {
       return true;
-    }
-    if (value > lastValue_) {
+    } else if (value > lastValue_) {
       progress_ = list_.size;
       value_ = std::numeric_limits<ValueType>::max();
       return false;
     }
 
     upper_.skipToNext(value >> list_.numLowerBits);
-    progress_ = upper_.position();
-    value_ = readLowerPart(progress_) |
-             (upper_.value() << list_.numLowerBits);
-    ++progress_;
-    while (value_ < value) {
-      value_ = readLowerPart(progress_) |
-               (upper_.next() << list_.numLowerBits);
-      ++progress_;
-    }
-
+    iterateTo(value);
     return true;
   }
 
-  bool goTo(size_t n) {
+  bool jump(size_t n) {
     if (LIKELY(n - 1 < list_.size)) {  // n > 0 && n <= list_.size
       progress_ = n;
-      value_ = readLowerPart(n - 1) | (upper_.goTo(n) << list_.numLowerBits);
+      value_ = readLowerPart(n - 1) | (upper_.jump(n) << list_.numLowerBits);
       return true;
     } else if (n == 0) {
-      upper_.reset();
-      progress_ = 0;
-      value_ = 0;
+      reset();
       return true;
     }
     progress_ = list_.size;
@@ -592,6 +590,26 @@ class EliasFanoReader : private boost::noncopyable {
     return false;
   }
 
+  ValueType jumpTo(ValueType value) {
+    if (value <= 0) {
+      reset();
+      return true;
+    } else if (value > lastValue_) {
+      progress_ = list_.size;
+      value_ = std::numeric_limits<ValueType>::max();
+      return false;
+    }
+
+    upper_.jumpToNext(value >> list_.numLowerBits);
+    iterateTo(value);
+    return true;
+  }
+
+  size_t size() const { return list_.size; }
+
+  size_t position() const { return progress_ - 1; }
+  ValueType value() const { return value_; }
+
  private:
   ValueType readLowerPart(size_t i) const {
     DCHECK_LT(i, list_.size);
@@ -601,6 +619,16 @@ class EliasFanoReader : private boost::noncopyable {
     return lowerMask_ & (ptrv >> (pos % 8));
   }
 
+  void iterateTo(ValueType value) {
+    progress_ = upper_.position();
+    value_ = readLowerPart(progress_) | (upper_.value() << list_.numLowerBits);
+    ++progress_;
+    while (value_ < value) {
+      value_ = readLowerPart(progress_) | (upper_.next() << list_.numLowerBits);
+      ++progress_;
+    }
+  }
+
   const EliasFanoCompressedList list_;
   const ValueType lowerMask_;
   detail::UpperBitsReader<Encoder, Instructions> upper_;
index e1ed84a6357a9e818e12b75dccb9cc8b2d46d457..e8b81b0fdd24d120dcb62b13bbc9f4374a15d4cd 100644 (file)
@@ -153,7 +153,7 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list) {
 }
 
 template <class Reader, class List>
-void testGoTo(const std::vector<uint32_t>& data, const List& list) {
+void testJump(const std::vector<uint32_t>& data, const List& list) {
   std::mt19937 gen;
   std::vector<size_t> is(data.size());
   for (size_t i = 0; i < data.size(); ++i) {
@@ -165,13 +165,45 @@ void testGoTo(const std::vector<uint32_t>& data, const List& list) {
   }
 
   Reader reader(list);
-  EXPECT_TRUE(reader.goTo(0));
+  EXPECT_TRUE(reader.jump(0));
   EXPECT_EQ(reader.value(), 0);
   for (auto i : is) {
-    EXPECT_TRUE(reader.goTo(i + 1));
+    EXPECT_TRUE(reader.jump(i + 1));
     EXPECT_EQ(reader.value(), data[i]);
   }
-  EXPECT_FALSE(reader.goTo(data.size() + 1));
+  EXPECT_FALSE(reader.jump(data.size() + 1));
+  EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
+}
+
+template <class Reader, class List>
+void testJumpTo(const std::vector<uint32_t>& data, const List& list) {
+  CHECK(!data.empty());
+
+  Reader reader(list);
+
+  std::mt19937 gen;
+  std::uniform_int_distribution<> values(0, data.back());
+  const size_t iters = Reader::EncoderType::skipQuantum == 0 ? 100 : 10000;
+  for (size_t i = 0; i < iters; ++i) {
+    const uint32_t value = values(gen);
+    auto it = std::lower_bound(data.begin(), data.end(), value);
+    CHECK(it != data.end());
+    EXPECT_TRUE(reader.jumpTo(value));
+    EXPECT_EQ(reader.value(), *it);
+  }
+
+  EXPECT_TRUE(reader.jumpTo(0));
+  EXPECT_EQ(reader.value(), 0);
+
+  if (data.front() > 0) {
+    EXPECT_TRUE(reader.jumpTo(1));
+    EXPECT_EQ(reader.value(), data.front());
+  }
+
+  EXPECT_TRUE(reader.jumpTo(data.back()));
+  EXPECT_EQ(reader.value(), data.back());
+
+  EXPECT_FALSE(reader.jumpTo(data.back() + 1));
   EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
 }
 
@@ -188,10 +220,13 @@ void testEmpty() {
     Reader reader(list);
     EXPECT_FALSE(reader.skip(1));
     EXPECT_FALSE(reader.skip(10));
+    EXPECT_FALSE(reader.jump(1));
+    EXPECT_FALSE(reader.jump(10));
   }
   {
     Reader reader(list);
     EXPECT_FALSE(reader.skipTo(1));
+    EXPECT_FALSE(reader.jumpTo(1));
   }
 }
 
@@ -201,7 +236,8 @@ void testAll(const std::vector<uint32_t>& data) {
   testNext<Reader>(data, list);
   testSkip<Reader>(data, list);
   testSkipTo<Reader>(data, list);
-  testGoTo<Reader>(data, list);
+  testJump<Reader>(data, list);
+  testJumpTo<Reader>(data, list);
   list.free();
 }
 
@@ -214,8 +250,7 @@ void bmNext(const List& list, const std::vector<uint32_t>& data,
   for (size_t i = 0, j; i < iters; ) {
     Reader reader(list);
     for (j = 0; reader.next(); ++j, ++i) {
-      const uint32_t value = reader.value();
-      CHECK_EQ(value, data[j]) << j;
+      CHECK_EQ(reader.value(), data[j]) << j;
     }
   }
 }
@@ -230,8 +265,7 @@ void bmSkip(const List& list, const std::vector<uint32_t>& data,
     Reader reader(list);
     for (j = skip - 1; j < data.size(); j += skip, ++i) {
       reader.skip(skip);
-      const uint32_t value = reader.value();
-      CHECK_EQ(value, data[j]);
+      CHECK_EQ(reader.value(), data[j]);
     }
   }
 }
@@ -246,14 +280,13 @@ void bmSkipTo(const List& list, const std::vector<uint32_t>& data,
     Reader reader(list);
     for (j = 0; j < data.size(); j += skip, ++i) {
       reader.skipTo(data[j]);
-      const uint32_t value = reader.value();
-      CHECK_EQ(value, data[j]);
+      CHECK_EQ(reader.value(), data[j]);
     }
   }
 }
 
 template <class Reader, class List>
-void bmGoTo(const List& list, const std::vector<uint32_t>& data,
+void bmJump(const List& list, const std::vector<uint32_t>& data,
             const std::vector<size_t>& order, size_t iters) {
   CHECK(!data.empty());
   CHECK_EQ(data.size(), order.size());
@@ -261,9 +294,24 @@ void bmGoTo(const List& list, const std::vector<uint32_t>& data,
   Reader reader(list);
   for (size_t i = 0; i < iters; ) {
     for (size_t j : order) {
-      reader.goTo(j + 1);
-      const uint32_t value = reader.value();
-      CHECK_EQ(value, data[j]);
+      reader.jump(j + 1);
+      CHECK_EQ(reader.value(), data[j]);
+      ++i;
+    }
+  }
+}
+
+template <class Reader, class List>
+void bmJumpTo(const List& list, const std::vector<uint32_t>& data,
+              const std::vector<size_t>& order, size_t iters) {
+  CHECK(!data.empty());
+  CHECK_EQ(data.size(), order.size());
+
+  Reader reader(list);
+  for (size_t i = 0; i < iters; ) {
+    for (size_t j : order) {
+      reader.jumpTo(data[j]);
+      CHECK_EQ(reader.value(), data[j]);
       ++i;
     }
   }
index 91da77875fd078af8b9db409bab4154e0940c26e..547cd30936c3f202c3aa5a258072074de02bc7ad 100644 (file)
@@ -131,16 +131,16 @@ BENCHMARK(Skip1000_ForwardQ128_1M) {
   bmSkip<bm::Reader>(bm::list, bm::data, 1000, bm::k1M);
 }
 
-BENCHMARK(GoTo_ForwardQ128_1M) {
-  bmGoTo<bm::Reader>(bm::list, bm::data, bm::order, bm::k1M);
+BENCHMARK(Jump_ForwardQ128_1M) {
+  bmJump<bm::Reader>(bm::list, bm::data, bm::order, bm::k1M);
 }
 
+BENCHMARK_DRAW_LINE();
+
 BENCHMARK(SkipTo1_SkipQ128_1M) {
   bmSkipTo<bm::Reader>(bm::list, bm::data, 1, bm::k1M);
 }
 
-BENCHMARK_DRAW_LINE();
-
 BENCHMARK(SkipTo10_SkipQ128_1M) {
   bmSkipTo<bm::Reader>(bm::list, bm::data, 10, bm::k1M);
 }
@@ -153,6 +153,10 @@ BENCHMARK(SkipTo1000_SkipQ128_1M) {
   bmSkipTo<bm::Reader>(bm::list, bm::data, 1000, bm::k1M);
 }
 
+BENCHMARK(JumpTo_SkipQ128_1M) {
+  bmJumpTo<bm::Reader>(bm::list, bm::data, bm::order, bm::k1M);
+}
+
 BENCHMARK_DRAW_LINE();
 
 BENCHMARK(Encode_10) {
@@ -173,20 +177,21 @@ Intel(R) Xeon(R) CPU E5-2660 @ 2.7GHz (turbo on), using instructions::Fast.
 ============================================================================
 folly/experimental/test/EliasFanoCodingTest.cpp relative  time/iter  iters/s
 ============================================================================
-Next_1M                                                      4.86ms   205.97
-Skip1_ForwarQ128_1M                                          5.17ms   193.36
-Skip10_ForwarQ128_1M                                        13.69ms    73.03
-Skip100_ForwardQ128_1M                                      26.76ms    37.37
-Skip1000_ForwardQ128_1M                                     20.66ms    48.40
-GoTo_ForwardQ128_1M                                         43.75ms    22.86
+Next_1M                                                      4.61ms   216.70
+Skip1_ForwarQ128_1M                                          5.33ms   187.71
+Skip10_ForwarQ128_1M                                        14.23ms    70.26
+Skip100_ForwardQ128_1M                                      29.10ms    34.37
+Skip1000_ForwardQ128_1M                                     21.15ms    47.28
+Jump_ForwardQ128_1M                                         46.30ms    21.60
 ----------------------------------------------------------------------------
-SkipTo1_SkipQ128_1M                                          9.74ms   102.70
-SkipTo10_SkipQ128_1M                                        30.62ms    32.66
-SkipTo100_SkipQ128_1M                                       37.70ms    26.53
-SkipTo1000_SkipQ128_1M                                      31.14ms    32.11
+SkipTo1_SkipQ128_1M                                         12.03ms    83.15
+SkipTo10_SkipQ128_1M                                        36.11ms    27.69
+SkipTo100_SkipQ128_1M                                       42.91ms    23.30
+SkipTo1000_SkipQ128_1M                                      36.92ms    27.08
+JumpTo_SkipQ128_1M                                          78.51ms    12.74
 ----------------------------------------------------------------------------
-Encode_10                                                  208.16ns    4.80M
-Encode_1M                                                    8.90ms   112.37
+Encode_10                                                  199.19ns    5.02M
+Encode_1M                                                    8.82ms   113.37
 ============================================================================
 #endif