From 297d5cee109992e5101fbf353bbd04e2a5704c39 Mon Sep 17 00:00:00 2001 From: Philip Pronin Date: Sat, 2 Aug 2014 01:48:46 -0700 Subject: [PATCH] EliasFanoReader::{jump,jumpTo} Summary: Renamed `goTo` to `jump`, introduced `jumpTo`. FBCode: Updated CSS tree benchmark to compare with EF-based search that is using `jumpTo` (P14517259). Test Plan: fbconfig -r folly/experimental/test:eliasfano_test && fbmake runtests_opt -j32 @override-unit-failures Reviewed By: lucian@fb.com Subscribers: fbcode-common-diffs@ FB internal diff: D1474431 Tasks: 4536072 --- folly/experimental/EliasFanoCoding.h | 74 ++++++++++++------ folly/experimental/test/CodingTestUtils.h | 78 +++++++++++++++---- .../experimental/test/EliasFanoCodingTest.cpp | 37 +++++---- 3 files changed, 135 insertions(+), 54 deletions(-) diff --git a/folly/experimental/EliasFanoCoding.h b/folly/experimental/EliasFanoCoding.h index c7406fd0..2a1e21a9 100644 --- a/folly/experimental/EliasFanoCoding.h +++ b/folly/experimental/EliasFanoCoding.h @@ -460,15 +460,24 @@ class UpperBitsReader { return value_; } - ValueType goTo(size_t n) { + ValueType jump(size_t n) { if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) { reset(); } else { - position_ = -1; // Avoid reading the head, skip() will call reposition(). + position_ = -1; // Avoid reading the head, skip() will reposition. } return skip(n); } + ValueType jumpToNext(ValueType v) { + if (Encoder::skipQuantum == 0 || v < Encoder::skipQuantum) { + reset(); + } else { + value_ = 0; // Avoid reading the head, skipToNext() will reposition. + } + return skipToNext(v); + } + private: ValueType setValue() { value_ = static_cast(8 * outer_ + inner_ - position_); @@ -520,10 +529,11 @@ class EliasFanoReader : private boost::noncopyable { (lastUpperValue << list_.numLowerBits); } - size_t size() const { return list_.size; } - - size_t position() const { return progress_ - 1; } - ValueType value() const { return value_; } + void reset() { + upper_.reset(); + progress_ = 0; + value_ = 0; + } bool next() { if (UNLIKELY(progress_ == list_.size)) { @@ -555,36 +565,24 @@ class EliasFanoReader : private boost::noncopyable { DCHECK_GE(value, value_); if (value <= value_) { return true; - } - if (value > lastValue_) { + } else if (value > lastValue_) { progress_ = list_.size; value_ = std::numeric_limits::max(); return false; } upper_.skipToNext(value >> list_.numLowerBits); - progress_ = upper_.position(); - value_ = readLowerPart(progress_) | - (upper_.value() << list_.numLowerBits); - ++progress_; - while (value_ < value) { - value_ = readLowerPart(progress_) | - (upper_.next() << list_.numLowerBits); - ++progress_; - } - + iterateTo(value); return true; } - bool goTo(size_t n) { + bool jump(size_t n) { if (LIKELY(n - 1 < list_.size)) { // n > 0 && n <= list_.size progress_ = n; - value_ = readLowerPart(n - 1) | (upper_.goTo(n) << list_.numLowerBits); + value_ = readLowerPart(n - 1) | (upper_.jump(n) << list_.numLowerBits); return true; } else if (n == 0) { - upper_.reset(); - progress_ = 0; - value_ = 0; + reset(); return true; } progress_ = list_.size; @@ -592,6 +590,26 @@ class EliasFanoReader : private boost::noncopyable { return false; } + ValueType jumpTo(ValueType value) { + if (value <= 0) { + reset(); + return true; + } else if (value > lastValue_) { + progress_ = list_.size; + value_ = std::numeric_limits::max(); + return false; + } + + upper_.jumpToNext(value >> list_.numLowerBits); + iterateTo(value); + return true; + } + + size_t size() const { return list_.size; } + + size_t position() const { return progress_ - 1; } + ValueType value() const { return value_; } + private: ValueType readLowerPart(size_t i) const { DCHECK_LT(i, list_.size); @@ -601,6 +619,16 @@ class EliasFanoReader : private boost::noncopyable { return lowerMask_ & (ptrv >> (pos % 8)); } + void iterateTo(ValueType value) { + progress_ = upper_.position(); + value_ = readLowerPart(progress_) | (upper_.value() << list_.numLowerBits); + ++progress_; + while (value_ < value) { + value_ = readLowerPart(progress_) | (upper_.next() << list_.numLowerBits); + ++progress_; + } + } + const EliasFanoCompressedList list_; const ValueType lowerMask_; detail::UpperBitsReader upper_; diff --git a/folly/experimental/test/CodingTestUtils.h b/folly/experimental/test/CodingTestUtils.h index e1ed84a6..e8b81b0f 100644 --- a/folly/experimental/test/CodingTestUtils.h +++ b/folly/experimental/test/CodingTestUtils.h @@ -153,7 +153,7 @@ void testSkipTo(const std::vector& data, const List& list) { } template -void testGoTo(const std::vector& data, const List& list) { +void testJump(const std::vector& data, const List& list) { std::mt19937 gen; std::vector is(data.size()); for (size_t i = 0; i < data.size(); ++i) { @@ -165,13 +165,45 @@ void testGoTo(const std::vector& data, const List& list) { } Reader reader(list); - EXPECT_TRUE(reader.goTo(0)); + EXPECT_TRUE(reader.jump(0)); EXPECT_EQ(reader.value(), 0); for (auto i : is) { - EXPECT_TRUE(reader.goTo(i + 1)); + EXPECT_TRUE(reader.jump(i + 1)); EXPECT_EQ(reader.value(), data[i]); } - EXPECT_FALSE(reader.goTo(data.size() + 1)); + EXPECT_FALSE(reader.jump(data.size() + 1)); + EXPECT_EQ(reader.value(), std::numeric_limits::max()); +} + +template +void testJumpTo(const std::vector& data, const List& list) { + CHECK(!data.empty()); + + Reader reader(list); + + std::mt19937 gen; + std::uniform_int_distribution<> values(0, data.back()); + const size_t iters = Reader::EncoderType::skipQuantum == 0 ? 100 : 10000; + for (size_t i = 0; i < iters; ++i) { + const uint32_t value = values(gen); + auto it = std::lower_bound(data.begin(), data.end(), value); + CHECK(it != data.end()); + EXPECT_TRUE(reader.jumpTo(value)); + EXPECT_EQ(reader.value(), *it); + } + + EXPECT_TRUE(reader.jumpTo(0)); + EXPECT_EQ(reader.value(), 0); + + if (data.front() > 0) { + EXPECT_TRUE(reader.jumpTo(1)); + EXPECT_EQ(reader.value(), data.front()); + } + + EXPECT_TRUE(reader.jumpTo(data.back())); + EXPECT_EQ(reader.value(), data.back()); + + EXPECT_FALSE(reader.jumpTo(data.back() + 1)); EXPECT_EQ(reader.value(), std::numeric_limits::max()); } @@ -188,10 +220,13 @@ void testEmpty() { Reader reader(list); EXPECT_FALSE(reader.skip(1)); EXPECT_FALSE(reader.skip(10)); + EXPECT_FALSE(reader.jump(1)); + EXPECT_FALSE(reader.jump(10)); } { Reader reader(list); EXPECT_FALSE(reader.skipTo(1)); + EXPECT_FALSE(reader.jumpTo(1)); } } @@ -201,7 +236,8 @@ void testAll(const std::vector& data) { testNext(data, list); testSkip(data, list); testSkipTo(data, list); - testGoTo(data, list); + testJump(data, list); + testJumpTo(data, list); list.free(); } @@ -214,8 +250,7 @@ void bmNext(const List& list, const std::vector& data, for (size_t i = 0, j; i < iters; ) { Reader reader(list); for (j = 0; reader.next(); ++j, ++i) { - const uint32_t value = reader.value(); - CHECK_EQ(value, data[j]) << j; + CHECK_EQ(reader.value(), data[j]) << j; } } } @@ -230,8 +265,7 @@ void bmSkip(const List& list, const std::vector& data, Reader reader(list); for (j = skip - 1; j < data.size(); j += skip, ++i) { reader.skip(skip); - const uint32_t value = reader.value(); - CHECK_EQ(value, data[j]); + CHECK_EQ(reader.value(), data[j]); } } } @@ -246,14 +280,13 @@ void bmSkipTo(const List& list, const std::vector& data, Reader reader(list); for (j = 0; j < data.size(); j += skip, ++i) { reader.skipTo(data[j]); - const uint32_t value = reader.value(); - CHECK_EQ(value, data[j]); + CHECK_EQ(reader.value(), data[j]); } } } template -void bmGoTo(const List& list, const std::vector& data, +void bmJump(const List& list, const std::vector& data, const std::vector& order, size_t iters) { CHECK(!data.empty()); CHECK_EQ(data.size(), order.size()); @@ -261,9 +294,24 @@ void bmGoTo(const List& list, const std::vector& data, Reader reader(list); for (size_t i = 0; i < iters; ) { for (size_t j : order) { - reader.goTo(j + 1); - const uint32_t value = reader.value(); - CHECK_EQ(value, data[j]); + reader.jump(j + 1); + CHECK_EQ(reader.value(), data[j]); + ++i; + } + } +} + +template +void bmJumpTo(const List& list, const std::vector& data, + const std::vector& order, size_t iters) { + CHECK(!data.empty()); + CHECK_EQ(data.size(), order.size()); + + Reader reader(list); + for (size_t i = 0; i < iters; ) { + for (size_t j : order) { + reader.jumpTo(data[j]); + CHECK_EQ(reader.value(), data[j]); ++i; } } diff --git a/folly/experimental/test/EliasFanoCodingTest.cpp b/folly/experimental/test/EliasFanoCodingTest.cpp index 91da7787..547cd309 100644 --- a/folly/experimental/test/EliasFanoCodingTest.cpp +++ b/folly/experimental/test/EliasFanoCodingTest.cpp @@ -131,16 +131,16 @@ BENCHMARK(Skip1000_ForwardQ128_1M) { bmSkip(bm::list, bm::data, 1000, bm::k1M); } -BENCHMARK(GoTo_ForwardQ128_1M) { - bmGoTo(bm::list, bm::data, bm::order, bm::k1M); +BENCHMARK(Jump_ForwardQ128_1M) { + bmJump(bm::list, bm::data, bm::order, bm::k1M); } +BENCHMARK_DRAW_LINE(); + BENCHMARK(SkipTo1_SkipQ128_1M) { bmSkipTo(bm::list, bm::data, 1, bm::k1M); } -BENCHMARK_DRAW_LINE(); - BENCHMARK(SkipTo10_SkipQ128_1M) { bmSkipTo(bm::list, bm::data, 10, bm::k1M); } @@ -153,6 +153,10 @@ BENCHMARK(SkipTo1000_SkipQ128_1M) { bmSkipTo(bm::list, bm::data, 1000, bm::k1M); } +BENCHMARK(JumpTo_SkipQ128_1M) { + bmJumpTo(bm::list, bm::data, bm::order, bm::k1M); +} + BENCHMARK_DRAW_LINE(); BENCHMARK(Encode_10) { @@ -173,20 +177,21 @@ Intel(R) Xeon(R) CPU E5-2660 @ 2.7GHz (turbo on), using instructions::Fast. ============================================================================ folly/experimental/test/EliasFanoCodingTest.cpp relative time/iter iters/s ============================================================================ -Next_1M 4.86ms 205.97 -Skip1_ForwarQ128_1M 5.17ms 193.36 -Skip10_ForwarQ128_1M 13.69ms 73.03 -Skip100_ForwardQ128_1M 26.76ms 37.37 -Skip1000_ForwardQ128_1M 20.66ms 48.40 -GoTo_ForwardQ128_1M 43.75ms 22.86 +Next_1M 4.61ms 216.70 +Skip1_ForwarQ128_1M 5.33ms 187.71 +Skip10_ForwarQ128_1M 14.23ms 70.26 +Skip100_ForwardQ128_1M 29.10ms 34.37 +Skip1000_ForwardQ128_1M 21.15ms 47.28 +Jump_ForwardQ128_1M 46.30ms 21.60 ---------------------------------------------------------------------------- -SkipTo1_SkipQ128_1M 9.74ms 102.70 -SkipTo10_SkipQ128_1M 30.62ms 32.66 -SkipTo100_SkipQ128_1M 37.70ms 26.53 -SkipTo1000_SkipQ128_1M 31.14ms 32.11 +SkipTo1_SkipQ128_1M 12.03ms 83.15 +SkipTo10_SkipQ128_1M 36.11ms 27.69 +SkipTo100_SkipQ128_1M 42.91ms 23.30 +SkipTo1000_SkipQ128_1M 36.92ms 27.08 +JumpTo_SkipQ128_1M 78.51ms 12.74 ---------------------------------------------------------------------------- -Encode_10 208.16ns 4.80M -Encode_1M 8.90ms 112.37 +Encode_10 199.19ns 5.02M +Encode_1M 8.82ms 113.37 ============================================================================ #endif -- 2.34.1