From 2baf3f69a2d82034e8afc1bbbd612a82bb98ea35 Mon Sep 17 00:00:00 2001 From: Giuseppe Ottaviano Date: Thu, 10 Sep 2015 08:26:36 -0700 Subject: [PATCH] Make EliasFanoReader and BitVectorReader API more consistent Summary: This diff introduces a few breaking API changes to both EliasFanoReader and BitVectorReader in order to fix some inconsistencies: - As initalized, `EliasFanoReader` and `BitVectorReader` held a value of `0`, thus if `0` is present in the list `skipTo(0)` would not update the position to `0`, as it happens with any other `skipTo()`. This fixes `jumpTo` accordingly. - `jump(i + 1)` would go at position `i`. Now `reader.jump(i)`'s postcondition is `reader.position() == i`. - It is now illegal to retrieve `value()` from a reader in an out-of-bounds position (before-the-begin or past-the-end). Validity of a reader can be checked with `valid()`. Reviewed By: @philippv Differential Revision: D2420023 --- folly/experimental/BitVectorCoding.h | 42 +++++++++----- folly/experimental/EliasFanoCoding.h | 41 +++++++------ folly/experimental/test/CodingTestUtils.h | 58 +++++++++++-------- .../experimental/test/EliasFanoCodingTest.cpp | 1 + 4 files changed, 87 insertions(+), 55 deletions(-) diff --git a/folly/experimental/BitVectorCoding.h b/folly/experimental/BitVectorCoding.h index 4f660378..814b5818 100644 --- a/folly/experimental/BitVectorCoding.h +++ b/folly/experimental/BitVectorCoding.h @@ -121,7 +121,11 @@ struct BitVectorEncoder { Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {} void add(ValueType value) { - CHECK_GE(value, lastValue_); + CHECK_LT(value, std::numeric_limits::max()); + // Also works when lastValue_ == -1. + CHECK_GT(value + 1, lastValue_ + 1) + << "BitVectorCoding only supports stricly monotone lists"; + auto block = bits_ + (value / 64) * sizeof(uint64_t); size_t inner = value % 64; folly::Bits>::set( @@ -160,7 +164,7 @@ struct BitVectorEncoder { uint8_t* const skipPointers_ = nullptr; uint8_t* const forwardPointers_ = nullptr; - ValueType lastValue_ = 0; + ValueType lastValue_ = -1; size_t size_ = 0; size_t skipPointersSize_ = 0; @@ -265,7 +269,7 @@ class BitVectorReader { outer_ = 0; inner_ = -1; position_ = -1; - value_ = 0; + value_ = kInvalidValue; } bool next() { @@ -332,11 +336,13 @@ class BitVectorReader { } bool skipTo(ValueType v) { - DCHECK_GE(v, value_); - if (v <= value_) { - return true; - } else if (!kUnchecked && v > upperBound_) { + // Also works when value_ == kInvalidValue. + if (v != kInvalidValue) { DCHECK_GE(v + 1, value_ + 1); } + + if (!kUnchecked && v > upperBound_) { return setDone(); + } else if (v == value_) { + return true; } // Small skip optimization. @@ -384,16 +390,19 @@ class BitVectorReader { size_t size() const { return size_; } + bool valid() const { + return position() < size(); // Also checks that position() != -1. + } + size_t position() const { return position_; } - ValueType value() const { return value_; } + ValueType value() const { + DCHECK(valid()); + return value_; + } bool jump(size_t n) { reset(); - if (n > 0) { - return skip(n); - } else { - return true; - } + return skip(n + 1); } bool jumpTo(ValueType v) { @@ -402,12 +411,15 @@ class BitVectorReader { } bool setDone() { - value_ = std::numeric_limits::max(); + value_ = kInvalidValue; position_ = size_; return false; } private: + constexpr static ValueType kInvalidValue = + std::numeric_limits::max(); // Must hold kInvalidValue + 1 == 0. + bool setValue() { value_ = static_cast(8 * outer_ + inner_); return true; @@ -426,7 +438,7 @@ class BitVectorReader { size_t inner_; size_t position_; uint64_t block_; - ValueType value_ = 0; + ValueType value_; size_t size_; ValueType upperBound_; diff --git a/folly/experimental/EliasFanoCoding.h b/folly/experimental/EliasFanoCoding.h index 5d1dcdb3..b428d543 100644 --- a/folly/experimental/EliasFanoCoding.h +++ b/folly/experimental/EliasFanoCoding.h @@ -149,6 +149,7 @@ struct EliasFanoEncoderV2 { Layout::fromUpperBoundAndSize(upperBound, size).allocList()) { } void add(ValueType value) { + CHECK_LT(value, std::numeric_limits::max()); CHECK_GE(value, lastValue_); const auto numLowerBits = result_.numLowerBits; @@ -551,7 +552,7 @@ class EliasFanoReader { void reset() { upper_.reset(); - value_ = 0; + value_ = kInvalidValue; } bool next() { @@ -582,11 +583,13 @@ class EliasFanoReader { } bool skipTo(ValueType value) { - DCHECK_GE(value, value_); - if (value <= value_) { - return true; - } else if (!kUnchecked && value > lastValue_) { + // Also works when value_ == kInvalidValue. + if (value != kInvalidValue) { DCHECK_GE(value + 1, value_ + 1); } + + if (!kUnchecked && value > lastValue_) { return setDone(); + } else if (value == value_) { + return true; } size_t upperValue = (value >> numLowerBits_); @@ -606,21 +609,15 @@ class EliasFanoReader { } bool jump(size_t n) { - if (LIKELY(n - 1 < size_)) { // n > 0 && n <= size_ - value_ = readLowerPart(n - 1) | (upper_.jump(n) << numLowerBits_); - return true; - } else if (n == 0) { - reset(); + if (LIKELY(n < size_)) { // Also checks that n != -1. + value_ = readLowerPart(n) | (upper_.jump(n + 1) << numLowerBits_); return true; } return setDone(); } bool jumpTo(ValueType value) { - if (value <= 0) { - reset(); - return true; - } else if (!kUnchecked && value > lastValue_) { + if (!kUnchecked && value > lastValue_) { return setDone(); } @@ -638,12 +635,22 @@ class EliasFanoReader { size_t size() const { return size_; } + bool valid() const { + return position() < size(); // Also checks that position() != -1. + } + size_t position() const { return upper_.position(); } - ValueType value() const { return value_; } + ValueType value() const { + DCHECK(valid()); + return value_; + } private: + constexpr static ValueType kInvalidValue = + std::numeric_limits::max(); // Must hold kInvalidValue + 1 == 0. + bool setDone() { - value_ = std::numeric_limits::max(); + value_ = kInvalidValue; upper_.setDone(size_); return false; } @@ -671,7 +678,7 @@ class EliasFanoReader { const uint8_t* lower_; detail::UpperBitsReader upper_; const ValueType lowerMask_; - ValueType value_ = 0; + ValueType value_ = kInvalidValue; ValueType lastValue_; uint8_t numLowerBits_; }; diff --git a/folly/experimental/test/CodingTestUtils.h b/folly/experimental/test/CodingTestUtils.h index f44d382d..bb912c42 100644 --- a/folly/experimental/test/CodingTestUtils.h +++ b/folly/experimental/test/CodingTestUtils.h @@ -91,15 +91,17 @@ auto maybeTestPreviousValue(const Vector& data, Reader& reader, Index i) template void testNext(const std::vector& data, const List& list) { Reader reader(list); - EXPECT_EQ(reader.value(), 0); + EXPECT_FALSE(reader.valid()); + for (size_t i = 0; i < data.size(); ++i) { EXPECT_TRUE(reader.next()); + EXPECT_TRUE(reader.valid()); EXPECT_EQ(reader.value(), data[i]); EXPECT_EQ(reader.position(), i); maybeTestPreviousValue(data, reader, i); } EXPECT_FALSE(reader.next()); - EXPECT_EQ(reader.value(), std::numeric_limits::max()); + EXPECT_FALSE(reader.valid()); EXPECT_EQ(reader.position(), reader.size()); } @@ -108,15 +110,16 @@ void testSkip(const std::vector& data, const List& list, size_t skipStep) { CHECK_GT(skipStep, 0); Reader reader(list); - EXPECT_EQ(reader.value(), 0); + for (size_t i = skipStep - 1; i < data.size(); i += skipStep) { EXPECT_TRUE(reader.skip(skipStep)); + EXPECT_TRUE(reader.valid()); EXPECT_EQ(reader.value(), data[i]); EXPECT_EQ(reader.position(), i); maybeTestPreviousValue(data, reader, i); } EXPECT_FALSE(reader.skip(skipStep)); - EXPECT_EQ(reader.value(), std::numeric_limits::max()); + EXPECT_FALSE(reader.valid()); EXPECT_EQ(reader.position(), reader.size()); EXPECT_FALSE(reader.next()); } @@ -135,9 +138,7 @@ template void testSkipTo(const std::vector& data, const List& list, size_t skipToStep) { CHECK_GT(skipToStep, 0); - Reader reader(list); - EXPECT_EQ(reader.value(), 0); const uint32_t delta = std::max(1, data.back() / skipToStep); uint32_t value = delta; @@ -149,11 +150,13 @@ void testSkipTo(const std::vector& data, const List& list, break; } EXPECT_TRUE(reader.skipTo(value)); + EXPECT_TRUE(reader.valid()); EXPECT_EQ(reader.value(), *it); + EXPECT_EQ(reader.position(), std::distance(data.begin(), it)); value = reader.value() + delta; maybeTestPreviousValue(data, reader, std::distance(data.begin(), it)); } - EXPECT_EQ(reader.value(), std::numeric_limits::max()); + EXPECT_FALSE(reader.valid()); EXPECT_EQ(reader.position(), reader.size()); EXPECT_FALSE(reader.next()); } @@ -168,9 +171,26 @@ void testSkipTo(const std::vector& data, const List& list) { } testSkipTo(data, list, std::numeric_limits::max()); { + // Skip to the first element. + Reader reader(list); + EXPECT_TRUE(reader.skipTo(data[0])); + EXPECT_EQ(reader.value(), data[0]); + EXPECT_EQ(reader.position(), 0); + } + { + // Skip past the last element. Reader reader(list); EXPECT_FALSE(reader.skipTo(data.back() + 1)); - EXPECT_EQ(reader.value(), std::numeric_limits::max()); + EXPECT_FALSE(reader.valid()); + EXPECT_EQ(reader.position(), reader.size()); + EXPECT_FALSE(reader.next()); + } + { + // Skip to maximum integer. + Reader reader(list); + using ValueType = typename Reader::ValueType; + EXPECT_FALSE(reader.skipTo(std::numeric_limits::max())); + EXPECT_FALSE(reader.valid()); EXPECT_EQ(reader.position(), reader.size()); EXPECT_FALSE(reader.next()); } @@ -189,16 +209,14 @@ void testJump(const std::vector& data, const List& list) { } Reader reader(list); - EXPECT_TRUE(reader.jump(0)); - EXPECT_EQ(reader.value(), 0); for (auto i : is) { - EXPECT_TRUE(reader.jump(i + 1)); + EXPECT_TRUE(reader.jump(i)); EXPECT_EQ(reader.value(), data[i]); EXPECT_EQ(reader.position(), i); maybeTestPreviousValue(data, reader, i); } - EXPECT_FALSE(reader.jump(data.size() + 1)); - EXPECT_EQ(reader.value(), std::numeric_limits::max()); + EXPECT_FALSE(reader.jump(data.size())); + EXPECT_FALSE(reader.valid()); EXPECT_EQ(reader.position(), reader.size()); } @@ -221,21 +239,15 @@ void testJumpTo(const std::vector& data, const List& list) { } EXPECT_TRUE(reader.jumpTo(0)); - EXPECT_EQ(reader.value(), 0); - EXPECT_EQ(reader.position(), -1); - - if (data.front() > 0) { - EXPECT_TRUE(reader.jumpTo(1)); - EXPECT_EQ(reader.value(), data.front()); - EXPECT_EQ(reader.position(), 0); - } + EXPECT_EQ(reader.value(), data[0]); + EXPECT_EQ(reader.position(), 0); EXPECT_TRUE(reader.jumpTo(data.back())); EXPECT_EQ(reader.value(), data.back()); EXPECT_EQ(reader.position(), reader.size() - 1); EXPECT_FALSE(reader.jumpTo(data.back() + 1)); - EXPECT_EQ(reader.value(), std::numeric_limits::max()); + EXPECT_FALSE(reader.valid()); EXPECT_EQ(reader.position(), reader.size()); } @@ -252,7 +264,7 @@ void testEmpty() { Reader reader(list); EXPECT_FALSE(reader.skip(1)); EXPECT_FALSE(reader.skip(10)); - EXPECT_FALSE(reader.jump(1)); + EXPECT_FALSE(reader.jump(0)); EXPECT_FALSE(reader.jump(10)); } { diff --git a/folly/experimental/test/EliasFanoCodingTest.cpp b/folly/experimental/test/EliasFanoCodingTest.cpp index 3a5b83d4..e02adca8 100644 --- a/folly/experimental/test/EliasFanoCodingTest.cpp +++ b/folly/experimental/test/EliasFanoCodingTest.cpp @@ -43,6 +43,7 @@ class EliasFanoCodingTest : public ::testing::Test { typedef EliasFanoEncoderV2< uint32_t, uint32_t, kSkipQuantum, kForwardQuantum> Encoder; typedef EliasFanoReader Reader; + testAll({0}); testAll(generateRandomList(100 * 1000, 10 * 1000 * 1000)); testAll(generateSeqList(1, 100000, 100)); } -- 2.34.1