X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2Fexperimental%2FEliasFanoCoding.h;h=04a0f15881a850216f2f7167ff24a53c9cb0bffb;hb=b26334e5f85126a4ebe1c514d4790b98f2c2bbe1;hp=15d3d5d6b48f01133914dfc04662ad3237615411;hpb=d4aacd244f21e76dce685365acc281a9015897c1;p=folly.git diff --git a/folly/experimental/EliasFanoCoding.h b/folly/experimental/EliasFanoCoding.h index 15d3d5d6..04a0f158 100644 --- a/folly/experimental/EliasFanoCoding.h +++ b/folly/experimental/EliasFanoCoding.h @@ -28,7 +28,6 @@ #include #include -#include #include #include #include @@ -36,6 +35,7 @@ #include #include #include +#include #include #if !FOLLY_X64 @@ -46,6 +46,8 @@ namespace folly { namespace compression { static_assert(kIsLittleEndian, "EliasFanoCoding.h requires little endianness"); +constexpr size_t kCacheLineSize = 64; + template struct EliasFanoCompressedListBase { EliasFanoCompressedListBase() = default; @@ -450,6 +452,38 @@ class UpperBitsReader : ForwardPointers, return value_; } + /** + * Prepare to skip to `value`. This is a constant-time operation that will + * prefetch memory required for a `skipTo(value)` call. + * + * @return position of reader + */ + SizeType prepareSkipTo(ValueType v) const { + auto position = position_; + + if (Encoder::skipQuantum > 0 && v >= value_ + Encoder::skipQuantum) { + auto outer = outer_; + const size_t steps = v / Encoder::skipQuantum; + const size_t dest = folly::loadUnaligned( + this->skipPointers_ + (steps - 1) * sizeof(SkipValueType)); + + position = dest - 1; + outer = (dest + Encoder::skipQuantum * steps) / 8; + + // Prefetch up to the beginning of where we linear search. After that, + // hardware prefetching will outperform our own. In addition, this + // simplifies calculating what to prefetch as we don't have to calculate + // the entire destination address. Two cache lines are prefetched because + // this results in fewer cycles used (based on practical results) than + // one. However, three cache lines does not have any additional effect. + const auto addr = start_ + outer; + __builtin_prefetch(addr); + __builtin_prefetch(addr + kCacheLineSize); + } + + return position; + } + ValueType jump(size_t n) { if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) { reset(); @@ -572,8 +606,9 @@ class EliasFanoReader { if (kUnchecked || LIKELY(position() + n < size_)) { if (LIKELY(n < kLinearScanThreshold)) { - for (SizeType i = 0; i < n; ++i) + for (SizeType i = 0; i < n; ++i) { upper_.next(); + } } else { upper_.skip(n); } @@ -611,6 +646,29 @@ class EliasFanoReader { return true; } + /** + * Prepare to skip to `value` by prefetching appropriate memory in both the + * upper and lower bits. + */ + void prepareSkipTo(ValueType value) const { + // Also works when value_ == kInvalidValue. + if (value != kInvalidValue) { + DCHECK_GE(value + 1, value_ + 1); + } + + if ((!kUnchecked && value > lastValue_) || (value == value_)) { + return; + } + + // Do minimal computation required to prefetch address used in + // `readLowerPart()`. + ValueType upperValue = (value >> numLowerBits_); + const auto upperPosition = upper_.prepareSkipTo(upperValue); + const auto addr = lower_ + (upperPosition * numLowerBits_ / 8); + __builtin_prefetch(addr); + __builtin_prefetch(addr + kCacheLineSize); + } + bool jump(SizeType n) { if (LIKELY(n < size_)) { // Also checks that n != -1. value_ = readLowerPart(n) | (upper_.jump(n + 1) << numLowerBits_); @@ -678,7 +736,9 @@ class EliasFanoReader { while (true) { value_ = readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_); - if (LIKELY(value_ >= value)) break; + if (LIKELY(value_ >= value)) { + break; + } upper_.next(); } } @@ -693,4 +753,5 @@ class EliasFanoReader { uint8_t numLowerBits_; }; -}} // namespaces +} // namespace compression +} // namespace folly