#include <limits>
#include <type_traits>
-#include <folly/Assume.h>
#include <folly/Bits.h>
#include <folly/Likely.h>
#include <folly/Portability.h>
#include <folly/experimental/CodingDetail.h>
#include <folly/experimental/Instructions.h>
#include <folly/experimental/Select64.h>
+#include <folly/lang/Assume.h>
#include <glog/logging.h>
#if !FOLLY_X64
static_assert(kIsLittleEndian, "EliasFanoCoding.h requires little endianness");
+constexpr size_t kCacheLineSize = 64;
+
template <class Pointer>
struct EliasFanoCompressedListBase {
EliasFanoCompressedListBase() = default;
return value_;
}
+ /**
+ * Prepare to skip to `value`. This is a constant-time operation that will
+ * prefetch memory required for a `skipTo(value)` call.
+ *
+ * @return position of reader
+ */
+ SizeType prepareSkipTo(ValueType v) const {
+ auto position = position_;
+
+ if (Encoder::skipQuantum > 0 && v >= value_ + Encoder::skipQuantum) {
+ auto outer = outer_;
+ const size_t steps = v / Encoder::skipQuantum;
+ const size_t dest = folly::loadUnaligned<SkipValueType>(
+ this->skipPointers_ + (steps - 1) * sizeof(SkipValueType));
+
+ position = dest - 1;
+ outer = (dest + Encoder::skipQuantum * steps) / 8;
+
+ // Prefetch up to the beginning of where we linear search. After that,
+ // hardware prefetching will outperform our own. In addition, this
+ // simplifies calculating what to prefetch as we don't have to calculate
+ // the entire destination address. Two cache lines are prefetched because
+ // this results in fewer cycles used (based on practical results) than
+ // one. However, three cache lines does not have any additional effect.
+ const auto addr = start_ + outer;
+ __builtin_prefetch(addr);
+ __builtin_prefetch(addr + kCacheLineSize);
+ }
+
+ return position;
+ }
+
ValueType jump(size_t n) {
if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) {
reset();
if (kUnchecked || LIKELY(position() + n < size_)) {
if (LIKELY(n < kLinearScanThreshold)) {
- for (SizeType i = 0; i < n; ++i)
+ for (SizeType i = 0; i < n; ++i) {
upper_.next();
+ }
} else {
upper_.skip(n);
}
return true;
}
+ /**
+ * Prepare to skip to `value` by prefetching appropriate memory in both the
+ * upper and lower bits.
+ */
+ void prepareSkipTo(ValueType value) const {
+ // Also works when value_ == kInvalidValue.
+ if (value != kInvalidValue) {
+ DCHECK_GE(value + 1, value_ + 1);
+ }
+
+ if ((!kUnchecked && value > lastValue_) || (value == value_)) {
+ return;
+ }
+
+ // Do minimal computation required to prefetch address used in
+ // `readLowerPart()`.
+ ValueType upperValue = (value >> numLowerBits_);
+ const auto upperPosition = upper_.prepareSkipTo(upperValue);
+ const auto addr = lower_ + (upperPosition * numLowerBits_ / 8);
+ __builtin_prefetch(addr);
+ __builtin_prefetch(addr + kCacheLineSize);
+ }
+
bool jump(SizeType n) {
if (LIKELY(n < size_)) { // Also checks that n != -1.
value_ = readLowerPart(n) | (upper_.jump(n + 1) << numLowerBits_);
while (true) {
value_ = readLowerPart(upper_.position()) |
(upper_.value() << numLowerBits_);
- if (LIKELY(value_ >= value)) break;
+ if (LIKELY(value_ >= value)) {
+ break;
+ }
upper_.next();
}
}
uint8_t numLowerBits_;
};
-}} // namespaces
+} // namespace compression
+} // namespace folly