X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2Fexperimental%2FEliasFanoCoding.h;h=faf19d74b70fc9b3aa8579326dc505db7e3ea16a;hb=f9eb4bb48bd6036beae37196882b32801595c45d;hp=7f83b0e8ab40d047a039e8749f1d6e9da9f8cc10;hpb=c7c65f54afa6e0927c1a9a5ee818a79de337151a;p=folly.git diff --git a/folly/experimental/EliasFanoCoding.h b/folly/experimental/EliasFanoCoding.h index 7f83b0e8..faf19d74 100644 --- a/folly/experimental/EliasFanoCoding.h +++ b/folly/experimental/EliasFanoCoding.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Facebook, Inc. + * Copyright 2015 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,40 +50,51 @@ namespace folly { namespace compression { -struct EliasFanoCompressedList { - EliasFanoCompressedList() { } +template +struct EliasFanoCompressedListBase { + EliasFanoCompressedListBase() = default; + + template + EliasFanoCompressedListBase( + const EliasFanoCompressedListBase& other) + : size(other.size), + numLowerBits(other.numLowerBits), + data(other.data), + skipPointers(reinterpret_cast(other.skipPointers)), + forwardPointers(reinterpret_cast(other.forwardPointers)), + lower(reinterpret_cast(other.lower)), + upper(reinterpret_cast(other.upper)) { + } void free() { - ::free(const_cast(lower.data())); - ::free(const_cast(upper.data())); - ::free(const_cast(skipPointers.data())); - ::free(const_cast(forwardPointers.data())); + ::free(const_cast(data.data())); + } + + size_t upperSize() const { + return data.end() - upper; } size_t size = 0; uint8_t numLowerBits = 0; - // WARNING: EliasFanoCompressedList has no ownership of - // lower, upper, skipPointers and forwardPointers. - // The 7 bytes following the last byte of lower and upper - // sequences should be readable. - folly::ByteRange lower; - folly::ByteRange upper; + // WARNING: EliasFanoCompressedList has no ownership of data. The 7 + // bytes following the last byte should be readable. + folly::Range data; - folly::ByteRange skipPointers; - folly::ByteRange forwardPointers; + Pointer skipPointers = nullptr; + Pointer forwardPointers = nullptr; + Pointer lower = nullptr; + Pointer upper = nullptr; }; -// Version history: -// In version 1 skip / forward pointers encoding has been changed, -// so SkipValue = uint32_t is able to address up to ~4B elements, -// instead of only ~2B. +typedef EliasFanoCompressedListBase EliasFanoCompressedList; +typedef EliasFanoCompressedListBase MutableEliasFanoCompressedList; + template -struct EliasFanoEncoder { + size_t kForwardQuantum = 0> // 0 = disabled +struct EliasFanoEncoderV2 { static_assert(std::is_integral::value && std::is_unsigned::value, "Value should be unsigned integral"); @@ -92,10 +103,10 @@ struct EliasFanoEncoder { typedef Value ValueType; typedef SkipValue SkipValueType; + struct Layout; static constexpr size_t skipQuantum = kSkipQuantum; static constexpr size_t forwardQuantum = kForwardQuantum; - static constexpr size_t version = kVersion; static uint8_t defaultNumLowerBits(size_t upperBound, size_t size) { if (size == 0 || upperBound < size) { @@ -116,97 +127,26 @@ struct EliasFanoEncoder { if (begin == end) { return EliasFanoCompressedList(); } - EliasFanoEncoder encoder(end - begin, *(end - 1)); + EliasFanoEncoderV2 encoder(end - begin, *(end - 1)); for (; begin != end; ++begin) { encoder.add(*begin); } return encoder.finish(); } - EliasFanoEncoder(size_t size, ValueType upperBound) { - if (size == 0) { - return; - } - - uint8_t numLowerBits = defaultNumLowerBits(upperBound, size); - - // This is detail::writeBits56 limitation. - numLowerBits = std::min(numLowerBits, 56); - CHECK_LT(numLowerBits, 8 * sizeof(Value)); // As we shift by numLowerBits. - - // WARNING: Current read/write logic assumes that the 7 bytes - // following the last byte of lower and upper sequences are - // readable (stored value doesn't matter and won't be changed), - // so we allocate additional 7B, but do not include them in size - // of returned value. - - // *** Lower bits. - const size_t lowerSize = (numLowerBits * size + 7) / 8; - if (lowerSize > 0) { // numLowerBits != 0 - lower_ = static_cast(calloc(lowerSize + 7, 1)); - } - - // *** Upper bits. - // Upper bits are stored using unary delta encoding. - // For example, (3 5 5 9) will be encoded as 1000011001000_2. - const size_t upperSizeBits = - (upperBound >> numLowerBits) + // Number of 0-bits to be stored. - size; // 1-bits. - const size_t upperSize = (upperSizeBits + 7) / 8; - upper_ = static_cast(calloc(upperSize + 7, 1)); - - // *** Skip pointers. - // Store (1-indexed) position of every skipQuantum-th - // 0-bit in upper bits sequence. - size_t numSkipPointers = 0; - /* static */ if (skipQuantum != 0) { - /* static */ if (kVersion > 0) { - CHECK_LT(size, std::numeric_limits::max()); - } else { - CHECK_LT(upperSizeBits, std::numeric_limits::max()); - } - // 8 * upperSize is used here instead of upperSizeBits, as that is - // more serialization-friendly way (upperSizeBits isn't known outside of - // this function, unlike upperSize; thus numSkipPointers could easily be - // deduced from upperSize). - numSkipPointers = (8 * upperSize - size) / (skipQuantum ?: 1); - skipPointers_ = static_cast( - numSkipPointers == 0 - ? nullptr - : calloc(numSkipPointers, sizeof(SkipValueType))); - } - - // *** Forward pointers. - // Store (1-indexed) position of every forwardQuantum-th - // 1-bit in upper bits sequence. - size_t numForwardPointers = 0; - /* static */ if (forwardQuantum != 0) { - /* static */ if (kVersion > 0) { - CHECK_LT(upperBound >> numLowerBits, - std::numeric_limits::max()); - } else { - CHECK_LT(upperSizeBits, std::numeric_limits::max()); - } - - // '?: 1' is a workaround for false 'division by zero' compile-time error. - numForwardPointers = size / (forwardQuantum ?: 1); - forwardPointers_ = static_cast( - numForwardPointers == 0 - ? nullptr - : malloc(numForwardPointers * sizeof(SkipValueType))); - } + explicit EliasFanoEncoderV2(const MutableEliasFanoCompressedList& result) + : lower_(result.lower), + upper_(result.upper), + skipPointers_(reinterpret_cast( + result.skipPointers)), + forwardPointers_(reinterpret_cast( + result.forwardPointers)), + result_(result) { + } - // *** Result. - result_.size = size; - result_.numLowerBits = numLowerBits; - result_.lower.reset(lower_, lowerSize); - result_.upper.reset(upper_, upperSize); - result_.skipPointers.reset( - reinterpret_cast(skipPointers_), - numSkipPointers * sizeof(SkipValueType)); - result_.forwardPointers.reset( - reinterpret_cast(forwardPointers_), - numForwardPointers * sizeof(SkipValueType)); + EliasFanoEncoderV2(size_t size, ValueType upperBound) + : EliasFanoEncoderV2( + Layout::fromUpperBoundAndSize(upperBound, size).allocList()) { } void add(ValueType value) { @@ -226,26 +166,16 @@ struct EliasFanoEncoder { /* static */ if (skipQuantum != 0) { while ((skipPointersSize_ + 1) * skipQuantum <= upperBits) { - /* static */ if (kVersion > 0) { - // Since version 1, just the number of preceding 1-bits is stored. - skipPointers_[skipPointersSize_] = size_; - } else { - skipPointers_[skipPointersSize_] = - size_ + (skipPointersSize_ + 1) * skipQuantum; - } - ++skipPointersSize_; + // Store the number of preceding 1-bits. + skipPointers_[skipPointersSize_++] = size_; } } /* static */ if (forwardQuantum != 0) { if ((size_ + 1) % forwardQuantum == 0) { const auto pos = size_ / forwardQuantum; - /* static */ if (kVersion > 0) { - // Since version 1, just the number of preceding 0-bits is stored. - forwardPointers_[pos] = upperBits; - } else { - forwardPointers_[pos] = upperBits + size_ + 1; - } + // Store the number of preceding 0-bits. + forwardPointers_[pos] = upperBits; } } @@ -282,6 +212,122 @@ struct EliasFanoEncoder { EliasFanoCompressedList result_; }; +template +struct EliasFanoEncoderV2::Layout { + static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) { + // numLowerBits can be at most 56 because of detail::writeBits56. + const uint8_t numLowerBits = std::min(defaultNumLowerBits(upperBound, + size), + uint8_t(56)); + // *** Upper bits. + // Upper bits are stored using unary delta encoding. + // For example, (3 5 5 9) will be encoded as 1000011001000_2. + const size_t upperSizeBits = + (upperBound >> numLowerBits) + // Number of 0-bits to be stored. + size; // 1-bits. + const size_t upper = (upperSizeBits + 7) / 8; + + // *** Validity checks. + // Shift by numLowerBits must be valid. + CHECK_LT(numLowerBits, 8 * sizeof(Value)); + CHECK_LT(size, std::numeric_limits::max()); + CHECK_LT(upperBound >> numLowerBits, + std::numeric_limits::max()); + + return fromInternalSizes(numLowerBits, upper, size); + } + + static Layout fromInternalSizes(uint8_t numLowerBits, + size_t upper, + size_t size) { + Layout layout; + layout.size = size; + layout.numLowerBits = numLowerBits; + + layout.lower = (numLowerBits * size + 7) / 8; + layout.upper = upper; + + // *** Skip pointers. + // Store (1-indexed) position of every skipQuantum-th + // 0-bit in upper bits sequence. + /* static */ if (skipQuantum != 0) { + // 8 * upper is used here instead of upperSizeBits, as that is + // more serialization-friendly way (upperSizeBits doesn't need + // to be known by this function, unlike upper). + + // '?: 1' is a workaround for false 'division by zero' + // compile-time error. + size_t numSkipPointers = (8 * upper - size) / (skipQuantum ?: 1); + layout.skipPointers = numSkipPointers * sizeof(SkipValueType); + } + + // *** Forward pointers. + // Store (1-indexed) position of every forwardQuantum-th + // 1-bit in upper bits sequence. + /* static */ if (forwardQuantum != 0) { + size_t numForwardPointers = size / (forwardQuantum ?: 1); + layout.forwardPointers = numForwardPointers * sizeof(SkipValueType); + } + + return layout; + } + + size_t bytes() const { + return lower + upper + skipPointers + forwardPointers; + } + + template + EliasFanoCompressedListBase + openList(Range& buf) const { + EliasFanoCompressedListBase result; + result.size = size; + result.numLowerBits = numLowerBits; + result.data = buf.subpiece(0, bytes()); + + auto advance = [&] (size_t n) { + auto begin = buf.data(); + buf.advance(n); + return begin; + }; + + result.skipPointers = advance(skipPointers); + result.forwardPointers = advance(forwardPointers); + result.lower = advance(lower); + result.upper = advance(upper); + + return result; + } + + MutableEliasFanoCompressedList allocList() const { + uint8_t* buf = nullptr; + // WARNING: Current read/write logic assumes that the 7 bytes + // following the last byte of lower and upper sequences are + // readable (stored value doesn't matter and won't be changed), + // so we allocate additional 7B, but do not include them in size + // of returned value. + if (size > 0) { + buf = static_cast(calloc(bytes() + 7, 1)); + } + folly::MutableByteRange bufRange(buf, bytes()); + return openList(bufRange); + } + + size_t size = 0; + uint8_t numLowerBits = 0; + + // Sizes in bytes. + size_t lower = 0; + size_t upper = 0; + size_t skipPointers = 0; + size_t forwardPointers = 0; +}; + // NOTE: It's recommended to compile EF coding with -msse4.2, starting // with Nehalem, Intel CPUs support POPCNT instruction and gcc will emit // it for __builtin_popcountll intrinsic. @@ -343,9 +389,9 @@ class UpperBitsReader { typedef typename Encoder::ValueType ValueType; explicit UpperBitsReader(const EliasFanoCompressedList& list) - : forwardPointers_(list.forwardPointers.data()), - skipPointers_(list.skipPointers.data()), - start_(list.upper.data()) { + : forwardPointers_(list.forwardPointers), + skipPointers_(list.skipPointers), + start_(list.upper) { reset(); } @@ -389,13 +435,9 @@ class UpperBitsReader { folly::loadUnaligned( forwardPointers_ + (steps - 1) * sizeof(SkipValueType)); - /* static */ if (Encoder::version > 0) { - reposition(dest + steps * q); - } else { - reposition(dest); - } + reposition(dest + steps * q); n = position_ + 1 - steps * q; // n is > 0. - // correct inner_ will be set at the end. + // Correct inner_ will be set at the end. } size_t cnt; @@ -429,13 +471,9 @@ class UpperBitsReader { folly::loadUnaligned( skipPointers_ + (steps - 1) * sizeof(SkipValueType)); - /* static */ if (Encoder::version > 0) { - reposition(dest + q * steps); - position_ = dest - 1; - } else { - reposition(dest); - position_ = dest - q * steps - 1; - } + reposition(dest + q * steps); + position_ = dest - 1; + // Correct inner_ and value_ will be set during the next() // call at the end. @@ -527,8 +565,8 @@ class EliasFanoReader : private boost::noncopyable { lastValue_ = 0; return; } - ValueType lastUpperValue = 8 * list_.upper.size() - list_.size; - auto it = list_.upper.end() - 1; + ValueType lastUpperValue = 8 * list_.upperSize() - list_.size; + auto it = list_.upper + list_.upperSize() - 1; DCHECK_NE(*it, 0); lastUpperValue -= 8 - folly::findLastSet(*it); lastValue_ = readLowerPart(list_.size - 1) | @@ -633,7 +671,7 @@ class EliasFanoReader : private boost::noncopyable { ValueType readLowerPart(size_t i) const { DCHECK_LT(i, list_.size); const size_t pos = i * list_.numLowerBits; - const unsigned char* ptr = list_.lower.data() + (pos / 8); + const unsigned char* ptr = list_.lower + (pos / 8); const uint64_t ptrv = folly::loadUnaligned(ptr); return lowerMask_ & (ptrv >> (pos % 8)); }