2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
18 #define FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
22 #include <type_traits>
24 #include <folly/Bits.h>
25 #include <folly/Likely.h>
26 #include <folly/Portability.h>
27 #include <folly/Range.h>
28 #include <folly/experimental/Bits.h>
29 #include <folly/experimental/Instructions.h>
30 #include <folly/experimental/Select64.h>
31 #include <glog/logging.h>
34 #error BitVectorCoding.h requires GCC
38 #error BitVectorCoding.h requires x86_64
41 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
42 #error BitVectorCoding.h requires little endianness
45 namespace folly { namespace compression {
47 template <class Pointer>
48 struct BitVectorCompressedListBase {
49 BitVectorCompressedListBase() = default;
51 template <class OtherPointer>
52 BitVectorCompressedListBase(
53 const BitVectorCompressedListBase<OtherPointer>& other)
55 upperBound(other.upperBound),
57 bits(reinterpret_cast<Pointer>(other.bits)),
58 skipPointers(reinterpret_cast<Pointer>(other.skipPointers)),
59 forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)) {}
61 template <class T = Pointer>
62 auto free() -> decltype(::free(T(nullptr))) {
63 return ::free(data.data());
67 size_t upperBound = 0;
69 folly::Range<Pointer> data;
71 Pointer bits = nullptr;
72 Pointer skipPointers = nullptr;
73 Pointer forwardPointers = nullptr;
76 typedef BitVectorCompressedListBase<const uint8_t*> BitVectorCompressedList;
77 typedef BitVectorCompressedListBase<uint8_t*> MutableBitVectorCompressedList;
79 template <class Value,
81 size_t kSkipQuantum = 0,
82 size_t kForwardQuantum = 0>
83 struct BitVectorEncoder {
84 static_assert(std::is_integral<Value>::value &&
85 std::is_unsigned<Value>::value,
86 "Value should be unsigned integral");
88 typedef BitVectorCompressedList CompressedList;
89 typedef MutableBitVectorCompressedList MutableCompressedList;
91 typedef Value ValueType;
92 typedef SkipValue SkipValueType;
95 static constexpr size_t skipQuantum = kSkipQuantum;
96 static constexpr size_t forwardQuantum = kForwardQuantum;
98 template <class RandomAccessIterator>
99 static MutableCompressedList encode(RandomAccessIterator begin,
100 RandomAccessIterator end) {
102 return MutableCompressedList();
104 BitVectorEncoder encoder(end - begin, *(end - 1));
105 for (; begin != end; ++begin) {
108 return encoder.finish();
111 explicit BitVectorEncoder(const MutableCompressedList& result)
112 : bits_(result.bits),
113 skipPointers_(result.skipPointers),
114 forwardPointers_(result.forwardPointers),
116 memset(result.data.data(), 0, result.data.size());
119 BitVectorEncoder(size_t size, ValueType upperBound)
121 Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
123 void add(ValueType value) {
124 CHECK_LT(value, std::numeric_limits<ValueType>::max());
125 // Also works when lastValue_ == -1.
126 CHECK_GT(value + 1, lastValue_ + 1)
127 << "BitVectorCoding only supports stricly monotone lists";
129 auto block = bits_ + (value / 64) * sizeof(uint64_t);
130 size_t inner = value % 64;
131 folly::Bits<folly::Unaligned<uint64_t>>::set(
132 reinterpret_cast<folly::Unaligned<uint64_t>*>(block), inner);
134 if (skipQuantum != 0) {
135 size_t nextSkipPointerSize = value / (skipQuantum ?: 1);
136 while (skipPointersSize_ < nextSkipPointerSize) {
137 auto pos = skipPointersSize_++;
138 folly::storeUnaligned<SkipValueType>(
139 skipPointers_ + pos * sizeof(SkipValueType), size_);
143 if (forwardQuantum != 0) {
144 if (size_ != 0 && (size_ % (forwardQuantum ?: 1) == 0)) {
145 const auto pos = size_ / (forwardQuantum ?: 1) - 1;
146 folly::storeUnaligned<SkipValueType>(
147 forwardPointers_ + pos * sizeof(SkipValueType), value);
155 const MutableCompressedList& finish() const {
156 CHECK_EQ(size_, result_.size);
157 // TODO(ott): Relax this assumption.
158 CHECK_EQ(result_.upperBound, lastValue_);
163 uint8_t* const bits_ = nullptr;
164 uint8_t* const skipPointers_ = nullptr;
165 uint8_t* const forwardPointers_ = nullptr;
167 ValueType lastValue_ = -1;
169 size_t skipPointersSize_ = 0;
171 MutableCompressedList result_;
174 template <class Value,
177 size_t kForwardQuantum>
178 struct BitVectorEncoder<Value, SkipValue, kSkipQuantum, kForwardQuantum>::
180 static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) {
183 layout.upperBound = upperBound;
185 size_t bitVectorSizeInBytes = (upperBound / 8) + 1;
186 layout.bits = bitVectorSizeInBytes;
188 if (skipQuantum != 0) {
189 size_t numSkipPointers = upperBound / (skipQuantum ?: 1);
190 layout.skipPointers = numSkipPointers * sizeof(SkipValueType);
192 if (forwardQuantum != 0) {
193 size_t numForwardPointers = size / (forwardQuantum ?: 1);
194 layout.forwardPointers = numForwardPointers * sizeof(SkipValueType);
197 CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
202 size_t bytes() const { return bits + skipPointers + forwardPointers; }
204 template <class Range>
205 BitVectorCompressedListBase<typename Range::iterator> openList(
207 BitVectorCompressedListBase<typename Range::iterator> result;
209 result.upperBound = upperBound;
210 result.data = buf.subpiece(0, bytes());
211 auto advance = [&](size_t n) {
212 auto begin = buf.data();
217 result.bits = advance(bits);
218 result.skipPointers = advance(skipPointers);
219 result.forwardPointers = advance(forwardPointers);
220 CHECK_EQ(buf.data() - result.data.data(), bytes());
225 MutableCompressedList allocList() const {
226 uint8_t* buf = nullptr;
228 buf = static_cast<uint8_t*>(malloc(bytes() + 7));
230 folly::MutableByteRange bufRange(buf, bytes());
231 return openList(bufRange);
235 size_t upperBound = 0;
239 size_t skipPointers = 0;
240 size_t forwardPointers = 0;
243 template <class Encoder,
244 class Instructions = instructions::Default,
245 bool kUnchecked = false>
246 class BitVectorReader {
248 typedef Encoder EncoderType;
249 typedef typename Encoder::ValueType ValueType;
250 typedef typename Encoder::SkipValueType SkipValueType;
252 explicit BitVectorReader(const typename Encoder::CompressedList& list)
255 skipPointers_(list.skipPointers),
256 forwardPointers_(list.forwardPointers) {
259 if (kUnchecked || UNLIKELY(list.size == 0)) {
264 upperBound_ = list.upperBound;
268 block_ = (bits_ != nullptr) ? folly::loadUnaligned<uint64_t>(bits_) : 0;
272 value_ = kInvalidValue;
276 if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
280 while (block_ == 0) {
281 outer_ += sizeof(uint64_t);
282 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
286 inner_ = Instructions::ctz(block_);
287 block_ = Instructions::blsr(block_);
292 bool skip(size_t n) {
295 if (!kUnchecked && position() + n >= size_) {
298 // Small skip optimization.
299 if (LIKELY(n < kLinearScanThreshold)) {
300 for (size_t i = 0; i < n; ++i) {
308 // Use forward pointer.
309 if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
310 // Workaround to avoid 'division by zero' compile-time error.
311 constexpr size_t q = Encoder::forwardQuantum ?: 1;
313 const size_t steps = position_ / q;
314 const size_t dest = folly::loadUnaligned<SkipValueType>(
315 forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
318 n = position_ + 1 - steps * q;
319 // Correct inner_ will be set at the end.
323 // Find necessary block.
324 while ((cnt = Instructions::popcount(block_)) < n) {
326 outer_ += sizeof(uint64_t);
327 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
330 // Skip to the n-th one in the block.
332 inner_ = select64<Instructions>(block_, n - 1);
333 block_ &= (uint64_t(-1) << inner_) << 1;
338 bool skipTo(ValueType v) {
339 // Also works when value_ == kInvalidValue.
340 if (v != kInvalidValue) { DCHECK_GE(v + 1, value_ + 1); }
342 if (!kUnchecked && v > upperBound_) {
344 } else if (v == value_) {
348 // Small skip optimization.
349 if (v - value_ < kLinearScanThreshold) {
352 } while (value() < v);
357 if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) {
358 size_t q = v / Encoder::skipQuantum;
359 position_ = folly::loadUnaligned<SkipValueType>(
360 skipPointers_ + (q - 1) * sizeof(SkipValueType)) - 1;
362 reposition(q * Encoder::skipQuantum);
366 size_t outer = v / 64 * 8;
368 while (outer_ < outer) {
369 position_ += Instructions::popcount(block_);
370 outer_ += sizeof(uint64_t);
371 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
374 DCHECK_EQ(outer_, outer);
375 uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1);
376 position_ += Instructions::popcount(block_ & ~mask) + 1;
379 while (block_ == 0) {
380 outer_ += sizeof(uint64_t);
381 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
384 inner_ = Instructions::ctz(block_);
385 block_ = Instructions::blsr(block_);
391 size_t size() const { return size_; }
394 return position() < size(); // Also checks that position() != -1.
397 size_t position() const { return position_; }
398 ValueType value() const {
403 bool jump(size_t n) {
408 bool jumpTo(ValueType v) {
414 value_ = kInvalidValue;
420 constexpr static ValueType kInvalidValue =
421 std::numeric_limits<ValueType>::max(); // Must hold kInvalidValue + 1 == 0.
424 value_ = static_cast<ValueType>(8 * outer_ + inner_);
428 void reposition(size_t dest) {
429 outer_ = dest / 64 * 8;
430 // We maintain the invariant that outer_ is divisible by 8.
431 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
432 block_ &= ~((uint64_t(1) << (dest % 64)) - 1);
435 constexpr static size_t kLinearScanThreshold = 4;
444 ValueType upperBound_;
445 const uint8_t* const bits_;
446 const uint8_t* const skipPointers_;
447 const uint8_t* const forwardPointers_;
452 #endif // FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H