2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
18 #define FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
22 #include <type_traits>
24 #include <folly/Bits.h>
25 #include <folly/Likely.h>
26 #include <folly/Portability.h>
27 #include <folly/Range.h>
28 #include <folly/experimental/Bits.h>
29 #include <folly/experimental/Instructions.h>
30 #include <folly/experimental/Select64.h>
31 #include <glog/logging.h>
34 #error BitVectorCoding.h requires GCC
38 #error BitVectorCoding.h requires x86_64
41 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
42 #error BitVectorCoding.h requires little endianness
45 namespace folly { namespace compression {
47 template <class Pointer>
48 struct BitVectorCompressedListBase {
49 BitVectorCompressedListBase() = default;
51 template <class OtherPointer>
52 BitVectorCompressedListBase(
53 const BitVectorCompressedListBase<OtherPointer>& other)
55 upperBound(other.upperBound),
57 bits(reinterpret_cast<Pointer>(other.bits)),
58 skipPointers(reinterpret_cast<Pointer>(other.skipPointers)),
59 forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)) {}
61 void free() { ::free(const_cast<unsigned char*>(data.data())); }
63 size_t getUpperBound() const { return upperBound; }
66 size_t upperBound = 0;
68 folly::Range<Pointer> data;
70 Pointer bits = nullptr;
71 Pointer skipPointers = nullptr;
72 Pointer forwardPointers = nullptr;
75 typedef BitVectorCompressedListBase<const uint8_t*> BitVectorCompressedList;
76 typedef BitVectorCompressedListBase<uint8_t*> MutableBitVectorCompressedList;
78 template <class Value,
80 size_t kSkipQuantum = 0,
81 size_t kForwardQuantum = 0>
82 struct BitVectorEncoder {
83 static_assert(std::is_integral<Value>::value &&
84 std::is_unsigned<Value>::value,
85 "Value should be unsigned integral");
87 typedef BitVectorCompressedList CompressedList;
89 typedef Value ValueType;
90 typedef SkipValue SkipValueType;
93 static constexpr size_t skipQuantum = kSkipQuantum;
94 static constexpr size_t forwardQuantum = kForwardQuantum;
96 template <class RandomAccessIterator>
97 static BitVectorCompressedList encode(RandomAccessIterator begin,
98 RandomAccessIterator end) {
100 return BitVectorCompressedList();
102 BitVectorEncoder encoder(end - begin, *(end - 1));
103 for (; begin != end; ++begin) {
106 return encoder.finish();
109 explicit BitVectorEncoder(const MutableBitVectorCompressedList& result)
110 : bits_(result.bits),
111 skipPointers_(result.skipPointers),
112 forwardPointers_(result.forwardPointers),
114 memset(result.data.data(), 0, result.data.size());
117 BitVectorEncoder(size_t size, ValueType upperBound)
119 Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
121 void add(ValueType value) {
122 CHECK_GE(value, lastValue_);
123 auto block = bits_ + (value / 64) * sizeof(uint64_t);
124 size_t inner = value % 64;
125 folly::Bits<folly::Unaligned<uint64_t>>::set(
126 reinterpret_cast<folly::Unaligned<uint64_t>*>(block), inner);
128 if (skipQuantum != 0) {
129 size_t nextSkipPointerSize = value / (skipQuantum ?: 1);
130 while (skipPointersSize_ < nextSkipPointerSize) {
131 auto pos = skipPointersSize_++;
132 folly::storeUnaligned<SkipValueType>(
133 skipPointers_ + pos * sizeof(SkipValueType), size_);
137 if (forwardQuantum != 0) {
138 if ( size_ != 0 && (size_ % (forwardQuantum ?: 1) == 0)) {
139 const auto pos = size_ / (forwardQuantum ?: 1) - 1;
140 folly::storeUnaligned<SkipValueType>(
141 forwardPointers_ + pos * sizeof(SkipValueType), value);
149 const BitVectorCompressedList& finish() const {
150 CHECK_EQ(size_, result_.size);
151 // TODO(ott): Relax this assumption.
152 CHECK_EQ(result_.getUpperBound(), lastValue_);
157 uint8_t* const bits_ = nullptr;
158 uint8_t* const skipPointers_ = nullptr;
159 uint8_t* const forwardPointers_ = nullptr;
161 ValueType lastValue_ = 0;
163 size_t skipPointersSize_ = 0;
165 BitVectorCompressedList result_;
168 template <class Value,
171 size_t kForwardQuantum>
172 struct BitVectorEncoder<Value, SkipValue, kSkipQuantum, kForwardQuantum>::
174 static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) {
177 layout.upperBound = upperBound;
179 size_t bitVectorSizeInBytes = (upperBound / 8) + 1;
180 layout.bits = bitVectorSizeInBytes;
182 if (skipQuantum != 0) {
183 size_t numSkipPointers = upperBound / (skipQuantum ?: 1);
184 layout.skipPointers = numSkipPointers * sizeof(SkipValueType);
186 if (forwardQuantum != 0) {
187 size_t numForwardPointers = size / (forwardQuantum ?: 1);
188 layout.forwardPointers = numForwardPointers * sizeof(SkipValueType);
191 CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
196 size_t bytes() const { return bits + skipPointers + forwardPointers; }
198 template <typename Range>
199 BitVectorCompressedListBase<typename Range::iterator> openList(
201 BitVectorCompressedListBase<typename Range::iterator> result;
203 result.upperBound = upperBound;
204 result.data = buf.subpiece(0, bytes());
205 auto advance = [&](size_t n) {
206 auto begin = buf.data();
211 result.bits = advance(bits);
212 result.skipPointers = advance(skipPointers);
213 result.forwardPointers = advance(forwardPointers);
214 CHECK_EQ(buf.data() - result.data.data(), bytes());
219 MutableBitVectorCompressedList allocList() const {
220 uint8_t* buf = nullptr;
222 buf = static_cast<uint8_t*>(malloc(bytes() + 7));
224 folly::MutableByteRange bufRange(buf, bytes());
225 return openList(bufRange);
229 size_t upperBound = 0;
233 size_t skipPointers = 0;
234 size_t forwardPointers = 0;
237 template <class Encoder,
238 class Instructions = instructions::Default,
239 bool kUnchecked = false>
240 class BitVectorReader {
242 typedef Encoder EncoderType;
243 typedef typename Encoder::ValueType ValueType;
244 typedef typename Encoder::SkipValueType SkipValueType;
246 explicit BitVectorReader(const BitVectorCompressedList& list)
249 skipPointers_(list.skipPointers),
250 forwardPointers_(list.forwardPointers) {
253 if (kUnchecked || UNLIKELY(list.size == 0)) {
258 upperBound_ = list.getUpperBound();
262 block_ = (bits_ != nullptr) ? folly::loadUnaligned<uint64_t>(bits_) : 0;
270 if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
274 while (block_ == 0) {
275 outer_ += sizeof(uint64_t);
276 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
280 inner_ = Instructions::ctz(block_);
281 block_ = Instructions::blsr(block_);
286 bool skip(size_t n) {
289 if (!kUnchecked && position() + n >= size_) {
292 // Small skip optimization.
293 if (LIKELY(n < kLinearScanThreshold)) {
294 for (size_t i = 0; i < n; ++i) {
302 // Use forward pointer.
303 if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
304 // Workaround to avoid 'division by zero' compile-time error.
305 constexpr size_t q = Encoder::forwardQuantum ?: 1;
307 const size_t steps = position_ / q;
308 const size_t dest = folly::loadUnaligned<SkipValueType>(
309 forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
312 n = position_ + 1 - steps * q;
313 // Correct inner_ will be set at the end.
317 // Find necessary block.
318 while ((cnt = Instructions::popcount(block_)) < n) {
320 outer_ += sizeof(uint64_t);
321 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
324 // Skip to the n-th one in the block.
326 inner_ = select64<Instructions>(block_, n - 1);
327 block_ &= (uint64_t(-1) << inner_) << 1;
332 bool skipTo(ValueType v) {
333 DCHECK_GE(v, value_);
336 } else if (!kUnchecked && v > upperBound_) {
340 // Small skip optimization.
341 if (v - value_ < kLinearScanThreshold) {
344 } while (value() < v);
349 if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) {
350 size_t q = v / Encoder::skipQuantum;
351 position_ = folly::loadUnaligned<SkipValueType>(
352 skipPointers_ + (q - 1) * sizeof(SkipValueType)) - 1;
354 reposition(q * Encoder::skipQuantum);
358 size_t outer = v / 64 * 8;
360 while (outer_ < outer) {
361 position_ += Instructions::popcount(block_);
362 outer_ += sizeof(uint64_t);
363 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
366 DCHECK_EQ(outer_, outer);
367 uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1);
368 position_ += Instructions::popcount(block_ & ~mask) + 1;
371 while (block_ == 0) {
372 outer_ += sizeof(uint64_t);
373 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
376 inner_ = Instructions::ctz(block_);
377 block_ = Instructions::blsr(block_);
383 size_t size() const { return size_; }
385 size_t position() const { return position_; }
387 ValueType value() const { return value_; }
389 bool jump(size_t n) {
398 bool jumpTo(ValueType v) {
404 value_ = std::numeric_limits<ValueType>::max();
411 value_ = static_cast<ValueType>(8 * outer_ + inner_);
415 void reposition(size_t dest) {
416 outer_ = dest / 64 * 8;
417 // We maintain the invariant that outer_ is divisible by 8.
418 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
419 block_ &= ~((uint64_t(1) << (dest % 64)) - 1);
422 constexpr static size_t kLinearScanThreshold = 4;
428 ValueType value_ = 0;
431 ValueType upperBound_;
432 const uint8_t* const bits_;
433 const uint8_t* const skipPointers_;
434 const uint8_t* const forwardPointers_;
439 #endif // FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H