2 * Copyright 2012 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_GROUPVARINT_H_
18 #define FOLLY_GROUPVARINT_H_
21 #error GroupVarint.h requires GCC
24 #if !defined(__x86_64__) && !defined(__i386__)
25 #error GroupVarint.h requires x86_64 or i386
30 #include "folly/detail/GroupVarintDetail.h"
31 #include "folly/Range.h"
32 #include <glog/logging.h>
35 #include <x86intrin.h>
38 extern const __m128i groupVarintSSEMasks[];
45 extern const uint8_t groupVarintLengths[];
55 * GroupVarint encoding for 32-bit values.
57 * Encodes 4 32-bit integers at once, each using 1-4 bytes depending on size.
58 * There is one byte of overhead. (The first byte contains the lengths of
59 * the four integers encoded as two bits each; 00=1 byte .. 11=4 bytes)
61 * This implementation assumes little-endian and does unaligned 32-bit
62 * accesses, so it's basically not portable outside of the x86[_64] world.
65 class GroupVarint<uint32_t> : public detail::GroupVarintBase<uint32_t> {
69 * Return the number of bytes used to encode these four values.
71 static size_t size(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
72 return kHeaderSize + kGroupSize + key(a) + key(b) + key(c) + key(d);
76 * Return the number of bytes used to encode four uint32_t values stored
77 * at consecutive positions in an array.
79 static size_t size(const uint32_t* p) {
80 return size(p[0], p[1], p[2], p[3]);
84 * Return the number of bytes used to encode count (<= 4) values.
85 * If you clip a buffer after these many bytes, you can still decode
86 * the first "count" values correctly (if the remaining size() -
87 * partialSize() bytes are filled with garbage).
89 static size_t partialSize(const type* p, size_t count) {
90 DCHECK_LE(count, kGroupSize);
91 size_t s = kHeaderSize + count;
92 for (; count; --count, ++p) {
99 * Return the number of values from *p that are valid from an encoded
100 * buffer of size bytes.
102 static size_t partialCount(const char* p, size_t size) {
104 size_t s = kHeaderSize;
106 if (s > size) return 0;
108 if (s > size) return 1;
110 if (s > size) return 2;
112 if (s > size) return 3;
117 * Given a pointer to the beginning of an GroupVarint32-encoded block,
118 * return the number of bytes used by the encoding.
120 static size_t encodedSize(const char* p) {
121 return (kHeaderSize + kGroupSize +
122 b0key(*p) + b1key(*p) + b2key(*p) + b3key(*p));
126 * Encode four uint32_t values into the buffer pointed-to by p, and return
127 * the next position in the buffer (that is, one character past the last
128 * encoded byte). p needs to have at least size()+4 bytes available.
130 static char* encode(char* p, uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
131 uint8_t b0key = key(a);
132 uint8_t b1key = key(b);
133 uint8_t b2key = key(c);
134 uint8_t b3key = key(d);
135 *p++ = (b3key << 6) | (b2key << 4) | (b1key << 2) | b0key;
136 *reinterpret_cast<uint32_t*>(p) = a;
138 *reinterpret_cast<uint32_t*>(p) = b;
140 *reinterpret_cast<uint32_t*>(p) = c;
142 *reinterpret_cast<uint32_t*>(p) = d;
148 * Encode four uint32_t values from the array pointed-to by src into the
149 * buffer pointed-to by p, similar to encode(p,a,b,c,d) above.
151 static char* encode(char* p, const uint32_t* src) {
152 return encode(p, src[0], src[1], src[2], src[3]);
156 * Decode four uint32_t values from a buffer, and return the next position
157 * in the buffer (that is, one character past the last encoded byte).
158 * The buffer needs to have at least 3 extra bytes available (they
159 * may be read but ignored).
161 static const char* decode_simple(const char* p, uint32_t* a, uint32_t* b,
162 uint32_t* c, uint32_t* d) {
163 size_t k = *reinterpret_cast<const uint8_t*>(p);
164 const char* end = p + detail::groupVarintLengths[k];
166 size_t k0 = b0key(k);
167 *a = *reinterpret_cast<const uint32_t*>(p) & kMask[k0];
169 size_t k1 = b1key(k);
170 *b = *reinterpret_cast<const uint32_t*>(p) & kMask[k1];
172 size_t k2 = b2key(k);
173 *c = *reinterpret_cast<const uint32_t*>(p) & kMask[k2];
175 size_t k3 = b3key(k);
176 *d = *reinterpret_cast<const uint32_t*>(p) & kMask[k3];
182 * Decode four uint32_t values from a buffer and store them in the array
183 * pointed-to by dest, similar to decode(p,a,b,c,d) above.
185 static const char* decode_simple(const char* p, uint32_t* dest) {
186 return decode_simple(p, dest, dest+1, dest+2, dest+3);
190 static const char* decode(const char* p, uint32_t* dest) {
192 __m128i val = _mm_loadu_si128((const __m128i*)(p+1));
193 __m128i mask = detail::groupVarintSSEMasks[key];
194 __m128i r = _mm_shuffle_epi8(val, mask);
195 _mm_storeu_si128((__m128i*)dest, r);
196 return p + detail::groupVarintLengths[key];
199 static const char* decode(const char* p, uint32_t* a, uint32_t* b,
200 uint32_t* c, uint32_t* d) {
202 __m128i val = _mm_loadu_si128((const __m128i*)(p+1));
203 __m128i mask = detail::groupVarintSSEMasks[key];
204 __m128i r = _mm_shuffle_epi8(val, mask);
206 // Extracting 32 bits at a time out of an XMM register is a SSE4 feature
208 *a = _mm_extract_epi32(r, 0);
209 *b = _mm_extract_epi32(r, 1);
210 *c = _mm_extract_epi32(r, 2);
211 *d = _mm_extract_epi32(r, 3);
212 #else /* !__SSE4__ */
213 *a = _mm_extract_epi16(r, 0) + (_mm_extract_epi16(r, 1) << 16);
214 *b = _mm_extract_epi16(r, 2) + (_mm_extract_epi16(r, 3) << 16);
215 *c = _mm_extract_epi16(r, 4) + (_mm_extract_epi16(r, 5) << 16);
216 *d = _mm_extract_epi16(r, 6) + (_mm_extract_epi16(r, 7) << 16);
217 #endif /* __SSE4__ */
219 return p + detail::groupVarintLengths[key];
222 #else /* !__SSSE3__ */
223 static const char* decode(const char* p, uint32_t* a, uint32_t* b,
224 uint32_t* c, uint32_t* d) {
225 return decode_simple(p, a, b, c, d);
228 static const char* decode(const char* p, uint32_t* dest) {
229 return decode_simple(p, dest);
231 #endif /* __SSSE3__ */
234 static uint8_t key(uint32_t x) {
235 // __builtin_clz is undefined for the x==0 case
236 return 3 - (__builtin_clz(x|1) / 8);
238 static size_t b0key(size_t x) { return x & 3; }
239 static size_t b1key(size_t x) { return (x >> 2) & 3; }
240 static size_t b2key(size_t x) { return (x >> 4) & 3; }
241 static size_t b3key(size_t x) { return (x >> 6) & 3; }
243 static const uint32_t kMask[];
248 * GroupVarint encoding for 64-bit values.
250 * Encodes 5 64-bit integers at once, each using 1-8 bytes depending on size.
251 * There are two bytes of overhead. (The first two bytes contain the lengths
252 * of the five integers encoded as three bits each; 000=1 byte .. 111 = 8 bytes)
254 * This implementation assumes little-endian and does unaligned 64-bit
255 * accesses, so it's basically not portable outside of the x86[_64] world.
258 class GroupVarint<uint64_t> : public detail::GroupVarintBase<uint64_t> {
261 * Return the number of bytes used to encode these five values.
263 static size_t size(uint64_t a, uint64_t b, uint64_t c, uint64_t d,
265 return (kHeaderSize + kGroupSize +
266 key(a) + key(b) + key(c) + key(d) + key(e));
270 * Return the number of bytes used to encode five uint64_t values stored
271 * at consecutive positions in an array.
273 static size_t size(const uint64_t* p) {
274 return size(p[0], p[1], p[2], p[3], p[4]);
278 * Return the number of bytes used to encode count (<= 4) values.
279 * If you clip a buffer after these many bytes, you can still decode
280 * the first "count" values correctly (if the remaining size() -
281 * partialSize() bytes are filled with garbage).
283 static size_t partialSize(const type* p, size_t count) {
284 DCHECK_LE(count, kGroupSize);
285 size_t s = kHeaderSize + count;
286 for (; count; --count, ++p) {
293 * Return the number of values from *p that are valid from an encoded
294 * buffer of size bytes.
296 static size_t partialCount(const char* p, size_t size) {
297 uint16_t v = *reinterpret_cast<const uint16_t*>(p);
298 size_t s = kHeaderSize;
300 if (s > size) return 0;
302 if (s > size) return 1;
304 if (s > size) return 2;
306 if (s > size) return 3;
308 if (s > size) return 4;
313 * Given a pointer to the beginning of an GroupVarint64-encoded block,
314 * return the number of bytes used by the encoding.
316 static size_t encodedSize(const char* p) {
317 uint16_t n = *reinterpret_cast<const uint16_t*>(p);
318 return (kHeaderSize + kGroupSize +
319 b0key(n) + b1key(n) + b2key(n) + b3key(n) + b4key(n));
323 * Encode five uint64_t values into the buffer pointed-to by p, and return
324 * the next position in the buffer (that is, one character past the last
325 * encoded byte). p needs to have at least size()+8 bytes available.
327 static char* encode(char* p, uint64_t a, uint64_t b, uint64_t c,
328 uint64_t d, uint64_t e) {
329 uint8_t b0key = key(a);
330 uint8_t b1key = key(b);
331 uint8_t b2key = key(c);
332 uint8_t b3key = key(d);
333 uint8_t b4key = key(e);
334 *reinterpret_cast<uint16_t*>(p) =
335 (b4key << 12) | (b3key << 9) | (b2key << 6) | (b1key << 3) | b0key;
337 *reinterpret_cast<uint64_t*>(p) = a;
339 *reinterpret_cast<uint64_t*>(p) = b;
341 *reinterpret_cast<uint64_t*>(p) = c;
343 *reinterpret_cast<uint64_t*>(p) = d;
345 *reinterpret_cast<uint64_t*>(p) = e;
351 * Encode five uint64_t values from the array pointed-to by src into the
352 * buffer pointed-to by p, similar to encode(p,a,b,c,d,e) above.
354 static char* encode(char* p, const uint64_t* src) {
355 return encode(p, src[0], src[1], src[2], src[3], src[4]);
359 * Decode five uint64_t values from a buffer, and return the next position
360 * in the buffer (that is, one character past the last encoded byte).
361 * The buffer needs to have at least 7 bytes available (they may be read
364 static const char* decode(const char* p, uint64_t* a, uint64_t* b,
365 uint64_t* c, uint64_t* d, uint64_t* e) {
366 uint16_t k = *reinterpret_cast<const uint16_t*>(p);
368 uint8_t k0 = b0key(k);
369 *a = *reinterpret_cast<const uint64_t*>(p) & kMask[k0];
371 uint8_t k1 = b1key(k);
372 *b = *reinterpret_cast<const uint64_t*>(p) & kMask[k1];
374 uint8_t k2 = b2key(k);
375 *c = *reinterpret_cast<const uint64_t*>(p) & kMask[k2];
377 uint8_t k3 = b3key(k);
378 *d = *reinterpret_cast<const uint64_t*>(p) & kMask[k3];
380 uint8_t k4 = b4key(k);
381 *e = *reinterpret_cast<const uint64_t*>(p) & kMask[k4];
387 * Decode five uint64_t values from a buffer and store them in the array
388 * pointed-to by dest, similar to decode(p,a,b,c,d,e) above.
390 static const char* decode(const char* p, uint64_t* dest) {
391 return decode(p, dest, dest+1, dest+2, dest+3, dest+4);
395 enum { kHeaderBytes = 2 };
397 static uint8_t key(uint64_t x) {
398 // __builtin_clzll is undefined for the x==0 case
399 return 7 - (__builtin_clzll(x|1) / 8);
402 static uint8_t b0key(uint16_t x) { return x & 7; }
403 static uint8_t b1key(uint16_t x) { return (x >> 3) & 7; }
404 static uint8_t b2key(uint16_t x) { return (x >> 6) & 7; }
405 static uint8_t b3key(uint16_t x) { return (x >> 9) & 7; }
406 static uint8_t b4key(uint16_t x) { return (x >> 12) & 7; }
408 static const uint64_t kMask[];
411 typedef GroupVarint<uint32_t> GroupVarint32;
412 typedef GroupVarint<uint64_t> GroupVarint64;
415 * Simplify use of GroupVarint* for the case where data is available one
416 * entry at a time (instead of one group at a time). Handles buffering
417 * and an incomplete last chunk.
419 * Output is a function object that accepts character ranges:
420 * out(StringPiece) appends the given character range to the output.
422 template <class T, class Output>
423 class GroupVarintEncoder {
425 typedef GroupVarint<T> Base;
428 explicit GroupVarintEncoder(Output out)
433 ~GroupVarintEncoder() {
438 * Add a value to the encoder.
441 buf_[count_++] = val;
442 if (count_ == Base::kGroupSize) {
443 char* p = Base::encode(tmp_, buf_);
444 out_(StringPiece(tmp_, p));
450 * Finish encoding, flushing any buffered values if necessary.
451 * After finish(), the encoder is immediately ready to encode more data
452 * to the same output.
456 // This is not strictly necessary, but it makes testing easy;
457 // uninitialized bytes are guaranteed to be recorded as taking one byte
459 for (size_t i = count_; i < Base::kGroupSize; i++) {
462 Base::encode(tmp_, buf_);
463 out_(StringPiece(tmp_, Base::partialSize(buf_, count_)));
469 * Return the appender that was used.
474 const Output& output() const {
479 * Reset the encoder, disregarding any state (except what was already
480 * flushed to the output, of course).
488 char tmp_[Base::kMaxSize];
489 type buf_[Base::kGroupSize];
494 * Simplify use of GroupVarint* for the case where the last group in the
495 * input may be incomplete (but the exact size of the input is known).
496 * Allows for extracting values one at a time.
498 template <typename T>
499 class GroupVarintDecoder {
501 typedef GroupVarint<T> Base;
504 GroupVarintDecoder() { }
506 explicit GroupVarintDecoder(StringPiece data,
507 size_t maxCount = (size_t)-1)
509 end_(data.data() + data.size()),
512 remaining_(maxCount) {
515 void reset(StringPiece data, size_t maxCount=(size_t)-1) {
517 end_ = data.data() + data.size();
520 remaining_ = maxCount;
524 * Read and return the next value.
526 bool next(type* val) {
527 if (pos_ == count_) {
529 size_t rem = end_ - p_;
530 if (rem == 0 || remaining_ == 0) {
533 // next() attempts to read one full group at a time, and so we must have
534 // at least enough bytes readable after its end to handle the case if the
535 // last group is full.
537 // The best way to ensure this is to ensure that data has at least
538 // Base::kMaxSize - 1 bytes readable *after* the end, otherwise we'll copy
539 // into a temporary buffer.
540 if (rem < Base::kMaxSize) {
541 memcpy(tmp_, p_, rem);
546 const char* n = Base::decode(p_, buf_);
548 // Full group could be decoded
549 if (remaining_ >= Base::kGroupSize) {
550 remaining_ -= Base::kGroupSize;
551 count_ = Base::kGroupSize;
556 p_ += Base::partialSize(buf_, count_);
559 // Can't decode a full group
560 count_ = Base::partialCount(p_, end_ - p_);
561 if (remaining_ >= count_) {
562 remaining_ -= count_;
567 p_ += Base::partialSize(buf_, count_);
578 StringPiece rest() const {
579 // This is only valid after next() returned false
580 CHECK(pos_ == count_ && (p_ == end_ || remaining_ == 0));
581 return StringPiece(p_, end_ - p_);
587 char tmp_[Base::kMaxSize];
588 type buf_[Base::kGroupSize];
594 typedef GroupVarintDecoder<uint32_t> GroupVarint32Decoder;
595 typedef GroupVarintDecoder<uint64_t> GroupVarint64Decoder;
599 #endif /* FOLLY_GROUPVARINT_H_ */