folly/Varint.h

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <type_traits>
  20
  21 #include <folly/Conv.h>
  22 #include <folly/Expected.h>
  23 #include <folly/Likely.h>
  24 #include <folly/Range.h>
  25
  26 namespace folly {
  27
  28 /**
  29  * Variable-length integer encoding, using a little-endian, base-128
  30  * representation.
  31  *
  32  * The MSb is set on all bytes except the last.
  33  *
  34  * Details:
  35  * https://developers.google.com/protocol-buffers/docs/encoding#varints
  36  *
  37  * If you want to encode multiple values, GroupVarint (in GroupVarint.h)
  38  * is faster and likely smaller.
  39  */
  40
  41 /**
  42  * Maximum length (in bytes) of the varint encoding of a 32-bit value.
  43  */
  44 constexpr size_t kMaxVarintLength32 = 5;
  45
  46 /**
  47  * Maximum length (in bytes) of the varint encoding of a 64-bit value.
  48  */
  49 constexpr size_t kMaxVarintLength64 = 10;
  50
  51 /**
  52  * Encode a value in the given buffer, returning the number of bytes used
  53  * for encoding.
  54  * buf must have enough space to represent the value (at least
  55  * kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
  56  */
  57 size_t encodeVarint(uint64_t val, uint8_t* buf);
  58
  59 /**
  60  * Decode a value from a given buffer, advances data past the returned value.
  61  * Throws on error.
  62  */
  63 template <class T>
  64 uint64_t decodeVarint(Range<T*>& data);
  65
  66 enum class DecodeVarintError {
  67   TooManyBytes = 0,
  68   TooFewBytes = 1,
  69 };
  70
  71 /**
  72  * A variant of decodeVarint() that does not throw on error. Useful in contexts
  73  * where only part of a serialized varint may be attempted to be decoded, e.g.,
  74  * when a serialized varint arrives on the boundary of a network packet.
  75  */
  76 template <class T>
  77 Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data);
  78
  79 /**
  80  * ZigZag encoding that maps signed integers with a small absolute value
  81  * to unsigned integers with a small (positive) values. Without this,
  82  * encoding negative values using Varint would use up 9 or 10 bytes.
  83  *
  84  * if x >= 0, encodeZigZag(x) == 2*x
  85  * if x <  0, encodeZigZag(x) == -2*x + 1
  86  */
  87
  88 inline uint64_t encodeZigZag(int64_t val) {
  89   // Bit-twiddling magic stolen from the Google protocol buffer document;
  90   // val >> 63 is an arithmetic shift because val is signed
  91   auto uval = static_cast<uint64_t>(val);
  92   return static_cast<uint64_t>((uval << 1) ^ (val >> 63));
  93 }
  94
  95 inline int64_t decodeZigZag(uint64_t val) {
  96   return static_cast<int64_t>((val >> 1) ^ -(val & 1));
  97 }
  98
  99 // Implementation below
 100
 101 inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
 102   uint8_t* p = buf;
 103   while (val >= 128) {
 104     *p++ = 0x80 | (val & 0x7f);
 105     val >>= 7;
 106   }
 107   *p++ = uint8_t(val);
 108   return size_t(p - buf);
 109 }
 110
 111 template <class T>
 112 inline uint64_t decodeVarint(Range<T*>& data) {
 113   auto expected = tryDecodeVarint(data);
 114   if (!expected) {
 115     throw std::invalid_argument(
 116         expected.error() == DecodeVarintError::TooManyBytes
 117             ? "Invalid varint value: too many bytes."
 118             : "Invalid varint value: too few bytes.");
 119   }
 120   return *expected;
 121 }
 122
 123 template <class T>
 124 inline Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data) {
 125   static_assert(
 126       std::is_same<typename std::remove_cv<T>::type, char>::value ||
 127           std::is_same<typename std::remove_cv<T>::type, unsigned char>::value,
 128       "Only character ranges are supported");
 129
 130   const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
 131   const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
 132   const int8_t* p = begin;
 133   uint64_t val = 0;
 134
 135   // end is always greater than or equal to begin, so this subtraction is safe
 136   if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) {  // fast path
 137     int64_t b;
 138     do {
 139       b = *p++; val  = (b & 0x7f)      ; if (b >= 0) break;
 140       b = *p++; val |= (b & 0x7f) <<  7; if (b >= 0) break;
 141       b = *p++; val |= (b & 0x7f) << 14; if (b >= 0) break;
 142       b = *p++; val |= (b & 0x7f) << 21; if (b >= 0) break;
 143       b = *p++; val |= (b & 0x7f) << 28; if (b >= 0) break;
 144       b = *p++; val |= (b & 0x7f) << 35; if (b >= 0) break;
 145       b = *p++; val |= (b & 0x7f) << 42; if (b >= 0) break;
 146       b = *p++; val |= (b & 0x7f) << 49; if (b >= 0) break;
 147       b = *p++; val |= (b & 0x7f) << 56; if (b >= 0) break;
 148       b = *p++; val |= (b & 0x7f) << 63; if (b >= 0) break;
 149       return makeUnexpected(DecodeVarintError::TooManyBytes);
 150     } while (false);
 151   } else {
 152     int shift = 0;
 153     while (p != end && *p < 0) {
 154       val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
 155       shift += 7;
 156     }
 157     if (p == end) {
 158       return makeUnexpected(DecodeVarintError::TooFewBytes);
 159     }
 160     val |= static_cast<uint64_t>(*p++) << shift;
 161   }
 162
 163   data.uncheckedAdvance(p - begin);
 164   return val;
 165 }
 166
 167 } // folly