folly/Bits.h

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * Various low-level, bit-manipulation routines.
  19  *
  20  * findFirstSet(x)  [constexpr]
  21  *    find first (least significant) bit set in a value of an integral type,
  22  *    1-based (like ffs()).  0 = no bits are set (x == 0)
  23  *
  24  * findLastSet(x)  [constexpr]
  25  *    find last (most significant) bit set in a value of an integral type,
  26  *    1-based.  0 = no bits are set (x == 0)
  27  *    for x != 0, findLastSet(x) == 1 + floor(log2(x))
  28  *
  29  * nextPowTwo(x)  [constexpr]
  30  *    Finds the next power of two >= x.
  31  *
  32  * isPowTwo(x)  [constexpr]
  33  *    return true iff x is a power of two
  34  *
  35  * popcount(x)
  36  *    return the number of 1 bits in x
  37  *
  38  * Endian
  39  *    convert between native, big, and little endian representation
  40  *    Endian::big(x)      big <-> native
  41  *    Endian::little(x)   little <-> native
  42  *    Endian::swap(x)     big <-> little
  43  *
  44  * BitIterator
  45  *    Wrapper around an iterator over an integral type that iterates
  46  *    over its underlying bits in MSb to LSb order
  47  *
  48  * findFirstSet(BitIterator begin, BitIterator end)
  49  *    return a BitIterator pointing to the first 1 bit in [begin, end), or
  50  *    end if all bits in [begin, end) are 0
  51  *
  52  * @author Tudor Bosman (tudorb@fb.com)
  53  */
  54
  55 #pragma once
  56
  57 // MSVC does not support intrinsics constexpr
  58 #if defined(_MSC_VER)
  59 #define FOLLY_INTRINSIC_CONSTEXPR const
  60 #else
  61 #define FOLLY_INTRINSIC_CONSTEXPR constexpr
  62 #endif
  63
  64 #include <cassert>
  65 #include <cinttypes>
  66 #include <cstdint>
  67 #include <cstring>
  68 #include <iterator>
  69 #include <limits>
  70 #include <type_traits>
  71
  72 #include <boost/iterator/iterator_adaptor.hpp>
  73
  74 #include <folly/Assume.h>
  75 #include <folly/Likely.h>
  76 #include <folly/Portability.h>
  77 #include <folly/detail/BitIteratorDetail.h>
  78 #include <folly/portability/Builtins.h>
  79
  80 namespace folly {
  81
  82 // Generate overloads for findFirstSet as wrappers around
  83 // appropriate ffs, ffsl, ffsll gcc builtins
  84 template <class T>
  85 inline FOLLY_INTRINSIC_CONSTEXPR
  86 typename std::enable_if<
  87   (std::is_integral<T>::value &&
  88    std::is_unsigned<T>::value &&
  89    sizeof(T) <= sizeof(unsigned int)),
  90   unsigned int>::type
  91   findFirstSet(T x) {
  92   return static_cast<unsigned int>(__builtin_ffs(static_cast<int>(x)));
  93 }
  94
  95 template <class T>
  96 inline FOLLY_INTRINSIC_CONSTEXPR
  97 typename std::enable_if<
  98   (std::is_integral<T>::value &&
  99    std::is_unsigned<T>::value &&
 100    sizeof(T) > sizeof(unsigned int) &&
 101    sizeof(T) <= sizeof(unsigned long)),
 102   unsigned int>::type
 103   findFirstSet(T x) {
 104   return static_cast<unsigned int>(__builtin_ffsl(static_cast<long>(x)));
 105 }
 106
 107 template <class T>
 108 inline FOLLY_INTRINSIC_CONSTEXPR
 109 typename std::enable_if<
 110   (std::is_integral<T>::value &&
 111    std::is_unsigned<T>::value &&
 112    sizeof(T) > sizeof(unsigned long) &&
 113    sizeof(T) <= sizeof(unsigned long long)),
 114   unsigned int>::type
 115   findFirstSet(T x) {
 116   return static_cast<unsigned int>(__builtin_ffsll(static_cast<long long>(x)));
 117 }
 118
 119 template <class T>
 120 inline FOLLY_INTRINSIC_CONSTEXPR
 121 typename std::enable_if<
 122   (std::is_integral<T>::value && std::is_signed<T>::value),
 123   unsigned int>::type
 124   findFirstSet(T x) {
 125   // Note that conversion from a signed type to the corresponding unsigned
 126   // type is technically implementation-defined, but will likely work
 127   // on any impementation that uses two's complement.
 128   return findFirstSet(static_cast<typename std::make_unsigned<T>::type>(x));
 129 }
 130
 131 // findLastSet: return the 1-based index of the highest bit set
 132 // for x > 0, findLastSet(x) == 1 + floor(log2(x))
 133 template <class T>
 134 inline FOLLY_INTRINSIC_CONSTEXPR
 135 typename std::enable_if<
 136   (std::is_integral<T>::value &&
 137    std::is_unsigned<T>::value &&
 138    sizeof(T) <= sizeof(unsigned int)),
 139   unsigned int>::type
 140   findLastSet(T x) {
 141   // If X is a power of two X - Y = ((X - 1) ^ Y) + 1. Doing this transformation
 142   // allows GCC to remove its own xor that it adds to implement clz using bsr
 143   return x ? ((8 * sizeof(unsigned int) - 1) ^ __builtin_clz(x)) + 1 : 0;
 144 }
 145
 146 template <class T>
 147 inline FOLLY_INTRINSIC_CONSTEXPR
 148 typename std::enable_if<
 149   (std::is_integral<T>::value &&
 150    std::is_unsigned<T>::value &&
 151    sizeof(T) > sizeof(unsigned int) &&
 152    sizeof(T) <= sizeof(unsigned long)),
 153   unsigned int>::type
 154   findLastSet(T x) {
 155   return x ? ((8 * sizeof(unsigned long) - 1) ^ __builtin_clzl(x)) + 1 : 0;
 156 }
 157
 158 template <class T>
 159 inline FOLLY_INTRINSIC_CONSTEXPR
 160 typename std::enable_if<
 161   (std::is_integral<T>::value &&
 162    std::is_unsigned<T>::value &&
 163    sizeof(T) > sizeof(unsigned long) &&
 164    sizeof(T) <= sizeof(unsigned long long)),
 165   unsigned int>::type
 166   findLastSet(T x) {
 167   return x ? ((8 * sizeof(unsigned long long) - 1) ^ __builtin_clzll(x)) + 1
 168            : 0;
 169 }
 170
 171 template <class T>
 172 inline FOLLY_INTRINSIC_CONSTEXPR
 173 typename std::enable_if<
 174   (std::is_integral<T>::value &&
 175    std::is_signed<T>::value),
 176   unsigned int>::type
 177   findLastSet(T x) {
 178   return findLastSet(static_cast<typename std::make_unsigned<T>::type>(x));
 179 }
 180
 181 template <class T>
 182 inline FOLLY_INTRINSIC_CONSTEXPR
 183 typename std::enable_if<
 184   std::is_integral<T>::value && std::is_unsigned<T>::value,
 185   T>::type
 186 nextPowTwo(T v) {
 187   return v ? (T(1) << findLastSet(v - 1)) : 1;
 188 }
 189
 190 template <class T>
 191 inline FOLLY_INTRINSIC_CONSTEXPR typename std::
 192     enable_if<std::is_integral<T>::value && std::is_unsigned<T>::value, T>::type
 193     prevPowTwo(T v) {
 194   return v ? (T(1) << (findLastSet(v) - 1)) : 0;
 195 }
 196
 197 template <class T>
 198 inline constexpr typename std::enable_if<
 199     std::is_integral<T>::value && std::is_unsigned<T>::value,
 200     bool>::type
 201 isPowTwo(T v) {
 202   return (v != 0) && !(v & (v - 1));
 203 }
 204
 205 /**
 206  * Population count
 207  */
 208 template <class T>
 209 inline typename std::enable_if<
 210   (std::is_integral<T>::value &&
 211    std::is_unsigned<T>::value &&
 212    sizeof(T) <= sizeof(unsigned int)),
 213   size_t>::type
 214   popcount(T x) {
 215   return size_t(__builtin_popcount(x));
 216 }
 217
 218 template <class T>
 219 inline typename std::enable_if<
 220   (std::is_integral<T>::value &&
 221    std::is_unsigned<T>::value &&
 222    sizeof(T) > sizeof(unsigned int) &&
 223    sizeof(T) <= sizeof(unsigned long long)),
 224   size_t>::type
 225   popcount(T x) {
 226   return size_t(__builtin_popcountll(x));
 227 }
 228
 229 /**
 230  * Endianness detection and manipulation primitives.
 231  */
 232 namespace detail {
 233
 234 template <size_t Size>
 235 struct uint_types_by_size;
 236
 237 #define FB_GEN(sz, fn)                                      \
 238   static inline uint##sz##_t byteswap_gen(uint##sz##_t v) { \
 239     return fn(v);                                           \
 240   }                                                         \
 241   template <>                                               \
 242   struct uint_types_by_size<sz / 8> {                       \
 243     using type = uint##sz##_t;                              \
 244   };
 245
 246 FB_GEN(8, uint8_t)
 247 #ifdef _MSC_VER
 248 FB_GEN(64, _byteswap_uint64)
 249 FB_GEN(32, _byteswap_ulong)
 250 FB_GEN(16, _byteswap_ushort)
 251 #else
 252 FB_GEN(64, __builtin_bswap64)
 253 FB_GEN(32, __builtin_bswap32)
 254 FB_GEN(16, __builtin_bswap16)
 255 #endif
 256
 257 #undef FB_GEN
 258
 259 template <class T>
 260 struct EndianInt {
 261   static_assert(
 262       (std::is_integral<T>::value && !std::is_same<T, bool>::value) ||
 263           std::is_floating_point<T>::value,
 264       "template type parameter must be non-bool integral or floating point");
 265   static T swap(T x) {
 266     // we implement this with memcpy because that is defined behavior in C++
 267     // we rely on compilers to optimize away the memcpy calls
 268     constexpr auto s = sizeof(T);
 269     using B = typename uint_types_by_size<s>::type;
 270     B b;
 271     std::memcpy(&b, &x, s);
 272     b = byteswap_gen(b);
 273     std::memcpy(&x, &b, s);
 274     return x;
 275   }
 276   static T big(T x) {
 277     return kIsLittleEndian ? EndianInt::swap(x) : x;
 278   }
 279   static T little(T x) {
 280     return kIsBigEndian ? EndianInt::swap(x) : x;
 281   }
 282 };
 283
 284 } // namespace detail
 285
 286 // big* convert between native and big-endian representations
 287 // little* convert between native and little-endian representations
 288 // swap* convert between big-endian and little-endian representations
 289 //
 290 // ntohs, htons == big16
 291 // ntohl, htonl == big32
 292 #define FB_GEN1(fn, t, sz) \
 293   static t fn##sz(t x) { return fn<t>(x); } \
 294
 295 #define FB_GEN2(t, sz) \
 296   FB_GEN1(swap, t, sz) \
 297   FB_GEN1(big, t, sz) \
 298   FB_GEN1(little, t, sz)
 299
 300 #define FB_GEN(sz) \
 301   FB_GEN2(uint##sz##_t, sz) \
 302   FB_GEN2(int##sz##_t, sz)
 303
 304 class Endian {
 305  public:
 306   enum class Order : uint8_t {
 307     LITTLE,
 308     BIG
 309   };
 310
 311   static constexpr Order order = kIsLittleEndian ? Order::LITTLE : Order::BIG;
 312
 313   template <class T> static T swap(T x) {
 314     return folly::detail::EndianInt<T>::swap(x);
 315   }
 316   template <class T> static T big(T x) {
 317     return folly::detail::EndianInt<T>::big(x);
 318   }
 319   template <class T> static T little(T x) {
 320     return folly::detail::EndianInt<T>::little(x);
 321   }
 322
 323 #if !defined(__ANDROID__)
 324   FB_GEN(64)
 325   FB_GEN(32)
 326   FB_GEN(16)
 327   FB_GEN(8)
 328 #endif
 329 };
 330
 331 #undef FB_GEN
 332 #undef FB_GEN2
 333 #undef FB_GEN1
 334
 335 /**
 336  * Fast bit iteration facility.
 337  */
 338
 339
 340 template <class BaseIter> class BitIterator;
 341 template <class BaseIter>
 342 BitIterator<BaseIter> findFirstSet(BitIterator<BaseIter>,
 343                                    BitIterator<BaseIter>);
 344 /**
 345  * Wrapper around an iterator over an integer type that iterates
 346  * over its underlying bits in LSb to MSb order.
 347  *
 348  * BitIterator models the same iterator concepts as the base iterator.
 349  */
 350 template <class BaseIter>
 351 class BitIterator
 352   : public bititerator_detail::BitIteratorBase<BaseIter>::type {
 353  public:
 354   /**
 355    * Return the number of bits in an element of the underlying iterator.
 356    */
 357   static unsigned int bitsPerBlock() {
 358     return std::numeric_limits<
 359       typename std::make_unsigned<
 360         typename std::iterator_traits<BaseIter>::value_type
 361       >::type
 362     >::digits;
 363   }
 364
 365   /**
 366    * Construct a BitIterator that points at a given bit offset (default 0)
 367    * in iter.
 368    */
 369   explicit BitIterator(const BaseIter& iter, size_t bitOff=0)
 370     : bititerator_detail::BitIteratorBase<BaseIter>::type(iter),
 371       bitOffset_(bitOff) {
 372     assert(bitOffset_ < bitsPerBlock());
 373   }
 374
 375   size_t bitOffset() const {
 376     return bitOffset_;
 377   }
 378
 379   void advanceToNextBlock() {
 380     bitOffset_ = 0;
 381     ++this->base_reference();
 382   }
 383
 384   BitIterator& operator=(const BaseIter& other) {
 385     this->~BitIterator();
 386     new (this) BitIterator(other);
 387     return *this;
 388   }
 389
 390  private:
 391   friend class boost::iterator_core_access;
 392   friend BitIterator findFirstSet<>(BitIterator, BitIterator);
 393
 394   typedef bititerator_detail::BitReference<
 395       typename std::iterator_traits<BaseIter>::reference,
 396       typename std::iterator_traits<BaseIter>::value_type
 397     > BitRef;
 398
 399   void advanceInBlock(size_t n) {
 400     bitOffset_ += n;
 401     assert(bitOffset_ < bitsPerBlock());
 402   }
 403
 404   BitRef dereference() const {
 405     return BitRef(*this->base_reference(), bitOffset_);
 406   }
 407
 408   void advance(ssize_t n) {
 409     size_t bpb = bitsPerBlock();
 410     ssize_t blocks = n / ssize_t(bpb);
 411     bitOffset_ += n % bpb;
 412     if (bitOffset_ >= bpb) {
 413       bitOffset_ -= bpb;
 414       ++blocks;
 415     }
 416     this->base_reference() += blocks;
 417   }
 418
 419   void increment() {
 420     if (++bitOffset_ == bitsPerBlock()) {
 421       advanceToNextBlock();
 422     }
 423   }
 424
 425   void decrement() {
 426     if (bitOffset_-- == 0) {
 427       bitOffset_ = bitsPerBlock() - 1;
 428       --this->base_reference();
 429     }
 430   }
 431
 432   bool equal(const BitIterator& other) const {
 433     return (bitOffset_ == other.bitOffset_ &&
 434             this->base_reference() == other.base_reference());
 435   }
 436
 437   ssize_t distance_to(const BitIterator& other) const {
 438     return ssize_t(
 439         (other.base_reference() - this->base_reference()) * bitsPerBlock() +
 440         other.bitOffset_ - bitOffset_);
 441   }
 442
 443   size_t bitOffset_;
 444 };
 445
 446 /**
 447  * Helper function, so you can write
 448  * auto bi = makeBitIterator(container.begin());
 449  */
 450 template <class BaseIter>
 451 BitIterator<BaseIter> makeBitIterator(const BaseIter& iter) {
 452   return BitIterator<BaseIter>(iter);
 453 }
 454
 455
 456 /**
 457  * Find first bit set in a range of bit iterators.
 458  * 4.5x faster than the obvious std::find(begin, end, true);
 459  */
 460 template <class BaseIter>
 461 BitIterator<BaseIter> findFirstSet(BitIterator<BaseIter> begin,
 462                                    BitIterator<BaseIter> end) {
 463   // shortcut to avoid ugly static_cast<>
 464   static const typename BaseIter::value_type one = 1;
 465
 466   while (begin.base() != end.base()) {
 467     typename BaseIter::value_type v = *begin.base();
 468     // mask out the bits that don't matter (< begin.bitOffset)
 469     v &= ~((one << begin.bitOffset()) - 1);
 470     size_t firstSet = findFirstSet(v);
 471     if (firstSet) {
 472       --firstSet;  // now it's 0-based
 473       assert(firstSet >= begin.bitOffset());
 474       begin.advanceInBlock(firstSet - begin.bitOffset());
 475       return begin;
 476     }
 477     begin.advanceToNextBlock();
 478   }
 479
 480   // now begin points to the same block as end
 481   if (end.bitOffset() != 0) {  // assume end is dereferenceable
 482     typename BaseIter::value_type v = *begin.base();
 483     // mask out the bits that don't matter (< begin.bitOffset)
 484     v &= ~((one << begin.bitOffset()) - 1);
 485     // mask out the bits that don't matter (>= end.bitOffset)
 486     v &= (one << end.bitOffset()) - 1;
 487     size_t firstSet = findFirstSet(v);
 488     if (firstSet) {
 489       --firstSet;  // now it's 0-based
 490       assert(firstSet >= begin.bitOffset());
 491       begin.advanceInBlock(firstSet - begin.bitOffset());
 492       return begin;
 493     }
 494   }
 495
 496   return end;
 497 }
 498
 499
 500 template <class T, class Enable=void> struct Unaligned;
 501
 502 /**
 503  * Representation of an unaligned value of a POD type.
 504  */
 505 FOLLY_PACK_PUSH
 506 template <class T>
 507 struct Unaligned<
 508     T,
 509     typename std::enable_if<std::is_pod<T>::value>::type> {
 510   Unaligned() = default;  // uninitialized
 511   /* implicit */ Unaligned(T v) : value(v) { }
 512   T value;
 513 } FOLLY_PACK_ATTR;
 514 FOLLY_PACK_POP
 515
 516 /**
 517  * Read an unaligned value of type T and return it.
 518  */
 519 template <class T>
 520 inline T loadUnaligned(const void* p) {
 521   static_assert(sizeof(Unaligned<T>) == sizeof(T), "Invalid unaligned size");
 522   static_assert(alignof(Unaligned<T>) == 1, "Invalid alignment");
 523   if (kHasUnalignedAccess) {
 524     return static_cast<const Unaligned<T>*>(p)->value;
 525   } else {
 526     T value;
 527     memcpy(&value, p, sizeof(T));
 528     return value;
 529   }
 530 }
 531
 532 /**
 533  * Write an unaligned value of type T.
 534  */
 535 template <class T>
 536 inline void storeUnaligned(void* p, T value) {
 537   static_assert(sizeof(Unaligned<T>) == sizeof(T), "Invalid unaligned size");
 538   static_assert(alignof(Unaligned<T>) == 1, "Invalid alignment");
 539   if (kHasUnalignedAccess) {
 540     // Prior to C++14, the spec says that a placement new like this
 541     // is required to check that p is not nullptr, and to do nothing
 542     // if p is a nullptr. By assuming it's not a nullptr, we get a
 543     // nice loud segfault in optimized builds if p is nullptr, rather
 544     // than just silently doing nothing.
 545     folly::assume(p != nullptr);
 546     new (p) Unaligned<T>(value);
 547   } else {
 548     memcpy(p, &value, sizeof(T));
 549   }
 550 }
 551
 552 } // namespace folly