X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FBits.h;h=889b3a0b0c2d6852cef8a0c9c623b6806a123ee8;hb=171c36527455c2709be57f2ccc41218d45689411;hp=61a7407d10ddd26c97d495c1d44451b7259b3953;hpb=d7a2b20b4df53ee1e622ebc7cb58d1b1f0959ec7;p=folly.git diff --git a/folly/Bits.h b/folly/Bits.h index 61a7407d..889b3a0b 100644 --- a/folly/Bits.h +++ b/folly/Bits.h @@ -1,5 +1,5 @@ /* - * Copyright 2016 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,45 +41,28 @@ * Endian::little(x) little <-> native * Endian::swap(x) big <-> little * - * BitIterator - * Wrapper around an iterator over an integral type that iterates - * over its underlying bits in MSb to LSb order - * - * findFirstSet(BitIterator begin, BitIterator end) - * return a BitIterator pointing to the first 1 bit in [begin, end), or - * end if all bits in [begin, end) are 0 - * * @author Tudor Bosman (tudorb@fb.com) */ -#ifndef FOLLY_BITS_H_ -#define FOLLY_BITS_H_ +#pragma once -#if !defined(__clang__) && !(defined(_MSC_VER) && (_MSC_VER < 1900)) -#define FOLLY_INTRINSIC_CONSTEXPR constexpr -#else -// GCC and MSVC 2015+ are the only compilers with -// intrinsics constexpr. +// MSVC does not support intrinsics constexpr +#if defined(_MSC_VER) #define FOLLY_INTRINSIC_CONSTEXPR const -#endif - -#include - -#include -#include -#include - -#if FOLLY_HAVE_BYTESWAP_H -# include +#else +#define FOLLY_INTRINSIC_CONSTEXPR constexpr #endif #include #include -#include +#include +#include #include #include -#include -#include + +#include +#include +#include namespace folly { @@ -93,7 +76,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), unsigned int>::type findFirstSet(T x) { - return __builtin_ffs(x); + return static_cast(__builtin_ffs(static_cast(x))); } template @@ -105,7 +88,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long)), unsigned int>::type findFirstSet(T x) { - return __builtin_ffsl(x); + return static_cast(__builtin_ffsl(static_cast(x))); } template @@ -117,7 +100,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), unsigned int>::type findFirstSet(T x) { - return __builtin_ffsll(x); + return static_cast(__builtin_ffsll(static_cast(x))); } template @@ -142,7 +125,9 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), unsigned int>::type findLastSet(T x) { - return x ? 8 * sizeof(unsigned int) - __builtin_clz(x) : 0; + // If X is a power of two X - Y = ((X - 1) ^ Y) + 1. Doing this transformation + // allows GCC to remove its own xor that it adds to implement clz using bsr + return x ? ((8 * sizeof(unsigned int) - 1) ^ __builtin_clz(x)) + 1 : 0; } template @@ -154,7 +139,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long)), unsigned int>::type findLastSet(T x) { - return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0; + return x ? ((8 * sizeof(unsigned long) - 1) ^ __builtin_clzl(x)) + 1 : 0; } template @@ -166,7 +151,8 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), unsigned int>::type findLastSet(T x) { - return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0; + return x ? ((8 * sizeof(unsigned long long) - 1) ^ __builtin_clzll(x)) + 1 + : 0; } template @@ -185,14 +171,20 @@ typename std::enable_if< std::is_integral::value && std::is_unsigned::value, T>::type nextPowTwo(T v) { - return v ? (1ul << findLastSet(v - 1)) : 1; + return v ? (T(1) << findLastSet(v - 1)) : 1; } template -inline constexpr -typename std::enable_if< - std::is_integral::value && std::is_unsigned::value, - bool>::type +inline FOLLY_INTRINSIC_CONSTEXPR typename std:: + enable_if::value && std::is_unsigned::value, T>::type + prevPowTwo(T v) { + return v ? (T(1) << (findLastSet(v) - 1)) : 0; +} + +template +inline constexpr typename std::enable_if< + std::is_integral::value && std::is_unsigned::value, + bool>::type isPowTwo(T v) { return (v != 0) && !(v & (v - 1)); } @@ -207,7 +199,7 @@ inline typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), size_t>::type popcount(T x) { - return detail::popcount(x); + return size_t(__builtin_popcount(x)); } template @@ -218,7 +210,7 @@ inline typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), size_t>::type popcount(T x) { - return detail::popcountll(x); + return size_t(__builtin_popcountll(x)); } /** @@ -226,82 +218,57 @@ inline typename std::enable_if< */ namespace detail { -template -struct EndianIntBase { - public: - static T swap(T x); -}; +template +struct uint_types_by_size; -#ifndef _MSC_VER - -/** - * If we have the bswap_16 macro from byteswap.h, use it; otherwise, provide our - * own definition. - */ -#ifdef bswap_16 -# define our_bswap16 bswap_16 -#else - -template -inline constexpr typename std::enable_if< - sizeof(Int16) == 2, - Int16>::type -our_bswap16(Int16 x) { - return ((x >> 8) & 0xff) | ((x & 0xff) << 8); -} -#endif - -#endif - -#define FB_GEN(t, fn) \ -template<> inline t EndianIntBase::swap(t x) { return fn(x); } +#define FB_GEN(sz, fn) \ + static inline uint##sz##_t byteswap_gen(uint##sz##_t v) { \ + return fn(v); \ + } \ + template <> \ + struct uint_types_by_size { \ + using type = uint##sz##_t; \ + }; -// fn(x) expands to (x) if the second argument is empty, which is exactly -// what we want for [u]int8_t. Also, gcc 4.7 on Intel doesn't have -// __builtin_bswap16 for some reason, so we have to provide our own. -FB_GEN( int8_t,) -FB_GEN(uint8_t,) +FB_GEN(8, uint8_t) #ifdef _MSC_VER -FB_GEN( int64_t, _byteswap_uint64) -FB_GEN(uint64_t, _byteswap_uint64) -FB_GEN( int32_t, _byteswap_ulong) -FB_GEN(uint32_t, _byteswap_ulong) -FB_GEN( int16_t, _byteswap_ushort) -FB_GEN(uint16_t, _byteswap_ushort) +FB_GEN(64, _byteswap_uint64) +FB_GEN(32, _byteswap_ulong) +FB_GEN(16, _byteswap_ushort) #else -FB_GEN( int64_t, __builtin_bswap64) -FB_GEN(uint64_t, __builtin_bswap64) -FB_GEN( int32_t, __builtin_bswap32) -FB_GEN(uint32_t, __builtin_bswap32) -FB_GEN( int16_t, our_bswap16) -FB_GEN(uint16_t, our_bswap16) +FB_GEN(64, __builtin_bswap64) +FB_GEN(32, __builtin_bswap32) +FB_GEN(16, __builtin_bswap16) #endif #undef FB_GEN -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - -template -struct EndianInt : public detail::EndianIntBase { - public: - static T big(T x) { return EndianInt::swap(x); } - static T little(T x) { return x; } -}; - -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - template -struct EndianInt : public detail::EndianIntBase { - public: - static T big(T x) { return x; } - static T little(T x) { return EndianInt::swap(x); } +struct EndianInt { + static_assert( + (std::is_integral::value && !std::is_same::value) || + std::is_floating_point::value, + "template type parameter must be non-bool integral or floating point"); + static T swap(T x) { + // we implement this with memcpy because that is defined behavior in C++ + // we rely on compilers to optimize away the memcpy calls + constexpr auto s = sizeof(T); + using B = typename uint_types_by_size::type; + B b; + std::memcpy(&b, &x, s); + b = byteswap_gen(b); + std::memcpy(&x, &b, s); + return x; + } + static T big(T x) { + return kIsLittleEndian ? EndianInt::swap(x) : x; + } + static T little(T x) { + return kIsBigEndian ? EndianInt::swap(x) : x; + } }; -#else -# error Your machine uses a weird endianness! -#endif /* __BYTE_ORDER__ */ - -} // namespace detail +} // namespace detail // big* convert between native and big-endian representations // little* convert between native and little-endian representations @@ -328,23 +295,16 @@ class Endian { BIG }; - static constexpr Order order = -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - Order::LITTLE; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - Order::BIG; -#else -# error Your machine uses a weird endianness! -#endif /* __BYTE_ORDER__ */ + static constexpr Order order = kIsLittleEndian ? Order::LITTLE : Order::BIG; template static T swap(T x) { - return detail::EndianInt::swap(x); + return folly::detail::EndianInt::swap(x); } template static T big(T x) { - return detail::EndianInt::big(x); + return folly::detail::EndianInt::big(x); } template static T little(T x) { - return detail::EndianInt::little(x); + return folly::detail::EndianInt::little(x); } #if !defined(__ANDROID__) @@ -359,173 +319,6 @@ class Endian { #undef FB_GEN2 #undef FB_GEN1 -/** - * Fast bit iteration facility. - */ - - -template class BitIterator; -template -BitIterator findFirstSet(BitIterator, - BitIterator); -/** - * Wrapper around an iterator over an integer type that iterates - * over its underlying bits in LSb to MSb order. - * - * BitIterator models the same iterator concepts as the base iterator. - */ -template -class BitIterator - : public bititerator_detail::BitIteratorBase::type { - public: - /** - * Return the number of bits in an element of the underlying iterator. - */ - static unsigned int bitsPerBlock() { - return std::numeric_limits< - typename std::make_unsigned< - typename std::iterator_traits::value_type - >::type - >::digits; - } - - /** - * Construct a BitIterator that points at a given bit offset (default 0) - * in iter. - */ - #pragma GCC diagnostic push // bitOffset shadows a member - #pragma GCC diagnostic ignored "-Wshadow" - explicit BitIterator(const BaseIter& iter, size_t bitOffset=0) - : bititerator_detail::BitIteratorBase::type(iter), - bitOffset_(bitOffset) { - assert(bitOffset_ < bitsPerBlock()); - } - #pragma GCC diagnostic pop - - size_t bitOffset() const { - return bitOffset_; - } - - void advanceToNextBlock() { - bitOffset_ = 0; - ++this->base_reference(); - } - - BitIterator& operator=(const BaseIter& other) { - this->~BitIterator(); - new (this) BitIterator(other); - return *this; - } - - private: - friend class boost::iterator_core_access; - friend BitIterator findFirstSet<>(BitIterator, BitIterator); - - typedef bititerator_detail::BitReference< - typename std::iterator_traits::reference, - typename std::iterator_traits::value_type - > BitRef; - - void advanceInBlock(size_t n) { - bitOffset_ += n; - assert(bitOffset_ < bitsPerBlock()); - } - - BitRef dereference() const { - return BitRef(*this->base_reference(), bitOffset_); - } - - void advance(ssize_t n) { - size_t bpb = bitsPerBlock(); - ssize_t blocks = n / bpb; - bitOffset_ += n % bpb; - if (bitOffset_ >= bpb) { - bitOffset_ -= bpb; - ++blocks; - } - this->base_reference() += blocks; - } - - void increment() { - if (++bitOffset_ == bitsPerBlock()) { - advanceToNextBlock(); - } - } - - void decrement() { - if (bitOffset_-- == 0) { - bitOffset_ = bitsPerBlock() - 1; - --this->base_reference(); - } - } - - bool equal(const BitIterator& other) const { - return (bitOffset_ == other.bitOffset_ && - this->base_reference() == other.base_reference()); - } - - ssize_t distance_to(const BitIterator& other) const { - return - (other.base_reference() - this->base_reference()) * bitsPerBlock() + - other.bitOffset_ - bitOffset_; - } - - unsigned int bitOffset_; -}; - -/** - * Helper function, so you can write - * auto bi = makeBitIterator(container.begin()); - */ -template -BitIterator makeBitIterator(const BaseIter& iter) { - return BitIterator(iter); -} - - -/** - * Find first bit set in a range of bit iterators. - * 4.5x faster than the obvious std::find(begin, end, true); - */ -template -BitIterator findFirstSet(BitIterator begin, - BitIterator end) { - // shortcut to avoid ugly static_cast<> - static const typename BaseIter::value_type one = 1; - - while (begin.base() != end.base()) { - typename BaseIter::value_type v = *begin.base(); - // mask out the bits that don't matter (< begin.bitOffset) - v &= ~((one << begin.bitOffset()) - 1); - size_t firstSet = findFirstSet(v); - if (firstSet) { - --firstSet; // now it's 0-based - assert(firstSet >= begin.bitOffset()); - begin.advanceInBlock(firstSet - begin.bitOffset()); - return begin; - } - begin.advanceToNextBlock(); - } - - // now begin points to the same block as end - if (end.bitOffset() != 0) { // assume end is dereferenceable - typename BaseIter::value_type v = *begin.base(); - // mask out the bits that don't matter (< begin.bitOffset) - v &= ~((one << begin.bitOffset()) - 1); - // mask out the bits that don't matter (>= end.bitOffset) - v &= (one << end.bitOffset()) - 1; - size_t firstSet = findFirstSet(v); - if (firstSet) { - --firstSet; // now it's 0-based - assert(firstSet >= begin.bitOffset()); - begin.advanceInBlock(firstSet - begin.bitOffset()); - return begin; - } - } - - return end; -} - template struct Unaligned; @@ -567,12 +360,25 @@ inline void storeUnaligned(void* p, T value) { static_assert(sizeof(Unaligned) == sizeof(T), "Invalid unaligned size"); static_assert(alignof(Unaligned) == 1, "Invalid alignment"); if (kHasUnalignedAccess) { + // Prior to C++14, the spec says that a placement new like this + // is required to check that p is not nullptr, and to do nothing + // if p is a nullptr. By assuming it's not a nullptr, we get a + // nice loud segfault in optimized builds if p is nullptr, rather + // than just silently doing nothing. + folly::assume(p != nullptr); new (p) Unaligned(value); } else { memcpy(p, &value, sizeof(T)); } } -} // namespace folly +template +T bitReverse(T n) { + auto m = static_cast::type>(n); + m = ((m & 0xAAAAAAAAAAAAAAAA) >> 1) | ((m & 0x5555555555555555) << 1); + m = ((m & 0xCCCCCCCCCCCCCCCC) >> 2) | ((m & 0x3333333333333333) << 2); + m = ((m & 0xF0F0F0F0F0F0F0F0) >> 4) | ((m & 0x0F0F0F0F0F0F0F0F) << 4); + return static_cast(Endian::swap(m)); +} -#endif /* FOLLY_BITS_H_ */ +} // namespace folly