X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FBits.h;h=529073b2823124bb591519a90fce81bf7d8a3e5c;hp=c915111e1dc3949b367081d8c6a844a540c104b8;hb=96ac8f8ff0c9bc9446eede119fdfee0fe7feaba3;hpb=5ed20b59910fcb9e7f7f866c5279b0075f5887eb diff --git a/folly/Bits.h b/folly/Bits.h index c915111e..529073b2 100644 --- a/folly/Bits.h +++ b/folly/Bits.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,54 +41,28 @@ * Endian::little(x) little <-> native * Endian::swap(x) big <-> little * - * BitIterator - * Wrapper around an iterator over an integral type that iterates - * over its underlying bits in MSb to LSb order - * - * findFirstSet(BitIterator begin, BitIterator end) - * return a BitIterator pointing to the first 1 bit in [begin, end), or - * end if all bits in [begin, end) are 0 - * * @author Tudor Bosman (tudorb@fb.com) */ -#ifndef FOLLY_BITS_H_ -#define FOLLY_BITS_H_ +#pragma once -#include - -#if !defined(__clang__) && !defined(_MSC_VER) -#define FOLLY_INTRINSIC_CONSTEXPR constexpr -#else -// GCC is the only compiler with intrinsics constexpr. +// MSVC does not support intrinsics constexpr +#if defined(_MSC_VER) #define FOLLY_INTRINSIC_CONSTEXPR const -#endif - -#include - -#include -#include -#include - -#if FOLLY_HAVE_BYTESWAP_H -# include -#endif - -#ifdef _MSC_VER -# include -# pragma intrinsic(_BitScanForward) -# pragma intrinsic(_BitScanForward64) -# pragma intrinsic(_BitScanReverse) -# pragma intrinsic(_BitScanReverse64) +#else +#define FOLLY_INTRINSIC_CONSTEXPR constexpr #endif #include #include -#include +#include +#include #include #include -#include -#include + +#include +#include +#include namespace folly { @@ -102,12 +76,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), unsigned int>::type findFirstSet(T x) { -#ifdef _MSC_VER - unsigned long index; - return _BitScanForward(&index, x) ? index : 0; -#else - return __builtin_ffs(x); -#endif + return static_cast(__builtin_ffs(static_cast(x))); } template @@ -119,12 +88,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long)), unsigned int>::type findFirstSet(T x) { -#ifdef _MSC_VER - unsigned long index; - return _BitScanForward(&index, x) ? index : 0; -#else - return __builtin_ffsl(x); -#endif + return static_cast(__builtin_ffsl(static_cast(x))); } template @@ -136,12 +100,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), unsigned int>::type findFirstSet(T x) { -#ifdef _MSC_VER - unsigned long index; - return _BitScanForward64(&index, x) ? index : 0; -#else - return __builtin_ffsll(x); -#endif + return static_cast(__builtin_ffsll(static_cast(x))); } template @@ -166,18 +125,9 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), unsigned int>::type findLastSet(T x) { -#ifdef _MSC_VER - unsigned long index; - int clz; - if (_BitScanReverse(&index, x)) { - clz = static_cast(31 - index); - } else { - clz = 32; - } - return x ? 8 * sizeof(unsigned int) - clz : 0; -#else - return x ? 8 * sizeof(unsigned int) - __builtin_clz(x) : 0; -#endif + // If X is a power of two X - Y = ((X - 1) ^ Y) + 1. Doing this transformation + // allows GCC to remove its own xor that it adds to implement clz using bsr + return x ? ((8 * sizeof(unsigned int) - 1) ^ __builtin_clz(x)) + 1 : 0; } template @@ -189,18 +139,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long)), unsigned int>::type findLastSet(T x) { -#ifdef _MSC_VER - unsigned long index; - int clz; - if (_BitScanReverse(&index, x)) { - clz = static_cast(31 - index); - } else { - clz = 32; - } - return x ? 8 * sizeof(unsigned int) - clz : 0; -#else - return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0; -#endif + return x ? ((8 * sizeof(unsigned long) - 1) ^ __builtin_clzl(x)) + 1 : 0; } template @@ -212,18 +151,8 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), unsigned int>::type findLastSet(T x) { -#ifdef _MSC_VER - unsigned long index; - unsigned long long clz; - if (_BitScanReverse(&index, x)) { - clz = static_cast(63 - index); - } else { - clz = 64; - } - return x ? 8 * sizeof(unsigned long long) - clz : 0; -#else - return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0; -#endif + return x ? ((8 * sizeof(unsigned long long) - 1) ^ __builtin_clzll(x)) + 1 + : 0; } template @@ -242,14 +171,20 @@ typename std::enable_if< std::is_integral::value && std::is_unsigned::value, T>::type nextPowTwo(T v) { - return v ? (1ul << findLastSet(v - 1)) : 1; + return v ? (T(1) << findLastSet(v - 1)) : 1; } template -inline constexpr -typename std::enable_if< - std::is_integral::value && std::is_unsigned::value, - bool>::type +inline FOLLY_INTRINSIC_CONSTEXPR typename std:: + enable_if::value && std::is_unsigned::value, T>::type + prevPowTwo(T v) { + return v ? (T(1) << (findLastSet(v) - 1)) : 0; +} + +template +inline constexpr typename std::enable_if< + std::is_integral::value && std::is_unsigned::value, + bool>::type isPowTwo(T v) { return (v != 0) && !(v & (v - 1)); } @@ -264,7 +199,7 @@ inline typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), size_t>::type popcount(T x) { - return detail::popcount(x); + return size_t(__builtin_popcount(x)); } template @@ -275,7 +210,7 @@ inline typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), size_t>::type popcount(T x) { - return detail::popcountll(x); + return size_t(__builtin_popcountll(x)); } /** @@ -283,82 +218,57 @@ inline typename std::enable_if< */ namespace detail { -template -struct EndianIntBase { - public: - static T swap(T x); -}; - -#ifndef _MSC_VER - -/** - * If we have the bswap_16 macro from byteswap.h, use it; otherwise, provide our - * own definition. - */ -#ifdef bswap_16 -# define our_bswap16 bswap_16 -#else - -template -inline constexpr typename std::enable_if< - sizeof(Int16) == 2, - Int16>::type -our_bswap16(Int16 x) { - return ((x >> 8) & 0xff) | ((x & 0xff) << 8); -} -#endif - -#endif +template +struct uint_types_by_size; -#define FB_GEN(t, fn) \ -template<> inline t EndianIntBase::swap(t x) { return fn(x); } +#define FB_GEN(sz, fn) \ + static inline uint##sz##_t byteswap_gen(uint##sz##_t v) { \ + return fn(v); \ + } \ + template <> \ + struct uint_types_by_size { \ + using type = uint##sz##_t; \ + }; -// fn(x) expands to (x) if the second argument is empty, which is exactly -// what we want for [u]int8_t. Also, gcc 4.7 on Intel doesn't have -// __builtin_bswap16 for some reason, so we have to provide our own. -FB_GEN( int8_t,) -FB_GEN(uint8_t,) +FB_GEN(8, uint8_t) #ifdef _MSC_VER -FB_GEN( int64_t, _byteswap_uint64) -FB_GEN(uint64_t, _byteswap_uint64) -FB_GEN( int32_t, _byteswap_ulong) -FB_GEN(uint32_t, _byteswap_ulong) -FB_GEN( int16_t, _byteswap_ushort) -FB_GEN(uint16_t, _byteswap_ushort) +FB_GEN(64, _byteswap_uint64) +FB_GEN(32, _byteswap_ulong) +FB_GEN(16, _byteswap_ushort) #else -FB_GEN( int64_t, __builtin_bswap64) -FB_GEN(uint64_t, __builtin_bswap64) -FB_GEN( int32_t, __builtin_bswap32) -FB_GEN(uint32_t, __builtin_bswap32) -FB_GEN( int16_t, our_bswap16) -FB_GEN(uint16_t, our_bswap16) +FB_GEN(64, __builtin_bswap64) +FB_GEN(32, __builtin_bswap32) +FB_GEN(16, __builtin_bswap16) #endif #undef FB_GEN -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - template -struct EndianInt : public detail::EndianIntBase { - public: - static T big(T x) { return EndianInt::swap(x); } - static T little(T x) { return x; } -}; - -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - -template -struct EndianInt : public detail::EndianIntBase { - public: - static T big(T x) { return x; } - static T little(T x) { return EndianInt::swap(x); } +struct EndianInt { + static_assert( + (std::is_integral::value && !std::is_same::value) || + std::is_floating_point::value, + "template type parameter must be non-bool integral or floating point"); + static T swap(T x) { + // we implement this with memcpy because that is defined behavior in C++ + // we rely on compilers to optimize away the memcpy calls + constexpr auto s = sizeof(T); + using B = typename uint_types_by_size::type; + B b; + std::memcpy(&b, &x, s); + b = byteswap_gen(b); + std::memcpy(&x, &b, s); + return x; + } + static T big(T x) { + return kIsLittleEndian ? EndianInt::swap(x) : x; + } + static T little(T x) { + return kIsBigEndian ? EndianInt::swap(x) : x; + } }; -#else -# error Your machine uses a weird endianness! -#endif /* __BYTE_ORDER__ */ - -} // namespace detail +} // namespace detail // big* convert between native and big-endian representations // little* convert between native and little-endian representations @@ -385,202 +295,30 @@ class Endian { BIG }; - static constexpr Order order = -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - Order::LITTLE; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - Order::BIG; -#else -# error Your machine uses a weird endianness! -#endif /* __BYTE_ORDER__ */ + static constexpr Order order = kIsLittleEndian ? Order::LITTLE : Order::BIG; template static T swap(T x) { - return detail::EndianInt::swap(x); + return folly::detail::EndianInt::swap(x); } template static T big(T x) { - return detail::EndianInt::big(x); + return folly::detail::EndianInt::big(x); } template static T little(T x) { - return detail::EndianInt::little(x); + return folly::detail::EndianInt::little(x); } +#if !defined(__ANDROID__) FB_GEN(64) FB_GEN(32) FB_GEN(16) FB_GEN(8) +#endif }; #undef FB_GEN #undef FB_GEN2 #undef FB_GEN1 -/** - * Fast bit iteration facility. - */ - - -template class BitIterator; -template -BitIterator findFirstSet(BitIterator, - BitIterator); -/** - * Wrapper around an iterator over an integer type that iterates - * over its underlying bits in LSb to MSb order. - * - * BitIterator models the same iterator concepts as the base iterator. - */ -template -class BitIterator - : public bititerator_detail::BitIteratorBase::type { - public: - /** - * Return the number of bits in an element of the underlying iterator. - */ - static size_t bitsPerBlock() { - return std::numeric_limits< - typename std::make_unsigned< - typename std::iterator_traits::value_type - >::type - >::digits; - } - - /** - * Construct a BitIterator that points at a given bit offset (default 0) - * in iter. - */ - #pragma GCC diagnostic push // bitOffset shadows a member - #pragma GCC diagnostic ignored "-Wshadow" - explicit BitIterator(const BaseIter& iter, size_t bitOffset=0) - : bititerator_detail::BitIteratorBase::type(iter), - bitOffset_(bitOffset) { - assert(bitOffset_ < bitsPerBlock()); - } - #pragma GCC diagnostic pop - - size_t bitOffset() const { - return bitOffset_; - } - - void advanceToNextBlock() { - bitOffset_ = 0; - ++this->base_reference(); - } - - BitIterator& operator=(const BaseIter& other) { - this->~BitIterator(); - new (this) BitIterator(other); - return *this; - } - - private: - friend class boost::iterator_core_access; - friend BitIterator findFirstSet<>(BitIterator, BitIterator); - - typedef bititerator_detail::BitReference< - typename std::iterator_traits::reference, - typename std::iterator_traits::value_type - > BitRef; - - void advanceInBlock(size_t n) { - bitOffset_ += n; - assert(bitOffset_ < bitsPerBlock()); - } - - BitRef dereference() const { - return BitRef(*this->base_reference(), bitOffset_); - } - - void advance(ssize_t n) { - size_t bpb = bitsPerBlock(); - ssize_t blocks = n / bpb; - bitOffset_ += n % bpb; - if (bitOffset_ >= bpb) { - bitOffset_ -= bpb; - ++blocks; - } - this->base_reference() += blocks; - } - - void increment() { - if (++bitOffset_ == bitsPerBlock()) { - advanceToNextBlock(); - } - } - - void decrement() { - if (bitOffset_-- == 0) { - bitOffset_ = bitsPerBlock() - 1; - --this->base_reference(); - } - } - - bool equal(const BitIterator& other) const { - return (bitOffset_ == other.bitOffset_ && - this->base_reference() == other.base_reference()); - } - - ssize_t distance_to(const BitIterator& other) const { - return - (other.base_reference() - this->base_reference()) * bitsPerBlock() + - (other.bitOffset_ - bitOffset_); - } - - ssize_t bitOffset_; -}; - -/** - * Helper function, so you can write - * auto bi = makeBitIterator(container.begin()); - */ -template -BitIterator makeBitIterator(const BaseIter& iter) { - return BitIterator(iter); -} - - -/** - * Find first bit set in a range of bit iterators. - * 4.5x faster than the obvious std::find(begin, end, true); - */ -template -BitIterator findFirstSet(BitIterator begin, - BitIterator end) { - // shortcut to avoid ugly static_cast<> - static const typename BaseIter::value_type one = 1; - - while (begin.base() != end.base()) { - typename BaseIter::value_type v = *begin.base(); - // mask out the bits that don't matter (< begin.bitOffset) - v &= ~((one << begin.bitOffset()) - 1); - size_t firstSet = findFirstSet(v); - if (firstSet) { - --firstSet; // now it's 0-based - assert(firstSet >= begin.bitOffset()); - begin.advanceInBlock(firstSet - begin.bitOffset()); - return begin; - } - begin.advanceToNextBlock(); - } - - // now begin points to the same block as end - if (end.bitOffset() != 0) { // assume end is dereferenceable - typename BaseIter::value_type v = *begin.base(); - // mask out the bits that don't matter (< begin.bitOffset) - v &= ~((one << begin.bitOffset()) - 1); - // mask out the bits that don't matter (>= end.bitOffset) - v &= (one << end.bitOffset()) - 1; - size_t firstSet = findFirstSet(v); - if (firstSet) { - --firstSet; // now it's 0-based - assert(firstSet >= begin.bitOffset()); - begin.advanceInBlock(firstSet - begin.bitOffset()); - return begin; - } - } - - return end; -} - template struct Unaligned; @@ -605,7 +343,13 @@ template inline T loadUnaligned(const void* p) { static_assert(sizeof(Unaligned) == sizeof(T), "Invalid unaligned size"); static_assert(alignof(Unaligned) == 1, "Invalid alignment"); - return static_cast*>(p)->value; + if (kHasUnalignedAccess) { + return static_cast*>(p)->value; + } else { + T value; + memcpy(&value, p, sizeof(T)); + return value; + } } /** @@ -615,9 +359,17 @@ template inline void storeUnaligned(void* p, T value) { static_assert(sizeof(Unaligned) == sizeof(T), "Invalid unaligned size"); static_assert(alignof(Unaligned) == 1, "Invalid alignment"); - new (p) Unaligned(value); + if (kHasUnalignedAccess) { + // Prior to C++14, the spec says that a placement new like this + // is required to check that p is not nullptr, and to do nothing + // if p is a nullptr. By assuming it's not a nullptr, we get a + // nice loud segfault in optimized builds if p is nullptr, rather + // than just silently doing nothing. + folly::assume(p != nullptr); + new (p) Unaligned(value); + } else { + memcpy(p, &value, sizeof(T)); + } } -} // namespace folly - -#endif /* FOLLY_BITS_H_ */ +} // namespace folly