X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FBits.h;h=b818655e2a347c6d301a7723fa0bf823c0ed1970;hp=2136cfadb58da0b97fee2164a0ed459d3ff9fda7;hb=314c43c4f95dc51f2c97d00216be062eebbd5454;hpb=98c4cba4a6bc766a361680664176901bb13d8697 diff --git a/folly/Bits.h b/folly/Bits.h index 2136cfad..b818655e 100644 --- a/folly/Bits.h +++ b/folly/Bits.h @@ -1,5 +1,5 @@ /* - * Copyright 2014 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,35 +52,26 @@ * @author Tudor Bosman (tudorb@fb.com) */ -#ifndef FOLLY_BITS_H_ -#define FOLLY_BITS_H_ +#pragma once -#if !defined(__clang__) && !defined(_MSC_VER) +#if !defined(__clang__) && !(defined(_MSC_VER) && (_MSC_VER < 1900)) #define FOLLY_INTRINSIC_CONSTEXPR constexpr #else -// GCC is the only compiler with intrinsics constexpr. +// GCC and MSVC 2015+ are the only compilers with +// intrinsics constexpr. #define FOLLY_INTRINSIC_CONSTEXPR const #endif #include +#include +#include #include #include #include -#if FOLLY_HAVE_BYTESWAP_H -# include -#endif - -#ifdef _MSC_VER -# include -# pragma intrinsic(_BitScanForward) -# pragma intrinsic(_BitScanForward64) -# pragma intrinsic(_BitScanReverse) -# pragma intrinsic(_BitScanReverse64) -#endif - #include +#include #include #include #include @@ -100,12 +91,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), unsigned int>::type findFirstSet(T x) { -#ifdef _MSC_VER - unsigned long index; - return _BitScanForward(&index, x) ? index : 0; -#else - return __builtin_ffs(x); -#endif + return static_cast(__builtin_ffs(static_cast(x))); } template @@ -117,12 +103,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long)), unsigned int>::type findFirstSet(T x) { -#ifdef _MSC_VER - unsigned long index; - return _BitScanForward(&index, x) ? index : 0; -#else - return __builtin_ffsl(x); -#endif + return static_cast(__builtin_ffsl(static_cast(x))); } template @@ -134,12 +115,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), unsigned int>::type findFirstSet(T x) { -#ifdef _MSC_VER - unsigned long index; - return _BitScanForward64(&index, x) ? index : 0; -#else - return __builtin_ffsll(x); -#endif + return static_cast(__builtin_ffsll(static_cast(x))); } template @@ -164,18 +140,9 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), unsigned int>::type findLastSet(T x) { -#ifdef _MSC_VER - unsigned long index; - int clz; - if (_BitScanReverse(&index, x)) { - clz = static_cast(31 - index); - } else { - clz = 32; - } - return x ? 8 * sizeof(unsigned int) - clz : 0; -#else - return x ? 8 * sizeof(unsigned int) - __builtin_clz(x) : 0; -#endif + // If X is a power of two X - Y = ((X - 1) ^ Y) + 1. Doing this transformation + // allows GCC to remove its own xor that it adds to implement clz using bsr + return x ? ((8 * sizeof(unsigned int) - 1) ^ __builtin_clz(x)) + 1 : 0; } template @@ -187,18 +154,7 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long)), unsigned int>::type findLastSet(T x) { -#ifdef _MSC_VER - unsigned long index; - int clz; - if (_BitScanReverse(&index, x)) { - clz = static_cast(31 - index); - } else { - clz = 32; - } - return x ? 8 * sizeof(unsigned int) - clz : 0; -#else - return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0; -#endif + return x ? ((8 * sizeof(unsigned long) - 1) ^ __builtin_clzl(x)) + 1 : 0; } template @@ -210,18 +166,8 @@ typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), unsigned int>::type findLastSet(T x) { -#ifdef _MSC_VER - unsigned long index; - unsigned long long clz; - if (_BitScanReverse(&index, x)) { - clz = static_cast(63 - index); - } else { - clz = 64; - } - return x ? 8 * sizeof(unsigned long long) - clz : 0; -#else - return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0; -#endif + return x ? ((8 * sizeof(unsigned long long) - 1) ^ __builtin_clzll(x)) + 1 + : 0; } template @@ -240,14 +186,20 @@ typename std::enable_if< std::is_integral::value && std::is_unsigned::value, T>::type nextPowTwo(T v) { - return v ? (1ul << findLastSet(v - 1)) : 1; + return v ? (T(1) << findLastSet(v - 1)) : 1; } template -inline constexpr -typename std::enable_if< - std::is_integral::value && std::is_unsigned::value, - bool>::type +inline FOLLY_INTRINSIC_CONSTEXPR typename std:: + enable_if::value && std::is_unsigned::value, T>::type + prevPowTwo(T v) { + return v ? (T(1) << (findLastSet(v) - 1)) : 0; +} + +template +inline constexpr typename std::enable_if< + std::is_integral::value && std::is_unsigned::value, + bool>::type isPowTwo(T v) { return (v != 0) && !(v & (v - 1)); } @@ -262,7 +214,7 @@ inline typename std::enable_if< sizeof(T) <= sizeof(unsigned int)), size_t>::type popcount(T x) { - return detail::popcount(x); + return size_t(detail::popcount(x)); } template @@ -273,7 +225,7 @@ inline typename std::enable_if< sizeof(T) <= sizeof(unsigned long long)), size_t>::type popcount(T x) { - return detail::popcountll(x); + return size_t(detail::popcountll(x)); } /** @@ -281,81 +233,56 @@ inline typename std::enable_if< */ namespace detail { -template -struct EndianIntBase { - public: - static T swap(T x); -}; - -#ifndef _MSC_VER - -/** - * If we have the bswap_16 macro from byteswap.h, use it; otherwise, provide our - * own definition. - */ -#ifdef bswap_16 -# define our_bswap16 bswap_16 -#else +template +struct uint_types_by_size; -template -inline constexpr typename std::enable_if< - sizeof(Int16) == 2, - Int16>::type -our_bswap16(Int16 x) { - return ((x >> 8) & 0xff) | ((x & 0xff) << 8); -} -#endif - -#endif - -#define FB_GEN(t, fn) \ -template<> inline t EndianIntBase::swap(t x) { return fn(x); } +#define FB_GEN(sz, fn) \ + static inline uint##sz##_t byteswap_gen(uint##sz##_t v) { \ + return fn(v); \ + } \ + template <> \ + struct uint_types_by_size { \ + using type = uint##sz##_t; \ + }; -// fn(x) expands to (x) if the second argument is empty, which is exactly -// what we want for [u]int8_t. Also, gcc 4.7 on Intel doesn't have -// __builtin_bswap16 for some reason, so we have to provide our own. -FB_GEN( int8_t,) -FB_GEN(uint8_t,) +FB_GEN(8, uint8_t) #ifdef _MSC_VER -FB_GEN( int64_t, _byteswap_uint64) -FB_GEN(uint64_t, _byteswap_uint64) -FB_GEN( int32_t, _byteswap_ulong) -FB_GEN(uint32_t, _byteswap_ulong) -FB_GEN( int16_t, _byteswap_ushort) -FB_GEN(uint16_t, _byteswap_ushort) +FB_GEN(64, _byteswap_uint64) +FB_GEN(32, _byteswap_ulong) +FB_GEN(16, _byteswap_ushort) #else -FB_GEN( int64_t, __builtin_bswap64) -FB_GEN(uint64_t, __builtin_bswap64) -FB_GEN( int32_t, __builtin_bswap32) -FB_GEN(uint32_t, __builtin_bswap32) -FB_GEN( int16_t, our_bswap16) -FB_GEN(uint16_t, our_bswap16) +FB_GEN(64, __builtin_bswap64) +FB_GEN(32, __builtin_bswap32) +FB_GEN(16, __builtin_bswap16) #endif #undef FB_GEN -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - template -struct EndianInt : public detail::EndianIntBase { - public: - static T big(T x) { return EndianInt::swap(x); } - static T little(T x) { return x; } -}; - -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - -template -struct EndianInt : public detail::EndianIntBase { - public: - static T big(T x) { return x; } - static T little(T x) { return EndianInt::swap(x); } +struct EndianInt { + static_assert( + (std::is_integral::value && !std::is_same::value) || + std::is_floating_point::value, + "template type parameter must be non-bool integral or floating point"); + static T swap(T x) { + // we implement this with memcpy because that is defined behavior in C++ + // we rely on compilers to optimize away the memcpy calls + constexpr auto s = sizeof(T); + using B = typename uint_types_by_size::type; + B b; + std::memcpy(&b, &x, s); + b = byteswap_gen(b); + std::memcpy(&x, &b, s); + return x; + } + static T big(T x) { + return kIsLittleEndian ? EndianInt::swap(x) : x; + } + static T little(T x) { + return kIsBigEndian ? EndianInt::swap(x) : x; + } }; -#else -# error Your machine uses a weird endianness! -#endif /* __BYTE_ORDER__ */ - } // namespace detail // big* convert between native and big-endian representations @@ -383,23 +310,16 @@ class Endian { BIG }; - static constexpr Order order = -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - Order::LITTLE; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - Order::BIG; -#else -# error Your machine uses a weird endianness! -#endif /* __BYTE_ORDER__ */ + static constexpr Order order = kIsLittleEndian ? Order::LITTLE : Order::BIG; template static T swap(T x) { - return detail::EndianInt::swap(x); + return folly::detail::EndianInt::swap(x); } template static T big(T x) { - return detail::EndianInt::big(x); + return folly::detail::EndianInt::big(x); } template static T little(T x) { - return detail::EndianInt::little(x); + return folly::detail::EndianInt::little(x); } #if !defined(__ANDROID__) @@ -448,14 +368,11 @@ class BitIterator * Construct a BitIterator that points at a given bit offset (default 0) * in iter. */ - #pragma GCC diagnostic push // bitOffset shadows a member - #pragma GCC diagnostic ignored "-Wshadow" - explicit BitIterator(const BaseIter& iter, size_t bitOffset=0) + explicit BitIterator(const BaseIter& iter, size_t bitOff=0) : bititerator_detail::BitIteratorBase::type(iter), - bitOffset_(bitOffset) { + bitOffset_(bitOff) { assert(bitOffset_ < bitsPerBlock()); } - #pragma GCC diagnostic pop size_t bitOffset() const { return bitOffset_; @@ -492,7 +409,7 @@ class BitIterator void advance(ssize_t n) { size_t bpb = bitsPerBlock(); - ssize_t blocks = n / bpb; + ssize_t blocks = n / ssize_t(bpb); bitOffset_ += n % bpb; if (bitOffset_ >= bpb) { bitOffset_ -= bpb; @@ -520,12 +437,12 @@ class BitIterator } ssize_t distance_to(const BitIterator& other) const { - return - (other.base_reference() - this->base_reference()) * bitsPerBlock() + - other.bitOffset_ - bitOffset_; + return ssize_t( + (other.base_reference() - this->base_reference()) * bitsPerBlock() + + other.bitOffset_ - bitOffset_); } - unsigned int bitOffset_; + size_t bitOffset_; }; /** @@ -605,7 +522,13 @@ template inline T loadUnaligned(const void* p) { static_assert(sizeof(Unaligned) == sizeof(T), "Invalid unaligned size"); static_assert(alignof(Unaligned) == 1, "Invalid alignment"); - return static_cast*>(p)->value; + if (kHasUnalignedAccess) { + return static_cast*>(p)->value; + } else { + T value; + memcpy(&value, p, sizeof(T)); + return value; + } } /** @@ -615,9 +538,17 @@ template inline void storeUnaligned(void* p, T value) { static_assert(sizeof(Unaligned) == sizeof(T), "Invalid unaligned size"); static_assert(alignof(Unaligned) == 1, "Invalid alignment"); - new (p) Unaligned(value); + if (kHasUnalignedAccess) { + // Prior to C++14, the spec says that a placement new like this + // is required to check that p is not nullptr, and to do nothing + // if p is a nullptr. By assuming it's not a nullptr, we get a + // nice loud segfault in optimized builds if p is nullptr, rather + // than just silently doing nothing. + folly::assume(p != nullptr); + new (p) Unaligned(value); + } else { + memcpy(p, &value, sizeof(T)); + } } } // namespace folly - -#endif /* FOLLY_BITS_H_ */