From: Christopher Dykes Date: Fri, 6 May 2016 16:32:30 +0000 (-0700) Subject: Use intrinsics rather than inline assembly where possible X-Git-Tag: 2016.07.26~269 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=ceaca5b0cc47ab731984f268d2403fca131ff04f;p=folly.git Use intrinsics rather than inline assembly where possible Summary: I would switch these to just use the intrinsic functions, but GCC 4.8 doesn't support them. MSVC supports the intrinsics, which is the primary reason for the switch. Reviewed By: yfeldblum Differential Revision: D3265572 fb-gh-sync-id: f12d16b8f64cf48adcb97958d7ae88d56689a96f fbshipit-source-id: f12d16b8f64cf48adcb97958d7ae88d56689a96f --- diff --git a/folly/Portability.h b/folly/Portability.h index c9616ea7..cbf1b71c 100644 --- a/folly/Portability.h +++ b/folly/Portability.h @@ -92,6 +92,13 @@ constexpr bool kHasUnalignedAccess = false; # define FOLLY_ALWAYS_INLINE inline #endif +// target +#ifdef _MSC_VER +# define FOLLY_TARGET_ATTRIBUTE(target) +#else +# define FOLLY_TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#endif + // detection for 64 bit #if defined(__x86_64__) || defined(_M_X64) # define FOLLY_X64 1 diff --git a/folly/experimental/Instructions.h b/folly/experimental/Instructions.h index 97386ff1..6a1d6ed6 100644 --- a/folly/experimental/Instructions.h +++ b/folly/experimental/Instructions.h @@ -17,8 +17,10 @@ #pragma once #include +#include #include +#include namespace folly { namespace compression { namespace instructions { @@ -52,11 +54,17 @@ struct Nehalem : public Default { static bool supported(const folly::CpuId& cpuId = {}) { return cpuId.popcnt(); } - static inline uint64_t popcount(uint64_t value) { + static inline uint64_t popcount(uint64_t value) + FOLLY_TARGET_ATTRIBUTE("popcnt") { // POPCNT is supported starting with Intel Nehalem, AMD K10. +#if defined(__GNUC__) && !__GNUC_PREREQ(4, 9) + // GCC 4.8 doesn't support the intrinsics. uint64_t result; asm ("popcntq %1, %0" : "=r" (result) : "r" (value)); return result; +#else + return _mm_popcnt_u64(value); +#endif } }; @@ -64,12 +72,17 @@ struct Haswell : public Nehalem { static bool supported(const folly::CpuId& cpuId = {}) { return Nehalem::supported(cpuId) && cpuId.bmi1(); } - static inline uint64_t blsr(uint64_t value) { + static inline uint64_t blsr(uint64_t value) FOLLY_TARGET_ATTRIBUTE("bmi") { // BMI1 is supported starting with Intel Haswell, AMD Piledriver. // BLSR combines two instuctions into one and reduces register pressure. +#if defined(__GNUC__) && !__GNUC_PREREQ(4, 9) + // GCC 4.8 doesn't support the intrinsics. uint64_t result; asm ("blsrq %1, %0" : "=r" (result) : "r" (value)); return result; +#else + return _blsr_u64(value); +#endif } }; diff --git a/folly/experimental/Select64.h b/folly/experimental/Select64.h index 9c2e03f9..5d0fcaa5 100644 --- a/folly/experimental/Select64.h +++ b/folly/experimental/Select64.h @@ -62,9 +62,15 @@ inline uint64_t select64(uint64_t x, uint64_t k) { return place + detail::kSelectInByte[((x >> place) & 0xFF) | (byteRank << 8)]; } +template <> +uint64_t select64(uint64_t x, uint64_t k) + FOLLY_TARGET_ATTRIBUTE("bmi,bmi2"); + template <> inline uint64_t select64(uint64_t x, uint64_t k) { +#if defined(__GNUC__) && !__GNUC_PREREQ(4, 9) + // GCC 4.8 doesn't support the intrinsics. uint64_t result = uint64_t(1) << k; asm("pdep %1, %0, %0\n\t" @@ -73,6 +79,9 @@ inline uint64_t select64(uint64_t x, : "r"(x)); return result; +#else + return _tzcnt_u64(_pdep_u64(x, 1ULL << k)); +#endif } } // namespace folly