From 1228a37d8cf34c689cbea7d00252a8dccf669de6 Mon Sep 17 00:00:00 2001 From: Christopher Dykes Date: Thu, 9 Jun 2016 16:01:25 -0700 Subject: [PATCH] Use intrinsics rather than inline assembly where possible Summary: I would switch these to just use the intrinsic functions, but GCC 4.8 doesn't support them. MSVC supports the intrinsics, which is the primary reason for the switch. Reviewed By: philippv Differential Revision: D3278901 fbshipit-source-id: 60103ac7cf7ddfb529f65f4aadc687dbdaf423a1 --- folly/Portability.h | 7 +++++++ folly/experimental/Instructions.h | 20 ++++++++++++++++++++ folly/experimental/Select64.h | 6 ++++++ 3 files changed, 33 insertions(+) diff --git a/folly/Portability.h b/folly/Portability.h index 61c69846..2d3ec417 100644 --- a/folly/Portability.h +++ b/folly/Portability.h @@ -92,6 +92,13 @@ constexpr bool kHasUnalignedAccess = false; # define FOLLY_ALWAYS_INLINE inline #endif +// target +#ifdef _MSC_VER +# define FOLLY_TARGET_ATTRIBUTE(target) +#else +# define FOLLY_TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#endif + // detection for 64 bit #if defined(__x86_64__) || defined(_M_X64) # define FOLLY_X64 1 diff --git a/folly/experimental/Instructions.h b/folly/experimental/Instructions.h index 97386ff1..97e6e78c 100644 --- a/folly/experimental/Instructions.h +++ b/folly/experimental/Instructions.h @@ -17,8 +17,14 @@ #pragma once #include +#include +#ifdef __clang__ +// Clang defines the intrinsics in weird places. +#include +#endif #include +#include namespace folly { namespace compression { namespace instructions { @@ -52,11 +58,18 @@ struct Nehalem : public Default { static bool supported(const folly::CpuId& cpuId = {}) { return cpuId.popcnt(); } + + FOLLY_TARGET_ATTRIBUTE("popcnt") static inline uint64_t popcount(uint64_t value) { // POPCNT is supported starting with Intel Nehalem, AMD K10. +#if defined(__GNUC__) && !defined(__clang__) && !__GNUC_PREREQ(4, 9) + // GCC 4.8 doesn't support the intrinsics. uint64_t result; asm ("popcntq %1, %0" : "=r" (result) : "r" (value)); return result; +#else + return _mm_popcnt_u64(value); +#endif } }; @@ -64,12 +77,19 @@ struct Haswell : public Nehalem { static bool supported(const folly::CpuId& cpuId = {}) { return Nehalem::supported(cpuId) && cpuId.bmi1(); } + + FOLLY_TARGET_ATTRIBUTE("bmi") static inline uint64_t blsr(uint64_t value) { // BMI1 is supported starting with Intel Haswell, AMD Piledriver. // BLSR combines two instuctions into one and reduces register pressure. +#if defined(__GNUC__) && !defined(__clang__) && !__GNUC_PREREQ(4, 9) + // GCC 4.8 doesn't support the intrinsics. uint64_t result; asm ("blsrq %1, %0" : "=r" (result) : "r" (value)); return result; +#else + return _blsr_u64(value); +#endif } }; diff --git a/folly/experimental/Select64.h b/folly/experimental/Select64.h index 9c2e03f9..039cfa85 100644 --- a/folly/experimental/Select64.h +++ b/folly/experimental/Select64.h @@ -63,8 +63,11 @@ inline uint64_t select64(uint64_t x, uint64_t k) { } template <> +FOLLY_TARGET_ATTRIBUTE("bmi,bmi2") inline uint64_t select64(uint64_t x, uint64_t k) { +#if defined(__GNUC__) && !defined(__clang__) && !__GNUC_PREREQ(4, 9) + // GCC 4.8 doesn't support the intrinsics. uint64_t result = uint64_t(1) << k; asm("pdep %1, %0, %0\n\t" @@ -73,6 +76,9 @@ inline uint64_t select64(uint64_t x, : "r"(x)); return result; +#else + return _tzcnt_u64(_pdep_u64(1ULL << k, x)); +#endif } } // namespace folly -- 2.34.1