X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2Fexperimental%2FInstructions.h;h=a98de9694a97dcbad3b72036e4827701e0d1b01b;hb=d08cf1621ac39b4a23e9f4508222700ce68e9d4a;hp=ab2db160d9ec180316bd293ef30e8e87983914a4;hpb=ed8c80a0e0988e4ce687f51ca832a00e4a6b7930;p=folly.git diff --git a/folly/experimental/Instructions.h b/folly/experimental/Instructions.h index ab2db160..a98de969 100644 --- a/folly/experimental/Instructions.h +++ b/folly/experimental/Instructions.h @@ -43,7 +43,7 @@ struct Default { return true; } static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value) { - return __builtin_popcountll(value); + return uint64_t(__builtin_popcountll(value)); } static FOLLY_ALWAYS_INLINE int ctz(uint64_t value) { DCHECK_GT(value, 0u); @@ -72,6 +72,14 @@ struct Default { return (value >> start) & ((length == 64) ? (~0ULL) : ((1ULL << length) - 1ULL)); } + + // Clear high bits starting at position index. + static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index) { + if (index > 63) { + return 0; + } + return value & ((uint64_t(1) << index) - 1); + } }; struct Nehalem : public Default { @@ -94,12 +102,12 @@ struct Nehalem : public Default { struct Haswell : public Nehalem { static bool supported(const folly::CpuId& cpuId = {}) { - return Nehalem::supported(cpuId) && cpuId.bmi1(); + return Nehalem::supported(cpuId) && cpuId.bmi1() && cpuId.bmi2(); } static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) { // BMI1 is supported starting with Intel Haswell, AMD Piledriver. -// BLSR combines two instuctions into one and reduces register pressure. +// BLSR combines two instructions into one and reduces register pressure. #if defined(__GNUC__) || defined(__clang__) // GCC and Clang won't inline the intrinsics. uint64_t result; @@ -124,9 +132,21 @@ struct Haswell : public Nehalem { return result; #else return _bextr_u64(value, start, length); +#endif + } + + static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index) { +#if defined(__GNUC__) || defined(__clang__) + // GCC and Clang won't inline the intrinsics. + const uint64_t index64 = index; + uint64_t result; + asm("bzhiq %2, %1, %0" : "=r"(result) : "r"(value), "r"(index64)); + return result; +#else + return _bzhi_u64(value, index); #endif } }; -} -} -} // namespaces +} // namespace instructions +} // namespace compression +} // namespace folly