2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <glog/logging.h>
20 #include <immintrin.h>
22 // Clang defines the intrinsics in weird places.
23 #include <popcntintrin.h>
26 #include <folly/CpuId.h>
27 #include <folly/portability/Builtins.h>
29 namespace folly { namespace compression { namespace instructions {
31 // NOTE: It's recommended to compile EF coding with -msse4.2, starting
32 // with Nehalem, Intel CPUs support POPCNT instruction and gcc will emit
33 // it for __builtin_popcountll intrinsic.
34 // But we provide an alternative way for the client code: it can switch to
35 // the appropriate version of EliasFanoReader<> in realtime (client should
36 // implement this switching logic itself) by specifying instruction set to
40 static bool supported(const folly::CpuId& /* cpuId */ = {}) { return true; }
41 static inline uint64_t popcount(uint64_t value) {
42 return __builtin_popcountll(value);
44 static inline int ctz(uint64_t value) {
46 return __builtin_ctzll(value);
48 static inline int clz(uint64_t value) {
50 return __builtin_clzll(value);
52 static inline uint64_t blsr(uint64_t value) {
53 return value & (value - 1);
57 struct Nehalem : public Default {
58 static bool supported(const folly::CpuId& cpuId = {}) {
59 return cpuId.popcnt();
62 FOLLY_TARGET_ATTRIBUTE("popcnt")
63 static inline uint64_t popcount(uint64_t value) {
64 // POPCNT is supported starting with Intel Nehalem, AMD K10.
65 #if defined(__GNUC__) && !defined(__clang__) && !__GNUC_PREREQ(4, 9)
66 // GCC 4.8 doesn't support the intrinsics.
68 asm ("popcntq %1, %0" : "=r" (result) : "r" (value));
71 return _mm_popcnt_u64(value);
76 struct Haswell : public Nehalem {
77 static bool supported(const folly::CpuId& cpuId = {}) {
78 return Nehalem::supported(cpuId) && cpuId.bmi1();
81 FOLLY_TARGET_ATTRIBUTE("bmi")
82 static inline uint64_t blsr(uint64_t value) {
83 // BMI1 is supported starting with Intel Haswell, AMD Piledriver.
84 // BLSR combines two instuctions into one and reduces register pressure.
85 #if defined(__GNUC__) && !defined(__clang__) && !__GNUC_PREREQ(4, 9)
86 // GCC 4.8 doesn't support the intrinsics.
88 asm ("blsrq %1, %0" : "=r" (result) : "r" (value));
91 return _blsr_u64(value);