2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <glog/logging.h>
22 #include <immintrin.h>
25 #include <folly/CpuId.h>
26 #include <folly/Portability.h>
27 #include <folly/portability/Builtins.h>
30 namespace compression {
31 namespace instructions {
33 // NOTE: It's recommended to compile EF coding with -msse4.2, starting
34 // with Nehalem, Intel CPUs support POPCNT instruction and gcc will emit
35 // it for __builtin_popcountll intrinsic.
36 // But we provide an alternative way for the client code: it can switch to
37 // the appropriate version of EliasFanoReader<> in realtime (client should
38 // implement this switching logic itself) by specifying instruction set to
42 static bool supported(const folly::CpuId& /* cpuId */ = {}) {
45 static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value) {
46 return __builtin_popcountll(value);
48 static FOLLY_ALWAYS_INLINE int ctz(uint64_t value) {
50 return __builtin_ctzll(value);
52 static FOLLY_ALWAYS_INLINE int clz(uint64_t value) {
54 return __builtin_clzll(value);
56 static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) {
57 return value & (value - 1);
60 // Extract `length` bits starting from `start` from value. Only bits [0:63]
61 // will be extracted. All higher order bits in the
62 // result will be zeroed. If no bits are extracted, return 0.
63 static FOLLY_ALWAYS_INLINE uint64_t
64 bextr(uint64_t value, uint32_t start, uint32_t length) {
68 if (start + length > 64) {
72 return (value >> start) &
73 ((length == 64) ? (~0ULL) : ((1ULL << length) - 1ULL));
77 struct Nehalem : public Default {
78 static bool supported(const folly::CpuId& cpuId = {}) {
79 return cpuId.popcnt();
82 static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value) {
83 // POPCNT is supported starting with Intel Nehalem, AMD K10.
84 #if defined(__GNUC__) || defined(__clang__)
85 // GCC and Clang won't inline the intrinsics.
87 asm("popcntq %1, %0" : "=r"(result) : "r"(value));
90 return uint64_t(_mm_popcnt_u64(value));
95 struct Haswell : public Nehalem {
96 static bool supported(const folly::CpuId& cpuId = {}) {
97 return Nehalem::supported(cpuId) && cpuId.bmi1();
100 static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) {
101 // BMI1 is supported starting with Intel Haswell, AMD Piledriver.
102 // BLSR combines two instuctions into one and reduces register pressure.
103 #if defined(__GNUC__) || defined(__clang__)
104 // GCC and Clang won't inline the intrinsics.
106 asm("blsrq %1, %0" : "=r"(result) : "r"(value));
109 return _blsr_u64(value);
113 static FOLLY_ALWAYS_INLINE uint64_t
114 bextr(uint64_t value, uint32_t start, uint32_t length) {
115 #if defined(__GNUC__) || defined(__clang__)
116 // GCC and Clang won't inline the intrinsics.
117 // Encode parameters in `pattern` where `pattern[0:7]` is `start` and
118 // `pattern[8:15]` is `length`.
119 // Ref: Intel Advanced Vector Extensions Programming Reference
120 uint64_t pattern = start & 0xFF;
121 pattern = pattern | ((length & 0xFF) << 8);
123 asm("bextrq %2, %1, %0" : "=r"(result) : "r"(value), "r"(pattern));
126 return _bextr_u64(value, start, length);