--- /dev/null
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Bits.h"
+
+#include "folly/CpuId.h"
+
+// None of this is necessary if we're compiling for a target that supports
+// popcnt
+#ifndef __POPCNT__
+
+namespace {
+
+int popcount_inst(unsigned int x) {
+ asm ("popcntl %0, %0" : "=r" (x) : "0" (x));
+ return x;
+}
+
+int popcount_builtin(unsigned int x) {
+ return __builtin_popcount(x);
+}
+
+int popcountll_inst(unsigned long long x) {
+ asm ("popcntq %0, %0" : "=r" (x) : "0" (x));
+ return x;
+}
+
+int popcountll_builtin(unsigned long long x) {
+ return __builtin_popcountll(x);
+}
+
+typedef decltype(popcount_builtin) Type_popcount;
+typedef decltype(popcountll_builtin) Type_popcountll;
+
+} // namespace
+
+// This function is called on startup to resolve folly::detail::popcount
+extern "C" Type_popcount* folly_popcount_ifunc() {
+ return folly::CpuId().popcnt() ? popcount_inst : popcount_builtin;
+}
+
+// This function is called on startup to resolve folly::detail::popcountll
+extern "C" Type_popcountll* folly_popcountll_ifunc() {
+ return folly::CpuId().popcnt() ? popcountll_inst : popcountll_builtin;
+}
+
+namespace folly {
+namespace detail {
+
+// Call folly_popcount_ifunc on startup to resolve to either popcount_inst
+// or popcount_builtin
+int popcount(unsigned int x)
+ __attribute__((ifunc("folly_popcount_ifunc")));
+
+// Call folly_popcount_ifunc on startup to resolve to either popcountll_inst
+// or popcountll_builtin
+int popcountll(unsigned long long x)
+ __attribute__((ifunc("folly_popcountll_ifunc")));
+
+} // namespace detail
+} // namespace folly
+
+#endif /* !__POPCNT__ */
+
* 1-based. 0 = no bits are set (x == 0)
* for x != 0, findLastSet(x) == 1 + floor(log2(x))
*
+ * popcount(x)
+ * return the number of 1 bits in x
+ *
* nextPowTwo(x)
* Finds the next power of two >= x.
*
#define _GNU_SOURCE 1
#endif
+#ifndef __GNUC__
+#error GCC required
+#endif
+
+#include "folly/detail/BitsDetail.h"
#include "folly/detail/BitIteratorDetail.h"
#include "folly/Likely.h"
return findFirstSet(static_cast<typename std::make_signed<T>::type>(x));
}
-namespace detail {
-
-// Portable, but likely slow...
-inline unsigned int findLastSetPortable(uint64_t x) {
- unsigned int r = (x != 0); // 1-based index, except for x==0
- while (x >>= 1) {
- ++r;
- }
- return r;
-}
-
-} // namespace detail
-
-#ifdef __GNUC__
-
// findLastSet: return the 1-based index of the highest bit set
// for x > 0, findLastSet(x) == 1 + floor(log2(x))
template <class T>
return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0;
}
-#else /* !__GNUC__ */
-
-template <class T>
-typename std::enable_if<
- (std::is_integral<T>::value &&
- std::is_unsigned<T>::value),
- unsigned int>::type
- findLastSet(T x) {
- return detail:findLastSetPortable(x);
-}
-
-#endif
-
template <class T>
typename std::enable_if<
(std::is_integral<T>::value &&
return findLastSet(static_cast<typename std::make_unsigned<T>::type>(x));
}
-namespace detail {
-
-template <class T>
-inline
-typename std::enable_if<
- std::is_integral<T>::value && std::is_unsigned<T>::value,
- T>::type
-nextPowTwoPortable(T v) {
- if (UNLIKELY(v == 0)) {
- return 1;
- }
-
- --v;
- for (uint32_t i = 1; i < sizeof(T) * 8; i <<= 8) {
- v |= (v >> i);
- v |= (v >> (i << 1));
- v |= (v >> (i << 2));
- v |= (v >> (i << 3));
- v |= (v >> (i << 4));
- v |= (v >> (i << 5));
- v |= (v >> (i << 6));
- v |= (v >> (i << 7));
- }
- return v + 1;
-}
-
-} // namespace detail
-
-#ifdef __GNUC__
-
template <class T>
inline
typename std::enable_if<
return 1ul << findLastSet(v - 1);
}
-#else /* __GNUC__ */
-
+/**
+ * Population count
+ */
template <class T>
-inline
-typename std::enable_if<
- std::is_integral<T>::value && std::is_unsigned<T>::value,
- T>::type
-nextPowTwo(T v) {
- return detail::nextPowTwoPortable(v);
+inline typename std::enable_if<
+ (std::is_integral<T>::value &&
+ std::is_unsigned<T>::value &&
+ sizeof(T) <= sizeof(unsigned int)),
+ size_t>::type
+ popcount(T x) {
+ return detail::popcount(x);
}
-#endif /* __GNUC__ */
-
-
+template <class T>
+inline typename std::enable_if<
+ (std::is_integral<T>::value &&
+ std::is_unsigned<T>::value &&
+ sizeof(T) > sizeof(unsigned int) &&
+ sizeof(T) <= sizeof(unsigned long long)),
+ size_t>::type
+ popcount(T x) {
+ return detail::popcountll(x);
+}
/**
* Endianness detection and manipulation primitives.
--- /dev/null
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_CPUID_H_
+#define FOLLY_CPUID_H_
+
+#include <cstdint>
+
+namespace folly {
+
+/**
+ * Identification of an Intel CPU.
+ * Supports CPUID (EAX=1) feature flags.
+ * Values from http://www.intel.com/content/www/us/en/processors/processor-identification-cpuid-instruction-note.html
+ */
+class CpuId {
+ public:
+ CpuId() {
+ __asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx");
+ }
+#define X(name, r, bit) bool name() const { return r & (1U << bit); }
+#define C(name, bit) X(name, c_, bit)
+#define D(name, bit) X(name, d_, bit)
+ C(sse3, 0)
+ C(pclmuldq, 1)
+ C(dtes64, 2)
+ C(monitor, 3)
+ C(dscpl, 4)
+ C(vmx, 5)
+ C(smx, 6)
+ C(eist, 7)
+ C(tm2, 8)
+ C(ssse3, 9)
+ C(cnxtid, 10)
+ // 11 is reserved
+ C(fma, 12)
+ C(cx16, 13)
+ C(xtpr, 14)
+ C(pdcm, 15)
+ // 16 is reserved
+ C(pcid, 17)
+ C(dca, 18)
+ C(sse41, 19)
+ C(sse42, 20)
+ C(x2apic, 21)
+ C(movbe, 22)
+ C(popcnt, 23)
+ C(tscdeadline, 24)
+ C(aes, 25)
+ C(xsave, 26)
+ C(osxsave, 27)
+ C(avx, 28)
+ C(f16c, 29)
+ C(rdrand, 30)
+ // 31 is not used
+ D(fpu, 0)
+ D(vme, 1)
+ D(de, 2)
+ D(pse, 3)
+ D(tsc, 4)
+ D(msr, 5)
+ D(pae, 6)
+ D(mce, 7)
+ D(cx8, 8)
+ D(apic, 9)
+ // 10 is reserved
+ D(sep, 11)
+ D(mtrr, 12)
+ D(pge, 13)
+ D(mca, 14)
+ D(cmov, 15)
+ D(pat, 16)
+ D(pse36, 17)
+ D(psn, 18)
+ D(clfsh, 19)
+ // 20 is reserved
+ D(ds, 21)
+ D(acpi, 22)
+ D(mmx, 23)
+ D(fxsr, 24)
+ D(sse, 25)
+ D(sse2, 26)
+ D(ss, 27)
+ D(htt, 28)
+ D(tm, 29)
+ // 30 is reserved
+ D(pbe, 31)
+#undef D
+#undef C
+#undef X
+ private:
+ uint32_t c_; // ECX
+ uint32_t d_; // EDX
+};
+
+} // namespace folly
+
+#endif /* FOLLY_CPUID_H_ */
+
--- /dev/null
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_DETAIL_BITSDETAIL_H_
+#define FOLLY_DETAIL_BITSDETAIL_H_
+
+namespace folly {
+namespace detail {
+
+// If we're targeting an architecture with popcnt support, use
+// __builtin_popcount directly, as it's presumably inlined.
+// If not, use runtime detection using __attribute__((ifunc))
+// (see Bits.cpp)
+#ifdef __POPCNT__
+
+inline int popcount(unsigned int x) {
+ return __builtin_popcount(x);
+}
+inline int popcountll(unsigned long long x) {
+ return __builtin_popcountll(x);
+}
+
+#else /* !__POPCNT__ */
+
+int popcount(unsigned int x);
+int popcountll(unsigned long long x);
+
+#endif /* !__POPCNT__ */
+
+} // namespace detail
+} // namespace folly
+
+#endif /* FOLLY_DETAIL_BITSDETAIL_H_ */
+
// right-shift is arithmetic for signed values, and that can lead to
// unpleasant bugs.
-/**
- * Population count (number of bits set), using __builtin_popcount or
- * __builtin_popcountll, depending on size.
- */
-template <class T>
-inline typename std::enable_if<
- (std::is_integral<T>::value &&
- std::is_unsigned<T>::value &&
- sizeof(T) <= sizeof(unsigned int)),
- size_t>::type
- popcount(T x) {
- return __builtin_popcount(x);
-}
-
-template <class T>
-inline typename std::enable_if<
- (std::is_integral<T>::value &&
- std::is_unsigned<T>::value &&
- sizeof(T) > sizeof(unsigned int) &&
- sizeof(T) <= sizeof(unsigned long long)),
- size_t>::type
- popcount(T x) {
- return __builtin_popcountll(x);
-}
-
namespace detail {
/**
}
}
-template <class INT>
-unsigned int findLastSetPortable(INT x) {
- return detail::findLastSetPortable(
- static_cast<typename std::make_unsigned<INT>::type>(x));
-}
-
template <class INT>
void testFLS() {
typedef typename std::make_unsigned<INT>::type UINT;
for (size_t i = 0; i < bits; i++) {
INT v1 = static_cast<UINT>(1) << i;
EXPECT_EQ(i + 1, findLastSet(v1));
- EXPECT_EQ(i + 1, findLastSetPortable(v1));
INT v2 = (static_cast<UINT>(1) << i) - 1;
EXPECT_EQ(i, findLastSet(v2));
- EXPECT_EQ(i, findLastSetPortable(v2));
}
}
}
-#ifdef __GNUC__
-
TEST(Bits, nextPowTwoClz) {
testPowTwo(nextPowTwo);
}
x = folly::nextPowTwo(iters);
}
-#endif
-
-TEST(Bits, nextPowTwoPortable) {
- testPowTwo(detail::nextPowTwoPortable);
-}
-
-BENCHMARK(nextPowTwoPortable, iters) {
- x = detail::nextPowTwoPortable(iters);
+TEST(Bits, popcount) {
+ EXPECT_EQ(0, popcount(0U));
+ EXPECT_EQ(1, popcount(1U));
+ EXPECT_EQ(32, popcount(uint32_t(-1)));
+ EXPECT_EQ(64, popcount(uint64_t(-1)));
}
-BENCHMARK_DRAW_LINE();
-
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
google::ParseCommandLineFlags(&argc, &argv, true);
Benchmark Iters Total t t/iter iter/sec
------------------------------------------------------------------------------
* nextPowTwoClz 1000000 1.659 ms 1.659 ns 574.8 M
- +66.8% nextPowTwoPortable 1000000 2.767 ms 2.767 ns 344.7 M
*/
--- /dev/null
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/CpuId.h"
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+using namespace folly;
+
+TEST(CpuId, Simple) {
+ // All CPUs should support MMX
+ CpuId id;
+ EXPECT_TRUE(id.mmx());
+}
+