/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef FOLLY_CPUID_H_
-#define FOLLY_CPUID_H_
+#pragma once
#include <cstdint>
+#include <folly/Portability.h>
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
namespace folly {
/**
* Identification of an Intel CPU.
- * Supports CPUID (EAX=1) feature flags.
+ * Supports CPUID feature flags (EAX=1) and extended features (EAX=7, ECX=0).
* Values from http://www.intel.com/content/www/us/en/processors/processor-identification-cpuid-instruction-note.html
*/
class CpuId {
public:
- CpuId() {
- __asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx");
+ // Always inline in order for this to be usable from a __ifunc__.
+ // In shared library mode, a __ifunc__ runs at relocation time, while the
+ // PLT hasn't been fully populated yet; thus, ifuncs cannot use symbols
+ // with potentially external linkage. (This issue is less likely in opt
+ // mode since inlining happens more likely, and it doesn't happen for
+ // statically linked binaries which don't depend on the PLT)
+ FOLLY_ALWAYS_INLINE CpuId() {
+#ifdef _MSC_VER
+ int reg[4];
+ __cpuid(static_cast<int*>(reg), 0);
+ const int n = reg[0];
+ if (n >= 1) {
+ __cpuid(static_cast<int*>(reg), 1);
+ f1c_ = uint32_t(reg[2]);
+ f1d_ = uint32_t(reg[3]);
+ }
+ if (n >= 7) {
+ __cpuidex(static_cast<int*>(reg), 7, 0);
+ f7b_ = uint32_t(reg[1]);
+ f7c_ = uint32_t(reg[2]);
+ }
+#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && \
+ defined(__GNUC__)
+ // The following block like the normal cpuid branch below, but gcc
+ // reserves ebx for use of its pic register so we must specially
+ // handle the save and restore to avoid clobbering the register
+ uint32_t n;
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(n)
+ : "a"(0)
+ : "ecx", "edx");
+ if (n >= 1) {
+ uint32_t f1a;
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(f1a), "=c"(f1c_), "=d"(f1d_)
+ : "a"(1)
+ :);
+ }
+ if (n >= 7) {
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "movl %%ebx, %%eax\n\r"
+ "popl %%ebx"
+ : "=a"(f7b_), "=c"(f7c_)
+ : "a"(7), "c"(0)
+ : "edx");
+ }
+#elif FOLLY_X64 || defined(__i386__)
+ uint32_t n;
+ __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+ if (n >= 1) {
+ uint32_t f1a;
+ __asm__("cpuid" : "=a"(f1a), "=c"(f1c_), "=d"(f1d_) : "a"(1) : "ebx");
+ }
+ if (n >= 7) {
+ uint32_t f7a;
+ __asm__("cpuid"
+ : "=a"(f7a), "=b"(f7b_), "=c"(f7c_)
+ : "a"(7), "c"(0)
+ : "edx");
+ }
+#endif
+ }
+
+#define X(name, r, bit) \
+ FOLLY_ALWAYS_INLINE bool name() const { \
+ return ((r) & (1U << bit)) != 0; \
}
-#define X(name, r, bit) bool name() const { return r & (1U << bit); }
-#define C(name, bit) X(name, c_, bit)
-#define D(name, bit) X(name, d_, bit)
+
+// cpuid(1): Processor Info and Feature Bits.
+#define C(name, bit) X(name, f1c_, bit)
C(sse3, 0)
C(pclmuldq, 1)
C(dtes64, 2)
C(tm2, 8)
C(ssse3, 9)
C(cnxtid, 10)
- // 11 is reserved
C(fma, 12)
C(cx16, 13)
C(xtpr, 14)
C(pdcm, 15)
- // 16 is reserved
C(pcid, 17)
C(dca, 18)
C(sse41, 19)
C(avx, 28)
C(f16c, 29)
C(rdrand, 30)
- // 31 is not used
+#undef C
+#define D(name, bit) X(name, f1d_, bit)
D(fpu, 0)
D(vme, 1)
D(de, 2)
D(mce, 7)
D(cx8, 8)
D(apic, 9)
- // 10 is reserved
D(sep, 11)
D(mtrr, 12)
D(pge, 13)
D(pse36, 17)
D(psn, 18)
D(clfsh, 19)
- // 20 is reserved
D(ds, 21)
D(acpi, 22)
D(mmx, 23)
D(ss, 27)
D(htt, 28)
D(tm, 29)
- // 30 is reserved
D(pbe, 31)
#undef D
+
+ // cpuid(7): Extended Features.
+#define B(name, bit) X(name, f7b_, bit)
+ B(bmi1, 3)
+ B(hle, 4)
+ B(avx2, 5)
+ B(smep, 7)
+ B(bmi2, 8)
+ B(erms, 9)
+ B(invpcid, 10)
+ B(rtm, 11)
+ B(mpx, 14)
+ B(avx512f, 16)
+ B(avx512dq, 17)
+ B(rdseed, 18)
+ B(adx, 19)
+ B(smap, 20)
+ B(avx512ifma, 21)
+ B(pcommit, 22)
+ B(clflushopt, 23)
+ B(clwb, 24)
+ B(avx512pf, 26)
+ B(avx512er, 27)
+ B(avx512cd, 28)
+ B(sha, 29)
+ B(avx512bw, 30)
+ B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c_, bit)
+ C(prefetchwt1, 0)
+ C(avx512vbmi, 1)
#undef C
+
#undef X
+
private:
- uint32_t c_; // ECX
- uint32_t d_; // EDX
+ uint32_t f1c_ = 0;
+ uint32_t f1d_ = 0;
+ uint32_t f7b_ = 0;
+ uint32_t f7c_ = 0;
};
-} // namespace folly
-
-#endif /* FOLLY_CPUID_H_ */
-
+} // namespace folly