#include <folly/Format.h>
#include <folly/ScopeGuard.h>
-namespace folly { namespace detail {
+DECLARE_ACCESS_SPREADER_TYPE(std::atomic)
+
+namespace folly {
+namespace detail {
///////////// CacheLocality
template <>
const CacheLocality& CacheLocality::system<std::atomic>() {
- static CacheLocality cache(getSystemLocalityInfo());
- return cache;
+ static auto* cache = new CacheLocality(getSystemLocalityInfo());
+ return *cache;
}
// Each level of cache has sharing sets, which are the set of cpus
/// '\n', or eos.
static size_t parseLeadingNumber(const std::string& line) {
auto raw = line.c_str();
- char *end;
+ char* end;
unsigned long val = strtoul(raw, &end, 10);
if (end == raw || (*end != ',' && *end != '-' && *end != '\n' && *end != 0)) {
- throw std::runtime_error(to<std::string>(
- "error parsing list '", line, "'").c_str());
+ throw std::runtime_error(
+ to<std::string>("error parsing list '", line, "'").c_str());
}
return val;
}
while (true) {
auto cpu = cpus.size();
std::vector<size_t> levels;
- for (size_t index = 0; ; ++index) {
- auto dir = format("/sys/devices/system/cpu/cpu{}/cache/index{}/",
- cpu, index).str();
+ for (size_t index = 0;; ++index) {
+ auto dir =
+ sformat("/sys/devices/system/cpu/cpu{}/cache/index{}/", cpu, index);
auto cacheType = mapping(dir + "type");
auto equivStr = mapping(dir + "shared_cpu_list");
if (cacheType.size() == 0 || equivStr.size() == 0) {
throw std::runtime_error("unable to load cache sharing info");
}
- std::sort(cpus.begin(), cpus.end(), [&](size_t lhs, size_t rhs) -> bool {
- // sort first by equiv class of cache with highest index, direction
- // doesn't matter. If different cpus have different numbers of
- // caches then this code might produce a sub-optimal ordering, but
- // it won't crash
- auto& lhsEquiv = equivClassesByCpu[lhs];
- auto& rhsEquiv = equivClassesByCpu[rhs];
- for (int i = std::min(lhsEquiv.size(), rhsEquiv.size()) - 1; i >= 0; --i) {
- if (lhsEquiv[i] != rhsEquiv[i]) {
- return lhsEquiv[i] < rhsEquiv[i];
- }
- }
-
- // break ties deterministically by cpu
- return lhs < rhs;
- });
+ std::sort(cpus.begin(),
+ cpus.end(),
+ [&](size_t lhs, size_t rhs) -> bool {
+ // sort first by equiv class of cache with highest index,
+ // direction doesn't matter. If different cpus have
+ // different numbers of caches then this code might produce
+ // a sub-optimal ordering, but it won't crash
+ auto& lhsEquiv = equivClassesByCpu[lhs];
+ auto& rhsEquiv = equivClassesByCpu[rhs];
+ for (int i = std::min(lhsEquiv.size(), rhsEquiv.size()) - 1;
+ i >= 0;
+ --i) {
+ if (lhsEquiv[i] != rhsEquiv[i]) {
+ return lhsEquiv[i] < rhsEquiv[i];
+ }
+ }
+
+ // break ties deterministically by cpu
+ return lhs < rhs;
+ });
// the cpus are now sorted by locality, with neighboring entries closer
// to each other than entries that are far away. For striping we want
}
return CacheLocality{
- cpus.size(), std::move(numCachesByLevel), std::move(indexes) };
+ cpus.size(), std::move(numCachesByLevel), std::move(indexes)};
}
CacheLocality CacheLocality::readFromSysfs() {
});
}
-
CacheLocality CacheLocality::uniform(size_t numCpus) {
CacheLocality rv;
////////////// Getcpu
-/// Resolves the dynamically loaded symbol __vdso_getcpu, returning null
-/// on failure
-static Getcpu::Func loadVdsoGetcpu() {
-#if defined(_MSC_VER) || defined(__BIONIC__)
+Getcpu::Func Getcpu::resolveVdsoFunc() {
+#if !FOLLY_HAVE_LINUX_VDSO
return nullptr;
#else
void* h = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
#endif
}
-Getcpu::Func Getcpu::vdsoFunc() {
- static Func func = loadVdsoGetcpu();
- return func;
-}
-
#ifdef FOLLY_TLS
/////////////// SequentialThreadId
-template<>
+template <>
std::atomic<size_t> SequentialThreadId<std::atomic>::prevId(0);
-template<>
+template <>
FOLLY_TLS size_t SequentialThreadId<std::atomic>::currentId(0);
#endif
/////////////// AccessSpreader
-template<>
-const AccessSpreader<std::atomic>
-AccessSpreader<std::atomic>::stripeByCore(
- CacheLocality::system<>().numCachesByLevel.front());
-
-template<>
-const AccessSpreader<std::atomic>
-AccessSpreader<std::atomic>::stripeByChip(
- CacheLocality::system<>().numCachesByLevel.back());
-
-template<>
-AccessSpreaderArray<std::atomic,128>
-AccessSpreaderArray<std::atomic,128>::sharedInstance = {};
-
-/// Always claims to be on CPU zero, node zero
-static int degenerateGetcpu(unsigned* cpu, unsigned* node, void* /* unused */) {
- if (cpu != nullptr) {
- *cpu = 0;
- }
- if (node != nullptr) {
- *node = 0;
- }
- return 0;
-}
-
-template<>
-Getcpu::Func AccessSpreader<std::atomic>::pickGetcpuFunc(size_t numStripes) {
- if (numStripes == 1) {
- // there's no need to call getcpu if there is only one stripe.
- // This should not be common, so we don't want to waste a test and
- // branch in the main code path, but we might as well use a faster
- // function pointer
- return °enerateGetcpu;
- } else {
- auto best = Getcpu::vdsoFunc();
- return best ? best : &FallbackGetcpuType::getcpu;
- }
+template <>
+Getcpu::Func AccessSpreader<std::atomic>::pickGetcpuFunc() {
+ auto best = Getcpu::resolveVdsoFunc();
+ return best ? best : &FallbackGetcpuType::getcpu;
}
-} } // namespace folly::detail
+} // namespace detail
+} // namespace folly