X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2Fdetail%2FCacheLocality.cpp;h=09da2871592999375cfa38f3b29227347678da4e;hb=03ce292a000fb616c12e2aca2a38b7b26e6ef050;hp=8667b34f8e52ead17c53e3efefab4bdc6a0df2d0;hpb=321542683a01c3f334047531e9b487f047129775;p=folly.git diff --git a/folly/detail/CacheLocality.cpp b/folly/detail/CacheLocality.cpp index 8667b34f..09da2871 100644 --- a/folly/detail/CacheLocality.cpp +++ b/folly/detail/CacheLocality.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2016 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,7 +28,8 @@ #include #include -namespace folly { namespace detail { +namespace folly { +namespace detail { ///////////// CacheLocality @@ -54,13 +55,13 @@ static CacheLocality getSystemLocalityInfo() { // wiggle room numCpus = 32; } - return CacheLocality::uniform(numCpus); + return CacheLocality::uniform(size_t(numCpus)); } template <> const CacheLocality& CacheLocality::system() { - static CacheLocality cache(getSystemLocalityInfo()); - return cache; + static auto* cache = new CacheLocality(getSystemLocalityInfo()); + return *cache; } // Each level of cache has sharing sets, which are the set of cpus @@ -84,11 +85,11 @@ const CacheLocality& CacheLocality::system() { /// '\n', or eos. static size_t parseLeadingNumber(const std::string& line) { auto raw = line.c_str(); - char *end; + char* end; unsigned long val = strtoul(raw, &end, 10); if (end == raw || (*end != ',' && *end != '-' && *end != '\n' && *end != 0)) { - throw std::runtime_error(to( - "error parsing list '", line, "'").c_str()); + throw std::runtime_error( + to("error parsing list '", line, "'").c_str()); } return val; } @@ -107,9 +108,9 @@ CacheLocality CacheLocality::readFromSysfsTree( while (true) { auto cpu = cpus.size(); std::vector levels; - for (size_t index = 0; ; ++index) { - auto dir = format("/sys/devices/system/cpu/cpu{}/cache/index{}/", - cpu, index).str(); + for (size_t index = 0;; ++index) { + auto dir = + sformat("/sys/devices/system/cpu/cpu{}/cache/index{}/", cpu, index); auto cacheType = mapping(dir + "type"); auto equivStr = mapping(dir + "shared_cpu_list"); if (cacheType.size() == 0 || equivStr.size() == 0) { @@ -146,22 +147,27 @@ CacheLocality CacheLocality::readFromSysfsTree( throw std::runtime_error("unable to load cache sharing info"); } - std::sort(cpus.begin(), cpus.end(), [&](size_t lhs, size_t rhs) -> bool { - // sort first by equiv class of cache with highest index, direction - // doesn't matter. If different cpus have different numbers of - // caches then this code might produce a sub-optimal ordering, but - // it won't crash - auto& lhsEquiv = equivClassesByCpu[lhs]; - auto& rhsEquiv = equivClassesByCpu[rhs]; - for (int i = std::min(lhsEquiv.size(), rhsEquiv.size()) - 1; i >= 0; --i) { - if (lhsEquiv[i] != rhsEquiv[i]) { - return lhsEquiv[i] < rhsEquiv[i]; - } - } - - // break ties deterministically by cpu - return lhs < rhs; - }); + std::sort(cpus.begin(), + cpus.end(), + [&](size_t lhs, size_t rhs) -> bool { + // sort first by equiv class of cache with highest index, + // direction doesn't matter. If different cpus have + // different numbers of caches then this code might produce + // a sub-optimal ordering, but it won't crash + auto& lhsEquiv = equivClassesByCpu[lhs]; + auto& rhsEquiv = equivClassesByCpu[rhs]; + for (ssize_t i = ssize_t(std::min(lhsEquiv.size(), rhsEquiv.size())) - 1; + i >= 0; + --i) { + auto idx = size_t(i); + if (lhsEquiv[idx] != rhsEquiv[idx]) { + return lhsEquiv[idx] < rhsEquiv[idx]; + } + } + + // break ties deterministically by cpu + return lhs < rhs; + }); // the cpus are now sorted by locality, with neighboring entries closer // to each other than entries that are far away. For striping we want @@ -172,7 +178,7 @@ CacheLocality CacheLocality::readFromSysfsTree( } return CacheLocality{ - cpus.size(), std::move(numCachesByLevel), std::move(indexes) }; + cpus.size(), std::move(numCachesByLevel), std::move(indexes)}; } CacheLocality CacheLocality::readFromSysfs() { @@ -184,7 +190,6 @@ CacheLocality CacheLocality::readFromSysfs() { }); } - CacheLocality CacheLocality::uniform(size_t numCpus) { CacheLocality rv; @@ -203,10 +208,8 @@ CacheLocality CacheLocality::uniform(size_t numCpus) { ////////////// Getcpu -/// Resolves the dynamically loaded symbol __vdso_getcpu, returning null -/// on failure -static Getcpu::Func loadVdsoGetcpu() { -#if defined(_MSC_VER) || defined(__BIONIC__) +Getcpu::Func Getcpu::resolveVdsoFunc() { +#if !FOLLY_HAVE_LINUX_VDSO return nullptr; #else void* h = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); @@ -227,60 +230,13 @@ static Getcpu::Func loadVdsoGetcpu() { #endif } -Getcpu::Func Getcpu::vdsoFunc() { - static Func func = loadVdsoGetcpu(); - return func; -} - #ifdef FOLLY_TLS /////////////// SequentialThreadId - -template<> -std::atomic SequentialThreadId::prevId(0); - -template<> -FOLLY_TLS size_t SequentialThreadId::currentId(0); +template struct SequentialThreadId; #endif /////////////// AccessSpreader +template struct AccessSpreader; -template<> -const AccessSpreader -AccessSpreader::stripeByCore( - CacheLocality::system<>().numCachesByLevel.front()); - -template<> -const AccessSpreader -AccessSpreader::stripeByChip( - CacheLocality::system<>().numCachesByLevel.back()); - -template<> -AccessSpreaderArray -AccessSpreaderArray::sharedInstance = {}; - -/// Always claims to be on CPU zero, node zero -static int degenerateGetcpu(unsigned* cpu, unsigned* node, void* /* unused */) { - if (cpu != nullptr) { - *cpu = 0; - } - if (node != nullptr) { - *node = 0; - } - return 0; -} - -template<> -Getcpu::Func AccessSpreader::pickGetcpuFunc(size_t numStripes) { - if (numStripes == 1) { - // there's no need to call getcpu if there is only one stripe. - // This should not be common, so we don't want to waste a test and - // branch in the main code path, but we might as well use a faster - // function pointer - return °enerateGetcpu; - } else { - auto best = Getcpu::vdsoFunc(); - return best ? best : &FallbackGetcpuType::getcpu; - } -} - -} } // namespace folly::detail +} // namespace detail +} // namespace folly