Make folly::detail::CacheLocality portable on apple

[folly.git] / folly / detail / CacheLocality.cpp
diff --git a/folly/detail/CacheLocality.cpp b/folly/detail/CacheLocality.cpp

index ef562fa99f584e34ee98a7021df6ef8bba696cc5..a6abec2ae1aabb6f454e601b34e22b1d1050dccc 100644 (file)
--- a/folly/detail/CacheLocality.cpp
+++ b/folly/detail/CacheLocality.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2015 Facebook, Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -16,10 +16,11 @@
  
  #include <folly/detail/CacheLocality.h>
  
+#ifndef _MSC_VER
  #define _GNU_SOURCE 1 // for RTLD_NOLOAD
  #include <dlfcn.h>
+#endif
  #include <fstream>
-#include <mutex>
  
  #include <folly/Conv.h>
  #include <folly/Exception.h>
@@ -37,7 +38,7 @@ static CacheLocality getSystemLocalityInfo() {
    try {
      return CacheLocality::readFromSysfs();
    } catch (...) {
-    // fall through to below if something goes wrong
+    // keep trying
    }
  #endif
  
@@ -85,7 +86,7 @@ static size_t parseLeadingNumber(const std::string& line) {
    auto raw = line.c_str();
    char *end;
    unsigned long val = strtoul(raw, &end, 10);
-  if (end == raw || (*end != ',' && *end != '-' && *end != '\n')) {
+  if (end == raw || (*end != ',' && *end != '-' && *end != '\n' && *end != 0)) {
      throw std::runtime_error(to<std::string>(
          "error parsing list '", line, "'").c_str());
    }
@@ -202,89 +203,36 @@ CacheLocality CacheLocality::uniform(size_t numCpus) {
  
  ////////////// Getcpu
  
-#ifdef CLOCK_REALTIME_COARSE
-
-static std::once_flag gVdsoInitOnce;
-static Getcpu::Func gVdsoGetcpuFunc;
-static int64_t (*gVdsoGettimeNsFunc)(clockid_t);
-
-static int cachingVdsoGetcpu(unsigned* cpu, unsigned* unused_node,
-                             void* unused_tcache) {
-  static __thread unsigned tls_cpu;
-  static __thread int64_t tls_lastContextSwitchNanos;
-
-  auto lastContextSwitchNanos = gVdsoGettimeNsFunc(CLOCK_REALTIME_COARSE);
-  if (tls_lastContextSwitchNanos != lastContextSwitchNanos) {
-    int rv = gVdsoGetcpuFunc(&tls_cpu, nullptr, nullptr);
-    if (rv != 0) {
-      return rv;
-    }
-    tls_lastContextSwitchNanos = lastContextSwitchNanos;
+/// Resolves the dynamically loaded symbol __vdso_getcpu, returning null
+/// on failure
+static Getcpu::Func loadVdsoGetcpu() {
+#ifdef _MSC_VER
+  return nullptr;
+#else
+  void* h = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+  if (h == nullptr) {
+    return nullptr;
    }
-  *cpu = tls_cpu;
-  return 0;
-}
-#endif
-
-/// Resolves the dynamically loaded symbol __vdso_getcpu and
-/// __vdso_clock_gettime_ns, returning a pair of nulls on failure.  Does a
-/// little bit of probing to make sure that the __vdso_clock_gettime_ns
-/// function isn't using the slow fallback path.
-Getcpu::Func Getcpu::vdsoFunc() {
-#ifdef CLOCK_REALTIME_COARSE
-  std::call_once(gVdsoInitOnce, []{
-    void* h = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
-
-    typedef int64_t (*GettimeNsFunc)(clockid_t);
-
-    auto getcpuFunc = Getcpu::Func(
-        !h ? nullptr : dlsym(h, "__vdso_getcpu"));
-    auto gettimeNsFunc = GettimeNsFunc(
-        !h ? nullptr : dlsym(h, "__vdso_clock_gettime_ns"));
-
-    bool coarseGettimeDetected = false;
-    if (gettimeNsFunc != nullptr) {
-      // The TLS cache of getcpu results only is an optimization if the
-      // __vdso_clock_gettime_ns implementation is fast and actually
-      // coarse.  The slow fallback implementation is not coarse, so if
-      // we detect a coarse clock we are set.  If CLOCK_REALTIME_COARSE
-      // has the right properties, then so long as there is no context
-      // switch between two calls the returned time will be identical.
-      // Dynamically verify this.  An unlikely context switch while we're
-      // testing can lead to a false negative, but not a false positive,
-      // so we just run the test multiple times.  This ensures that we
-      // will get two calls to gettimeNsFunc in a row with no intervening
-      // context switch.
-      auto prev = gettimeNsFunc(CLOCK_REALTIME_COARSE);
-      for (int i = 0; i < 10 && !coarseGettimeDetected; ++i) {
-        auto next = gettimeNsFunc(CLOCK_REALTIME_COARSE);
-        coarseGettimeDetected = next == prev;
-        prev = next;
-      }
-    }
-
-    if (getcpuFunc == nullptr || !coarseGettimeDetected) {
-      // technically a null getcpuFunc could either be a failure or
-      // a successful lookup of a symbol with the null value, but the
-      // second can't actually happen for this symbol.  No point holding
-      // the handle forever if we don't need the code
-      if (h) {
-        dlclose(h);
-      }
-    } else {
-      gVdsoGetcpuFunc = getcpuFunc;
-      gVdsoGettimeNsFunc = gettimeNsFunc;
-    }
-  });
  
-  if (gVdsoGetcpuFunc != nullptr) {
-    return cachingVdsoGetcpu;
+  auto func = Getcpu::Func(dlsym(h, "__vdso_getcpu"));
+  if (func == nullptr) {
+    // technically a null result could either be a failure or a successful
+    // lookup of a symbol with the null value, but the second can't actually
+    // happen for this symbol.  No point holding the handle forever if
+    // we don't need the code
+    dlclose(h);
    }
+
+  return func;
  #endif
+}
  
-  return nullptr;
+Getcpu::Func Getcpu::vdsoFunc() {
+  static Func func = loadVdsoGetcpu();
+  return func;
  }
  
+#ifdef FOLLY_TLS
  /////////////// SequentialThreadId
  
  template<>
@@ -292,6 +240,7 @@ std::atomic<size_t> SequentialThreadId<std::atomic>::prevId(0);
  
  template<>
  FOLLY_TLS size_t SequentialThreadId<std::atomic>::currentId(0);
+#endif
  
  /////////////// AccessSpreader
  
@@ -330,7 +279,7 @@ Getcpu::Func AccessSpreader<std::atomic>::pickGetcpuFunc(size_t numStripes) {
      return &degenerateGetcpu;
    } else {
      auto best = Getcpu::vdsoFunc();
-    return best ? best : &SequentialThreadId<std::atomic>::getcpu;
+    return best ? best : &FallbackGetcpuType::getcpu;
    }
  }