folly/detail/MemoryIdler.cpp

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/detail/MemoryIdler.h>
  18 #include <folly/Logging.h>
  19 #include <folly/Malloc.h>
  20 #include <folly/ScopeGuard.h>
  21 #include <folly/detail/CacheLocality.h>
  22 #include <limits.h>
  23 #include <pthread.h>
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <unistd.h>
  27 #include <sys/mman.h>
  28 #include <utility>
  29
  30
  31 namespace folly { namespace detail {
  32
  33 AtomicStruct<std::chrono::steady_clock::duration>
  34 MemoryIdler::defaultIdleTimeout(std::chrono::seconds(5));
  35
  36
  37 // Calls mallctl, optionally reading a value of type <T> if out is
  38 // non-null.  Logs on error.
  39 template <typename T>
  40 static int mallctlRead(const char* cmd, T* out) {
  41   size_t outLen = sizeof(T);
  42   int err = mallctl(cmd,
  43                     out, out ? &outLen : nullptr,
  44                     nullptr, 0);
  45   if (err != 0) {
  46     FB_LOG_EVERY_MS(WARNING, 10000)
  47       << "mallctl " << cmd << ": " << strerror(err) << " (" << err << ")";
  48   }
  49   return err;
  50 }
  51
  52 static int mallctlCall(const char* cmd) {
  53   // Use <unsigned> rather than <void> to avoid sizeof(void).
  54   return mallctlRead<unsigned>(cmd, nullptr);
  55 }
  56
  57 void MemoryIdler::flushLocalMallocCaches() {
  58   if (usingJEMalloc()) {
  59     if (!mallctl || !mallctlnametomib || !mallctlbymib) {
  60       FB_LOG_EVERY_MS(ERROR, 10000) << "mallctl* weak link failed";
  61       return;
  62     }
  63
  64     // "tcache.flush" was renamed to "thread.tcache.flush" in jemalloc 3
  65     mallctlCall("thread.tcache.flush");
  66
  67     // By default jemalloc has 4 arenas per cpu, and then assigns each
  68     // thread to one of those arenas.  This means that in any service
  69     // that doesn't perform a lot of context switching, the chances that
  70     // another thread will be using the current thread's arena (and hence
  71     // doing the appropriate dirty-page purging) are low.  Some good
  72     // tuned configurations (such as that used by hhvm) use fewer arenas
  73     // and then pin threads to avoid contended access.  In that case,
  74     // purging the arenas is counter-productive.  We use the heuristic
  75     // that if narenas <= 2 * num_cpus then we shouldn't do anything here,
  76     // which detects when the narenas has been reduced from the default
  77     unsigned narenas;
  78     unsigned arenaForCurrent;
  79     size_t mib[3];
  80     size_t miblen = 3;
  81     if (mallctlRead<unsigned>("opt.narenas", &narenas) == 0 &&
  82         narenas > 2 * CacheLocality::system().numCpus &&
  83         mallctlRead<unsigned>("thread.arena", &arenaForCurrent) == 0 &&
  84         mallctlnametomib("arena.0.purge", mib, &miblen) == 0) {
  85       mib[1] = size_t(arenaForCurrent);
  86       mallctlbymib(mib, miblen, nullptr, nullptr, nullptr, 0);
  87     }
  88   }
  89 }
  90
  91
  92 // Stack madvise isn't Linux or glibc specific, but the system calls
  93 // and arithmetic (and bug compatibility) are not portable.  The set of
  94 // platforms could be increased if it was useful.
  95 #if (FOLLY_X64 || FOLLY_PPC64 ) && defined(_GNU_SOURCE) && defined(__linux__)
  96
  97 static FOLLY_TLS uintptr_t tls_stackLimit;
  98 static FOLLY_TLS size_t tls_stackSize;
  99
 100 static size_t pageSize() {
 101   static const size_t s_pageSize = sysconf(_SC_PAGESIZE);
 102   return s_pageSize;
 103 }
 104
 105 static void fetchStackLimits() {
 106   pthread_attr_t attr;
 107   pthread_getattr_np(pthread_self(), &attr);
 108   SCOPE_EXIT { pthread_attr_destroy(&attr); };
 109
 110   void* addr;
 111   size_t rawSize;
 112   int err;
 113   if ((err = pthread_attr_getstack(&attr, &addr, &rawSize))) {
 114     // unexpected, but it is better to continue in prod than do nothing
 115     FB_LOG_EVERY_MS(ERROR, 10000) << "pthread_attr_getstack error " << err;
 116     assert(false);
 117     tls_stackSize = 1;
 118     return;
 119   }
 120   assert(addr != nullptr);
 121   assert(rawSize >= PTHREAD_STACK_MIN);
 122
 123   // glibc subtracts guard page from stack size, even though pthread docs
 124   // seem to imply the opposite
 125   size_t guardSize;
 126   if (pthread_attr_getguardsize(&attr, &guardSize) != 0) {
 127     guardSize = 0;
 128   }
 129   assert(rawSize > guardSize);
 130
 131   // stack goes down, so guard page adds to the base addr
 132   tls_stackLimit = uintptr_t(addr) + guardSize;
 133   tls_stackSize = rawSize - guardSize;
 134
 135   assert((tls_stackLimit & (pageSize() - 1)) == 0);
 136 }
 137
 138 FOLLY_NOINLINE static uintptr_t getStackPtr() {
 139   char marker;
 140   auto rv = uintptr_t(&marker);
 141   return rv;
 142 }
 143
 144 void MemoryIdler::unmapUnusedStack(size_t retain) {
 145   if (tls_stackSize == 0) {
 146     fetchStackLimits();
 147   }
 148   if (tls_stackSize <= std::max(size_t(1), retain)) {
 149     // covers both missing stack info, and impossibly large retain
 150     return;
 151   }
 152
 153   auto sp = getStackPtr();
 154   assert(sp >= tls_stackLimit);
 155   assert(sp - tls_stackLimit < tls_stackSize);
 156
 157   auto end = (sp - retain) & ~(pageSize() - 1);
 158   if (end <= tls_stackLimit) {
 159     // no pages are eligible for unmapping
 160     return;
 161   }
 162
 163   size_t len = end - tls_stackLimit;
 164   assert((len & (pageSize() - 1)) == 0);
 165   if (madvise((void*)tls_stackLimit, len, MADV_DONTNEED) != 0) {
 166     // It is likely that the stack vma hasn't been fully grown.  In this
 167     // case madvise will apply dontneed to the present vmas, then return
 168     // errno of ENOMEM.  We can also get an EAGAIN, theoretically.
 169     // EINVAL means either an invalid alignment or length, or that some
 170     // of the pages are locked or shared.  Neither should occur.
 171     assert(errno == EAGAIN || errno == ENOMEM);
 172   }
 173 }
 174
 175 #else
 176
 177 void MemoryIdler::unmapUnusedStack(size_t retain) {
 178 }
 179
 180 #endif
 181
 182 }}