From: Jason Evans Date: Wed, 3 Sep 2014 23:08:22 +0000 (-0700) Subject: Convert from jemalloc's obsolete *allocm() to *allocx(). X-Git-Tag: v0.22.0~373 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=8470cb6ed9524328bffa359522dba32b5f12e746;p=folly.git Convert from jemalloc's obsolete *allocm() to *allocx(). Summary: Convert from jemalloc's obsolete *allocm() to *allocx(). Strengthen goodMallocSize() to always return a jemalloc size class, so that xallocx() success/failure detection is simple. @override-unit-failures Test Plan: Folly, unicorn, and HHVM tests. Reviewed By: andrei.alexandrescu@fb.com Subscribers: trunkagent, hphp-diffs@, ps, chaoyc, search-fbcode-diffs@, unicorn-diffs@, ptc, njormrod FB internal diff: D1535841 Tasks: 4996808 --- diff --git a/folly/FBVector.h b/folly/FBVector.h index 1a04eebc..6abc4529 100644 --- a/folly/FBVector.h +++ b/folly/FBVector.h @@ -98,16 +98,16 @@ private: // constructors Impl() : Allocator(), b_(nullptr), e_(nullptr), z_(nullptr) {} - Impl(const Allocator& a) + /* implicit */ Impl(const Allocator& a) : Allocator(a), b_(nullptr), e_(nullptr), z_(nullptr) {} - Impl(Allocator&& a) + /* implicit */ Impl(Allocator&& a) : Allocator(std::move(a)), b_(nullptr), e_(nullptr), z_(nullptr) {} - Impl(size_type n, const Allocator& a = Allocator()) + /* implicit */ Impl(size_type n, const Allocator& a = Allocator()) : Allocator(a) { init(n); } - Impl(Impl&& other) + Impl(Impl&& other) noexcept : Allocator(std::move(other)), b_(other.b_), e_(other.e_), z_(other.z_) { other.b_ = other.e_ = other.z_ = nullptr; } @@ -716,7 +716,7 @@ public: fbvector(const fbvector& other, const Allocator& a) : fbvector(other.begin(), other.end(), a) {} - fbvector(fbvector&& other, const Allocator& a) : impl_(a) { + /* may throw */ fbvector(fbvector&& other, const Allocator& a) : impl_(a) { if (impl_ == other.impl_) { impl_.swapData(other.impl_); } else { @@ -978,10 +978,11 @@ public: if (newCap >= oldCap) return; void* p = impl_.b_; - if ((rallocm && usingStdAllocator::value) && + // xallocx() will shrink to precisely newCapacityBytes (which was generated + // by goodMallocSize()) if it successfully shrinks in place. + if ((usingJEMalloc() && usingStdAllocator::value) && newCapacityBytes >= folly::jemallocMinInPlaceExpandable && - rallocm(&p, nullptr, newCapacityBytes, 0, ALLOCM_NO_MOVE) - == ALLOCM_SUCCESS) { + xallocx(p, newCapacityBytes, 0, 0) == newCapacityBytes) { impl_.z_ += newCap - oldCap; } else { T* newB; // intentionally uninitialized @@ -1007,7 +1008,7 @@ public: private: bool reserve_in_place(size_type n) { - if (!usingStdAllocator::value || !rallocm) return false; + if (!usingStdAllocator::value || !usingJEMalloc()) return false; // jemalloc can never grow in place blocks smaller than 4096 bytes. if ((impl_.z_ - impl_.b_) * sizeof(T) < @@ -1015,8 +1016,7 @@ private: auto const newCapacityBytes = folly::goodMallocSize(n * sizeof(T)); void* p = impl_.b_; - if (rallocm(&p, nullptr, newCapacityBytes, 0, ALLOCM_NO_MOVE) - == ALLOCM_SUCCESS) { + if (xallocx(p, newCapacityBytes, 0, 0) == newCapacityBytes) { impl_.z_ = impl_.b_ + newCapacityBytes / sizeof(T); return true; } @@ -1515,17 +1515,17 @@ void fbvector::emplace_back_aux(Args&&... args) { size_type byte_sz = folly::goodMallocSize( computePushBackCapacity() * sizeof(T)); if (usingStdAllocator::value - && rallocm + && usingJEMalloc() && ((impl_.z_ - impl_.b_) * sizeof(T) >= folly::jemallocMinInPlaceExpandable)) { // Try to reserve in place. - // Ask rallocm to allocate in place at least size()+1 and at most sz space. - // rallocm will allocate as much as possible within that range, which + // Ask xallocx to allocate in place at least size()+1 and at most sz space. + // xallocx will allocate as much as possible within that range, which // is the best possible outcome: if sz space is available, take it all, // otherwise take as much as possible. If nothing is available, then fail. // In this fashion, we never relocate if there is a possibility of - // expanding in place, and we never relocate by less than the desired - // amount unless we cannot expand further. Hence we will not relocate + // expanding in place, and we never reallocate by less than the desired + // amount unless we cannot expand further. Hence we will not reallocate // sub-optimally twice in a row (modulo the blocking memory being freed). size_type lower = folly::goodMallocSize(sizeof(T) + size() * sizeof(T)); size_type upper = byte_sz; @@ -1534,8 +1534,7 @@ void fbvector::emplace_back_aux(Args&&... args) { void* p = impl_.b_; size_t actual; - if (rallocm(&p, &actual, lower, extra, ALLOCM_NO_MOVE) - == ALLOCM_SUCCESS) { + if ((actual = xallocx(p, lower, extra, 0)) >= lower) { impl_.z_ = impl_.b_ + actual / sizeof(T); M_construct(impl_.e_, std::forward(args)...); ++impl_.e_; diff --git a/folly/Malloc.cpp b/folly/Malloc.cpp index d81c30d2..c19427fd 100644 --- a/folly/Malloc.cpp +++ b/folly/Malloc.cpp @@ -27,9 +27,11 @@ namespace folly { // with --enable-stats. bool usingJEMallocSlow() { // Some platforms (*cough* OSX *cough*) require weak symbol checks to be - // in the form if (mallctl != NULL). Not if (mallctl) or if (!mallctl) (!!). - // http://goo.gl/xpmctm - if (allocm == nullptr || rallocm == nullptr || mallctl == nullptr) { + // in the form if (mallctl != nullptr). Not if (mallctl) or if (!mallctl) + // (!!). http://goo.gl/xpmctm + if (mallocx == nullptr || rallocx == nullptr || xallocx == nullptr + || sallocx == nullptr || dallocx == nullptr || nallocx == nullptr + || mallctl == nullptr) { return false; } diff --git a/folly/Malloc.h b/folly/Malloc.h index a3b6bccc..cd426c3d 100644 --- a/folly/Malloc.h +++ b/folly/Malloc.h @@ -20,6 +20,18 @@ #ifndef FOLLY_MALLOC_H_ #define FOLLY_MALLOC_H_ +/** + * Define various MALLOCX_* macros normally provided by jemalloc. We define + * them so that we don't have to include jemalloc.h, in case the program is + * built without jemalloc support. + */ +#ifndef MALLOCX_LG_ALIGN +#define MALLOCX_LG_ALIGN(la) (la) +#endif +#ifndef MALLOCX_ZERO +#define MALLOCX_ZERO (static_cast(0x40)) +#endif + // If using fbstring from libstdc++, then just define stub code // here to typedef the fbstring type into the folly namespace. // This provides backwards compatibility for code that explicitly @@ -46,13 +58,21 @@ namespace folly { #pragma GCC system_header /** - * Declare rallocm(), allocm(), and mallctl() as weak symbols. These will be - * provided by jemalloc if we are using jemalloc, or will be NULL if we are - * using another malloc implementation. + * Declare *allocx() and mallctl() as weak symbols. These will be provided by + * jemalloc if we are using jemalloc, or will be NULL if we are using another + * malloc implementation. */ -extern "C" int rallocm(void**, size_t*, size_t, size_t, int) +extern "C" void* mallocx(size_t, int) +__attribute__((__weak__)); +extern "C" void* rallocx(void*, size_t, int) +__attribute__((__weak__)); +extern "C" size_t xallocx(void*, size_t, size_t, int) __attribute__((__weak__)); -extern "C" int allocm(void**, size_t*, size_t, int) +extern "C" size_t sallocx(const void*, int) +__attribute__((__weak__)); +extern "C" void dallocx(void*, int) +__attribute__((__weak__)); +extern "C" size_t nallocx(size_t, int) __attribute__((__weak__)); extern "C" int mallctl(const char*, void*, size_t*, void*, size_t) __attribute__((__weak__)); @@ -60,10 +80,12 @@ __attribute__((__weak__)); #include #define FOLLY_HAVE_MALLOC_H 1 #else -#include +#include /* nolint */ #include #endif +#include + // for malloc_usable_size // NOTE: FreeBSD 9 doesn't have malloc.h. It's defitions // are found in stdlib.h. @@ -80,29 +102,6 @@ __attribute__((__weak__)); #include -/** - * Define various ALLOCM_* macros normally provided by jemalloc. We define - * them so that we don't have to include jemalloc.h, in case the program is - * built without jemalloc support. - */ -#ifndef ALLOCM_SUCCESS - -#define ALLOCM_SUCCESS 0 -#define ALLOCM_ERR_OOM 1 -#define ALLOCM_ERR_NOT_MOVED 2 - -#define ALLOCM_ZERO 64 -#define ALLOCM_NO_MOVE 128 - -#define ALLOCM_LG_ALIGN(la) (la) - -#if defined(JEMALLOC_MANGLE) && defined(JEMALLOC_EXPERIMENTAL) -#define rallocm je_rallocm -#define allocm je_allocm -#endif - -#endif /* ALLOCM_SUCCESS */ - #ifdef _LIBSTDCXX_FBSTRING namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -116,11 +115,11 @@ bool usingJEMallocSlow(); * Determine if we are using jemalloc or not. */ inline bool usingJEMalloc() { - // Checking for rallocm != NULL is not sufficient; we may be in a dlopen()ed - // module that depends on libjemalloc, so rallocm is resolved, but the main + // Checking for rallocx != NULL is not sufficient; we may be in a dlopen()ed + // module that depends on libjemalloc, so rallocx is resolved, but the main // program might be using a different memory allocator. Look at the // implementation of usingJEMallocSlow() for the (hacky) details. - static bool result = usingJEMallocSlow(); + static const bool result = usingJEMallocSlow(); return result; } @@ -128,33 +127,37 @@ inline bool usingJEMalloc() { * For jemalloc's size classes, see * http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html */ -inline size_t goodMallocSize(size_t minSize) { +inline size_t goodMallocSize(size_t minSize) noexcept { if (!usingJEMalloc()) { // Not using jemalloc - no smarts return minSize; } + size_t goodSize; + SCOPE_EXIT { assert(nallocx(goodSize, 0) == goodSize); }; if (minSize <= 64) { // Choose smallest allocation to be 64 bytes - no tripping over // cache line boundaries, and small string optimization takes care // of short strings anyway. - return 64; + return goodSize = 64; } if (minSize <= 512) { // Round up to the next multiple of 64; we don't want to trip over // cache line boundaries. - return (minSize + 63) & ~size_t(63); + return goodSize = (minSize + 63) & ~size_t(63); } - if (minSize <= 3840) { - // Round up to the next multiple of 256 - return (minSize + 255) & ~size_t(255); + if (minSize <= 3584) { + // Round up to the next multiple of 256. For some size classes jemalloc + // will additionally round up to the nearest multiple of 512, hence the + // nallocx() call. + return goodSize = nallocx((minSize + 255) & ~size_t(255), 0); } if (minSize <= 4072 * 1024) { // Round up to the next multiple of 4KB - return (minSize + 4095) & ~size_t(4095); + return goodSize = (minSize + 4095) & ~size_t(4095); } // Holy Moly // Round up to the next multiple of 4MB - return (minSize + 4194303) & ~size_t(4194303); + return goodSize = (minSize + 4194303) & ~size_t(4194303); } // We always request "good" sizes for allocation, so jemalloc can @@ -208,8 +211,14 @@ inline void* smartRealloc(void* p, if (usingJEMalloc()) { // using jemalloc's API. Don't forget that jemalloc can never grow // in place blocks smaller than 4096 bytes. + // + // NB: newCapacity may not be precisely equal to a jemalloc size class, + // i.e. newCapacity is not guaranteed to be the result of a + // goodMallocSize() call, therefore xallocx() may return more than + // newCapacity bytes of space. Use >= rather than == to check whether + // xallocx() successfully expanded in place. if (currentCapacity >= jemallocMinInPlaceExpandable && - rallocm(&p, nullptr, newCapacity, 0, ALLOCM_NO_MOVE) == ALLOCM_SUCCESS) { + xallocx(p, newCapacity, 0, 0) >= newCapacity) { // Managed to expand in place return p; } diff --git a/folly/configure.ac b/folly/configure.ac index 988fc228..8b8ddcb3 100644 --- a/folly/configure.ac +++ b/folly/configure.ac @@ -78,7 +78,7 @@ AC_CHECK_LIB([double-conversion],[ceil],[],[AC_MSG_ERROR( AC_CHECK_LIB([event], [event_set], [], [AC_MSG_ERROR([Unable to find libevent])]) -AC_CHECK_LIB([jemalloc], [rallocm]) +AC_CHECK_LIB([jemalloc], [xallocx]) # Checks for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL @@ -269,7 +269,6 @@ AC_CHECK_FUNCS([getdelim \ pow \ strerror \ pthread_yield \ - rallocm \ malloc_size \ malloc_usable_size \ memrchr \ diff --git a/folly/detail/Malloc.h b/folly/detail/Malloc.h index 075c9efc..99c8dabe 100644 --- a/folly/detail/Malloc.h +++ b/folly/detail/Malloc.h @@ -24,12 +24,20 @@ extern "C" { #if FOLLY_HAVE_WEAK_SYMBOLS -int rallocm(void**, size_t*, size_t, size_t, int) __attribute__((__weak__)); -int allocm(void**, size_t*, size_t, int) __attribute__((__weak__)); +void* mallocx(size_t, int) __attribute__((__weak__)); +void* rallocx(void*, size_t, int) __attribute__((__weak__)); +size_t xallocx(void*, size_t, size_t, int) __attribute__((__weak__)); +size_t sallocx(const void*, int) __attribute__((__weak__)); +void dallocx(void*, int) __attribute__((__weak__)); +size_t nallocx(size_t, int) __attribute__((__weak__)); int mallctl(const char*, void*, size_t*, void*, size_t) __attribute__((__weak__)); #else -extern int (*rallocm)(void**, size_t*, size_t, size_t, int); -extern int (*allocm)(void**, size_t*, size_t, int); +extern void* (*mallocx)(size_t, int); +extern void* (*rallocx)(void*, size_t, int); +extern size_t (*xallocx)(void*, size_t, size_t, int); +extern size_t (*sallocx)(const void*, int); +extern void (*dallocx)(void*, int); +extern size_t (*nallocx)(size_t, int); extern int (*mallctl)(const char*, void*, size_t*, void*, size_t); #endif diff --git a/folly/detail/MallocImpl.cpp b/folly/detail/MallocImpl.cpp index 8f1bef07..f8aa8db0 100644 --- a/folly/detail/MallocImpl.cpp +++ b/folly/detail/MallocImpl.cpp @@ -19,8 +19,12 @@ extern "C" { #if !FOLLY_HAVE_WEAK_SYMBOLS -int (*rallocm)(void**, size_t*, size_t, size_t, int) = nullptr; -int (*allocm)(void**, size_t*, size_t, int) = nullptr; +void* (*mallocx)(size_t, int) = nullptr; +void* (*rallocx)(void*, size_t, int) = nullptr; +size_t (*xallocx)(void*, size_t, size_t, int) = nullptr; +size_t (*sallocx)(const void*, int) = nullptr; +void (*dallocx)(void*, int) = nullptr; +size_t (*nallocx)(size_t, int) = nullptr; int (*mallctl)(const char*, void*, size_t*, void*, size_t) = nullptr; #endif diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h index 29dda6ce..55985ad3 100644 --- a/folly/detail/ThreadLocalDetail.h +++ b/folly/detail/ThreadLocalDetail.h @@ -336,40 +336,30 @@ struct StaticMeta { // under the lock. if (usingJEMalloc()) { bool success = false; - size_t newByteSize = newCapacity * sizeof(ElementWrapper); - size_t realByteSize = 0; + size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0); // Try to grow in place. // - // Note that rallocm(ALLOCM_ZERO) will only zero newly allocated memory, + // Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory, // even if a previous allocation allocated more than we requested. - // This is fine; we always use ALLOCM_ZERO with jemalloc and we + // This is fine; we always use MALLOCX_ZERO with jemalloc and we // always expand our allocation to the real size. if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) { - success = (rallocm(reinterpret_cast(&threadEntry->elements), - &realByteSize, - newByteSize, - 0, - ALLOCM_NO_MOVE | ALLOCM_ZERO) == ALLOCM_SUCCESS); - + success = (xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO) + == newByteSize); } // In-place growth failed. if (!success) { - // Note that, unlike calloc,allocm(... ALLOCM_ZERO) zeros all - // allocated bytes (*realByteSize) and not just the requested - // bytes (newByteSize) - success = (allocm(reinterpret_cast(&reallocated), - &realByteSize, - newByteSize, - ALLOCM_ZERO) == ALLOCM_SUCCESS); + success = ((reallocated = static_cast( + mallocx(newByteSize, MALLOCX_ZERO))) != nullptr); } if (success) { // Expand to real size - assert(realByteSize / sizeof(ElementWrapper) >= newCapacity); - newCapacity = realByteSize / sizeof(ElementWrapper); + assert(newByteSize / sizeof(ElementWrapper) >= newCapacity); + newCapacity = newByteSize / sizeof(ElementWrapper); } else { throw std::bad_alloc(); } diff --git a/folly/io/IOBuf.cpp b/folly/io/IOBuf.cpp index c0410c83..e47c2faa 100644 --- a/folly/io/IOBuf.cpp +++ b/folly/io/IOBuf.cpp @@ -707,14 +707,14 @@ void IOBuf::reserveSlow(uint64_t minHeadroom, uint64_t minTailroom) { uint64_t oldHeadroom = headroom(); // If we have a buffer allocated with malloc and we just need more tailroom, - // try to use realloc()/rallocm() to grow the buffer in place. + // try to use realloc()/xallocx() to grow the buffer in place. SharedInfo* info = sharedInfo(); if (info && (info->freeFn == nullptr) && length_ != 0 && oldHeadroom >= minHeadroom) { if (usingJEMalloc()) { size_t headSlack = oldHeadroom - minHeadroom; // We assume that tailroom is more useful and more important than - // headroom (not least because realloc / rallocm allow us to grow the + // headroom (not least because realloc / xallocx allow us to grow the // buffer at the tail, but not at the head) So, if we have more headroom // than we need, we consider that "wasted". We arbitrarily define "too // much" headroom to be 25% of the capacity. @@ -722,23 +722,12 @@ void IOBuf::reserveSlow(uint64_t minHeadroom, uint64_t minTailroom) { size_t allocatedCapacity = capacity() + sizeof(SharedInfo); void* p = buf_; if (allocatedCapacity >= jemallocMinInPlaceExpandable) { - // rallocm can write to its 2nd arg even if it returns - // ALLOCM_ERR_NOT_MOVED. So, we pass a temporary to its 2nd arg and - // update newAllocatedCapacity only on success. - size_t allocatedSize; - int r = rallocm(&p, &allocatedSize, newAllocatedCapacity, - 0, ALLOCM_NO_MOVE); - if (r == ALLOCM_SUCCESS) { + if (xallocx(p, newAllocatedCapacity, 0, 0) == newAllocatedCapacity) { newBuffer = static_cast(p); newHeadroom = oldHeadroom; - newAllocatedCapacity = allocatedSize; - } else if (r == ALLOCM_ERR_OOM) { - // shouldn't happen as we don't actually allocate new memory - // (due to ALLOCM_NO_MOVE) - throw std::bad_alloc(); + newAllocatedCapacity = newAllocatedCapacity; } - // if ALLOCM_ERR_NOT_MOVED, do nothing, fall back to - // malloc/memcpy/free + // if xallocx failed, do nothing, fall back to malloc/memcpy/free } } } else { // Not using jemalloc diff --git a/folly/io/TypedIOBuf.h b/folly/io/TypedIOBuf.h index 6296a95d..9ccb7bc5 100644 --- a/folly/io/TypedIOBuf.h +++ b/folly/io/TypedIOBuf.h @@ -173,7 +173,7 @@ class TypedIOBuf { void push(IT begin, IT end) { uint32_t n = std::distance(begin, end); if (usingJEMalloc()) { - // Rely on rallocm() and avoid exponential growth to limit + // Rely on xallocx() and avoid exponential growth to limit // amount of memory wasted. reserve(headroom(), n); } else if (tailroom() < n) {