Summary:
[Folly] Improve `SingletonThreadLocal` performance.
By explicitly controlling inlining behavior.
The ctor is definitionally cold - once per process - so outline.
`get` is hot so inline. Uncached `get` is cold - once per thread - so outline uncached.
Reviewed By: djwatson
Differential Revision:
D6736662
fbshipit-source-id:
4cd77c7772b46e2e3c6b2a3dc071b2b06522979e
SingletonThreadLocal() : SingletonThreadLocal([]() { return new T(); }) {}
SingletonThreadLocal() : SingletonThreadLocal([]() { return new T(); }) {}
- explicit SingletonThreadLocal(CreateFunc createFunc)
- : singleton_([createFunc = std::move(createFunc)]() mutable {
- return new ThreadLocalT([createFunc =
- std::move(createFunc)]() mutable {
- return new Wrapper(std::unique_ptr<T>(createFunc()));
+ template <typename Create>
+ FOLLY_NOINLINE explicit SingletonThreadLocal(Create create)
+ : singleton_([create = std::move(create)]() mutable {
+ return new ThreadLocalT([create = std::move(create)]() mutable {
+ return new Wrapper(std::unique_ptr<T>(create()));
+ FOLLY_ALWAYS_INLINE static T& get() {
- if (UNLIKELY(*localPtr() == nullptr)) {
- *localPtr() = &(**SingletonT::get());
- }
-
- return **localPtr();
+ return *localPtr() ? **localPtr() : *(*localPtr() = &getSlow());
#else
return **SingletonT::get();
#endif
}
private:
#else
return **SingletonT::get();
#endif
}
private:
+ FOLLY_NOINLINE static T& getSlow() {
+ return **SingletonT::get();
+ }
+
- static T** localPtr() {
+ FOLLY_ALWAYS_INLINE static T** localPtr() {
static FOLLY_TLS T* localPtr = nullptr;
return &localPtr;
}
static FOLLY_TLS T* localPtr = nullptr;
return &localPtr;
}