2 * Copyright 2017-present Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include <folly/experimental/hazptr/hazptr.h>
26 namespace concurrenthashmap {
28 // hazptr retire() that can use an allocator.
29 template <typename Allocator>
32 template <typename Node>
33 void operator()(Node* node) {
35 Allocator().deallocate((uint8_t*)node, sizeof(Node));
43 typename Enabled = void>
46 typedef std::pair<const KeyType, ValueType> value_type;
48 explicit ValueHolder(const ValueHolder& other) : item_(other.item_) {}
50 template <typename Arg, typename... Args>
51 ValueHolder(std::piecewise_construct_t, Arg&& k, Args&&... args)
53 std::piecewise_construct,
54 std::forward_as_tuple(std::forward<Arg>(k)),
55 std::forward_as_tuple(std::forward<Args>(args)...)) {}
56 value_type& getItem() {
64 // If the ValueType is not copy constructible, we can instead add
65 // an extra indirection. Adds more allocations / deallocations and
66 // pulls in an extra cacheline.
67 template <typename KeyType, typename ValueType, typename Allocator>
73 !std::is_nothrow_copy_constructible<ValueType>::value ||
74 !std::is_nothrow_copy_constructible<KeyType>::value>> {
76 typedef std::pair<const KeyType, ValueType> value_type;
78 explicit ValueHolder(const ValueHolder& other) {
83 template <typename Arg, typename... Args>
84 ValueHolder(std::piecewise_construct_t, Arg&& k, Args&&... args) {
85 item_ = (value_type*)Allocator().allocate(sizeof(value_type));
86 new (item_) value_type(
87 std::piecewise_construct,
88 std::forward_as_tuple(std::forward<Arg>(k)),
89 std::forward_as_tuple(std::forward<Args>(args)...));
95 Allocator().deallocate((uint8_t*)item_, sizeof(value_type));
99 value_type& getItem() {
105 mutable bool owned_{true};
112 template <typename> class Atom = std::atomic>
113 class NodeT : public folly::hazptr::hazptr_obj_base<
114 NodeT<KeyType, ValueType, Allocator, Atom>,
115 concurrenthashmap::HazptrDeleter<Allocator>> {
117 typedef std::pair<const KeyType, ValueType> value_type;
119 explicit NodeT(NodeT* other) : item_(other->item_) {}
121 template <typename Arg, typename... Args>
122 NodeT(Arg&& k, Args&&... args)
124 std::piecewise_construct,
125 std::forward<Arg>(k),
126 std::forward<Args>(args)...) {}
128 /* Nodes are refcounted: If a node is retired() while a writer is
129 traversing the chain, the rest of the chain must remain valid
130 until all readers are finished. This includes the shared tail
131 portion of the chain, as well as both old/new hash buckets that
132 may point to the same portion, and erased nodes may increase the
135 DCHECK(refcount_.load() != 0);
136 refcount_.fetch_add(1);
139 if (refcount_.fetch_sub(1) == 1 /* was previously 1 */) {
141 folly::hazptr::default_hazptr_domain(),
142 concurrenthashmap::HazptrDeleter<Allocator>());
146 auto next = next_.load(std::memory_order_acquire);
152 value_type& getItem() {
153 return item_.getItem();
155 Atom<NodeT*> next_{nullptr};
158 ValueHolder<KeyType, ValueType, Allocator> item_;
159 Atom<uint8_t> refcount_{1};
162 } // namespace concurrenthashmap
164 /* A Segment is a single shard of the ConcurrentHashMap.
165 * All writes take the lock, while readers are all wait-free.
166 * Readers always proceed in parallel with the single writer.
169 * Possible additional optimizations:
171 * * insert / erase could be lock / wait free. Would need to be
172 * careful that assign and rehash don't conflict (possibly with
173 * reader/writer lock, or microlock per node or per bucket, etc).
174 * Java 8 goes halfway, and and does lock per bucket, except for the
175 * first item, that is inserted with a CAS (which is somewhat
176 * specific to java having a lock per object)
178 * * I tried using trylock() and find() to warm the cache for insert()
179 * and erase() similar to Java 7, but didn't have much luck.
181 * * We could order elements using split ordering, for faster rehash,
182 * and no need to ever copy nodes. Note that a full split ordering
183 * including dummy nodes increases the memory usage by 2x, but we
184 * could split the difference and still require a lock to set bucket
187 * * hazptr acquire/release could be optimized more, in
188 * single-threaded case, hazptr overhead is ~30% for a hot find()
194 uint8_t ShardBits = 0,
195 typename HashFn = std::hash<KeyType>,
196 typename KeyEqual = std::equal_to<KeyType>,
197 typename Allocator = std::allocator<uint8_t>,
198 template <typename> class Atom = std::atomic,
199 class Mutex = std::mutex>
200 class alignas(64) ConcurrentHashMapSegment {
201 enum class InsertType {
202 DOES_NOT_EXIST, // insert/emplace operations. If key exists, return false.
203 MUST_EXIST, // assign operations. If key does not exist, return false.
204 ANY, // insert_or_assign.
205 MATCH, // assign_if_equal (not in std). For concurrent maps, a
206 // way to atomically change a value if equal to some other
211 typedef KeyType key_type;
212 typedef ValueType mapped_type;
213 typedef std::pair<const KeyType, ValueType> value_type;
214 typedef std::size_t size_type;
216 using Node = concurrenthashmap::NodeT<KeyType, ValueType, Allocator, Atom>;
219 ConcurrentHashMapSegment(
220 size_t initial_buckets,
223 : load_factor_(load_factor), max_size_(max_size) {
224 auto buckets = (Buckets*)Allocator().allocate(sizeof(Buckets));
225 initial_buckets = folly::nextPowTwo(initial_buckets);
228 (isPowTwo(max_size_) &&
229 (folly::popcount(max_size_ - 1) + ShardBits <= 32)));
230 new (buckets) Buckets(initial_buckets);
231 buckets_.store(buckets, std::memory_order_release);
232 load_factor_nodes_ = initial_buckets * load_factor_;
235 ~ConcurrentHashMapSegment() {
236 auto buckets = buckets_.load(std::memory_order_relaxed);
237 // We can delete and not retire() here, since users must have
238 // their own synchronization around destruction.
240 Allocator().deallocate((uint8_t*)buckets, sizeof(Buckets));
251 bool insert(Iterator& it, std::pair<key_type, mapped_type>&& foo) {
252 return insert(it, std::move(foo.first), std::move(foo.second));
255 template <typename Key, typename Value>
256 bool insert(Iterator& it, Key&& k, Value&& v) {
257 auto node = (Node*)Allocator().allocate(sizeof(Node));
258 new (node) Node(std::forward<Key>(k), std::forward<Value>(v));
259 auto res = insert_internal(
261 node->getItem().first,
262 InsertType::DOES_NOT_EXIST,
263 [](const ValueType&) { return false; },
268 Allocator().deallocate((uint8_t*)node, sizeof(Node));
273 template <typename Key, typename... Args>
274 bool try_emplace(Iterator& it, Key&& k, Args&&... args) {
275 // Note: first key is only ever compared. Second is moved in to
276 // create the node, and the first key is never touched again.
277 return insert_internal(
279 std::forward<Key>(k),
280 InsertType::DOES_NOT_EXIST,
281 [](const ValueType&) { return false; },
283 std::forward<Key>(k),
284 std::forward<Args>(args)...);
287 template <typename... Args>
288 bool emplace(Iterator& it, const KeyType& k, Node* node) {
289 return insert_internal(
292 InsertType::DOES_NOT_EXIST,
293 [](const ValueType&) { return false; },
298 template <typename Key, typename Value>
299 bool insert_or_assign(Iterator& it, Key&& k, Value&& v) {
300 auto node = (Node*)Allocator().allocate(sizeof(Node));
301 new (node) Node(std::forward<Key>(k), std::forward<Value>(v));
302 auto res = insert_internal(
304 node->getItem().first,
306 [](const ValueType&) { return false; },
311 Allocator().deallocate((uint8_t*)node, sizeof(Node));
316 template <typename Key, typename Value>
317 bool assign(Iterator& it, Key&& k, Value&& v) {
318 auto node = (Node*)Allocator().allocate(sizeof(Node));
319 new (node) Node(std::forward<Key>(k), std::forward<Value>(v));
320 auto res = insert_internal(
322 node->getItem().first,
323 InsertType::MUST_EXIST,
324 [](const ValueType&) { return false; },
329 Allocator().deallocate((uint8_t*)node, sizeof(Node));
334 template <typename Key, typename Value>
335 bool assign_if_equal(
338 const ValueType& expected,
340 auto node = (Node*)Allocator().allocate(sizeof(Node));
341 new (node) Node(std::forward<Key>(k), std::forward<Value>(desired));
342 auto res = insert_internal(
344 node->getItem().first,
346 [&expected](const ValueType& v) { return v == expected; },
351 Allocator().deallocate((uint8_t*)node, sizeof(Node));
356 template <typename MatchFunc, typename... Args>
357 bool insert_internal(
364 auto h = HashFn()(k);
365 std::unique_lock<Mutex> g(m_);
367 auto buckets = buckets_.load(std::memory_order_relaxed);
368 // Check for rehash needed for DOES_NOT_EXIST
369 if (size_ >= load_factor_nodes_ && type == InsertType::DOES_NOT_EXIST) {
370 if (max_size_ && size_ << 1 > max_size_) {
371 // Would exceed max size.
372 throw std::bad_alloc();
374 rehash(buckets->bucket_count_ << 1);
375 buckets = buckets_.load(std::memory_order_relaxed);
378 auto idx = getIdx(buckets, h);
379 auto head = &buckets->buckets_[idx];
380 auto node = head->load(std::memory_order_relaxed);
381 auto headnode = node;
383 auto& hazbuckets = it.hazptrs_[0];
384 auto& haznode = it.hazptrs_[1];
385 hazbuckets.reset(buckets);
388 if (KeyEqual()(k, node->getItem().first)) {
389 it.setNode(node, buckets, idx);
391 if (type == InsertType::MATCH) {
392 if (!match(node->getItem().second)) {
396 if (type == InsertType::DOES_NOT_EXIST) {
400 cur = (Node*)Allocator().allocate(sizeof(Node));
401 new (cur) Node(std::forward<Args>(args)...);
403 auto next = node->next_.load(std::memory_order_relaxed);
404 cur->next_.store(next, std::memory_order_relaxed);
408 prev->store(cur, std::memory_order_release);
410 // Release not under lock.
417 node = node->next_.load(std::memory_order_relaxed);
419 if (type != InsertType::DOES_NOT_EXIST && type != InsertType::ANY) {
424 // Node not found, check for rehash on ANY
425 if (size_ >= load_factor_nodes_ && type == InsertType::ANY) {
426 if (max_size_ && size_ << 1 > max_size_) {
427 // Would exceed max size.
428 throw std::bad_alloc();
430 rehash(buckets->bucket_count_ << 1);
432 // Reload correct bucket.
433 buckets = buckets_.load(std::memory_order_relaxed);
434 hazbuckets.reset(buckets);
435 idx = getIdx(buckets, h);
436 head = &buckets->buckets_[idx];
437 headnode = head->load(std::memory_order_relaxed);
440 // We found a slot to put the node.
444 // OR DOES_NOT_EXIST, but only in the try_emplace case
445 DCHECK(type == InsertType::ANY || type == InsertType::DOES_NOT_EXIST);
446 cur = (Node*)Allocator().allocate(sizeof(Node));
447 new (cur) Node(std::forward<Args>(args)...);
449 cur->next_.store(headnode, std::memory_order_relaxed);
450 head->store(cur, std::memory_order_release);
451 it.setNode(cur, buckets, idx);
456 void rehash(size_t bucket_count) {
457 auto buckets = buckets_.load(std::memory_order_relaxed);
458 auto newbuckets = (Buckets*)Allocator().allocate(sizeof(Buckets));
459 new (newbuckets) Buckets(bucket_count);
461 load_factor_nodes_ = bucket_count * load_factor_;
463 for (size_t i = 0; i < buckets->bucket_count_; i++) {
464 auto bucket = &buckets->buckets_[i];
465 auto node = bucket->load(std::memory_order_relaxed);
469 auto h = HashFn()(node->getItem().first);
470 auto idx = getIdx(newbuckets, h);
471 // Reuse as long a chain as possible from the end. Since the
472 // nodes don't have previous pointers, the longest last chain
473 // will be the same for both the previous hashmap and the new one,
474 // assuming all the nodes hash to the same bucket.
478 auto last = node->next_.load(std::memory_order_relaxed);
479 for (; last != nullptr;
480 last = last->next_.load(std::memory_order_relaxed)) {
481 auto k = getIdx(newbuckets, HashFn()(last->getItem().first));
489 // Set longest last run in new bucket, incrementing the refcount.
491 newbuckets->buckets_[lastidx].store(lastrun, std::memory_order_relaxed);
492 // Clone remaining nodes
493 for (; node != lastrun;
494 node = node->next_.load(std::memory_order_relaxed)) {
495 auto newnode = (Node*)Allocator().allocate(sizeof(Node));
496 new (newnode) Node(node);
497 auto k = getIdx(newbuckets, HashFn()(node->getItem().first));
498 auto prevhead = &newbuckets->buckets_[k];
499 newnode->next_.store(prevhead->load(std::memory_order_relaxed));
500 prevhead->store(newnode, std::memory_order_relaxed);
504 auto oldbuckets = buckets_.load(std::memory_order_relaxed);
505 buckets_.store(newbuckets, std::memory_order_release);
507 folly::hazptr::default_hazptr_domain(),
508 concurrenthashmap::HazptrDeleter<Allocator>());
511 bool find(Iterator& res, const KeyType& k) {
512 auto hazcurr = &res.hazptrs_[1];
513 folly::hazptr::hazptr_local<1> hlocal;
514 auto haznext = &hlocal[0];
515 auto h = HashFn()(k);
516 auto buckets = res.hazptrs_[0].get_protected(buckets_);
517 auto idx = getIdx(buckets, h);
518 auto prev = &buckets->buckets_[idx];
519 auto node = hazcurr->get_protected(*prev);
521 if (KeyEqual()(k, node->getItem().first)) {
522 // We may be using hlocal, make sure we are using hazptrs_
523 res.hazptrs_[1].reset(node);
524 res.setNode(node, buckets, idx);
527 node = haznext[0].get_protected(node->next_);
528 std::swap(hazcurr, haznext);
533 // Listed separately because we need a prev pointer.
534 size_type erase(const key_type& key) {
535 return erase_internal(key, nullptr);
538 size_type erase_internal(const key_type& key, Iterator* iter) {
540 auto h = HashFn()(key);
542 std::lock_guard<Mutex> g(m_);
544 auto buckets = buckets_.load(std::memory_order_relaxed);
545 auto idx = getIdx(buckets, h);
546 auto head = &buckets->buckets_[idx];
547 node = head->load(std::memory_order_relaxed);
548 Node* prev = nullptr;
550 if (KeyEqual()(key, node->getItem().first)) {
551 auto next = node->next_.load(std::memory_order_relaxed);
556 prev->next_.store(next, std::memory_order_release);
558 // Must be head of list.
559 head->store(next, std::memory_order_release);
563 iter->hazptrs_[0].reset(buckets);
565 node->next_.load(std::memory_order_acquire), buckets, idx);
572 node = node->next_.load(std::memory_order_relaxed);
575 // Delete the node while not under the lock.
584 // Unfortunately because we are reusing nodes on rehash, we can't
585 // have prev pointers in the bucket chain. We have to start the
586 // search from the bucket.
588 // This is a small departure from standard stl containers: erase may
589 // throw if hash or key_eq functions throw.
590 void erase(Iterator& res, Iterator& pos) {
591 erase_internal(pos->first, &res);
595 auto buckets = buckets_.load(std::memory_order_relaxed);
596 auto newbuckets = (Buckets*)Allocator().allocate(sizeof(Buckets));
597 new (newbuckets) Buckets(buckets->bucket_count_);
599 std::lock_guard<Mutex> g(m_);
600 buckets_.store(newbuckets, std::memory_order_release);
604 folly::hazptr::default_hazptr_domain(),
605 concurrenthashmap::HazptrDeleter<Allocator>());
608 void max_load_factor(float factor) {
609 std::lock_guard<Mutex> g(m_);
610 load_factor_ = factor;
611 auto buckets = buckets_.load(std::memory_order_relaxed);
612 load_factor_nodes_ = buckets->bucket_count_ * load_factor_;
617 auto buckets = res.hazptrs_[0].get_protected(buckets_);
618 res.setNode(nullptr, buckets, 0);
624 return Iterator(nullptr);
627 // Could be optimized to avoid an extra pointer dereference by
628 // allocating buckets_ at the same time.
629 class Buckets : public folly::hazptr::hazptr_obj_base<
631 concurrenthashmap::HazptrDeleter<Allocator>> {
633 explicit Buckets(size_t count) : bucket_count_(count) {
635 (Atom<Node*>*)Allocator().allocate(sizeof(Atom<Node*>) * count);
636 new (buckets_) Atom<Node*>[ count ];
637 for (size_t i = 0; i < count; i++) {
638 buckets_[i].store(nullptr, std::memory_order_relaxed);
642 for (size_t i = 0; i < bucket_count_; i++) {
643 auto elem = buckets_[i].load(std::memory_order_relaxed);
648 Allocator().deallocate(
649 (uint8_t*)buckets_, sizeof(Atom<Node*>) * bucket_count_);
652 size_t bucket_count_;
653 Atom<Node*>* buckets_{nullptr};
659 FOLLY_ALWAYS_INLINE Iterator() {}
660 FOLLY_ALWAYS_INLINE explicit Iterator(std::nullptr_t) : hazptrs_(nullptr) {}
661 FOLLY_ALWAYS_INLINE ~Iterator() {}
663 void setNode(Node* node, Buckets* buckets, uint64_t idx) {
669 const value_type& operator*() const {
671 return node_->getItem();
674 const value_type* operator->() const {
676 return &(node_->getItem());
679 const Iterator& operator++() {
681 node_ = hazptrs_[1].get_protected(node_->next_);
691 if (idx_ >= buckets_->bucket_count_) {
695 DCHECK(buckets_->buckets_);
696 node_ = hazptrs_[1].get_protected(buckets_->buckets_[idx_]);
704 Iterator operator++(int) {
710 bool operator==(const Iterator& o) const {
711 return node_ == o.node_;
714 bool operator!=(const Iterator& o) const {
715 return !(*this == o);
718 Iterator& operator=(const Iterator& o) {
720 hazptrs_[1].reset(node_);
722 buckets_ = o.buckets_;
723 hazptrs_[0].reset(buckets_);
727 /* implicit */ Iterator(const Iterator& o) {
729 hazptrs_[1].reset(node_);
731 buckets_ = o.buckets_;
732 hazptrs_[0].reset(buckets_);
735 /* implicit */ Iterator(Iterator&& o) noexcept
736 : hazptrs_(std::move(o.hazptrs_)) {
738 buckets_ = o.buckets_;
742 // These are accessed directly from the functions above
743 folly::hazptr::hazptr_array<2> hazptrs_;
746 Node* node_{nullptr};
747 Buckets* buckets_{nullptr};
752 // Shards have already used low ShardBits of the hash.
753 // Shift it over to use fresh bits.
754 uint64_t getIdx(Buckets* buckets, size_t hash) {
755 return (hash >> ShardBits) & (buckets->bucket_count_ - 1);
759 size_t load_factor_nodes_;
761 size_t const max_size_;
762 Atom<Buckets*> buckets_{nullptr};
765 } // namespace detail