3 #ifndef CDSLIB_INTRUSIVE_IMPL_MULTILEVEL_HASHSET_H
4 #define CDSLIB_INTRUSIVE_IMPL_MULTILEVEL_HASHSET_H
6 #include <cds/intrusive/details/multilevel_hashset_base.h>
7 #include <cds/details/allocator.h>
9 namespace cds { namespace intrusive {
10 /// Intrusive hash set based on multi-level array
11 /** @ingroup cds_intrusive_map
12 @anchor cds_intrusive_MultilevelHashSet_hp
15 - [2013] Steven Feldman, Pierre LaBorde, Damian Dechev "Concurrent Multi-level Arrays:
16 Wait-free Extensible Hash Maps"
18 [From the paper] The hardest problem encountered while developing a parallel hash map is how to perform
19 a global resize, the process of redistributing the elements in a hash map that occurs when adding new
20 buckets. The negative impact of blocking synchronization is multiplied during a global resize, because all
21 threads will be forced to wait on the thread that is performing the involved process of resizing the hash map
22 and redistributing the elements. \p %MultilevelHashSet implementation avoids global resizes through new array
23 allocation. By allowing concurrent expansion this structure is free from the overhead of an explicit resize,
24 which facilitates concurrent operations.
26 The presented design includes dynamic hashing, the use of sub-arrays within the hash map data structure;
27 which, in combination with <b>perfect hashing</b>, means that each element has a unique final, as well as current, position.
28 It is important to note that the perfect hash function required by our hash map is trivial to realize as
29 any hash function that permutes the bits of the key is suitable. This is possible because of our approach
30 to the hash function; we require that it produces hash values that are equal in size to that of the key.
31 We know that if we expand the hash map a fixed number of times there can be no collision as duplicate keys
32 are not provided for in the standard semantics of a hash map.
34 \p %MultiLevelHashSet is a multi-level array whitch has a structure similar to a tree:
35 @image html multilevel_hashset.png
36 The multi-level array differs from a tree in that each position on the tree could hold an array of nodes or a single node.
37 A position that holds a single node is a \p dataNode which holds the hash value of a key and the value that is associated
\r
38 with that key; it is a simple struct holding two variables. A \p dataNode in the multi-level array could be marked.
\r
39 A \p markedDataNode refers to a pointer to a \p dataNode that has been bitmarked at the least significant bit (LSB)
\r
40 of the pointer to the node. This signifies that this \p dataNode is contended. An expansion must occur at this node;
\r
41 any thread that sees this \p markedDataNode will try to replace it with an \p arrayNode; which is a position that holds
\r
42 an array of nodes. The pointer to an \p arrayNode is differentiated from that of a pointer to a \p dataNode by a bitmark
\r
43 on the second-least significant bit.
\r
45 \p %MultiLevelHashSet multi-level array is similar to a tree in that we keep a pointer to the root, which is a memory array
\r
46 called \p head. The length of the \p head memory array is unique, whereas every other \p arrayNode has a uniform length;
\r
47 a normal \p arrayNode has a fixed power-of-two length equal to the binary logarithm of a variable called \p arrayLength.
\r
48 The maximum depth of the tree, \p maxDepth, is the maximum number of pointers that must be followed to reach any node.
\r
49 We define \p currentDepth as the number of memory arrays that we need to traverse to reach the \p arrayNode on which
\r
50 we need to operate; this is initially one, because of \p head.
\r
52 That approach to the structure of the hash map uses an extensible hashing scheme; <b> the hash value is treated as a bit
\r
53 string</b> and rehash incrementally.
\r
55 @note Two important things you should keep in mind when you're using \p %MultiLevelHashSet:
\r
56 - all keys must be fixed-size. It means that you cannot use \p std::string as a key for \p %MultiLevelHashSet.
\r
57 Instead, for the strings you should use well-known hashing algorithms like <a href="https://en.wikipedia.org/wiki/Secure_Hash_Algorithm">SHA1, SHA2</a>,
\r
58 <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>, <a href="https://en.wikipedia.org/wiki/CityHash">CityHash</a>
\r
59 or its successor <a href="https://code.google.com/p/farmhash/">FarmHash</a> and so on, which
\r
60 converts variable-length strings to fixed-length bit-strings, and use that hash as a key in \p %MultiLevelHashSet.
\r
61 - \p %MultiLevelHashSet uses a perfect hashing. It means that if two different keys, for example, of type \p std::string,
\r
62 have identical hash then you cannot insert both that keys in the set. \p %MultiLevelHashSet does not maintain the key,
\r
63 it maintains its fized-size hash value.
\r
66 There are several specializations of \p %MultiLevelHashSet for each \p GC. You should include:
67 - <tt><cds/intrusive/multilevel_hashset_hp.h></tt> for \p gc::HP garbage collector
68 - <tt><cds/intrusive/multilevel_hashset_dhp.h></tt> for \p gc::DHP garbage collector
69 - <tt><cds/intrusive/multilevel_hashset_nogc.h></tt> for \ref cds_intrusive_MultiLevelHashSet_nogc for append-only set
70 - <tt><cds/intrusive/multilevel_hashset_rcu.h></tt> for \ref cds_intrusive_MultiLevelHashSet_rcu "RCU type"
75 #ifdef CDS_DOXYGEN_INVOKED
76 ,typename Traits = multilevel_hashset::traits
81 class MultiLevelHashSet
84 typedef GC gc; ///< Garbage collector
85 typedef T value_type; ///< type of value stored in the set
86 typedef Traits traits; ///< Traits template parameter, see \p multilevel_hashset::traits
88 typedef typename traits::hash_accessor hash_accessor; ///< Hash accessor functor
89 static_assert(!std::is_same< hash_accessor, cds::opt::none >::value, "hash_accessor functor must be specified" );
91 /// Hash type defined as \p hash_accessor return type
92 typedef typename std::decay<
93 typename std::remove_reference<
94 decltype( hash_accessor()( std::declval<T>()) )
97 static_assert( !std::is_pointer<hash_type>, "hash_accessor should return a reference to hash value" );
99 typedef typename traits::disposer disposer; ///< data node disposer
101 # ifdef CDS_DOXYGEN_INVOKED
102 typedef implementation_defined hash_comparator ; ///< hash compare functor based on opt::compare and opt::less option setter
104 typedef typename cds::opt::details::make_comparator_from<
107 multilevel_hashset::bitwise_compare< hash_type >
108 >::type hash_comparator;
111 typedef typename traits::item_counter item_counter; ///< Item counter type
112 typedef typename traits::head_node_allocator head_node_allocator; ///< Head node allocator
113 typedef typename traits::array_node_allocator array_node_allocator; ///< Array node allocator
114 typedef typename traits::memory_model memory_model; ///< Memory model
115 typedef typename traits::back_off back_off; ///< Backoff strategy
116 typedef typename traits::stat stat; ///< Internal statistics type
118 typedef typename gc::template guarded_ptr< value_type > guarded_ptr; ///< Guarded pointer
123 logically_deleted = 1, ///< the cell (data node) is logically deleted
124 array_converting = 2, ///< the cell is converting from data node to an array node
125 array_node = 4 ///< the cell is a pointer to an array node
127 typedef cds::details::marked_ptr< value_type, 7 > node_ptr;
128 typedef atomics::atomic< node_ptr * > atomic_node_ptr;
130 typedef cds::details::Allocator< atomic_node_ptr, head_node_allocator > cxx_head_node_allocator;
131 typedef cds::details::Allocator< atomic_node_ptr, array_node_allocator > cxx_array_node_allocator;
134 size_t head_node_size; // power-of-two
135 size_t head_node_size_log; // log2( head_node_size )
136 size_t array_node_size; // power-of-two
137 size_t array_node_size_log;// log2( array_node_size )
143 metrics const m_Metrics; ///< Metrics
144 atomic_node_ptr * m_Head; ///< Head array
145 item_counter m_ItemCounter; ///< Item counter
146 stat m_Stat; ///< Internal statistics
150 /// Creates empty set
152 @param head_bits: 2<sup>head_bits</sup> specifies the size of head array, minimum is 8.
153 @param array_bits: 2<sup>array_bits</sup> specifies the size of array node, minimum is 2.
155 Equation for \p head_bits and \p array_bits:
157 sizeof(hash_type) * 8 == head_bits + N * array_bits
159 where \p N is multi-level array depth.
161 MultiLevelHashSet( size_t head_bits = 8, size_t array_bits = 4 )
162 : m_Metrics(make_metrics( head_bits, array_bits ))
163 , m_Head( cxx_head_node_allocator().NewArray( m_Metrics.head_node_size, nullptr ))
166 /// Destructs the set and frees all data
170 cxx_head_node_allocator().Delete( m_Head, m_Metrics.head_node_size );
175 The function inserts \p val in the set if it does not contain
176 an item with that hash.
178 Returns \p true if \p val is placed into the set, \p false otherwise.
180 bool insert( value_type& val )
186 This function is intended for derived non-intrusive containers.
188 The function allows to split creating of new item into two part:
189 - create item with key only
190 - insert new item into the set
191 - if inserting is success, calls \p f functor to initialize value-field of \p val.
193 The functor signature is:
195 void func( value_type& val );
197 where \p val is the item inserted.
199 The user-defined functor is called only if the inserting is success.
201 @warning See \ref cds_intrusive_item_creating "insert item troubleshooting".
203 template <typename Func>
204 bool insert( value_type& val, Func f )
210 The operation performs inserting or changing data with lock-free manner.
212 If the item \p val not found in the set, then \p val is inserted into the set iff \p bInsert is \p true.
213 Otherwise, the functor \p func is called with item found, \p bInsert argument is ignored.
215 The functor signature is:
217 void func( bool bNew, value_type& item, value_type& val );
220 - \p bNew - \p true if the item has been inserted, \p false otherwise
221 - \p item - the item in the set (new or existing)
222 - \p val - argument \p val passed into the \p update function
223 If new item has been inserted (i.e. \p bNew is \p true) then \p item and \p val arguments
224 refers to the same thing.
226 The functor may change non-key fields of the \p item.
228 Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successfull,
229 \p second is \p true if new item has been added or \p false if the set contains that hash.
231 @warning See \ref cds_intrusive_item_creating "insert item troubleshooting".
233 template <typename Func>
234 std::pair<bool, bool> update( value_type& val, Func func, bool bInsert = true )
238 /// Unlinks the item \p val from the set
240 The function searches the item \p val in the set and unlink it
241 if it is found and its address is equal to <tt>&val</tt>.
243 The function returns \p true if success and \p false otherwise.
245 bool unlink( value_type& val )
249 /// Deletes the item from the set
251 The function searches \p hash in the set,
252 unlinks the item found, and returns \p true.
253 If that item is not found the function returns \p false.
255 The \ref disposer specified in \p Traits is called by garbage collector \p GC asynchronously.
258 bool erase( hash_type const& hash )
260 if ( bucket( key ).erase( key )) {
267 /// Deletes the item from the set
269 The function searches \p hash in the set,
270 call \p f functor with item found, and unlinks it from the set.
271 The \ref disposer specified in \p Traits is called
272 by garbage collector \p GC asynchronously.
274 The \p Func interface is
277 void operator()( value_type const& item );
281 If \p hash is not found the function returns \p false.
283 template <typename Func>
284 bool erase( hash_type const& hash, Func f )
288 /// Extracts the item with specified \p hash
290 The function searches \p hash in the set,
291 unlinks it from the set, and returns an guarded pointer to the item extracted.
292 If \p hash is not found the function returns an empty guarded pointer.
294 The \p disposer specified in \p Traits class' template parameter is called automatically
295 by garbage collector \p GC when returned \ref guarded_ptr object to be destroyed or released.
296 @note Each \p guarded_ptr object uses the GC's guard that can be limited resource.
300 typedef cds::intrusive::MultiLevelHashSet< your_template_args > my_set;
304 my_set::guarded_ptr gp( theSet.extract( 5 ));
309 // Destructor of gp releases internal HP guard
313 guarded_ptr extract( hash_type const& hash )
317 /// Finds an item by it's \p hash
319 The function searches the item by \p hash and calls the functor \p f for item found.
320 The interface of \p Func functor is:
323 void operator()( value_type& item );
326 where \p item is the item found.
328 The functor may change non-key fields of \p item. Note that the functor is only guarantee
329 that \p item cannot be disposed during the functor is executing.
330 The functor does not serialize simultaneous access to the set's \p item. If such access is
331 possible you must provide your own synchronization schema on item level to prevent unsafe item modifications.
333 The function returns \p true if \p hash is found, \p false otherwise.
335 template <typename Func>
336 bool find( hash_type& hash, Func f )
340 template <typename Func>
341 bool find( hash_type const& hash, Func f )
346 /// Finds an item by it's \p hash
348 The function searches the item by its \p hash
349 and returns \p true if it is found, or \p false otherwise.
351 bool find( hash_type const& hash )
355 /// Finds an item by it's \p hash and returns the item found
357 The function searches the item by its \p hash
358 and returns the guarded pointer to the item found.
359 If \p hash is not found the function returns an empty \p guarded_ptr.
361 @note Each \p guarded_ptr object uses one GC's guard which can be limited resource.
365 typedef cds::intrusive::MultiLevelHashSet< your_template_params > my_set;
369 my_set::guarded_ptr gp( theSet.get( 5 ));
370 if ( theSet.get( 5 )) {
374 // Destructor of guarded_ptr releases internal HP guard
378 guarded_ptr get( hash_type const& hash )
382 /// Clears the set (non-atomic)
384 The function unlink all items from the set.
385 The function is not atomic. It removes all data nodes and then resets the item counter to zero.
386 If there are a thread that performs insertion while \p %clear() is working the result is undefined in general case:
387 \p empty() may return \p true but the set may contain item(s).
388 Therefore, \p %clear() may be used only for debugging purposes.
390 For each item the \p disposer is called after unlinking.
396 /// Checks if the set is empty
398 Emptiness is checked by item counting: if item count is zero then the set is empty.
399 Thus, the correct item counting feature is an important part of the set implementation.
406 /// Returns item count in the set
409 return m_ItemCounter;
412 /// Returns const reference to internal statistics
413 stat const& statistics() const
418 /// Returns the size of head node
419 size_t head_size() const
421 return m_Metrics.head_node_size;
424 /// Returns the size of the array node
425 size_t array_node_size() const
427 return m_Metrics.array_node_size;
432 static metrics make_metrics( size_t head_bits, size_t array_bits )
434 size_t const hash_bits = sizeof( hash_type ) * 8;
436 if ( array_bits < 2 )
440 if ( head_bits > hash_bits )
441 head_bits = hash_bits;
442 if ( (hash_bits - head_bits) % array_bits != 0 )
443 head_bits += (hash_bits - head_bits) % array_bits;
445 assert( (hash_bits - head_bits) % array_bits == 0 );
448 m.head_node_size_log = head_bits;
449 m.head_node_size = 1 << head_bits;
450 m.array_node_size_log = array_bits;
451 m.array_node_size = 1 << array_bits;
455 template <typename T>
456 static bool check_node_alignment( T * p )
458 return (reinterpret_cast<uintptr_t>(p) & node_ptr::bitmask) == 0;
463 // The function is not thread-safe. For use in dtor only
467 //TODO: free all array nodes
471 }} // namespace cds::intrusive
473 #endif // #ifndef CDSLIB_INTRUSIVE_IMPL_MULTILEVEL_HASHSET_H