cds/container/impl/multilevel_hashset.h

   1 //$$CDS-header$$
   2
   3 #ifndef CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHSET_H
   4 #define CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHSET_H
   5
   6 #include <cds/intrusive/impl/multilevel_hashset.h>
   7 #include <cds/container/details/multilevel_hashset_base.h>
   8
   9 namespace cds { namespace container {
  10
  11     /// Hash set based on multi-level array
  12     /** @ingroup cds_nonintrusive_set
  13         @anchor cds_container_MultilevelHashSet_hp
  14
  15         Source:
  16         - [2013] Steven Feldman, Pierre LaBorde, Damian Dechev "Concurrent Multi-level Arrays:
  17                  Wait-free Extensible Hash Maps"
  18
  19         [From the paper] The hardest problem encountered while developing a parallel hash map is how to perform
  20         a global resize, the process of redistributing the elements in a hash map that occurs when adding new
  21         buckets. The negative impact of blocking synchronization is multiplied during a global resize, because all
  22         threads will be forced to wait on the thread that is performing the involved process of resizing the hash map
  23         and redistributing the elements. \p %MultilevelHashSet implementation avoids global resizes through new array
  24         allocation. By allowing concurrent expansion this structure is free from the overhead of an explicit resize,
  25         which facilitates concurrent operations.
  26
  27         The presented design includes dynamic hashing, the use of sub-arrays within the hash map data structure;
  28         which, in combination with <b>perfect hashing</b>, means that each element has a unique final, as well as current, position.
  29         It is important to note that the perfect hash function required by our hash map is trivial to realize as
  30         any hash function that permutes the bits of the key is suitable. This is possible because of our approach
  31         to the hash function; we require that it produces hash values that are equal in size to that of the key.
  32         We know that if we expand the hash map a fixed number of times there can be no collision as duplicate keys
  33         are not provided for in the standard semantics of a hash map.
  34
  35         \p %MultiLevelHashSet is a multi-level array which has an internal structure similar to a tree:
  36         @image html multilevel_hashset.png
  37         The multi-level array differs from a tree in that each position on the tree could hold an array of nodes or a single node.
  38         A position that holds a single node is a \p dataNode which holds the hash value of a key and the value that is associated
  39         with that key; it is a simple struct holding two variables. A \p dataNode in the multi-level array could be marked.
  40         A \p markedDataNode refers to a pointer to a \p dataNode that has been bitmarked at the least significant bit (LSB)
  41         of the pointer to the node. This signifies that this \p dataNode is contended. An expansion must occur at this node;
  42         any thread that sees this \p markedDataNode will try to replace it with an \p arrayNode; which is a position that holds
  43         an array of nodes. The pointer to an \p arrayNode is differentiated from that of a pointer to a \p dataNode by a bitmark
  44         on the second-least significant bit.
  45
  46         \p %MultiLevelHashSet multi-level array is similar to a tree in that we keep a pointer to the root, which is a memory array
  47         called \p head. The length of the \p head memory array is unique, whereas every other \p arrayNode has a uniform length;
  48         a normal \p arrayNode has a fixed power-of-two length equal to the binary logarithm of a variable called \p arrayLength.
  49         The maximum depth of the tree, \p maxDepth, is the maximum number of pointers that must be followed to reach any node.
  50         We define \p currentDepth as the number of memory arrays that we need to traverse to reach the \p arrayNode on which
  51         we need to operate; this is initially one, because of \p head.
  52
  53         That approach to the structure of the hash set uses an extensible hashing scheme; <b> the hash value is treated as a bit
  54         string</b> and rehash incrementally.
  55
  56         @note Two important things you should keep in mind when you're using \p %MultiLevelHashSet:
  57         - all keys must be fixed-size. It means that you cannot use \p std::string as a key for \p %MultiLevelHashSet.
  58           Instead, for the strings you should use well-known hashing algorithms like <a href="https://en.wikipedia.org/wiki/Secure_Hash_Algorithm">SHA1, SHA2</a>,
  59           <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>, <a href="https://en.wikipedia.org/wiki/CityHash">CityHash</a>
  60           or its successor <a href="https://code.google.com/p/farmhash/">FarmHash</a> and so on, which
  61           converts variable-length strings to fixed-length bit-strings, and use that hash as a key in \p %MultiLevelHashSet.
  62         - \p %MultiLevelHashSet uses a perfect hashing. It means that if two different keys, for example, of type \p std::string,
  63           have identical hash then you cannot insert both that keys in the set. \p %MultiLevelHashSet does not maintain the key,
  64           it maintains its fixed-size hash value.
  65
  66         The set supports @ref cds_container_MultilevelHashSet_iterators "bidirectional thread-safe iterators".
  67
  68         Template parameters:
  69         - \p GC - safe memory reclamation schema. Can be \p gc::HP, \p gc::DHP or one of \ref cds_urcu_type "RCU type"
  70         - \p T - a value type to be stored in the set
  71         - \p Traits - type traits, the structure based on \p multilevel_hashset::traits or result of \p multilevel_hashset::make_traits metafunction.
  72             \p Traits is the mandatory argument because it has one mandatory type - an @ref multilevel_hashset::traits::hash_accessor "accessor"
  73             to hash value of \p T. The set algorithm does not calculate that hash value.
  74
  75         There are several specializations of \p %MultiLevelHashSet for each \p GC. You should include:
  76         - <tt><cds/container/multilevel_hashset_hp.h></tt> for \p gc::HP garbage collector
  77         - <tt><cds/container/multilevel_hashset_dhp.h></tt> for \p gc::DHP garbage collector
  78         - <tt><cds/container/multilevel_hashset_rcu.h></tt> for \ref cds_intrusive_MultiLevelHashSet_rcu "RCU type". RCU specialization
  79             has a slightly different interface.
  80     */
  81     template <
  82         class GC
  83         , typename T
  84 #ifdef CDS_DOXYGEN_INVOKED
  85         , class Traits = multilevel_hashset::traits
  86 #else
  87         , class Traits
  88 #endif
  89     >
  90     class MultiLevelHashSet
  91 #ifdef CDS_DOXYGEN_INVOKED
  92         : protected cds::intrusive::MultiLevelHashSet< GC, T, Traits >
  93 #else
  94         : protected cds::container::details::make_multilevel_hashset< GC, T, Traits >::type
  95 #endif
  96     {
  97         //@cond
  98         typedef cds::container::details::make_multilevel_hashset< GC, T, Traits > maker;
  99         typedef typename maker::type base_class;
 100         //@endcond
 101
 102     public:
 103         typedef GC      gc;         ///< Garbage collector
 104         typedef T       value_type; ///< type of value stored in the set
 105         typedef Traits  traits;     ///< Traits template parameter, see \p multilevel_hashset::traits
 106
 107         typedef typename base_class::hash_accessor hash_accessor; ///< Hash accessor functor
 108         typedef typename base_class::hash_type hash_type; ///< Hash type deduced from \p hash_accessor return type
 109         typedef typename base_class::hash_comparator hash_comparator; ///< hash compare functor based on \p opt::compare and \p opt::less option setter
 110
 111         typedef typename traits::item_counter   item_counter;   ///< Item counter type
 112         typedef typename traits::allocator      allocator;      ///< Element allocator
 113         typedef typename traits::node_allocator node_allocator; ///< Array node allocator
 114         typedef typename traits::memory_model   memory_model;   ///< Memory model
 115         typedef typename traits::back_off       back_off;       ///< Backoff strategy
 116         typedef typename traits::stat           stat;           ///< Internal statistics type
 117
 118         typedef typename gc::template guarded_ptr< value_type > guarded_ptr; ///< Guarded pointer
 119
 120         /// Count of hazard pointers required
 121         static CDS_CONSTEXPR size_t const c_nHazardPtrCount = base_class::c_nHazardPtrCount;
 122
 123         typedef typename base_class::iterator               iterator;       ///< @ref cds_container_MultilevelHashSet_iterators "bidirectional iterator" type
 124         typedef typename base_class::const_iterator         const_iterator; ///< @ref cds_container_MultilevelHashSet_iterators "bidirectional const iterator" type
 125         typedef typename base_class::reverse_iterator       reverse_iterator;       ///< @ref cds_container_MultilevelHashSet_iterators "bidirectional reverse iterator" type
 126         typedef typename base_class::const_reverse_iterator const_reverse_iterator; ///< @ref cds_container_MultilevelHashSet_iterators "bidirectional reverse const iterator" type
 127
 128     protected:
 129         //@cond
 130         typedef typename maker::cxx_node_allocator cxx_node_allocator;
 131         typedef std::unique_ptr< value_type, typename maker::node_disposer > scoped_node_ptr;
 132         //@endcond
 133
 134     public:
 135         /// Creates empty set
 136         /**
 137             @param head_bits: 2<sup>head_bits</sup> specifies the size of head array, minimum is 4.
 138             @param array_bits: 2<sup>array_bits</sup> specifies the size of array node, minimum is 2.
 139
 140             Equation for \p head_bits and \p array_bits:
 141             \code
 142             sizeof(hash_type) * 8 == head_bits + N * array_bits
 143             \endcode
 144             where \p N is multi-level array depth.
 145         */
 146         MultiLevelHashSet( size_t head_bits = 8, size_t array_bits = 4 )
 147             : base_class( head_bits, array_bits )
 148         {}
 149
 150         /// Destructs the set and frees all data
 151         ~MultiLevelHashSet()
 152         {}
 153
 154         /// Inserts new element
 155         /**
 156             The function creates an element with copy of \p val value and then inserts it into the set.
 157
 158             The type \p Q should contain as minimum the complete hash for the element.
 159             The object of \ref value_type should be constructible from a value of type \p Q.
 160             In trivial case, \p Q is equal to \ref value_type.
 161
 162             Returns \p true if \p val is inserted into the set, \p false otherwise.
 163         */
 164         template <typename Q>
 165         bool insert( Q const& val )
 166         {
 167             scoped_node_ptr sp( cxx_node_allocator().New( val ));
 168             if ( base_class::insert( *sp )) {
 169                 sp.release();
 170                 return true;
 171             }
 172             return false;
 173         }
 174
 175         /// Inserts new element
 176         /**
 177             The function allows to split creating of new item into two part:
 178             - create item with key only
 179             - insert new item into the set
 180             - if inserting is success, calls \p f functor to initialize value-fields of \p val.
 181
 182             The functor signature is:
 183             \code
 184                 void func( value_type& val );
 185             \endcode
 186             where \p val is the item inserted. User-defined functor \p f should guarantee that during changing
 187             \p val no any other changes could be made on this set's item by concurrent threads.
 188             The user-defined functor is called only if the inserting is success.
 189         */
 190         template <typename Q, typename Func>
 191         bool insert( Q const& val, Func f )
 192         {
 193             scoped_node_ptr sp( cxx_node_allocator().New( val ));
 194             if ( base_class::insert( *sp, f )) {
 195                 sp.release();
 196                 return true;
 197             }
 198             return false;
 199         }
 200
 201         /// Updates the element
 202         /**
 203             The operation performs inserting or replacing with lock-free manner.
 204
 205             If the \p val key not found in the set, then the new item created from \p val
 206             will be inserted into the set iff \p bInsert is \p true.
 207             Otherwise, if \p val is found, it is replaced with new item created from \p val.
 208             In both cases \p func functor is called.
 209
 210             The functor \p Func signature:
 211             \code
 212                 struct my_functor {
 213                     void operator()( value_type& cur, value_type * prev );
 214                 };
 215             \endcode
 216             where:
 217             - \p cur - current element
 218             - \p prev - pointer to previous element with such hash. \p prev is \p nullptr
 219                  if \p cur was just inserted.
 220
 221             The functor may change non-key fields of the \p item; however, \p func must guarantee
 222             that during changing no any other modifications could be made on this item by concurrent threads.
 223
 224             Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successfull,
 225             i.e. the item has been inserted or updated,
 226             \p second is \p true if the new item has been added or \p false if the item with key equal to \p val
 227             already exists.
 228
 229             @warning See \ref cds_intrusive_item_creating "insert item troubleshooting"
 230         */
 231         template <typename Q, typename Func>
 232         std::pair<bool, bool> update( const Q& val, Func func, bool bInsert = true )
 233         {
 234             scoped_node_ptr sp( cxx_node_allocator().New( val ));
 235             std::pair<bool, bool> bRes = base_class::do_update( *sp, func, bInsert );
 236             if ( bRes.first )
 237                 sp.release();
 238             return bRes;
 239         }
 240
 241         /// Inserts data of type \p value_type created in-place from <tt>std::forward<Args>(args)...</tt>
 242         /**
 243             Returns \p true if inserting successful, \p false otherwise.
 244         */
 245         template <typename... Args>
 246         bool emplace( Args&&... args )
 247         {
 248             scoped_node_ptr sp( cxx_node_allocator().New( std::forward<Args>(args)... ));
 249             if ( base_class::insert( *sp )) {
 250                 sp.release();
 251                 return true;
 252             }
 253             return false;
 254         }
 255
 256         /// Deletes the item from the set
 257         /**
 258             The function searches \p hash in the set,
 259             deletes the item found, and returns \p true.
 260             If that item is not found the function returns \p false.
 261         */
 262         bool erase( hash_type const& hash )
 263         {
 264             return base_class::erase( hash );
 265         }
 266
 267         /// Deletes the item from the set
 268         /**
 269             The function searches \p hash in the set,
 270             call \p f functor with item found, and deltes the element from the set.
 271
 272             The \p Func interface is
 273             \code
 274             struct functor {
 275                 void operator()( value_type& item );
 276             };
 277             \endcode
 278
 279             If \p hash is not found the function returns \p false.
 280         */
 281         template <typename Func>
 282         bool erase( hash_type const& hash, Func f )
 283         {
 284             return base_class::erase( hash, f );
 285         }
 286
 287         /// Deletes the item pointed by iterator \p iter
 288         /**
 289             Returns \p true if the operation is successful, \p false otherwise.
 290
 291             The function does not invalidate the iterator, it remains valid and can be used for further traversing.
 292         */
 293         bool erase_at( iterator const& iter )
 294         {
 295             return base_class::erase_at( iter );
 296         }
 297         //@cond
 298         bool erase_at( reverse_iterator const& iter )
 299         {
 300             return base_class::erase_at( iter );
 301         }
 302         //@endcond
 303
 304         /// Extracts the item with specified \p hash
 305         /**
 306             The function searches \p hash in the set,
 307             unlinks it from the set, and returns an guarded pointer to the item extracted.
 308             If \p hash is not found the function returns an empty guarded pointer.
 309
 310             The item returned is reclaimed by garbage collector \p GC
 311             when returned \ref guarded_ptr object to be destroyed or released.
 312             @note Each \p guarded_ptr object uses the GC's guard that can be limited resource.
 313
 314             Usage:
 315             \code
 316             typedef cds::container::MultiLevelHashSet< your_template_args > my_set;
 317             my_set theSet;
 318             // ...
 319             {
 320                 my_set::guarded_ptr gp( theSet.extract( 5 ));
 321                 if ( gp ) {
 322                     // Deal with gp
 323                     // ...
 324                 }
 325                 // Destructor of gp releases internal HP guard
 326             }
 327             \endcode
 328         */
 329         guarded_ptr extract( hash_type const& hash )
 330         {
 331             return base_class::extract( hash );
 332         }
 333
 334         /// Finds an item by it's \p hash
 335         /**
 336             The function searches the item by \p hash and calls the functor \p f for item found.
 337             The interface of \p Func functor is:
 338             \code
 339             struct functor {
 340                 void operator()( value_type& item );
 341             };
 342             \endcode
 343             where \p item is the item found.
 344
 345             The functor may change non-key fields of \p item. Note that the functor is only guarantee
 346             that \p item cannot be disposed during the functor is executing.
 347             The functor does not serialize simultaneous access to the set's \p item. If such access is
 348             possible you must provide your own synchronization schema on item level to prevent unsafe item modifications.
 349
 350             The function returns \p true if \p hash is found, \p false otherwise.
 351         */
 352         template <typename Func>
 353         bool find( hash_type const& hash, Func f )
 354         {
 355             return base_class::find( hash, f );
 356         }
 357
 358         /// Checks whether the set contains \p hash
 359         /**
 360             The function searches the item by its \p hash
 361             and returns \p true if it is found, or \p false otherwise.
 362         */
 363         bool contains( hash_type const& hash )
 364         {
 365             return base_class::contains( hash );
 366         }
 367
 368         /// Finds an item by it's \p hash and returns the item found
 369         /**
 370             The function searches the item by its \p hash
 371             and returns the guarded pointer to the item found.
 372             If \p hash is not found the function returns an empty \p guarded_ptr.
 373
 374             @note Each \p guarded_ptr object uses one GC's guard which can be limited resource.
 375
 376             Usage:
 377             \code
 378             typedef cds::container::MultiLevelHashSet< your_template_params >  my_set;
 379             my_set theSet;
 380             // ...
 381             {
 382                 my_set::guarded_ptr gp( theSet.get( 5 ));
 383                 if ( theSet.get( 5 )) {
 384                     // Deal with gp
 385                     //...
 386                 }
 387                 // Destructor of guarded_ptr releases internal HP guard
 388             }
 389             \endcode
 390         */
 391         guarded_ptr get( hash_type const& hash )
 392         {
 393             return base_class::get( hash );
 394         }
 395
 396         /// Clears the set (non-atomic)
 397         /**
 398             The function unlink all data node from the set.
 399             The function is not atomic but is thread-safe.
 400             After \p %clear() the set may not be empty because another threads may insert items.
 401         */
 402         void clear()
 403         {
 404             base_class::clear();
 405         }
 406
 407         /// Checks if the set is empty
 408         /**
 409             Emptiness is checked by item counting: if item count is zero then the set is empty.
 410             Thus, the correct item counting feature is an important part of the set implementation.
 411         */
 412         bool empty() const
 413         {
 414             return base_class::empty();
 415         }
 416
 417         /// Returns item count in the set
 418         size_t size() const
 419         {
 420             return base_class::size();
 421         }
 422
 423         /// Returns const reference to internal statistics
 424         stat const& statistics() const
 425         {
 426             return base_class::statistics();
 427         }
 428
 429         /// Returns the size of head node
 430         size_t head_size() const
 431         {
 432             return base_class::head_size();
 433         }
 434
 435         /// Returns the size of the array node
 436         size_t array_node_size() const
 437         {
 438             return base_class::array_node_size();
 439         }
 440
 441     public:
 442     ///@name Thread-safe iterators
 443         /** @anchor cds_container_MultilevelHashSet_iterators
 444             The set supports thread-safe iterators: you may iterate over the set in multi-threaded environment.
 445             It is guaranteed that the iterators will remain valid even if another thread deletes the node the iterator points to:
 446             Hazard Pointer embedded into the iterator object protects the node from physical reclamation.
 447
 448             @note Since the iterator object contains hazard pointer that is a thread-local resource,
 449             the iterator should not be passed to another thread.
 450
 451             Each iterator object supports the common interface:
 452             - dereference operators:
 453                 @code
 454                 value_type [const] * operator ->() noexcept
 455                 value_type [const] & operator *() noexcept
 456                 @endcode
 457             - pre-increment and pre-decrement. Post-operators is not supported
 458             - equality operators <tt>==</tt> and <tt>!=</tt>.
 459                 Iterators are equal iff they point to the same cell of the same array node.
 460                 Note that for two iterators \p it1 and \p it2, the conditon <tt> it1 == it2 </tt>
 461                 does not entail <tt> &(*it1) == &(*it2) </tt>
 462             - helper member function \p release() that clears internal hazard pointer.
 463                 After \p release() the iterator points to \p nullptr but it still remain valid: further iterating is possible.
 464         */
 465     ///@{
 466
 467         /// Returns an iterator to the beginning of the set
 468         iterator begin()
 469         {
 470             return base_class::begin();
 471         }
 472
 473         /// Returns an const iterator to the beginning of the set
 474         const_iterator begin() const
 475         {
 476             return base_class::begin();
 477         }
 478
 479         /// Returns an const iterator to the beginning of the set
 480         const_iterator cbegin()
 481         {
 482             return base_class::cbegin();
 483         }
 484
 485         /// Returns an iterator to the element following the last element of the set. This element acts as a placeholder; attempting to access it results in undefined behavior.
 486         iterator end()
 487         {
 488             return base_class::end();
 489         }
 490
 491         /// Returns a const iterator to the element following the last element of the set. This element acts as a placeholder; attempting to access it results in undefined behavior.
 492         const_iterator end() const
 493         {
 494             return base_class::end();
 495         }
 496
 497         /// Returns a const iterator to the element following the last element of the set. This element acts as a placeholder; attempting to access it results in undefined behavior.
 498         const_iterator cend()
 499         {
 500             return base_class::cend();
 501         }
 502
 503         /// Returns a reverse iterator to the first element of the reversed set
 504         reverse_iterator rbegin()
 505         {
 506             return base_class::rbegin();
 507         }
 508
 509         /// Returns a const reverse iterator to the first element of the reversed set
 510         const_reverse_iterator rbegin() const
 511         {
 512             return base_class::rbegin();
 513         }
 514
 515         /// Returns a const reverse iterator to the first element of the reversed set
 516         const_reverse_iterator crbegin()
 517         {
 518             return base_class::crbegin();
 519         }
 520
 521         /// Returns a reverse iterator to the element following the last element of the reversed set
 522         /**
 523             It corresponds to the element preceding the first element of the non-reversed container.
 524             This element acts as a placeholder, attempting to access it results in undefined behavior.
 525         */
 526         reverse_iterator rend()
 527         {
 528             return base_class::rend();
 529         }
 530
 531         /// Returns a const reverse iterator to the element following the last element of the reversed set
 532         /**
 533             It corresponds to the element preceding the first element of the non-reversed container.
 534             This element acts as a placeholder, attempting to access it results in undefined behavior.
 535         */
 536         const_reverse_iterator rend() const
 537         {
 538             return base_class::rend();
 539         }
 540
 541         /// Returns a const reverse iterator to the element following the last element of the reversed set
 542         /**
 543             It corresponds to the element preceding the first element of the non-reversed container.
 544             This element acts as a placeholder, attempting to access it results in undefined behavior.
 545         */
 546         const_reverse_iterator crend()
 547         {
 548             return base_class::crend();
 549         }
 550     ///@}
 551     };
 552
 553 }} // namespace cds::container
 554
 555 #endif // #ifndef CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHSET_H