cds/intrusive/impl/multilevel_hashset.h

   1 //$$CDS-header$$
   2
   3 #ifndef CDSLIB_INTRUSIVE_IMPL_MULTILEVEL_HASHSET_H
   4 #define CDSLIB_INTRUSIVE_IMPL_MULTILEVEL_HASHSET_H
   5
   6 #include <cds/intrusive/details/multilevel_hashset_base.h>
   7 #include <cds/details/allocator.h>
   8
   9 namespace cds { namespace intrusive {
  10     /// Intrusive hash set based on multi-level array
  11     /** @ingroup cds_intrusive_map
  12         @anchor cds_intrusive_MultilevelHashSet_hp
  13
  14         Source:
  15         - [2013] Steven Feldman, Pierre LaBorde, Damian Dechev "Concurrent Multi-level Arrays:
  16                  Wait-free Extensible Hash Maps"
  17
  18         [From the paper] The hardest problem encountered while developing a parallel hash map is how to perform
  19         a global resize, the process of redistributing the elements in a hash map that occurs when adding new
  20         buckets. The negative impact of blocking synchronization is multiplied during a global resize, because all
  21         threads will be forced to wait on the thread that is performing the involved process of resizing the hash map
  22         and redistributing the elements. \p %MultilevelHashSet implementation avoids global resizes through new array
  23         allocation. By allowing concurrent expansion this structure is free from the overhead of an explicit resize,
  24         which facilitates concurrent operations.
  25
  26         The presented design includes dynamic hashing, the use of sub-arrays within the hash map data structure;
  27         which, in combination with <b>perfect hashing</b>, means that each element has a unique final, as well as current, position.
  28         It is important to note that the perfect hash function required by our hash map is trivial to realize as
  29         any hash function that permutes the bits of the key is suitable. This is possible because of our approach
  30         to the hash function; we require that it produces hash values that are equal in size to that of the key.
  31         We know that if we expand the hash map a fixed number of times there can be no collision as duplicate keys
  32         are not provided for in the standard semantics of a hash map.
  33
  34         \p %MultiLevelHashSet is a multi-level array whitch has a structure similar to a tree:
  35         @image html multilevel_hashset.png
  36         The multi-level array differs from a tree in that each position on the tree could hold an array of nodes or a single node.
  37         A position that holds a single node is a \p dataNode which holds the hash value of a key and the value that is associated\r
  38         with that key; it is a simple struct holding two variables. A \p dataNode in the multi-level array could be marked.\r
  39         A \p markedDataNode refers to a pointer to a \p dataNode that has been bitmarked at the least significant bit (LSB)\r
  40         of the pointer to the node. This signifies that this \p dataNode is contended. An expansion must occur at this node;\r
  41         any thread that sees this \p markedDataNode will try to replace it with an \p arrayNode; which is a position that holds\r
  42         an array of nodes. The pointer to an \p arrayNode is differentiated from that of a pointer to a \p dataNode by a bitmark\r
  43         on the second-least significant bit.\r
  44 \r
  45         \p %MultiLevelHashSet multi-level array is similar to a tree in that we keep a pointer to the root, which is a memory array\r
  46         called \p head. The length of the \p head memory array is unique, whereas every other \p arrayNode has a uniform length;\r
  47         a normal \p arrayNode has a fixed power-of-two length equal to the binary logarithm of a variable called \p arrayLength.\r
  48         The maximum depth of the tree, \p maxDepth, is the maximum number of pointers that must be followed to reach any node.\r
  49         We define \p currentDepth as the number of memory arrays that we need to traverse to reach the \p arrayNode on which\r
  50         we need to operate; this is initially one, because of \p head.\r
  51 \r
  52         That approach to the structure of the hash map uses an extensible hashing scheme; <b> the hash value is treated as a bit\r
  53         string</b> and rehash incrementally.\r
  54 \r
  55         @note Two important things you should keep in mind when you're using \p %MultiLevelHashSet:\r
  56         - all keys must be fixed-size. It means that you cannot use \p std::string as a key for \p %MultiLevelHashSet.\r
  57           Instead, for the strings you should use well-known hashing algorithms like <a href="https://en.wikipedia.org/wiki/Secure_Hash_Algorithm">SHA1, SHA2</a>,\r
  58           <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>, <a href="https://en.wikipedia.org/wiki/CityHash">CityHash</a> \r
  59           or its successor <a href="https://code.google.com/p/farmhash/">FarmHash</a> and so on, which\r
  60           converts variable-length strings to fixed-length bit-strings, and use that hash as a key in \p %MultiLevelHashSet.\r
  61         - \p %MultiLevelHashSet uses a perfect hashing. It means that if two different keys, for example, of type \p std::string,\r
  62           have identical hash then you cannot insert both that keys in the set. \p %MultiLevelHashSet does not maintain the key,\r
  63           it maintains its fized-size hash value.\r
  64 \r
  65 \r
  66         There are several specializations of \p %MultiLevelHashSet for each \p GC. You should include:
  67         - <tt><cds/intrusive/multilevel_hashset_hp.h></tt> for \p gc::HP garbage collector
  68         - <tt><cds/intrusive/multilevel_hashset_dhp.h></tt> for \p gc::DHP garbage collector
  69         - <tt><cds/intrusive/multilevel_hashset_nogc.h></tt> for \ref cds_intrusive_MultiLevelHashSet_nogc for append-only set
  70         - <tt><cds/intrusive/multilevel_hashset_rcu.h></tt> for \ref cds_intrusive_MultiLevelHashSet_rcu "RCU type"
  71     */
  72     template <
  73         class GC
  74         ,typename T
  75 #ifdef CDS_DOXYGEN_INVOKED
  76        ,typename Traits = multilevel_hashset::traits
  77 #else
  78        ,typename Traits
  79 #endif
  80     >
  81     class MultiLevelHashSet
  82     {
  83     public:
  84         typedef GC      gc;         ///< Garbage collector
  85         typedef T       value_type; ///< type of value stored in the set
  86         typedef Traits  traits;     ///< Traits template parameter, see \p multilevel_hashset::traits
  87
  88         typedef typename traits::hash_accessor hash_accessor; ///< Hash accessor functor
  89         static_assert(!std::is_same< hash_accessor, cds::opt::none >::value, "hash_accessor functor must be specified" );
  90
  91         /// Hash type defined as \p hash_accessor return type
  92         typedef typename std::decay<
  93             typename std::remove_reference<
  94                 decltype( hash_accessor()( std::declval<T>()) )
  95             >::type
  96         >::type hash_type;
  97         static_assert( !std::is_pointer<hash_type>, "hash_accessor should return a reference to hash value" );
  98
  99         typedef typename traits::disposer disposer; ///< data node disposer
 100
 101 #   ifdef CDS_DOXYGEN_INVOKED
 102         typedef implementation_defined hash_comparator  ;    ///< hash compare functor based on opt::compare and opt::less option setter
 103 #   else
 104         typedef typename cds::opt::details::make_comparator_from<
 105             hash_type,
 106             traits,
 107             multilevel_hashset::bitwise_compare< hash_type >
 108         >::type hash_comparator;
 109 #   endif
 110
 111         typedef typename traits::item_counter         item_counter;         ///< Item counter type
 112         typedef typename traits::head_node_allocator  head_node_allocator;  ///< Head node allocator
 113         typedef typename traits::array_node_allocator array_node_allocator; ///< Array node allocator
 114         typedef typename traits::memory_model         memory_model;         ///< Memory model
 115         typedef typename traits::back_off             back_off;             ///< Backoff strategy
 116         typedef typename traits::stat                 stat;                 ///< Internal statistics type
 117
 118         typedef typename gc::template guarded_ptr< value_type > guarded_ptr; ///< Guarded pointer
 119
 120     protected:
 121         //@cond
 122         enum cell_flags {
 123             logically_deleted = 1,  ///< the cell (data node) is logically deleted
 124             array_converting = 2,   ///< the cell is converting from data node to an array node
 125             array_node = 4          ///< the cell is a pointer to an array node
 126         };
 127         typedef cds::details::marked_ptr< value_type, 7 > node_ptr;
 128         typedef atomics::atomic< node_ptr * >  atomic_node_ptr;
 129
 130         typedef cds::details::Allocator< atomic_node_ptr, head_node_allocator > cxx_head_node_allocator;
 131         typedef cds::details::Allocator< atomic_node_ptr, array_node_allocator > cxx_array_node_allocator;
 132
 133         struct metrics {
 134             size_t  head_node_size;     // power-of-two
 135             size_t  head_node_size_log; // log2( head_node_size )
 136             size_t  array_node_size;    // power-of-two
 137             size_t  array_node_size_log;// log2( array_node_size )
 138         };
 139         //@endcond
 140
 141     private:
 142         //@cond
 143         metrics const     m_Metrics;     ///< Metrics
 144         atomic_node_ptr * m_Head;        ///< Head array
 145         item_counter      m_ItemCounter; ///< Item counter
 146         stat              m_Stat;        ///< Internal statistics
 147         //@endcond
 148
 149     public:
 150         /// Creates empty set
 151         /**
 152             @param head_bits: 2<sup>head_bits</sup> specifies the size of head array, minimum is 8.
 153             @param array_bits: 2<sup>array_bits</sup> specifies the size of array node, minimum is 2.
 154
 155             Equation for \p head_bits and \p array_bits:
 156             \code
 157             sizeof(hash_type) * 8 == head_bits + N * array_bits
 158             \endcode
 159             where \p N is multi-level array depth.
 160         */
 161         MultiLevelHashSet( size_t head_bits = 8, size_t array_bits = 4 )
 162             : m_Metrics(make_metrics( head_bits, array_bits ))
 163             , m_Head( cxx_head_node_allocator().NewArray( m_Metrics.head_node_size, nullptr ))
 164         {}
 165
 166         /// Destructs the set and frees all data
 167         ~MultiLevelHashSet()
 168         {
 169             destroy_tree();
 170             cxx_head_node_allocator().Delete( m_Head, m_Metrics.head_node_size );
 171         }
 172
 173         /// Inserts new node
 174         /**
 175             The function inserts \p val in the set if it does not contain
 176             an item with that hash.
 177
 178             Returns \p true if \p val is placed into the set, \p false otherwise.
 179         */
 180         bool insert( value_type& val )
 181         {
 182         }
 183
 184         /// Inserts new node
 185         /**
 186             This function is intended for derived non-intrusive containers.
 187
 188             The function allows to split creating of new item into two part:
 189             - create item with key only
 190             - insert new item into the set
 191             - if inserting is success, calls  \p f functor to initialize value-field of \p val.
 192
 193             The functor signature is:
 194             \code
 195                 void func( value_type& val );
 196             \endcode
 197             where \p val is the item inserted.
 198
 199             The user-defined functor is called only if the inserting is success.
 200
 201             @warning See \ref cds_intrusive_item_creating "insert item troubleshooting".
 202         */
 203         template <typename Func>
 204         bool insert( value_type& val, Func f )
 205         {
 206         }
 207
 208         /// Updates the node
 209         /**
 210             The operation performs inserting or changing data with lock-free manner.
 211
 212             If the item \p val not found in the set, then \p val is inserted into the set iff \p bInsert is \p true.
 213             Otherwise, the functor \p func is called with item found, \p bInsert argument is ignored.
 214
 215             The functor signature is:
 216             \code
 217                 void func( bool bNew, value_type& item, value_type& val );
 218             \endcode
 219             with arguments:
 220             - \p bNew - \p true if the item has been inserted, \p false otherwise
 221             - \p item - the item in the set (new or existing)
 222             - \p val - argument \p val passed into the \p update function
 223             If new item has been inserted (i.e. \p bNew is \p true) then \p item and \p val arguments
 224             refers to the same thing.
 225
 226             The functor may change non-key fields of the \p item.
 227
 228             Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successfull,
 229             \p second is \p true if new item has been added or \p false if the set contains that hash.
 230
 231             @warning See \ref cds_intrusive_item_creating "insert item troubleshooting".
 232         */
 233         template <typename Func>
 234         std::pair<bool, bool> update( value_type& val, Func func, bool bInsert = true )
 235         {
 236         }
 237
 238         /// Unlinks the item \p val from the set
 239         /**
 240             The function searches the item \p val in the set and unlink it
 241             if it is found and its address is equal to <tt>&val</tt>.
 242
 243             The function returns \p true if success and \p false otherwise.
 244         */
 245         bool unlink( value_type& val )
 246         {
 247         }
 248
 249         /// Deletes the item from the set
 250         /**
 251             The function searches \p hash in the set,
 252             unlinks the item found, and returns \p true.
 253             If that item is not found the function returns \p false.
 254
 255             The \ref disposer specified in \p Traits is called by garbage collector \p GC asynchronously.
 256
 257         */
 258         bool erase( hash_type const& hash )
 259         {
 260             if ( bucket( key ).erase( key )) {
 261                 --m_ItemCounter;
 262                 return true;
 263             }
 264             return false;
 265         }
 266
 267         /// Deletes the item from the set
 268         /**
 269             The function searches \p hash in the set,
 270             call \p f functor with item found, and unlinks it from the set.
 271             The \ref disposer specified in \p Traits is called
 272             by garbage collector \p GC asynchronously.
 273
 274             The \p Func interface is
 275             \code
 276             struct functor {
 277                 void operator()( value_type const& item );
 278             };
 279             \endcode
 280
 281             If \p hash is not found the function returns \p false.
 282         */
 283         template <typename Func>
 284         bool erase( hash_type const& hash, Func f )
 285         {
 286         }
 287
 288         /// Extracts the item with specified \p hash
 289         /**
 290             The function searches \p hash in the set,
 291             unlinks it from the set, and returns an guarded pointer to the item extracted.
 292             If \p hash is not found the function returns an empty guarded pointer.
 293
 294             The \p disposer specified in \p Traits class' template parameter is called automatically
 295             by garbage collector \p GC when returned \ref guarded_ptr object to be destroyed or released.
 296             @note Each \p guarded_ptr object uses the GC's guard that can be limited resource.
 297
 298             Usage:
 299             \code
 300             typedef cds::intrusive::MultiLevelHashSet< your_template_args > my_set;
 301             my_set theSet;
 302             // ...
 303             {
 304                 my_set::guarded_ptr gp( theSet.extract( 5 ));
 305                 if ( gp ) {
 306                     // Deal with gp
 307                     // ...
 308                 }
 309                 // Destructor of gp releases internal HP guard
 310             }
 311             \endcode
 312         */
 313         guarded_ptr extract( hash_type const& hash )
 314         {
 315         }
 316
 317         /// Finds an item by it's \p hash
 318         /**
 319             The function searches the item by \p hash and calls the functor \p f for item found.
 320             The interface of \p Func functor is:
 321             \code
 322             struct functor {
 323                 void operator()( value_type& item );
 324             };
 325             \endcode
 326             where \p item is the item found.
 327
 328             The functor may change non-key fields of \p item. Note that the functor is only guarantee
 329             that \p item cannot be disposed during the functor is executing.
 330             The functor does not serialize simultaneous access to the set's \p item. If such access is
 331             possible you must provide your own synchronization schema on item level to prevent unsafe item modifications.
 332
 333             The function returns \p true if \p hash is found, \p false otherwise.
 334         */
 335         template <typename Func>
 336         bool find( hash_type& hash, Func f )
 337         {
 338         }
 339         //@cond
 340         template <typename Func>
 341         bool find( hash_type const& hash, Func f )
 342         {
 343         }
 344         //@endcond
 345
 346         /// Finds an item by it's \p hash
 347         /**
 348             The function searches the item by its \p hash
 349             and returns \p true if it is found, or \p false otherwise.
 350         */
 351         bool find( hash_type const& hash )
 352         {
 353         }
 354
 355         /// Finds an item by it's \p hash and returns the item found
 356         /**
 357             The function searches the item by its \p hash
 358             and returns the guarded pointer to the item found.
 359             If \p hash is not found the function returns an empty \p guarded_ptr.
 360
 361             @note Each \p guarded_ptr object uses one GC's guard which can be limited resource.
 362
 363             Usage:
 364             \code
 365             typedef cds::intrusive::MultiLevelHashSet< your_template_params >  my_set;
 366             my_set theSet;
 367             // ...
 368             {
 369                 my_set::guarded_ptr gp( theSet.get( 5 ));
 370                 if ( theSet.get( 5 )) {
 371                     // Deal with gp
 372                     //...
 373                 }
 374                 // Destructor of guarded_ptr releases internal HP guard
 375             }
 376             \endcode
 377         */
 378         guarded_ptr get( hash_type const& hash )
 379         {
 380         }
 381
 382         /// Clears the set (non-atomic)
 383         /**
 384             The function unlink all items from the set.
 385             The function is not atomic. It removes all data nodes and then resets the item counter to zero.
 386             If there are a thread that performs insertion while \p %clear() is working the result is undefined in general case:
 387             \p empty() may return \p true but the set may contain item(s).
 388             Therefore, \p %clear() may be used only for debugging purposes.
 389
 390             For each item the \p disposer is called after unlinking.
 391         */
 392         void clear()
 393         {
 394         }
 395
 396         /// Checks if the set is empty
 397         /**
 398             Emptiness is checked by item counting: if item count is zero then the set is empty.
 399             Thus, the correct item counting feature is an important part of the set implementation.
 400         */
 401         bool empty() const
 402         {
 403             return size() == 0;
 404         }
 405
 406         /// Returns item count in the set
 407         size_t size() const
 408         {
 409             return m_ItemCounter;
 410         }
 411
 412         /// Returns const reference to internal statistics
 413         stat const& statistics() const
 414         {
 415             return m_Stat;
 416         }
 417
 418         /// Returns the size of head node
 419         size_t head_size() const
 420         {
 421             return m_Metrics.head_node_size;
 422         }
 423
 424         /// Returns the size of the array node
 425         size_t array_node_size() const
 426         {
 427             return m_Metrics.array_node_size;
 428         }
 429
 430     private:
 431         //@cond
 432         static metrics make_metrics( size_t head_bits, size_t array_bits )
 433         {
 434             size_t const hash_bits = sizeof( hash_type ) * 8;
 435
 436             if ( array_bits < 2 )
 437                 array_bits = 2;
 438             if ( head_bits < 8 )
 439                 head_bits = 8;
 440             if ( head_bits > hash_bits )
 441                 head_bits = hash_bits;
 442             if ( (hash_bits - head_bits) % array_bits != 0 )
 443                 head_bits += (hash_bits - head_bits) % array_bits;
 444
 445             assert( (hash_bits - head_bits) % array_bits == 0 );
 446
 447             metrics m;
 448             m.head_node_size_log = head_bits;
 449             m.head_node_size = 1 << head_bits;
 450             m.array_node_size_log = array_bits;
 451             m.array_node_size = 1 << array_bits;
 452             return m;
 453         }
 454
 455         template <typename T>
 456         static bool check_node_alignment( T * p )
 457         {
 458             return (reinterpret_cast<uintptr_t>(p) & node_ptr::bitmask) == 0;
 459         }
 460
 461         void destroy_tree()
 462         {
 463             // The function is not thread-safe. For use in dtor only
 464             // Remove data node
 465             clear();
 466
 467             //TODO: free all array nodes
 468         }
 469         //@endcond
 470     };
 471 }} // namespace cds::intrusive
 472
 473 #endif // #ifndef CDSLIB_INTRUSIVE_IMPL_MULTILEVEL_HASHSET_H