cds/container/impl/multilevel_hashmap.h

   1 //$$CDS-header$$
   2
   3 #ifndef CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHMAP_H
   4 #define CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHMAP_H
   5
   6 #include <cds/intrusive/impl/multilevel_hashset.h>
   7 #include <cds/container/details/multilevel_hashmap_base.h>
   8
   9 namespace cds { namespace container {
  10
  11     /// Hash map based on multi-level array
  12     /** @ingroup cds_nonintrusive_map
  13         @anchor cds_container_MultilevelHashMap_hp
  14
  15         Source:
  16         - [2013] Steven Feldman, Pierre LaBorde, Damian Dechev "Concurrent Multi-level Arrays:
  17                  Wait-free Extensible Hash Maps"
  18
  19         [From the paper] The hardest problem encountered while developing a parallel hash map is how to perform
  20         a global resize, the process of redistributing the elements in a hash map that occurs when adding new
  21         buckets. The negative impact of blocking synchronization is multiplied during a global resize, because all
  22         threads will be forced to wait on the thread that is performing the involved process of resizing the hash map
  23         and redistributing the elements. \p %MultilevelHashSet implementation avoids global resizes through new array
  24         allocation. By allowing concurrent expansion this structure is free from the overhead of an explicit resize,
  25         which facilitates concurrent operations.
  26
  27         The presented design includes dynamic hashing, the use of sub-arrays within the hash map data structure;
  28         which, in combination with <b>perfect hashing</b>, means that each element has a unique final, as well as current, position.
  29         It is important to note that the perfect hash function required by our hash map is trivial to realize as
  30         any hash function that permutes the bits of the key is suitable. This is possible because of our approach
  31         to the hash function; we require that it produces hash values that are equal in size to that of the key.
  32         We know that if we expand the hash map a fixed number of times there can be no collision as duplicate keys
  33         are not provided for in the standard semantics of a hash map.
  34
  35         \p %MultiLevelHashMap is a multi-level array which has an internal structure similar to a tree:
  36         @image html multilevel_hashset.png
  37         The multi-level array differs from a tree in that each position on the tree could hold an array of nodes or a single node.
  38         A position that holds a single node is a \p dataNode which holds the hash value of a key and the value that is associated\r
  39         with that key; it is a simple struct holding two variables. A \p dataNode in the multi-level array could be marked.\r
  40         A \p markedDataNode refers to a pointer to a \p dataNode that has been bitmarked at the least significant bit (LSB)\r
  41         of the pointer to the node. This signifies that this \p dataNode is contended. An expansion must occur at this node;\r
  42         any thread that sees this \p markedDataNode will try to replace it with an \p arrayNode; which is a position that holds\r
  43         an array of nodes. The pointer to an \p arrayNode is differentiated from that of a pointer to a \p dataNode by a bitmark\r
  44         on the second-least significant bit.\r
  45 \r
  46         \p %MultiLevelHashMap multi-level array is similar to a tree in that we keep a pointer to the root, which is a memory array\r
  47         called \p head. The length of the \p head memory array is unique, whereas every other \p arrayNode has a uniform length;\r
  48         a normal \p arrayNode has a fixed power-of-two length equal to the binary logarithm of a variable called \p arrayLength.\r
  49         The maximum depth of the tree, \p maxDepth, is the maximum number of pointers that must be followed to reach any node.\r
  50         We define \p currentDepth as the number of memory arrays that we need to traverse to reach the \p arrayNode on which\r
  51         we need to operate; this is initially one, because of \p head.\r
  52 \r
  53         That approach to the structure of the hash set uses an extensible hashing scheme; <b> the hash value is treated as a bit\r
  54         string</b> and rehash incrementally.\r
  55 \r
  56         @note Two important things you should keep in mind when you're using \p %MultiLevelHashMap:\r
  57         - all keys is converted to fixed-size bit-string by hash functor provided. \r
  58           You can use variable-length keys, for example, \p std::string as a key for \p %MultiLevelHashMap,\r
  59           but real key in the map will be fixed-ize hash values of your keys.\r
  60           For the strings you may use well-known hashing algorithms like <a href="https://en.wikipedia.org/wiki/Secure_Hash_Algorithm">SHA1, SHA2</a>,\r
  61           <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>, <a href="https://en.wikipedia.org/wiki/CityHash">CityHash</a> \r
  62           or its successor <a href="https://code.google.com/p/farmhash/">FarmHash</a> and so on, which\r
  63           converts variable-length strings to fixed-length bit-strings, and such hash values will be the keys in \p %MultiLevelHashMap.\r
  64         - \p %MultiLevelHashMap uses a perfect hashing. It means that if two different keys, for example, of type \p std::string,\r
  65           have identical hash then you cannot insert both that keys in the set. \p %MultiLevelHashMap does not maintain the key,\r
  66           it maintains its fixed-size hash value.\r
  67 \r
  68         The set supports @ref cds_container_MultilevelHashMap_iterators "bidirectional thread-safe iterators".\r
  69 \r
  70         Template parameters:\r
  71         - \p GC - safe memory reclamation schema. Can be \p gc::HP, \p gc::DHP or one of \ref cds_urcu_type "RCU type"\r
  72         - \p Key - a key type to be stored in the map\r
  73         - \p T - a value type to be stored in the map\r
  74         - \p Traits - type traits, the structure based on \p multilevel_hashmap::traits or result of \p multilevel_hashmap::make_traits metafunction.\r
  75 \r
  76         There are several specializations of \p %MultiLevelHashMap for each \p GC. You should include:
  77         - <tt><cds/container/multilevel_hashmap_hp.h></tt> for \p gc::HP garbage collector
  78         - <tt><cds/container/multilevel_hashmap_dhp.h></tt> for \p gc::DHP garbage collector
  79         - <tt><cds/container/multilevel_hashmap_rcu.h></tt> for \ref cds_intrusive_MultiLevelHashSet_rcu "RCU type". RCU specialization
  80             has a slightly different interface.
  81     */
  82     template <
  83         class GC
  84         ,typename Key
  85         ,typename T
  86 #ifdef CDS_DOXYGEN_INVOKED
  87         ,class Traits = multilevel_hashmap::traits
  88 #else
  89         ,class Traits
  90 #endif
  91     >
  92     class MultiLevelHashMap
  93 #ifdef CDS_DOXYGEN_INVOKED
  94         : protected cds::intrusive::MultiLevelHashSet< GC, std::pair<Key const, T>, Traits >
  95 #else
  96         : protected cds::container::details::make_multilevel_hashmap< GC, Key, T, Hasher, Traits >::type
  97 #endif
  98     {
  99         //@cond
 100         typedef cds::container::details::make_multilevel_hashmap< GC, Key, T, Hasher, Traits > maker;
 101         typedef typename maker::type base_class;
 102         //@endcond
 103     public:
 104         typedef GC      gc;          ///< Garbage collector
 105         typedef Key     key_type;    ///< Key type
 106         typedef T       mapped_type; ///< Mapped type
 107         typedef std::pair< key_type const, mapped_type> value_type;   ///< Key-value pair to be stored in the map
 108         typedef Traits  traits;      ///< Map traits
 109 #ifdef CDS_DOXYGEN_INVOKED
 110         typedef typename traits::hash hasher; ///< Hash functor, see \p multilevel_hashmap::traits::hash
 111 #else
 112         typedef typename maker::hasher hasher;
 113 #endif
 114
 115         typedef typename maker::hash_type hash_type; ///< Hash type deduced from \p hasher return type
 116         typedef typename base_class::hash_comparator hash_comparator; ///< hash compare functor based on \p Traits::compare and \p Traits::less
 117
 118         typedef typename traits::item_counter   item_counter;   ///< Item counter type
 119         typedef typename traits::allocator      allocator;      ///< Element allocator
 120         typedef typename traits::node_allocator node_allocator; ///< Array node allocator
 121         typedef typename traits::memory_model   memory_model;   ///< Memory model
 122         typedef typename traits::back_off       back_off;       ///< Backoff strategy
 123         typedef typename traits::stat           stat;           ///< Internal statistics type
 124
 125         typedef typename gc::template guarded_ptr< value_type > guarded_ptr; ///< Guarded pointer
 126
 127         /// Count of hazard pointers required
 128         static CDS_CONSTEXPR size_t const c_nHazardPtrCount = base_class::c_nHazardPtrCount;
 129
 130     protected:
 131         //@cond
 132         typedef typename maker::node_type node_type;
 133         typedef typename maker::cxx_node_allocator cxx_node_allocator;
 134         typedef std::unique_ptr< node_type, typename maker::node_disposer > scoped_node_ptr;
 135         //@endcond
 136
 137     protected:
 138         //@cond
 139         hasher  m_Hasher;
 140         //@endcond
 141
 142     public:
 143         /// Creates empty map
 144         /**
 145             @param head_bits: 2<sup>head_bits</sup> specifies the size of head array, minimum is 4.
 146             @param array_bits: 2<sup>array_bits</sup> specifies the size of array node, minimum is 2.
 147
 148             Equation for \p head_bits and \p array_bits:
 149             \code
 150             sizeof(hash_type) * 8 == head_bits + N * array_bits
 151             \endcode
 152             where \p N is multi-level array depth.
 153         */
 154         MultiLevelHashMap( size_t head_bits = 8, size_t array_bits = 4 )
 155             : base_class( head_bits, array_bits )
 156         {}
 157
 158         /// Destructs the map and frees all data
 159         ~MultiLevelHashMap()
 160         {}
 161
 162         /// Inserts new element with key and default value
 163         /**
 164             The function creates an element with \p key and default value, and then inserts the node created into the map.
 165
 166             Preconditions:
 167             - The \p key_type should be constructible from a value of type \p K.
 168                 In trivial case, \p K is equal to \p key_type.
 169             - The \p mapped_type should be default-constructible.
 170
 171             Returns \p true if inserting successful, \p false otherwise.
 172         */
 173         template <typename K>
 174         bool insert( K const& key )
 175         {
 176             scoped_node_ptr sp( cxx_node_allocator().New( m_Hasher, key ));
 177             if ( base_class::insert( *sp )) {
 178                 sp.release();
 179                 return true;
 180             }
 181             return false;
 182         }
 183
 184         /// Inserts new element
 185         /**
 186             The function creates a node with copy of \p val value
 187             and then inserts the node created into the map.
 188
 189             Preconditions:
 190             - The \p key_type should be constructible from \p key of type \p K.
 191             - The \p value_type should be constructible from \p val of type \p V.
 192
 193             Returns \p true if \p val is inserted into the set, \p false otherwise.
 194         */
 195         template <typename K, typename V>
 196         bool insert( K const& key, V const& val )
 197         {
 198             scoped_node_ptr sp( cxx_node_allocator().New( m_Hasher, key, val ));
 199             if ( base_class::insert( *sp )) {
 200                 sp.release();
 201                 return true;
 202             }
 203             return false;
 204         }
 205
 206         /// Inserts new element and initialize it by a functor
 207         /**
 208             This function inserts new element with key \p key and if inserting is successful then it calls
 209             \p func functor with signature
 210             \code
 211                 struct functor {
 212                     void operator()( value_type& item );
 213                 };
 214             \endcode
 215
 216             The argument \p item of user-defined functor \p func is the reference
 217             to the map's item inserted:
 218                 - <tt>item.first</tt> is a const reference to item's key that cannot be changed.
 219                 - <tt>item.second</tt> is a reference to item's value that may be changed.
 220
 221             \p key_type should be constructible from value of type \p K.
 222
 223             The function allows to split creating of new item into two part:
 224             - create item from \p key;
 225             - insert new item into the map;
 226             - if inserting is successful, initialize the value of item by calling \p func functor
 227
 228             This can be useful if complete initialization of object of \p value_type is heavyweight and
 229             it is preferable that the initialization should be completed only if inserting is successful.
 230         */
 231         template <typename K, typename Func>
 232         bool insert_with( K const& key, Func func )
 233         {
 234             scoped_node_ptr sp( cxx_node_allocator().New( m_Hasher, key ));
 235             if ( base_class::insert( *sp, [&func]( node_type& item ) { func( item.m_Value ); } )) {
 236                 sp.release();
 237                 return true;
 238             }
 239             return false;
 240         }
 241
 242         /// For key \p key inserts data of type \p value_type created in-place from <tt>std::forward<Args>(args)...</tt>
 243         /**
 244             Returns \p true if inserting successful, \p false otherwise.
 245         */
 246         template <typename K, typename... Args>
 247         bool emplace( K&& key, Args&&... args )
 248         {
 249             scoped_node_ptr sp( cxx_node_allocator().New( m_Hasher, std::forward<K>(key), std::forward<Args>(args)... ));
 250             if ( base_class::insert( *sp )) {
 251                 sp.release();
 252                 return true;
 253             }
 254             return false;
 255         }
 256
 257         /// Updates data by \p key
 258         /**
 259             The operation performs inserting or changing data with lock-free manner.
 260
 261             If the \p key not found in the map, then the new item created from \p key
 262             will be inserted into the map iff \p bInsert is \p true
 263             (note that in this case the \ref key_type should be constructible from type \p K).
 264             Otherwise, if \p key is found, the functor \p func is called with item found.
 265             The functor \p Func signature:
 266             \code
 267                 struct my_functor {
 268                     void operator()( bool bNew, value_type& item );
 269                 };
 270             \endcode
 271             where:
 272             - \p bNew - \p true if the item has been inserted, \p false otherwise
 273             - \p item - item of the map
 274
 275             The functor may change any fields of the \p item.second that is \ref value_type.
 276
 277             Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successfull,
 278             \p second is \p true if new item has been added or \p false if \p key already exists.
 279
 280             @warning See \ref cds_intrusive_item_creating "insert item troubleshooting"
 281         */
 282         template <typename K, typename Func>
 283         std::pair<bool, bool> update( K const& key, Func func, bool bInsert = true )
 284         {
 285             scoped_node_ptr sp( cxx_node_allocator().New( m_Hasher, key ));
 286             std::pair<bool, bool> result = base_class::do_update( *sp, [&func]( bool bNew, node_type& node ) { func( bNew, node.m_Value );}, bInsert );
 287             if ( !result.first )
 288                 sp.release();
 289             return result;
 290         }
 291
 292         /// Delete \p key from the map
 293         /**
 294             \p key_type must be constructible from value of type \p K.
 295
 296             Return \p true if \p key is found and deleted, \p false otherwise.
 297         */
 298         template <typename K>
 299         bool erase( K const& key )
 300         {
 301             hash_type h = m_Hasher( key_type( key ));
 302             return base_class::erase( h );
 303         }
 304
 305         /// Delete \p key from the map
 306         /**
 307             The function searches an item with key \p key, calls \p f functor
 308             and deletes the item. If \p key is not found, the functor is not called.
 309
 310             The functor \p Func interface:
 311             \code
 312             struct extractor {
 313                 void operator()(value_type& item) { ... }
 314             };
 315             \endcode
 316             \p key_type must be constructible from value of type \p K.
 317
 318             Return \p true if key is found and deleted, \p false otherwise
 319         */
 320         template <typename K, typename Func>
 321         bool erase( K const& key, Func f )
 322         {
 323             hash_type h = m_Hasher( key_type( key ));
 324             return base_class::erase( h, [&f]( node_type& node) { f( node.m_Value ); } );
 325         }
 326
 327         /// Deletes the element pointed by iterator \p iter
 328         /**
 329             Returns \p true if the operation is successful, \p false otherwise.
 330
 331             The function does not invalidate the iterator, it remains valid and can be used for further traversing.
 332         */
 333         bool erase_at( iterator const& iter )
 334         {
 335             //TODO
 336         }
 337
 338
 339     };
 340
 341 }} // namespace cds::container
 342
 343 #endif // #ifndef CDSLIB_CONTAINER_IMPL_MULTILEVEL_HASHMAP_H