formatting
[libcds.git] / cds / container / impl / bronson_avltree_map_rcu.h
index 0387befca8a1cdd1eeabed9ff43834f2b1f927a0..f21253b5728529262d7bf252b7290dc1d3a53f49 100644 (file)
@@ -1,4 +1,32 @@
-//$$CDS-header$$
+/*
+    This file is a part of libcds - Concurrent Data Structures library
+
+    (C) Copyright Maxim Khizhinsky (libcds.dev@gmail.com) 2006-2017
+
+    Source code repo: http://github.com/khizmax/libcds/
+    Download: http://sourceforge.net/projects/libcds/files/
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this
+      list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
 
 #ifndef CDSLIB_CONTAINER_IMPL_BRONSON_AVLTREE_MAP_RCU_H
 #define CDSLIB_CONTAINER_IMPL_BRONSON_AVLTREE_MAP_RCU_H
@@ -10,7 +38,7 @@
 
 namespace cds { namespace container {
 
-    /// Bronson et al AVL-tree (RCU specialization for storing pointer to values)
+    /// Bronson et al AVL-tree (RCU specialization for pointers)
     /** @ingroup cds_nonintrusive_map
         @ingroup cds_nonintrusive_tree
         @headerfile cds/container/bronson_avltree_map_rcu.h
@@ -142,6 +170,8 @@ namespace cds { namespace container {
         static void free_node( node_type * pNode )
         {
             // Free node without disposer
+            assert( !pNode->is_valued( memory_model::memory_order_relaxed ));
+            assert( pNode->m_SyncMonitorInjection.check_free());
             cxx_allocator().Delete( pNode );
         }
 
@@ -152,15 +182,15 @@ namespace cds { namespace container {
 
         static node_type * child( node_type * pNode, int nDir, atomics::memory_order order )
         {
-            return pNode->child( nDir ).load( order );
+            return pNode->child( nDir, order );
         }
 
         static node_type * parent( node_type * pNode, atomics::memory_order order )
         {
-            return pNode->m_pParent.load( order );
+            return pNode->parent( order );
         }
 
-        // RCU safe disposer 
+        // RCU safe disposer
         class rcu_disposer
         {
             node_type *     m_pRetiredList;     ///< head of retired node list
@@ -183,13 +213,13 @@ namespace cds { namespace container {
                 pNode->m_pNextRemoved = m_pRetiredList;
                 m_pRetiredList = pNode;
             }
-            
+
             void dispose_value( mapped_type pVal )
             {
                 assert( m_pRetiredValue == nullptr );
                 m_pRetiredValue = pVal;
             }
-            
+
         private:
             struct internal_disposer
             {
@@ -201,8 +231,8 @@ namespace cds { namespace container {
 
             void clean()
             {
-                assert( !gc::is_locked() );
-                
+                assert( !gc::is_locked());
+
                 // TODO: use RCU::batch_retire
 
                 // Dispose nodes
@@ -259,7 +289,7 @@ namespace cds { namespace container {
                     assert( pNode->m_pValue.load( memory_model::memory_order_relaxed ) == nullptr );
                     CDS_UNUSED( pNode );
                     return pVal;
-                }, 
+                },
                 update_flags::allow_insert
             ) == update_flags::result_inserted;
         }
@@ -276,7 +306,7 @@ namespace cds { namespace container {
 
             RCU \p synchronize() method can be called. RCU should not be locked.
 
-            Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successfull,
+            Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successful,
             \p second is \p true if new node has been added or \p false if the node with \p key
             already exists.
         */
@@ -284,22 +314,15 @@ namespace cds { namespace container {
         std::pair<bool, bool> update( K const& key, mapped_type pVal, bool bInsert = true )
         {
             int result = do_update( key, key_comparator(),
-                [pVal]( node_type * ) -> mapped_type 
+                [pVal]( node_type * ) -> mapped_type
                 {
                     return pVal;
                 },
-                update_flags::allow_update | (bInsert ? update_flags::allow_insert : 0) 
+                update_flags::allow_update | (bInsert ? update_flags::allow_insert : 0)
             );
             return std::make_pair( result != 0, (result & update_flags::result_inserted) != 0 );
         }
 
-        //@cond
-        template <typename K>
-        std::pair<bool, bool> ensure( K const& key, mapped_type pVal )
-        {
-            return update( key, pVal, true );
-        }
-
         //@endcond
 
         /// Delete \p key from the map
@@ -329,8 +352,8 @@ namespace cds { namespace container {
         bool erase_with( K const& key, Less pred )
         {
             CDS_UNUSED( pred );
-            return do_remove( 
-                key, 
+            return do_remove(
+                key,
                 cds::opt::details::make_comparator_from_less<Less>(),
                 []( key_type const&, mapped_type pVal, rcu_disposer& disp ) -> bool { disp.dispose_value( pVal ); return true;  }
             );
@@ -343,7 +366,7 @@ namespace cds { namespace container {
 
             The functor \p Func interface:
             \code
-            struct extractor {
+            struct functor {
                 void operator()( key_type const& key, std::remove_pointer<mapped_type>::type& val) { ... }
             };
             \endcode
@@ -355,13 +378,13 @@ namespace cds { namespace container {
         template <typename K, typename Func>
         bool erase( K const& key, Func f )
         {
-            return do_remove( 
-                key, 
-                key_comparator(), 
-                [&f]( key_type const& key, mapped_type pVal, rcu_disposer& disp ) -> bool { 
+            return do_remove(
+                key,
+                key_comparator(),
+                [&f]( key_type const& key, mapped_type pVal, rcu_disposer& disp ) -> bool {
                     assert( pVal );
-                    f( key, *pVal ); 
-                    disp.dispose_value(pVal); 
+                    f( key, *pVal );
+                    disp.dispose_value(pVal);
                     return true;
                 }
             );
@@ -378,13 +401,13 @@ namespace cds { namespace container {
         bool erase_with( K const& key, Less pred, Func f )
         {
             CDS_UNUSED( pred );
-            return do_remove( 
-                key, 
+            return do_remove(
+                key,
                 cds::opt::details::make_comparator_from_less<Less>(),
-                [&f]( key_type const& key, mapped_type pVal, rcu_disposer& disp ) -> bool { 
+                [&f]( key_type const& key, mapped_type pVal, rcu_disposer& disp ) -> bool {
                     assert( pVal );
-                    f( key, *pVal ); 
-                    disp.dispose_value(pVal); 
+                    f( key, *pVal );
+                    disp.dispose_value(pVal);
                     return true;
                 }
             );
@@ -413,7 +436,7 @@ namespace cds { namespace container {
             return exempt_ptr(do_extract_min( []( key_type const& ) {}));
         }
 
-        /// Extracts minimal key key and corresponding value
+        /// Extracts minimal key and corresponding value
         /**
             Returns \p exempt_ptr to the leftmost item.
             If the tree is empty, returns empty \p exempt_ptr.
@@ -426,7 +449,7 @@ namespace cds { namespace container {
             };
             \endcode
             If the tree is empty, \p f is not called.
-            Otherwise, is it called with minimal key, the pointer to corresponding value is returned
+            Otherwise, it is called with minimal key, the pointer to corresponding value is returned
             as \p exempt_ptr.
 
             @note Due the concurrent nature of the map, the function extracts <i>nearly</i> minimum key.
@@ -445,13 +468,13 @@ namespace cds { namespace container {
             return exempt_ptr(do_extract_min( [&f]( key_type const& key ) { f(key); }));
         }
 
-        /// Extracts minimal key key and corresponding value
+        /// Extracts minimal key and corresponding value
         /**
             This function is a shortcut for the following call:
             \code
             key_type key;
             exempt_ptr xp = theTree.extract_min( [&key]( key_type const& k ) { key = k; } );
-            \endode
+            \endcode
             \p key_type should be copy-assignable. The copy of minimal key
             is returned in \p min_key argument.
         */
@@ -471,7 +494,7 @@ namespace cds { namespace container {
 
             @note Due the concurrent nature of the map, the function extracts <i>nearly</i> maximal key.
             It means that the function gets rightmost leaf of the tree and tries to unlink it.
-            During unlinking, a concurrent thread may insert an item with key great than leftmost item's key.
+            During unlinking, a concurrent thread may insert an item with key greater than rightmost item's key.
             So, the function returns the item with maximum key at the moment of tree traversing.
 
             RCU \p synchronize method can be called. RCU should NOT be locked.
@@ -497,12 +520,12 @@ namespace cds { namespace container {
                 };
             \endcode
             If the tree is empty, \p f is not called.
-            Otherwise, is it called with maximal key, the pointer to corresponding value is returned
+            Otherwise, it is called with maximal key, the pointer to corresponding value is returned
             as \p exempt_ptr.
 
             @note Due the concurrent nature of the map, the function extracts <i>nearly</i> maximal key.
             It means that the function gets rightmost leaf of the tree and tries to unlink it.
-            During unlinking, a concurrent thread may insert an item with key great than leftmost item's key.
+            During unlinking, a concurrent thread may insert an item with key greater than rightmost item's key.
             So, the function returns the item with maximum key at the moment of tree traversing.
 
             RCU \p synchronize method can be called. RCU should NOT be locked.
@@ -522,7 +545,7 @@ namespace cds { namespace container {
             \code
                 key_type key;
                 exempt_ptr xp = theTree.extract_max( [&key]( key_type const& k ) { key = k; } );
-            \endode
+            \endcode
             \p key_type should be copy-assignable. The copy of maximal key
             is returned in \p max_key argument.
         */
@@ -569,7 +592,7 @@ namespace cds { namespace container {
             The interface of \p Func functor is:
             \code
             struct functor {
-                void operator()( key_type const& key, mapped_type& item );
+                void operator()( key_type const& key, std::remove_pointer< mapped_type )::type& item );
             };
             \endcode
             where \p item is the item found.
@@ -582,7 +605,7 @@ namespace cds { namespace container {
         template <typename K, typename Func>
         bool find( K const& key, Func f )
         {
-            return do_find( key, key_comparator(), 
+            return do_find( key, key_comparator(),
                 [&f]( node_type * pNode ) -> bool {
                     assert( pNode != nullptr );
                     mapped_type pVal = pNode->m_pValue.load( memory_model::memory_order_relaxed );
@@ -606,7 +629,7 @@ namespace cds { namespace container {
         bool find_with( K const& key, Less pred, Func f )
         {
             CDS_UNUSED( pred );
-            return do_find( key, cds::opt::details::make_comparator_from_less<Less>(), 
+            return do_find( key, cds::opt::details::make_comparator_from_less<Less>(),
                 [&f]( node_type * pNode ) -> bool {
                     assert( pNode != nullptr );
                     mapped_type pVal = pNode->m_pValue.load( memory_model::memory_order_relaxed );
@@ -615,11 +638,11 @@ namespace cds { namespace container {
                         return true;
                     }
                     return false;
-                } 
+                }
             );
         }
 
-        /// Find the key \p key
+        /// Checks whether the map contains \p key
         /**
             The function searches the item with key equal to \p key
             and returns \p true if it is found, and \p false otherwise.
@@ -627,20 +650,19 @@ namespace cds { namespace container {
             The function applies RCU lock internally.
         */
         template <typename K>
-        bool find( K const& key )
+        bool contains( K const& key )
         {
             return do_find( key, key_comparator(), []( node_type * ) -> bool { return true; });
         }
 
-        /// Finds the key \p val using \p pred predicate for searching
+        /// Checks whether the map contains \p key using \p pred predicate for searching
         /**
-            The function is an analog of \p find(K const&)
-            but \p pred is used for key comparing.
+            The function is similar to <tt>contains( key )</tt> but \p pred is used for key comparing.
             \p Less functor has the interface like \p std::less.
-            \p Less must imply the same element order as the comparator used for building the map.
+            \p Less must imply the same element order as the comparator used for building the set.
         */
         template <typename K, typename Less>
-        bool find_with( K const& key, Less pred )
+        bool contains( K const& key, Less pred )
         {
             CDS_UNUSED( pred );
             return do_find( key, cds::opt::details::make_comparator_from_less<Less>(), []( node_type * ) -> bool { return true; } );
@@ -653,7 +675,7 @@ namespace cds { namespace container {
             this sequence
             \code
             set.clear();
-            assert( set.empty() );
+            assert( set.empty());
             \endcode
             the assertion could be raised.
 
@@ -663,7 +685,7 @@ namespace cds { namespace container {
         */
         void clear()
         {
-            while ( extract_min() );
+            while ( extract_min());
         }
 
         /// Clears the tree (not thread safe)
@@ -735,7 +757,7 @@ namespace cds { namespace container {
                 void operator()( size_t nLevel, size_t hLeft, size_t hRight );
             };
             \endcode
-            where 
+            where
             - \p nLevel - the level where the violation is found
             - \p hLeft - the height of left subtree
             - \p hRight - the height of right subtree
@@ -745,7 +767,7 @@ namespace cds { namespace container {
         template <typename Func>
         bool check_consistency( Func f ) const
         {
-            node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_relaxed );
+            node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_acquire );
             if ( pChild ) {
                 size_t nErrors = 0;
                 do_check_consistency( pChild, 1, f, nErrors );
@@ -760,8 +782,16 @@ namespace cds { namespace container {
         size_t do_check_consistency( node_type * pNode, size_t nLevel, Func f, size_t& nErrors ) const
         {
             if ( pNode ) {
-                size_t hLeft = do_check_consistency( child( pNode, left_child, memory_model::memory_order_relaxed ), nLevel + 1, f, nErrors );
-                size_t hRight = do_check_consistency( child( pNode, right_child, memory_model::memory_order_relaxed ), nLevel + 1, f, nErrors );
+                key_comparator cmp;
+                node_type * pLeft = child( pNode, left_child, memory_model::memory_order_acquire );
+                node_type * pRight = child( pNode, right_child, memory_model::memory_order_acquire );
+                if ( pLeft && cmp( pLeft->m_key, pNode->m_key ) > 0 )
+                    ++nErrors;
+                if (  pRight && cmp( pNode->m_key, pRight->m_key ) > 0 )
+                    ++nErrors;
+
+                size_t hLeft = do_check_consistency( pLeft, nLevel + 1, f, nErrors );
+                size_t hRight = do_check_consistency( pRight, nLevel + 1, f, nErrors );
 
                 if ( hLeft >= hRight ) {
                     if ( hLeft - hRight > 1 ) {
@@ -787,7 +817,7 @@ namespace cds { namespace container {
             find_result result;
             {
                 rcu_lock l;
-                result = try_find( key, cmp, f, m_pRoot, 1, 0 );
+                result = try_find( key, cmp, f, m_pRoot, right_child, 0 );
             }
             assert( result != find_result::retry );
             return result == find_result::found;
@@ -851,22 +881,34 @@ namespace cds { namespace container {
             {
                 rcu_lock l;
 
-                int result = update_flags::failed;
-                do {
+                while ( true ) {
+                    int result;
+
                     // get right child of root
                     node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_acquire );
                     if ( pChild ) {
-                        version_type nChildVersion = pChild->version( memory_model::memory_order_relaxed );
+                        version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
                         if ( nChildVersion & node_type::shrinking ) {
                             m_stat.onRemoveRootWaitShrinking();
-                            pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
+                            pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
                             result = update_flags::retry;
                         }
                         else if ( pChild == child( m_pRoot, right_child, memory_model::memory_order_acquire )) {
                             result = try_extract_minmax( nDir, func, m_pRoot, pChild, nChildVersion, removed_list );
                         }
+                        else
+                            result = update_flags::retry;
                     }
-                } while ( result == update_flags::retry );
+                    else
+                        return;
+
+                    if ( result == update_flags::retry )
+                        m_stat.onRemoveRetry();
+                    else {
+                        m_stat.onExtract( result == update_flags::result_removed );
+                        return;
+                    }
+                }
             }
         }
 
@@ -879,6 +921,7 @@ namespace cds { namespace container {
                 key_comparator(),
                 [&pExtracted]( key_type const&, mapped_type pVal, rcu_disposer& ) -> bool { pExtracted = pVal; return false; }
             );
+            m_stat.onExtract( pExtracted != nullptr );
             return pExtracted;
         }
 
@@ -892,92 +935,149 @@ namespace cds { namespace container {
                 cds::opt::details::make_comparator_from_less<Less>(),
                 [&pExtracted]( key_type const&, mapped_type pVal, rcu_disposer& ) -> bool { pExtracted = pVal; return false; }
             );
+            m_stat.onExtract( pExtracted != nullptr );
             return pExtracted;
         }
-
         //@endcond
 
     private:
         //@cond
+        static int height( node_type * pNode, atomics::memory_order order )
+        {
+            assert( pNode );
+            return pNode->m_nHeight.load( order );
+        }
+        static void set_height( node_type * pNode, int h, atomics::memory_order order )
+        {
+            assert( pNode );
+            pNode->m_nHeight.store( h, order );
+        }
+        static int height_null( node_type * pNode, atomics::memory_order order )
+        {
+            return pNode ? height( pNode, order ) : 0;
+        }
+
+        static CDS_CONSTEXPR int const c_stackSize = 64;
+
         template <typename Q, typename Compare, typename Func>
         find_result try_find( Q const& key, Compare cmp, Func f, node_type * pNode, int nDir, version_type nVersion ) const
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
             assert( pNode );
 
-            while ( true ) {
-                node_type * pChild = child( pNode, nDir, memory_model::memory_order_relaxed );
-                if ( !pChild ) {
-                    if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
-                        m_stat.onFindRetry();
-                        return find_result::retry;
-                    }
+            struct stack_record
+            {
+                node_type * pNode;
+                version_type nVersion;
+                int nDir;
+            };
 
-                    m_stat.onFindFailed();
-                    return find_result::not_found;
-                }
+            stack_record stack[c_stackSize];
+            int pos = 0;
+            stack[0].pNode = pNode;
+            stack[0].nVersion = nVersion;
+            stack[0].nDir = nDir;
+
+            while ( pos >= 0 ) {
+                pNode = stack[pos].pNode;
+                nVersion = stack[pos].nVersion;
+                nDir = stack[pos].nDir;
+
+                while ( true ) {
+                    node_type * pChild = child( pNode, nDir, memory_model::memory_order_acquire );
+                    if ( !pChild ) {
+                        if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                            --pos;
+                            m_stat.onFindRetry();
+                            break; // retry
+                        }
+                        m_stat.onFindFailed();
+                        return find_result::not_found;
+                    }
 
-                int nCmp = cmp( key, pChild->m_key );
-                if ( nCmp == 0 ) {
-                    if ( pChild->is_valued( memory_model::memory_order_relaxed ) ) {
-                        // key found
-                        node_scoped_lock l( m_Monitor, *pChild );
-                        if ( pChild->is_valued( memory_model::memory_order_relaxed )) {
-                            if ( f( pChild ) ) {
-                                m_stat.onFindSuccess();
-                                return find_result::found;
+                    int nCmp = cmp( key, pChild->m_key );
+                    if ( nCmp == 0 ) {
+                        if ( pChild->is_valued( memory_model::memory_order_acquire )) {
+                            // key found
+                            node_scoped_lock l( m_Monitor, *pChild );
+                            if ( child(pNode, nDir, memory_model::memory_order_acquire) == pChild ) {
+                                if ( pChild->is_valued( memory_model::memory_order_relaxed )) {
+                                    if ( f( pChild )) {
+                                        m_stat.onFindSuccess();
+                                        return find_result::found;
+                                    }
+                                }
+                            }
+                            else {
+                                m_stat.onFindRetry();
+                                continue;
                             }
                         }
+                        m_stat.onFindFailed();
+                        return find_result::not_found;
                     }
+                    else {
+                        version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
+                        if ( nChildVersion & node_type::shrinking ) {
+                            m_stat.onFindWaitShrinking();
+                            pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
 
-                    m_stat.onFindFailed();
-                    return find_result::not_found;
-                }
-
-                version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
-                if ( nChildVersion & node_type::shrinking ) {
-                    m_stat.onFindWaitShrinking();
-                    pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
+                            if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                                --pos;
+                                m_stat.onFindRetry();
+                                break; // retry
+                            }
+                        }
+                        else if ( nChildVersion != node_type::unlinked && child( pNode, nDir, memory_model::memory_order_acquire ) == pChild )
+                        {
+                            if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                                --pos;
+                                m_stat.onFindRetry();
+                                break; // retry
+                            }
 
-                    if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
-                        m_stat.onFindRetry();
-                        return find_result::retry;
-                    }
-                }
-                else if ( nChildVersion != node_type::unlinked ) {
+                            ++pos;
+                            assert(pos < c_stackSize);
+                            stack[pos].pNode = pChild;
+                            stack[pos].nVersion = nChildVersion;
+                            stack[pos].nDir = nCmp;
+                            break; // child iteration
+                        }
 
-                    if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
-                        m_stat.onFindRetry();
-                        return find_result::retry;
+                        if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                            --pos;
+                            m_stat.onFindRetry();
+                            break; // retry
+                        }
                     }
-
-                    find_result found = try_find( key, cmp, f, pChild, nCmp, nChildVersion );
-                    if ( found != find_result::retry )
-                        return found;
+                    m_stat.onFindRetry();
                 }
             }
+            return find_result::retry;
         }
 
         template <typename K, typename Compare, typename Func>
         int try_update_root( K const& key, Compare cmp, int nFlags, Func funcUpdate, rcu_disposer& disp )
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
+
+            while ( true ) {
+                int result;
 
-            int result;
-            do {
                 // get right child of root
                 node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_acquire );
                 if ( pChild ) {
-                    version_type nChildVersion = pChild->version( memory_model::memory_order_relaxed );
+                    version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
                     if ( nChildVersion & node_type::shrinking ) {
                         m_stat.onUpdateRootWaitShrinking();
-                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
+                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
                         result = update_flags::retry;
                     }
-                    else if ( pChild == child( m_pRoot, right_child, memory_model::memory_order_acquire )) {
-                        result = try_update( key, cmp, nFlags, funcUpdate, m_pRoot, pChild, nChildVersion, disp );
-                    }
-                } 
+                    else if ( pChild == child( m_pRoot, right_child, memory_model::memory_order_acquire ))
+                        result = try_update( key, cmp, nFlags, funcUpdate, pChild, nChildVersion, disp );
+                    else
+                        result = update_flags::retry;
+                }
                 else {
                     // the tree is empty
                     if ( nFlags & update_flags::allow_insert ) {
@@ -985,7 +1085,7 @@ namespace cds { namespace container {
                         {
                             node_scoped_lock l( m_Monitor, *m_pRoot );
                             if ( child( m_pRoot, right_child, memory_model::memory_order_acquire ) != nullptr ) {
-                                result = result == update_flags::retry;
+                                result = update_flags::retry;
                                 continue;
                             }
 
@@ -994,8 +1094,8 @@ namespace cds { namespace container {
                             assert( pVal != nullptr );
                             pNew->m_pValue.store( pVal, memory_model::memory_order_release );
 
-                            m_pRoot->child( pNew, right_child, memory_model::memory_order_relaxed );
-                            m_pRoot->height( 2, memory_model::memory_order_relaxed );
+                            m_pRoot->child( pNew, right_child, memory_model::memory_order_release);
+                            set_height( m_pRoot, 2, memory_model::memory_order_release );
                         }
 
                         ++m_ItemCounter;
@@ -1005,134 +1105,195 @@ namespace cds { namespace container {
 
                     return update_flags::failed;
                 }
-            } while ( result == update_flags::retry );
-            return result;
+
+                if ( result != update_flags::retry )
+                    return result;
+            }
         }
 
         template <typename K, typename Compare, typename Func>
         bool try_remove_root( K const& key, Compare cmp, Func func, rcu_disposer& disp )
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
+
+            while ( true ) {
+                int result;
 
-            int result;
-            do {
                 // get right child of root
                 node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_acquire );
                 if ( pChild ) {
-                    version_type nChildVersion = pChild->version( memory_model::memory_order_relaxed );
+                    version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
                     if ( nChildVersion & node_type::shrinking ) {
                         m_stat.onRemoveRootWaitShrinking();
-                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
+                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
                         result = update_flags::retry;
                     }
                     else if ( pChild == child( m_pRoot, right_child, memory_model::memory_order_acquire )) {
                         result = try_remove( key, cmp, func, m_pRoot, pChild, nChildVersion, disp );
                     }
+                    else
+                        result = update_flags::retry;
                 }
                 else
                     return false;
-            } while ( result == update_flags::retry );
 
-            return result == update_flags::result_removed;
+                if ( result == update_flags::retry )
+                    m_stat.onRemoveRetry();
+                else {
+                    m_stat.onRemove( result == update_flags::result_removed );
+                    return result == update_flags::result_removed;
+                }
+            }
         }
 
         template <typename K, typename Compare, typename Func>
-        int try_update( K const& key, Compare cmp, int nFlags, Func funcUpdate, node_type * pParent, node_type * pNode, version_type nVersion, rcu_disposer& disp )
+        int try_update( K const& key, Compare cmp, int nFlags, Func funcUpdate, node_type * pNode, version_type nVersion, rcu_disposer& disp )
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
             assert( nVersion != node_type::unlinked );
-            CDS_UNUSED( pParent );
 
-            int nCmp = cmp( key, pNode->m_key );
-            if ( nCmp == 0 ) {
-                if ( nFlags & update_flags::allow_update ) {
-                    return try_update_node( funcUpdate, pNode, disp );
-                }
-                return update_flags::failed;
-            }
+            struct stack_record
+            {
+                node_type * pNode;
+                version_type nVersion;
+            };
+
+            stack_record stack[c_stackSize];
+            int pos = 0;
+            stack[0].pNode = pNode;
+            stack[0].nVersion = nVersion;
 
-            int result;
-            do {
-                node_type * pChild = child( pNode, nCmp, memory_model::memory_order_relaxed );
-                if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+            while ( pos >= 0 ) {
+                pNode = stack[pos].pNode;
+                nVersion = stack[pos].nVersion;
+
+                int nCmp = cmp( key, pNode->m_key );
+                if ( nCmp == 0 ) {
+                    int result = try_update_node( nFlags, funcUpdate, pNode, nVersion, disp );
+                    if ( result != update_flags::retry )
+                        return result;
+                    --pos;
                     m_stat.onUpdateRetry();
-                    return update_flags::retry;
+                    continue;
                 }
 
-                if ( pChild == nullptr ) {
-                    // insert new node
-                    if ( nFlags & update_flags::allow_insert )
-                        result = try_insert_node( key, funcUpdate, pNode, nCmp, nVersion, disp );
-                    else
-                        result = update_flags::failed;
-                }
-                else {
-                    // update child
-                    result = update_flags::retry;
-                    version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
-                    if ( nChildVersion & node_type::shrinking ) {
-                        m_stat.onUpdateWaitShrinking();
-                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
-                        // retry
+                while ( true ) {
+                    node_type * pChild = child( pNode, nCmp, memory_model::memory_order_acquire );
+                    if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                        --pos;
+                        m_stat.onUpdateRetry();
+                        break;
                     }
-                    else if ( pChild == child( pNode, nCmp, memory_model::memory_order_relaxed )) {
-                        // this second read is important, because it is protected by nChildVersion
 
-                        // validate the read that our caller took to get to node
-                        if ( pNode->version( memory_model::memory_order_relaxed ) != nVersion ) {
+                    if ( pChild == nullptr ) {
+                        // insert new node
+                        if ( nFlags & update_flags::allow_insert ) {
+                            int result = try_insert_node( key, funcUpdate, pNode, nCmp, nVersion, disp );
+                            if ( result != update_flags::retry )
+                                return result;
+                            --pos;
                             m_stat.onUpdateRetry();
-                            return update_flags::retry;
+                            break;
                         }
-
-                        // At this point we know that the traversal our parent took to get to node is still valid.
-                        // The recursive implementation will validate the traversal from node to
-                        // child, so just prior to the node nVersion validation both traversals were definitely okay.
-                        // This means that we are no longer vulnerable to node shrinks, and we don't need
-                        // to validate node version any more.
-                        result = try_update( key, cmp, nFlags, funcUpdate, pNode, pChild, nChildVersion, disp );
+                        else
+                            return update_flags::failed;
+                    }
+                    else {
+                        // update child
+                        version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
+                        if ( nChildVersion & node_type::shrinking ) {
+                            m_stat.onUpdateWaitShrinking();
+                            pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
+                            // retry
+                        }
+                        else if ( pChild == child( pNode, nCmp, memory_model::memory_order_acquire )) {
+                            // this second read is important, because it is protected by nChildVersion
+
+                            // validate the read that our caller took to get to node
+                            if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
+                                --pos;
+                                m_stat.onUpdateRetry();
+                                break; // retry
+                            }
+                            // At this point we know that the traversal our parent took to get to node is still valid.
+                            // The recursive implementation will validate the traversal from node to
+                            // child, so just prior to the node nVersion validation both traversals were definitely okay.
+                            // This means that we are no longer vulnerable to node shrinks, and we don't need
+                            // to validate node version any more.
+                            ++pos;
+                            assert( pos < c_stackSize );
+                            stack[pos].pNode = pChild;
+                            stack[pos].nVersion = nChildVersion;
+                            assert( nChildVersion != node_type::unlinked );
+                            break; // child iteration
+                        }
+                        m_stat.onUpdateRetry();
                     }
                 }
-            } while ( result == update_flags::retry );
-            return result;
+            }
+            return update_flags::retry;
         }
 
         template <typename K, typename Compare, typename Func>
         int try_remove( K const& key, Compare cmp, Func func, node_type * pParent, node_type * pNode, version_type nVersion, rcu_disposer& disp )
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
             assert( nVersion != node_type::unlinked );
 
-            int nCmp = cmp( key, pNode->m_key );
-            if ( nCmp == 0 )
-                return try_remove_node( pParent, pNode, nVersion, func, disp );
+            struct stack_record
+            {
+                node_type * pParent;
+                node_type * pNode;
+                version_type nVersion;
+            };
+
+            stack_record stack[c_stackSize];
+            int pos = 0;
+            stack[0].pParent = pParent;
+            stack[0].pNode = pNode;
+            stack[0].nVersion = nVersion;
+
+            while ( pos >= 0 ) {
+                pParent = stack[pos].pParent;
+                pNode = stack[pos].pNode;
+                nVersion = stack[pos].nVersion;
 
-            int result;
-            do {
-                node_type * pChild = child( pNode, nCmp, memory_model::memory_order_relaxed );
-                if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                int nCmp = cmp( key, pNode->m_key );
+                if ( nCmp == 0 ) {
+                    int result = try_remove_node( pParent, pNode, nVersion, func, disp );
+                    if ( result != update_flags::retry )
+                        return result;
+                    --pos;
                     m_stat.onRemoveRetry();
-                    return update_flags::retry;
+                    continue;
                 }
 
-                if ( pChild == nullptr ) {
-                    return update_flags::failed;
-                }
-                else {
+                while ( true ) {
+                    node_type * pChild = child( pNode, nCmp, memory_model::memory_order_acquire );
+                    if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                        --pos;
+                        m_stat.onRemoveRetry();
+                        break;
+                    }
+
+                    if ( pChild == nullptr )
+                        return update_flags::failed;
+
                     // update child
-                    result = update_flags::retry;
                     version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
                     if ( nChildVersion & node_type::shrinking ) {
                         m_stat.onRemoveWaitShrinking();
-                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
+                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
                         // retry
                     }
-                    else if ( pChild == child( pNode, nCmp, memory_model::memory_order_relaxed )) {
+                    else if ( pChild == child( pNode, nCmp, memory_model::memory_order_acquire )) {
                         // this second read is important, because it is protected by nChildVersion
 
                         // validate the read that our caller took to get to node
-                        if ( pNode->version( memory_model::memory_order_relaxed ) != nVersion ) {
+                        if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                            --pos;
                             m_stat.onRemoveRetry();
-                            return update_flags::retry;
+                            break;
                         }
 
                         // At this point we know that the traversal our parent took to get to node is still valid.
@@ -1140,46 +1301,90 @@ namespace cds { namespace container {
                         // child, so just prior to the node nVersion validation both traversals were definitely okay.
                         // This means that we are no longer vulnerable to node shrinks, and we don't need
                         // to validate node version any more.
-                        result = try_remove( key, cmp, func, pNode, pChild, nChildVersion, disp );
+                        ++pos;
+                        assert( pos < c_stackSize );
+                        stack[pos].pParent = pNode;
+                        stack[pos].pNode = pChild;
+                        stack[pos].nVersion = nChildVersion;
+                        break; // child iteration
                     }
+                    m_stat.onRemoveRetry();
                 }
-            } while ( result == update_flags::retry );
-            return result;
+            }
+            return update_flags::retry;
         }
 
         template <typename Func>
         int try_extract_minmax( int nDir, Func func, node_type * pParent, node_type * pNode, version_type nVersion, rcu_disposer& disp )
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
             assert( nVersion != node_type::unlinked );
 
-            int result;
-            do {
-                node_type * pChild = child( pNode, nDir, memory_model::memory_order_relaxed );
-                if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
-                    m_stat.onRemoveRetry();
-                    return update_flags::retry;
-                }
+            struct stack_record
+            {
+                node_type * pParent;
+                node_type * pNode;
+                version_type nVersion;
+            };
+
+            stack_record stack[c_stackSize];
+            int pos = 0;
+            stack[0].pParent = pParent;
+            stack[0].pNode = pNode;
+            stack[0].nVersion = nVersion;
+
+            while ( pos >= 0 ) {
+                pParent = stack[pos].pParent;
+                pNode = stack[pos].pNode;
+                nVersion = stack[pos].nVersion;
+
+                while ( true ) {
+                    int iterDir = nDir;
+                    node_type * pChild = child( pNode, iterDir, memory_model::memory_order_acquire );
+                    if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                        --pos;
+                        m_stat.onRemoveRetry();
+                        break;
+                    }
+
+                    if ( !pChild ) {
+                        // Found min/max
+                        if ( pNode->is_valued( memory_model::memory_order_acquire )) {
+                            int result = try_remove_node( pParent, pNode, nVersion, func, disp );
+
+                            if ( result == update_flags::result_removed )
+                                return result;
+
+                            --pos;
+                            m_stat.onRemoveRetry();
+                            break;
+                        }
+                        else {
+                            // check right (for min) or left (for max) child node
+                            iterDir = -iterDir;
+                            pChild = child( pNode, iterDir, memory_model::memory_order_acquire );
+                            if ( !pChild ) {
+                                --pos;
+                                m_stat.onRemoveRetry();
+                                break;
+                            }
+                        }
+                    }
 
-                if ( pChild == nullptr ) {
-                    // Found min/max
-                    return try_remove_node( pParent, pNode, nVersion, func, disp );
-                }
-                else {
-                    result = update_flags::retry;
                     version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
                     if ( nChildVersion & node_type::shrinking ) {
                         m_stat.onRemoveWaitShrinking();
-                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
+                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
                         // retry
                     }
-                    else if ( pChild == child( pNode, nDir, memory_model::memory_order_relaxed )) {
+                    else if ( pChild == child( pNode, iterDir, memory_model::memory_order_acquire )) {
                         // this second read is important, because it is protected by nChildVersion
 
                         // validate the read that our caller took to get to node
-                        if ( pNode->version( memory_model::memory_order_relaxed ) != nVersion ) {
+                        if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
+                            --pos;
                             m_stat.onRemoveRetry();
-                            return update_flags::retry;
+                            break;
                         }
 
                         // At this point we know that the traversal our parent took to get to node is still valid.
@@ -1187,11 +1392,17 @@ namespace cds { namespace container {
                         // child, so just prior to the node nVersion validation both traversals were definitely okay.
                         // This means that we are no longer vulnerable to node shrinks, and we don't need
                         // to validate node version any more.
-                        result = try_extract_minmax( nDir, func, pNode, pChild, nChildVersion, disp );
+                        ++pos;
+                        assert( pos < c_stackSize );
+                        stack[pos].pParent = pNode;
+                        stack[pos].pNode = pChild;
+                        stack[pos].nVersion = nChildVersion;
+                        break; // child iteration
                     }
+                    m_stat.onRemoveRetry();
                 }
-            } while ( result == update_flags::retry );
-            return result;
+            }
+            return update_flags::retry;
         }
 
         template <typename K, typename Func>
@@ -1202,12 +1413,12 @@ namespace cds { namespace container {
             auto fnCreateNode = [&funcUpdate]( node_type * pNew ) {
                 mapped_type pVal = funcUpdate( pNew );
                 assert( pVal != nullptr );
-                pNew->m_pValue.store( pVal, memory_model::memory_order_relaxed );
+                pNew->m_pValue.store( pVal, memory_model::memory_order_release );
             };
 
-            if ( c_bRelaxedInsert ) {
+            static_if ( c_bRelaxedInsert ) {
                 if ( pNode->version( memory_model::memory_order_acquire ) != nVersion
-                     || child( pNode, nDir, memory_model::memory_order_relaxed ) != nullptr ) 
+                     || child( pNode, nDir, memory_model::memory_order_acquire ) != nullptr )
                 {
                     m_stat.onInsertRetry();
                     return update_flags::retry;
@@ -1221,10 +1432,13 @@ namespace cds { namespace container {
                 assert( pNode != nullptr );
                 node_scoped_lock l( m_Monitor, *pNode );
 
-                if ( pNode->version( memory_model::memory_order_relaxed ) != nVersion
-                     || child( pNode, nDir, memory_model::memory_order_relaxed ) != nullptr ) 
+                if ( pNode->version( memory_model::memory_order_acquire ) != nVersion
+                     || child( pNode, nDir, memory_model::memory_order_acquire ) != nullptr )
                 {
-                    if ( c_bRelaxedInsert ) {
+                    static_if ( c_bRelaxedInsert ) {
+                        mapped_type pVal = pNew->m_pValue.load( memory_model::memory_order_relaxed );
+                        pNew->m_pValue.store( nullptr, memory_model::memory_order_relaxed );
+                        free_value( pVal );
                         free_node( pNew );
                         m_stat.onRelaxedInsertFailed();
                     }
@@ -1233,41 +1447,54 @@ namespace cds { namespace container {
                     return update_flags::retry;
                 }
 
-                if ( !c_bRelaxedInsert )
+                static_if ( !c_bRelaxedInsert )
                     fnCreateNode( pNew = alloc_node( key, 1, 0, pNode, nullptr, nullptr ));
 
-                pNode->child( pNew, nDir, memory_model::memory_order_relaxed );
+                pNode->child( pNew, nDir, memory_model::memory_order_release );
                 pDamaged = fix_height_locked( pNode );
             }
 
             ++m_ItemCounter;
             m_stat.onInsertSuccess();
 
-            fix_height_and_rebalance( pDamaged, disp );
+            if ( pDamaged ) {
+                fix_height_and_rebalance( pDamaged, disp );
+                m_stat.onInsertRebalanceRequired();
+            }
 
             return update_flags::result_inserted;
         }
 
         template <typename Func>
-        int try_update_node( Func funcUpdate, node_type * pNode, rcu_disposer& disp )
+        int try_update_node( int nFlags, Func funcUpdate, node_type * pNode, version_type nVersion, rcu_disposer& disp )
         {
             mapped_type pOld;
+            bool bInserted;
             assert( pNode != nullptr );
             {
                 node_scoped_lock l( m_Monitor, *pNode );
 
-                if ( pNode->is_unlinked( memory_model::memory_order_relaxed )) {
+                if ( pNode->version(memory_model::memory_order_acquire) != nVersion )
+                    return update_flags::retry;
+
+                if ( pNode->is_unlinked( memory_model::memory_order_acquire )) {
                     m_stat.onUpdateUnlinked();
                     return update_flags::retry;
                 }
 
+                if ( pNode->is_valued( memory_model::memory_order_relaxed ) && !(nFlags & update_flags::allow_update)) {
+                    m_stat.onInsertFailed();
+                    return update_flags::failed;
+                }
+
                 pOld = pNode->value( memory_model::memory_order_relaxed );
+                bInserted = pOld == nullptr;
                 mapped_type pVal = funcUpdate( pNode );
                 if ( pVal == pOld )
                     pOld = nullptr;
                 else {
                     assert( pVal != nullptr );
-                    pNode->m_pValue.store( pVal, memory_model::memory_order_relaxed );
+                    pNode->m_pValue.store( pVal, memory_model::memory_order_release );
                 }
             }
 
@@ -1276,6 +1503,12 @@ namespace cds { namespace container {
                 m_stat.onDisposeValue();
             }
 
+            if ( bInserted ) {
+                ++m_ItemCounter;
+                m_stat.onInsertSuccess();
+                return update_flags::result_inserted;
+            }
+
             m_stat.onUpdateSuccess();
             return update_flags::result_updated;
         }
@@ -1286,28 +1519,32 @@ namespace cds { namespace container {
             assert( pParent != nullptr );
             assert( pNode != nullptr );
 
-            if ( !pNode->is_valued( atomics::memory_order_relaxed ) )
+            if ( !pNode->is_valued( memory_model::memory_order_acquire ))
                 return update_flags::failed;
 
-            if ( child( pNode, left_child, memory_model::memory_order_relaxed ) == nullptr 
-              || child( pNode, right_child, memory_model::memory_order_relaxed ) == nullptr )
-            { 
+            if ( child( pNode, left_child, memory_model::memory_order_acquire ) == nullptr
+              || child( pNode, right_child, memory_model::memory_order_acquire ) == nullptr )
+            {
+                // pNode can be replaced with its child
+
                 node_type * pDamaged;
                 mapped_type pOld;
                 {
                     node_scoped_lock lp( m_Monitor, *pParent );
-                    if ( pParent->is_unlinked( atomics::memory_order_relaxed ) || parent( pNode, memory_model::memory_order_relaxed ) != pParent )
+                    if ( pParent->is_unlinked( memory_model::memory_order_acquire ) || parent( pNode, memory_model::memory_order_acquire ) != pParent )
                         return update_flags::retry;
 
                     {
                         node_scoped_lock ln( m_Monitor, *pNode );
+                        if ( pNode->version( memory_model::memory_order_acquire ) != nVersion )
+                            return update_flags::retry;
+
                         pOld = pNode->value( memory_model::memory_order_relaxed );
-                        if ( !( pNode->version( memory_model::memory_order_relaxed ) == nVersion
-                          && pOld 
-                          && try_unlink_locked( pParent, pNode, disp )))
-                        {
+                        if ( !pOld )
+                            return update_flags::failed;
+
+                        if ( !try_unlink_locked( pParent, pNode, disp ))
                             return update_flags::retry;
-                        }
                     }
                     pDamaged = fix_height_locked( pParent );
                 }
@@ -1318,37 +1555,40 @@ namespace cds { namespace container {
                 else
                     m_stat.onExtractValue();
 
-                fix_height_and_rebalance( pDamaged, disp );
-                return update_flags::result_removed;
+                if ( pDamaged ) {
+                    fix_height_and_rebalance( pDamaged, disp );
+                    m_stat.onRemoveRebalanceRequired();
+                }
             }
             else {
-                int result = update_flags::retry;
+                // pNode is an internal with two children
+
                 mapped_type pOld;
                 {
                     node_scoped_lock ln( m_Monitor, *pNode );
-                    pOld = pNode->value( atomics::memory_order_relaxed );
-                    if ( pNode->version( atomics::memory_order_relaxed ) == nVersion && pOld ) {
-                        pNode->m_pValue.store( nullptr, atomics::memory_order_relaxed );
-                        result = update_flags::result_removed;
-                    }
-                }
+                    pOld = pNode->value( memory_model::memory_order_relaxed );
+                    if ( pNode->version( memory_model::memory_order_relaxed ) != nVersion )
+                        return update_flags::retry;
+                    if ( !pOld )
+                        return update_flags::failed;
 
-                if ( result == update_flags::result_removed ) {
-                    --m_ItemCounter;
-                    if ( func( pNode->m_key, pOld, disp ))  // calls pOld disposer inside
-                        m_stat.onDisposeValue();
-                    else
-                        m_stat.onExtractValue();
+                    pNode->m_pValue.store( nullptr, memory_model::memory_order_release );
+                    m_stat.onMakeRoutingNode();
                 }
 
-                return result;
+                --m_ItemCounter;
+                if ( func( pNode->m_key, pOld, disp ))  // calls pOld disposer inside
+                    m_stat.onDisposeValue();
+                else
+                    m_stat.onExtractValue();
             }
+            return update_flags::result_removed;
         }
 
         bool try_unlink_locked( node_type * pParent, node_type * pNode, rcu_disposer& disp )
         {
             // pParent and pNode must be locked
-            assert( !pParent->is_unlinked(memory_model::memory_order_relaxed) );
+            assert( !pParent->is_unlinked(memory_model::memory_order_relaxed));
 
             node_type * pParentLeft = child( pParent, left_child, memory_model::memory_order_relaxed );
             node_type * pParentRight = child( pParent, right_child, memory_model::memory_order_relaxed );
@@ -1357,8 +1597,8 @@ namespace cds { namespace container {
                 return false;
             }
 
-            assert( !pNode->is_unlinked( memory_model::memory_order_relaxed ) );
-            assert( pParent == parent( pNode, memory_model::memory_order_relaxed));
+            assert( !pNode->is_unlinked( memory_model::memory_order_relaxed ));
+            assert( pParent == parent( pNode, memory_model::memory_order_relaxed ));
 
             node_type * pLeft = child( pNode, left_child, memory_model::memory_order_relaxed );
             node_type * pRight = child( pNode, right_child, memory_model::memory_order_relaxed );
@@ -1369,18 +1609,18 @@ namespace cds { namespace container {
             node_type * pSplice = pLeft ? pLeft : pRight;
 
             if ( pParentLeft == pNode )
-                pParent->m_pLeft.store( pSplice, memory_model::memory_order_relaxed );
+                pParent->m_pLeft.store( pSplice, memory_model::memory_order_release );
             else
-                pParent->m_pRight.store( pSplice, memory_model::memory_order_relaxed );
+                pParent->m_pRight.store( pSplice, memory_model::memory_order_release );
 
             if ( pSplice )
-                pSplice->m_pParent.store( pParent, memory_model::memory_order_release );
+                pSplice->parent( pParent, memory_model::memory_order_release );
 
             // Mark the node as unlinked
             pNode->version( node_type::unlinked, memory_model::memory_order_release );
 
             // The value will be disposed by calling function
-            pNode->m_pValue.store( nullptr, memory_model::memory_order_relaxed );
+            pNode->m_pValue.store( nullptr, memory_model::memory_order_release );
 
             disp.dispose( pNode );
             m_stat.onDisposeNode();
@@ -1392,17 +1632,22 @@ namespace cds { namespace container {
 
     private: // rotations
         //@cond
+        int check_node_ordering( node_type* pParent, node_type* pChild )
+        {
+            return key_comparator()( pParent->m_key, pChild->m_key );
+        }
+
         int estimate_node_condition( node_type * pNode )
         {
-            node_type * pLeft = child( pNode, left_child, memory_model::memory_order_relaxed );
-            node_type * pRight = child( pNode, right_child, memory_model::memory_order_relaxed );
+            node_type * pLeft = child( pNode, left_child, memory_model::memory_order_acquire );
+            node_type * pRight = child( pNode, right_child, memory_model::memory_order_acquire );
 
-            if ( (pLeft == nullptr || pRight == nullptr) && !pNode->is_valued( memory_model::memory_order_relaxed ))
+            if ( (pLeft == nullptr || pRight == nullptr) && !pNode->is_valued( memory_model::memory_order_acquire ))
                 return unlink_required;
 
-            int h = pNode->height( memory_model::memory_order_relaxed );
-            int hL = pLeft ? pLeft->height( memory_model::memory_order_relaxed ) : 0;
-            int hR = pRight ? pRight->height( memory_model::memory_order_relaxed ) : 0;
+            int h = height( pNode, memory_model::memory_order_acquire );
+            int hL = height_null( pLeft, memory_model::memory_order_acquire );
+            int hR = height_null( pRight, memory_model::memory_order_acquire );
 
             int hNew = 1 + std::max( hL, hR );
             int nBalance = hL - hR;
@@ -1431,28 +1676,26 @@ namespace cds { namespace container {
                 case nothing_required:
                     return nullptr;
                 default:
-                    pNode->height( h, memory_model::memory_order_relaxed );
+                    set_height( pNode, h, memory_model::memory_order_release );
                     return parent( pNode, memory_model::memory_order_relaxed );
             }
         }
 
         void fix_height_and_rebalance( node_type * pNode, rcu_disposer& disp )
         {
-            while ( pNode && parent( pNode, memory_model::memory_order_relaxed )) {
+            while ( pNode && parent( pNode, memory_model::memory_order_acquire )) {
                 int nCond = estimate_node_condition( pNode );
-                if ( nCond == nothing_required || pNode->is_unlinked( memory_model::memory_order_relaxed ) )
+                if ( nCond == nothing_required || pNode->is_unlinked( memory_model::memory_order_acquire ))
                     return;
 
                 if ( nCond != unlink_required && nCond != rebalance_required )
                     pNode = fix_height( pNode );
                 else {
-                    node_type * pParent = parent( pNode, memory_model::memory_order_relaxed );
+                    node_type * pParent = parent( pNode, memory_model::memory_order_acquire );
                     assert( pParent != nullptr );
                     {
                         node_scoped_lock lp( m_Monitor, *pParent );
-                        if ( !pParent->is_unlinked( memory_model::memory_order_relaxed )
-                             && parent( pNode, memory_model::memory_order_relaxed ) == pParent ) 
-                        {
+                        if ( !pParent->is_unlinked( memory_model::memory_order_relaxed ) && parent( pNode, memory_model::memory_order_acquire ) == pParent ) {
                             node_scoped_lock ln( m_Monitor, *pNode );
                             pNode = rebalance_locked( pParent, pNode, disp );
                         }
@@ -1466,8 +1709,6 @@ namespace cds { namespace container {
             // pParent and pNode should be locked.
             // Returns a damaged node, or nullptr if no more rebalancing is necessary
             assert( parent( pNode, memory_model::memory_order_relaxed ) == pParent );
-            assert( child( pParent, left_child, memory_model::memory_order_relaxed ) == pNode
-                 || child( pParent, right_child, memory_model::memory_order_relaxed ) == pNode );
 
             node_type * pLeft = child( pNode, left_child, memory_model::memory_order_relaxed );
             node_type * pRight = child( pNode, right_child, memory_model::memory_order_relaxed );
@@ -1481,9 +1722,12 @@ namespace cds { namespace container {
                 }
             }
 
-            int h = pNode->height( memory_model::memory_order_relaxed );
-            int hL = pLeft ? pLeft->height( memory_model::memory_order_relaxed ) : 0;
-            int hR = pRight ? pRight->height( memory_model::memory_order_relaxed ) : 0;
+            assert( child( pParent, left_child, memory_model::memory_order_relaxed ) == pNode
+                 || child( pParent, right_child, memory_model::memory_order_relaxed ) == pNode );
+
+            int h = height( pNode, memory_model::memory_order_acquire );
+            int hL = height_null( pLeft, memory_model::memory_order_acquire );
+            int hR = height_null( pRight, memory_model::memory_order_acquire );
             int hNew = 1 + std::max( hL, hR );
             int balance = hL - hR;
 
@@ -1492,7 +1736,7 @@ namespace cds { namespace container {
             else if ( balance < -1 )
                 return rebalance_to_left_locked( pParent, pNode, pRight, hL );
             else if ( hNew != h ) {
-                pNode->height( hNew, memory_model::memory_order_relaxed );
+                set_height( pNode, hNew, memory_model::memory_order_release );
 
                 // pParent is already locked
                 return fix_height_locked( pParent );
@@ -1511,49 +1755,51 @@ namespace cds { namespace container {
             // pNode->pLeft is too large, we will rotate-right.
             // If pLeft->pRight is taller than pLeft->pLeft, then we will first rotate-left pLeft.
 
-            {
-                assert( pLeft != nullptr );
-                node_scoped_lock l( m_Monitor, *pLeft );
-                if ( pNode->m_pLeft.load( memory_model::memory_order_relaxed ) != pLeft )
-                    return pNode; // retry for pNode
-
-                int hL = pLeft->height( memory_model::memory_order_relaxed );
-                if ( hL - hR <= 1 )
-                    return pNode; // retry
-
-                node_type * pLRight = child( pLeft, right_child, memory_model::memory_order_relaxed );
-                int hLR = pLRight ? pLRight->height( memory_model::memory_order_relaxed ) : 0;
-                node_type * pLLeft = child( pLeft, left_child, memory_model::memory_order_relaxed );
-                int hLL = pLLeft ? pLLeft->height( memory_model::memory_order_relaxed ) : 0;
-
-                if ( hLL > hLR ) {
-                    // rotate right
-                    return rotate_right_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLR );
-                }
-                else {
-                    assert( pLRight != nullptr );
-                    {
-                        node_scoped_lock lr( m_Monitor, *pLRight );
-                        if ( pLeft->m_pRight.load( memory_model::memory_order_relaxed ) != pLRight )
-                            return pNode; // retry
-
-                        hLR = pLRight->height( memory_model::memory_order_relaxed );
-                        if ( hLL > hLR )
-                            return rotate_right_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLR );
-
-                        node_type * pLRLeft = child( pLRight, left_child, memory_model::memory_order_relaxed );
-                        int hLRL = pLRLeft ? pLRLeft->height( memory_model::memory_order_relaxed  ) : 0;
-                        int balance = hLL - hLRL;
-                        if ( balance >= -1 && balance <= 1 && !((hLL == 0 || hLRL == 0) && !pLeft->is_valued(memory_model::memory_order_relaxed))) {
-                            // nParent.child.left won't be damaged after a double rotation
-                            return rotate_right_over_left_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLRL );
-                        }
-                    }
+            assert( pLeft != nullptr );
+            node_scoped_lock l( m_Monitor, *pLeft );
+            if ( pNode->m_pLeft.load( memory_model::memory_order_relaxed ) != pLeft )
+                return pNode; // retry for pNode
+
+            assert( check_node_ordering( pNode, pLeft ) > 0 );
+
+            int hL = height( pLeft, memory_model::memory_order_acquire );
+            if ( hL - hR <= 1 )
+                return pNode; // retry
+
+            node_type * pLRight = child( pLeft, right_child, memory_model::memory_order_relaxed );
+            int hLR = height_null( pLRight, memory_model::memory_order_acquire );
+            node_type * pLLeft = child( pLeft, left_child, memory_model::memory_order_relaxed );
+            int hLL = height_null( pLLeft, memory_model::memory_order_acquire );
+
+            if ( pLRight ) {
+                {
+                    node_scoped_lock lr( m_Monitor, *pLRight );
+                    if ( pLeft->m_pRight.load( memory_model::memory_order_acquire ) != pLRight )
+                        return pNode; // retry
+
+                    assert( check_node_ordering( pLeft, pLRight ) < 0 );
 
-                    // focus on pLeft, if necessary pNode will be balanced later
-                    return rebalance_to_left_locked( pNode, pLeft, pLRight, hLL );
+                    hLR = height( pLRight, memory_model::memory_order_acquire );
+                    if ( hLL > hLR )
+                        return rotate_right_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLR );
+
+                    int hLRL = height_null( child( pLRight, left_child, memory_model::memory_order_relaxed ), memory_model::memory_order_acquire );
+                    int balance = hLL - hLRL;
+                    if ( balance >= -1 && balance <= 1 && !( ( hLL == 0 || hLRL == 0 ) && !pLeft->is_valued( memory_model::memory_order_relaxed ))) {
+                        // nParent.child.left won't be damaged after a double rotation
+                        return rotate_right_over_left_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLRL );
+                    }
                 }
+
+                // focus on pLeft, if necessary pNode will be balanced later
+                return rebalance_to_left_locked( pNode, pLeft, pLRight, hLL );
+            }
+            else if ( hLL > hLR ) {
+                // rotate right
+                return rotate_right_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLR );
             }
+
+            return pNode; // retry
         }
 
         node_type * rebalance_to_left_locked( node_type * pParent, node_type * pNode, node_type * pRight, int hL )
@@ -1563,46 +1809,54 @@ namespace cds { namespace container {
                  || child( pParent, right_child, memory_model::memory_order_relaxed ) == pNode );
 
             // pParent and pNode is locked yet
-            {
-                assert( pRight != nullptr );
-                node_scoped_lock l( m_Monitor, *pRight );
-                if ( pNode->m_pRight.load( memory_model::memory_order_relaxed ) != pRight )
-                    return pNode; // retry for pNode
-
-                int hR = pRight->height( memory_model::memory_order_relaxed );
-                if ( hL - hR >= -1 )
-                    return pNode; // retry
-
-                node_type * pRLeft = child( pRight, left_child, memory_model::memory_order_relaxed );
-                int hRL = pRLeft ? pRLeft->height( memory_model::memory_order_relaxed ) : 0;
-                node_type * pRRight = child( pRight, right_child, memory_model::memory_order_relaxed );
-                int hRR = pRRight ? pRRight->height( memory_model::memory_order_relaxed ) : 0;
-                if ( hRR > hRL )
-                    return rotate_left_locked( pParent, pNode, hL, pRight, pRLeft, hRL, hRR );
+            assert( pRight != nullptr );
+            node_scoped_lock l( m_Monitor, *pRight );
+            if ( pNode->m_pRight.load( memory_model::memory_order_relaxed ) != pRight )
+                return pNode; // retry for pNode
+
+            assert( check_node_ordering( pNode, pRight ) < 0 );
+
+            int hR = height( pRight, memory_model::memory_order_acquire );
+            if ( hL - hR >= -1 )
+                return pNode; // retry
 
+            node_type * pRLeft = child( pRight, left_child, memory_model::memory_order_relaxed );
+            int hRL = height_null( pRLeft, memory_model::memory_order_acquire );
+            node_type * pRRight = child( pRight, right_child, memory_model::memory_order_relaxed );
+            int hRR = height_null( pRRight, memory_model::memory_order_acquire );
+
+            if ( pRLeft ) {
                 {
-                    assert( pRLeft != nullptr );
                     node_scoped_lock lrl( m_Monitor, *pRLeft );
-                    if ( pRight->m_pLeft.load( memory_model::memory_order_relaxed ) != pRLeft )
+                    if ( pRight->m_pLeft.load( memory_model::memory_order_acquire ) != pRLeft )
                         return pNode; // retry
 
-                    hRL = pRLeft->height( memory_model::memory_order_relaxed );
+                    assert( check_node_ordering( pRight, pRLeft ) > 0 );
+
+                    hRL = height( pRLeft, memory_model::memory_order_acquire );
                     if ( hRR >= hRL )
                         return rotate_left_locked( pParent, pNode, hL, pRight, pRLeft, hRL, hRR );
 
                     node_type * pRLRight = child( pRLeft, right_child, memory_model::memory_order_relaxed );
-                    int hRLR = pRLRight ? pRLRight->height( memory_model::memory_order_relaxed ) : 0;
+                    int hRLR = height_null( pRLRight, memory_model::memory_order_acquire );
                     int balance = hRR - hRLR;
-                    if ( balance >= -1 && balance <= 1 && !((hRR == 0 || hRLR == 0) && !pRight->is_valued( memory_model::memory_order_relaxed )))
-                         return rotate_left_over_right_locked( pParent, pNode, hL, pRight, pRLeft, hRR, hRLR );
+                    if ( balance >= -1 && balance <= 1 && !( ( hRR == 0 || hRLR == 0 ) && !pRight->is_valued( memory_model::memory_order_relaxed )))
+                        return rotate_left_over_right_locked( pParent, pNode, hL, pRight, pRLeft, hRR, hRLR );
                 }
+
                 return rebalance_to_right_locked( pNode, pRight, pRLeft, hRR );
             }
+            else if ( hRR > hRL )
+                return rotate_left_locked( pParent, pNode, hL, pRight, pRLeft, hRL, hRR );
+
+            return pNode;   // retry
         }
 
         static void begin_change( node_type * pNode, version_type version )
         {
-            pNode->version( version | node_type::shrinking, memory_model::memory_order_release );
+            assert(pNode->version(memory_model::memory_order_acquire) == version );
+            assert( (version & node_type::shrinking) == 0 );
+            pNode->exchange_version( version | node_type::shrinking, memory_model::memory_order_acquire );
         }
         static void end_change( node_type * pNode, version_type version )
         {
@@ -1617,25 +1871,46 @@ namespace cds { namespace container {
 
             begin_change( pNode, nodeVersion );
 
-            pNode->m_pLeft.store( pLRight, memory_model::memory_order_relaxed );
-            if ( pLRight != nullptr )
-                pLRight->m_pParent.store( pNode, memory_model::memory_order_relaxed  );
+            pNode->m_pLeft.store( pLRight, memory_model::memory_order_release );
 
+            if ( pLRight != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRight->m_pParent );
+                pLRight->parent( pNode, memory_model::memory_order_relaxed );
+                assert( check_node_ordering( pNode, pLRight ) > 0 );
+            }
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLeft->m_pRight );
             pLeft->m_pRight.store( pNode, memory_model::memory_order_relaxed );
-            pNode->m_pParent.store( pLeft, memory_model::memory_order_relaxed );
 
-            if ( pParentLeft == pNode )
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pNode->m_pParent );
+            pNode->parent( pLeft, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pLeft, pNode ) < 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            if ( pParentLeft == pNode ) {
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pLeft );
                 pParent->m_pLeft.store( pLeft, memory_model::memory_order_relaxed );
+            }
             else {
                 assert( pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pRight );
                 pParent->m_pRight.store( pLeft, memory_model::memory_order_relaxed );
             }
-            pLeft->m_pParent.store( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLeft->m_pParent );
+            pLeft->parent( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
 
             // fix up heights links
             int hNode = 1 + std::max( hLR, hR );
-            pNode->height( hNode, memory_model::memory_order_relaxed );
-            pLeft->height( 1 + std::max( hLL, hNode ), memory_model::memory_order_relaxed );
+            set_height( pNode, hNode, memory_model::memory_order_release );
+            set_height( pLeft, 1 + std::max( hLL, hNode ), memory_model::memory_order_release);
 
             end_change( pNode, nodeVersion );
             m_stat.onRotateRight();
@@ -1650,6 +1925,7 @@ namespace cds { namespace container {
             int nodeBalance = hLR - hR;
             if ( nodeBalance < -1 || nodeBalance > 1 ) {
                 // we need another rotation at pNode
+                m_stat.onRotateAfterRightRotation();
                 return pNode;
             }
 
@@ -1657,17 +1933,22 @@ namespace cds { namespace container {
             // extra-routing node damage
             if ( (pLRight == nullptr || hR == 0) && !pNode->is_valued(memory_model::memory_order_relaxed)) {
                 // we need to remove pNode and then repair
+                m_stat.onRemoveAfterRightRotation();
                 return pNode;
             }
 
             // we've already fixed the height at pLeft, do we need a rotation here?
             int leftBalance = hLL - hNode;
-            if ( leftBalance < -1 || leftBalance > 1 )
+            if ( leftBalance < -1 || leftBalance > 1 ) {
+                m_stat.onRotateAfterRightRotation();
                 return pLeft;
+            }
 
             // pLeft might also have routing node damage (if pLeft.left was null)
-            if ( hLL == 0 && !pLeft->is_valued( memory_model::memory_order_relaxed ))
+            if ( hLL == 0 && !pLeft->is_valued(memory_model::memory_order_acquire)) {
+                m_stat.onDamageAfterRightRotation();
                 return pLeft;
+            }
 
             // try to fix the parent height while we've still got the lock
             return fix_height_locked( pParent );
@@ -1681,42 +1962,68 @@ namespace cds { namespace container {
             begin_change( pNode, nodeVersion );
 
             // fix up pNode links, careful to be compatible with concurrent traversal for all but pNode
-            pNode->m_pRight.store( pRLeft, memory_model::memory_order_relaxed );
-            if ( pRLeft != nullptr )
-                pRLeft->m_pParent.store( pNode, memory_model::memory_order_relaxed );
+            pNode->m_pRight.store( pRLeft, memory_model::memory_order_release );
+            if ( pRLeft != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLeft->m_pParent );
+                pRLeft->parent( pNode, memory_model::memory_order_relaxed );
+            }
 
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRight->m_pLeft );
             pRight->m_pLeft.store( pNode, memory_model::memory_order_relaxed );
-            pNode->m_pParent.store( pRight, memory_model::memory_order_relaxed );
 
-            if ( pParentLeft == pNode )
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pNode->m_pParent );
+            pNode->parent( pRight, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pRight, pNode ) > 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            if ( pParentLeft == pNode ) {
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pLeft );
                 pParent->m_pLeft.store( pRight, memory_model::memory_order_relaxed );
+            }
             else {
                 assert( pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pRight );
                 pParent->m_pRight.store( pRight, memory_model::memory_order_relaxed );
             }
-            pRight->m_pParent.store( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRight->m_pParent );
+            pRight->parent( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
 
             // fix up heights
             int hNode = 1 + std::max( hL, hRL );
-            pNode->height( hNode, memory_model::memory_order_relaxed );
-            pRight->height( 1 + std::max( hNode, hRR ), memory_model::memory_order_relaxed );
+            set_height( pNode, hNode, memory_model::memory_order_release );
+            set_height( pRight, 1 + std::max( hNode, hRR ), memory_model::memory_order_release);
 
             end_change( pNode, nodeVersion );
             m_stat.onRotateLeft();
 
             int nodeBalance = hRL - hL;
-            if ( nodeBalance < -1 || nodeBalance > 1 )
+            if ( nodeBalance < -1 || nodeBalance > 1 ) {
+                m_stat.onRotateAfterLeftRotation();
                 return pNode;
+            }
 
-            if ( (pRLeft == nullptr || hL == 0) && !pNode->is_valued( memory_model::memory_order_relaxed ))
+            if ( (pRLeft == nullptr || hL == 0) && !pNode->is_valued(memory_model::memory_order_relaxed)) {
+                m_stat.onRemoveAfterLeftRotation();
                 return pNode;
+            }
 
             int rightBalance = hRR - hNode;
-            if ( rightBalance < -1 || rightBalance > 1 )
+            if ( rightBalance < -1 || rightBalance > 1 ) {
+                m_stat.onRotateAfterLeftRotation();
                 return pRight;
+            }
 
-            if ( hRR == 0 && !pRight->is_valued( memory_model::memory_order_relaxed ))
+            if ( hRR == 0 && !pRight->is_valued(memory_model::memory_order_acquire)) {
+                m_stat.onDamageAfterLeftRotation();
                 return pRight;
+            }
 
             return fix_height_locked( pParent );
         }
@@ -1724,44 +2031,75 @@ namespace cds { namespace container {
         node_type * rotate_right_over_left_locked( node_type * pParent, node_type * pNode, node_type * pLeft, int hR, int hLL, node_type * pLRight, int hLRL )
         {
             version_type nodeVersion = pNode->version( memory_model::memory_order_relaxed );
-            version_type leftVersion = pLeft->version( memory_model::memory_order_relaxed );
+            version_type leftVersion = pLeft->version( memory_model::memory_order_acquire );
 
             node_type * pPL = child( pParent, left_child, memory_model::memory_order_relaxed );
-            node_type * pLRL = child( pLRight, left_child, memory_model::memory_order_relaxed );
-            node_type * pLRR = child( pLRight, right_child, memory_model::memory_order_relaxed );
-            int hLRR = pLRR ? pLRR->height( memory_model::memory_order_relaxed ) : 0;
+            node_type * pLRL = child( pLRight, left_child, memory_model::memory_order_acquire );
+            node_type * pLRR = child( pLRight, right_child, memory_model::memory_order_acquire );
+            int hLRR = height_null( pLRR, memory_model::memory_order_acquire );
 
             begin_change( pNode, nodeVersion );
             begin_change( pLeft, leftVersion );
 
             // fix up pNode links, careful about the order!
-            pNode->m_pLeft.store( pLRR, memory_model::memory_order_relaxed );
-            if ( pLRR != nullptr )
-                pLRR->m_pParent.store( pNode, memory_model::memory_order_relaxed );
+            pNode->m_pLeft.store( pLRR, memory_model::memory_order_release );
+            if ( pLRR != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRR->m_pParent );
+                pLRR->parent( pNode, memory_model::memory_order_relaxed );
+            }
 
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLeft->m_pRight );
             pLeft->m_pRight.store( pLRL, memory_model::memory_order_relaxed );
-            if ( pLRL != nullptr )
-                pLRL->m_pParent.store( pLeft, memory_model::memory_order_relaxed );
 
+            if ( pLRL != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRL->m_pParent );
+                pLRL->parent( pLeft, memory_model::memory_order_relaxed );
+            }
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRight->m_pLeft );
             pLRight->m_pLeft.store( pLeft, memory_model::memory_order_relaxed );
-            pLeft->m_pParent.store( pLRight, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLeft->m_pParent );
+            pLeft->parent( pLRight, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pLRight, pLeft ) > 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRight->m_pRight );
             pLRight->m_pRight.store( pNode, memory_model::memory_order_relaxed );
-            pNode->m_pParent.store( pLRight, memory_model::memory_order_relaxed );
 
-            if ( pPL == pNode )
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pNode->m_pParent );
+            pNode->parent( pLRight, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pLRight, pNode ) < 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            if ( pPL == pNode ) {
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pLeft );
                 pParent->m_pLeft.store( pLRight, memory_model::memory_order_relaxed );
+            }
             else {
                 assert( child( pParent, right_child, memory_model::memory_order_relaxed ) == pNode );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pRight );
                 pParent->m_pRight.store( pLRight, memory_model::memory_order_relaxed );
             }
-            pLRight->m_pParent.store( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRight->m_pParent );
+            pLRight->parent( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
 
             // fix up heights
             int hNode = 1 + std::max( hLRR, hR );
-            pNode->height( hNode, memory_model::memory_order_relaxed );
+            set_height( pNode, hNode, memory_model::memory_order_release );
             int hLeft = 1 + std::max( hLL, hLRL );
-            pLeft->height( hLeft, memory_model::memory_order_relaxed );
-            pLRight->height( 1 + std::max( hLeft, hNode ), memory_model::memory_order_relaxed );
+            set_height( pLeft, hLeft, memory_model::memory_order_release );
+            set_height( pLRight, 1 + std::max( hLeft, hNode ), memory_model::memory_order_release);
 
             end_change( pNode, nodeVersion );
             end_change( pLeft, leftVersion );
@@ -1770,7 +2108,7 @@ namespace cds { namespace container {
             // caller should have performed only a single rotation if pLeft was going
             // to end up damaged
             assert( hLL - hLRL <= 1 && hLRL - hLL <= 1 );
-            assert( !((hLL == 0 || pLRL == nullptr) && !pLeft->is_valued( memory_model::memory_order_relaxed )));
+            assert( !((hLL == 0 || pLRL == nullptr) && !pLeft->is_valued( memory_model::memory_order_acquire )));
 
             // We have damaged pParent, pLR (now parent.child), and pNode (now
             // parent.child.right).  pNode is the deepest.  Perform as many fixes as we
@@ -1782,19 +2120,23 @@ namespace cds { namespace container {
             int nodeBalance = hLRR - hR;
             if ( nodeBalance < -1 || nodeBalance > 1 ) {
                 // we need another rotation at pNode
+                m_stat.onRotateAfterRLRotation();
                 return pNode;
             }
 
             // pNode might also be damaged by being an unnecessary routing node
             if ( (pLRR == nullptr || hR == 0) && !pNode->is_valued( memory_model::memory_order_relaxed )) {
                 // repair involves splicing out pNode and maybe more rotations
+                m_stat.onRemoveAfterRLRotation();
                 return pNode;
             }
 
             // we've already fixed the height at pLRight, do we need a rotation here?
             int balanceLR = hLeft - hNode;
-            if ( balanceLR < -1 || balanceLR > 1 )
+            if ( balanceLR < -1 || balanceLR > 1 ) {
+                m_stat.onRotateAfterRLRotation();
                 return pLRight;
+            }
 
             // try to fix the parent height while we've still got the lock
             return fix_height_locked( pParent );
@@ -1803,44 +2145,75 @@ namespace cds { namespace container {
         node_type * rotate_left_over_right_locked( node_type * pParent, node_type * pNode, int hL, node_type * pRight, node_type * pRLeft, int hRR, int hRLR )
         {
             version_type nodeVersion = pNode->version( memory_model::memory_order_relaxed );
-            version_type rightVersion = pRight->version( memory_model::memory_order_relaxed );
+            version_type rightVersion = pRight->version( memory_model::memory_order_acquire );
 
             node_type * pPL = child( pParent, left_child, memory_model::memory_order_relaxed );
-            node_type * pRLL = child( pRLeft, left_child, memory_model::memory_order_relaxed );
-            node_type * pRLR = child( pRLeft, right_child, memory_model::memory_order_relaxed );
-            int hRLL = pRLL ? pRLL->height( memory_model::memory_order_relaxed ) : 0;
+            node_type * pRLL = child( pRLeft, left_child, memory_model::memory_order_acquire );
+            node_type * pRLR = child( pRLeft, right_child, memory_model::memory_order_acquire );
+            int hRLL = height_null( pRLL, memory_model::memory_order_acquire );
 
             begin_change( pNode, nodeVersion );
             begin_change( pRight, rightVersion );
 
             // fix up pNode links, careful about the order!
-            pNode->m_pRight.store( pRLL, memory_model::memory_order_relaxed );
-            if ( pRLL != nullptr )
-                pRLL->m_pParent.store( pNode, memory_model::memory_order_relaxed );
+            pNode->m_pRight.store( pRLL, memory_model::memory_order_release );
+            if ( pRLL != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLL->m_pParent );
+                pRLL->parent( pNode, memory_model::memory_order_relaxed );
+            }
 
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRight->m_pLeft );
             pRight->m_pLeft.store( pRLR, memory_model::memory_order_relaxed );
-            if ( pRLR != nullptr )
-                pRLR->m_pParent.store( pRight, memory_model::memory_order_relaxed );
 
+            if ( pRLR != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLR->m_pParent );
+                pRLR->parent( pRight, memory_model::memory_order_relaxed );
+            }
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLeft->m_pRight );
             pRLeft->m_pRight.store( pRight, memory_model::memory_order_relaxed );
-            pRight->m_pParent.store( pRLeft, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRight->m_pParent );
+            pRight->parent( pRLeft, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pRLeft, pRight ) < 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLeft->m_pLeft );
             pRLeft->m_pLeft.store( pNode, memory_model::memory_order_relaxed );
-            pNode->m_pParent.store( pRLeft, memory_model::memory_order_relaxed );
 
-            if ( pPL == pNode )
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pNode->m_pParent );
+            pNode->parent( pRLeft, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pRLeft, pNode ) > 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            if ( pPL == pNode ) {
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pLeft );
                 pParent->m_pLeft.store( pRLeft, memory_model::memory_order_relaxed );
+            }
             else {
                 assert( pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pRight );
                 pParent->m_pRight.store( pRLeft, memory_model::memory_order_relaxed );
             }
-            pRLeft->m_pParent.store( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLeft->m_pParent );
+            pRLeft->parent( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
 
             // fix up heights
             int hNode = 1 + std::max( hL, hRLL );
-            pNode->height( hNode, memory_model::memory_order_relaxed );
+            set_height( pNode, hNode, memory_model::memory_order_release );
             int hRight = 1 + std::max( hRLR, hRR );
-            pRight->height( hRight, memory_model::memory_order_relaxed );
-            pRLeft->height( 1 + std::max( hNode, hRight ), memory_model::memory_order_relaxed );
+            set_height( pRight, hRight, memory_model::memory_order_release );
+            set_height( pRLeft, 1 + std::max( hNode, hRight ), memory_model::memory_order_release);
 
             end_change( pNode, nodeVersion );
             end_change( pRight, rightVersion );
@@ -1849,14 +2222,21 @@ namespace cds { namespace container {
             assert( hRR - hRLR <= 1 && hRLR - hRR <= 1 );
 
             int nodeBalance = hRLL - hL;
-            if ( nodeBalance < -1 || nodeBalance > 1 )
+            if ( nodeBalance < -1 || nodeBalance > 1 ) {
+                m_stat.onRotateAfterLRRotation();
                 return pNode;
-            if ( (pRLL == nullptr || hL == 0) && !pNode->is_valued( memory_model::memory_order_relaxed ))
+            }
+
+            if ( (pRLL == nullptr || hL == 0) && !pNode->is_valued(memory_model::memory_order_relaxed)) {
+                m_stat.onRemoveAfterLRRotation();
                 return pNode;
+            }
 
             int balRL = hRight - hNode;
-            if ( balRL < -1 || balRL > 1 )
+            if ( balRL < -1 || balRL > 1 ) {
+                m_stat.onRotateAfterLRRotation();
                 return pRLeft;
+            }
 
             return fix_height_locked( pParent );
         }