Fixed -Wshadow warnings
[libcds.git] / cds / container / impl / bronson_avltree_map_rcu.h
index ffbf2fd96f9000f80baa709a9c8e49c426ff2a30..458cbdb77009d6b648ba8eceb2e39919806d7d80 100644 (file)
@@ -1,20 +1,48 @@
-//$$CDS-header$$
+/*
+    This file is a part of libcds - Concurrent Data Structures library
+
+    (C) Copyright Maxim Khizhinsky (libcds.dev@gmail.com) 2006-2017
+
+    Source code repo: http://github.com/khizmax/libcds/
+    Download: http://sourceforge.net/projects/libcds/files/
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this
+      list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
 
 #ifndef CDSLIB_CONTAINER_IMPL_BRONSON_AVLTREE_MAP_RCU_H
 #define CDSLIB_CONTAINER_IMPL_BRONSON_AVLTREE_MAP_RCU_H
 
-#include <memory> // unique_ptr
+#include <type_traits> // is_base_of
 #include <cds/container/details/bronson_avltree_base.h>
 #include <cds/urcu/details/check_deadlock.h>
-#include <cds/details/binary_functor_wrapper.h>
-
+#include <cds/urcu/exempt_ptr.h>
 
 namespace cds { namespace container {
 
-    /// Bronson et al AVL-tree (RCU specialization for storing pointer to values)
+    /// Bronson et al AVL-tree (RCU specialization for pointers)
     /** @ingroup cds_nonintrusive_map
         @ingroup cds_nonintrusive_tree
         @headerfile cds/container/bronson_avltree_map_rcu.h
+        @anchor cds_container_BronsonAVLTreeMap_rcu_ptr
 
         This is the specialization of \ref cds_container_BronsonAVLTreeMap_rcu "RCU-based Bronson et al AVL-tree"
         for "key -> value pointer" map. This specialization stores the pointer to user-allocated values instead of the copy
@@ -68,12 +96,26 @@ namespace cds { namespace container {
         /// Enabled or disabled @ref bronson_avltree::relaxed_insert "relaxed insertion"
         static CDS_CONSTEXPR bool const c_bRelaxedInsert = traits::relaxed_insert;
 
+        /// Group of \p extract_xxx functions does not require external locking
+        static CDS_CONSTEXPR const bool c_bExtractLockExternal = false;
+
+#   ifdef CDS_DOXYGEN_INVOKED
         /// Returned pointer to \p mapped_type of extracted node
-        typedef std::unique_ptr< mapped_type, disposer > unique_ptr;
+        typedef cds::urcu::exempt_ptr< gc, T, T, disposer, void > exempt_ptr;
+#   else
+        typedef cds::urcu::exempt_ptr< gc,
+            typename std::remove_pointer<mapped_type>::type,
+            typename std::remove_pointer<mapped_type>::type,
+            disposer,
+            void
+        > exempt_ptr;
+#   endif
+
+        typedef typename gc::scoped_lock    rcu_lock;  ///< RCU scoped lock
 
     protected:
         //@cond
-        typedef typename sync_monitor::node_injection< bronson_avltree::node< key_type, mapped_type >> node_type;
+        typedef bronson_avltree::node< key_type, mapped_type, sync_monitor > node_type;
         typedef typename node_type::version_type version_type;
 
         typedef cds::details::Allocator< node_type, node_allocator_type > cxx_allocator;
@@ -86,18 +128,20 @@ namespace cds { namespace container {
             retry
         };
 
-        enum class update_flags
+        struct update_flags
         {
-            allow_insert = 1,
-            allow_update = 2,
-            allow_remove = 4,
+            enum {
+                allow_insert = 1,
+                allow_update = 2,
+                //allow_remove = 4,
 
-            retry = 1024,
+                retry = 1024,
 
-            failed = 0,
-            result_inserted = allow_insert,
-            result_updated  = allow_update,
-            result_removed  = allow_remove
+                failed = 0,
+                result_inserted = allow_insert,
+                result_updated = allow_update,
+                result_removed = 4
+            };
         };
 
         enum node_condition
@@ -107,6 +151,11 @@ namespace cds { namespace container {
             unlink_required = -1
         };
 
+        enum direction {
+            left_child = -1,
+            right_child = 1
+        };
+
         typedef typename sync_monitor::template scoped_lock<node_type> node_scoped_lock;
         //@endcond
 
@@ -121,17 +170,36 @@ namespace cds { namespace container {
         static void free_node( node_type * pNode )
         {
             // Free node without disposer
+            assert( !pNode->is_valued( memory_model::memory_order_relaxed ));
+            assert( pNode->m_SyncMonitorInjection.check_free());
             cxx_allocator().Delete( pNode );
         }
 
+        static void free_value( mapped_type pVal )
+        {
+            disposer()(pVal);
+        }
+
+        static node_type * child( node_type * pNode, int nDir, atomics::memory_order order )
+        {
+            return pNode->child( nDir, order );
+        }
+
+        static node_type * parent( node_type * pNode, atomics::memory_order order )
+        {
+            return pNode->parent( order );
+        }
+
         // RCU safe disposer
         class rcu_disposer
         {
-            node_type *     m_pRetiredList; ///< head of retired list
+            node_type *     m_pRetiredList;     ///< head of retired node list
+            mapped_type     m_pRetiredValue;    ///< value retired
 
         public:
             rcu_disposer()
                 : m_pRetiredList( nullptr )
+                , m_pRetiredValue( nullptr )
             {}
 
             ~rcu_disposer()
@@ -141,16 +209,17 @@ namespace cds { namespace container {
 
             void dispose( node_type * pNode )
             {
-                mapped_type * pVal = pNode->value( memory_model::memory_order_relaxed );
-                if ( pVal ) {
-                    pNode->m_pValue.store( nullptr, memory_model::memory_order_relaxed );
-                    disposer()(pVal);
-                }
-
+                assert( !pNode->is_valued( memory_model::memory_order_relaxed ));
                 pNode->m_pNextRemoved = m_pRetiredList;
                 m_pRetiredList = pNode;
             }
 
+            void dispose_value( mapped_type pVal )
+            {
+                assert( m_pRetiredValue == nullptr );
+                m_pRetiredValue = pVal;
+            }
+
         private:
             struct internal_disposer
             {
@@ -162,14 +231,21 @@ namespace cds { namespace container {
 
             void clean()
             {
-                assert( !gc::is_locked() );
+                assert( !gc::is_locked());
+
+                // TODO: use RCU::batch_retire
 
+                // Dispose nodes
                 for ( node_type * p = m_pRetiredList; p; ) {
-                    node_type * pNext = p->m_pNextRemoved;
+                    node_type * pNext = static_cast<node_type *>( p->m_pNextRemoved );
                     // Value already disposed
                     gc::template retire_ptr<internal_disposer>( p );
                     p = pNext;
                 }
+
+                // Dispose value
+                if ( m_pRetiredValue  )
+                    gc::template retire_ptr<disposer>( m_pRetiredValue );
             }
         };
 
@@ -177,17 +253,17 @@ namespace cds { namespace container {
 
     protected:
         //@cond
-        typename node_type::base_class      m_Root;
-        node_type *                         m_pRoot;
-        item_counter                        m_ItemCounter;
-        mutable sync_monitor                m_Monitor;
-        mutable stat                        m_stat;
+        typename node_type::base_class m_Root;
+        node_type *             m_pRoot;
+        item_counter            m_ItemCounter;
+        mutable sync_monitor    m_Monitor;
+        mutable stat            m_stat;
         //@endcond
 
     public:
         /// Creates empty map
         BronsonAVLTreeMap()
-            : m_pRoot( static_cast<node_type *>(&m_Root) )
+            : m_pRoot( static_cast<node_type *>( &m_Root ))
         {}
 
         /// Destroys the map
@@ -205,15 +281,16 @@ namespace cds { namespace container {
             Returns \p true if inserting successful, \p false otherwise.
         */
         template <typename K>
-        bool insert( K const& key, mapped_type pVal )
+        bool insert( K const& key, mapped_type pVal )
         {
             return do_update(key, key_comparator(),
-                [pVal]( node_type * pNode ) -> mapped_type
+                [pVal]( node_type * pNode ) -> mapped_type
                 {
                     assert( pNode->m_pValue.load( memory_model::memory_order_relaxed ) == nullptr );
+                    CDS_UNUSED( pNode );
                     return pVal;
-                }, 
-                update_flags::allow_insert 
+                },
+                update_flags::allow_insert
             ) == update_flags::result_inserted;
         }
 
@@ -223,29 +300,31 @@ namespace cds { namespace container {
             If \p bInsert is \p false, only updating of existing node is possible.
 
             If \p key is not found and inserting is allowed (i.e. \p bInsert is \p true),
-            then the new node created from \p key is inserted into the map; note that in this case the \ref key_type should be
+            then the new node created from \p key will be inserted into the map; note that in this case the \ref key_type should be
             constructible from type \p K.
-            Otherwise, the value is changed to \p pVal.
+            Otherwise, the value for \p key will be changed to \p pVal.
 
             RCU \p synchronize() method can be called. RCU should not be locked.
 
-            Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successfull,
+            Returns <tt> std::pair<bool, bool> </tt> where \p first is \p true if operation is successful,
             \p second is \p true if new node has been added or \p false if the node with \p key
-            already is in the tree.
+            already exists.
         */
-        template <typename K, typename Func>
-        std::pair<bool, bool> update( K const& key, mapped_type pVal, bool bInsert = true )
+        template <typename K>
+        std::pair<bool, bool> update( K const& key, mapped_type pVal, bool bInsert = true )
         {
             int result = do_update( key, key_comparator(),
-                [pVal]( node_type * ) -> mapped_type
+                [pVal]( node_type * ) -> mapped_type
                 {
                     return pVal;
                 },
-                update_flags::allow_update | (bInsert ? update_flags::allow_insert : 0) 
+                update_flags::allow_update | (bInsert ? update_flags::allow_insert : 0)
             );
             return std::make_pair( result != 0, (result & update_flags::result_inserted) != 0 );
         }
 
+        //@endcond
+
         /// Delete \p key from the map
         /**
             RCU \p synchronize() method can be called. RCU should not be locked.
@@ -255,12 +334,11 @@ namespace cds { namespace container {
         template <typename K>
         bool erase( K const& key )
         {
-            return do_update( 
-                key, 
-                key_comparator(), 
-                []( mapped_type * pVal ) { disposer()(pVal); },
-                update_flags::allow_remove 
-            ) == update_flags::result_removed;
+            return do_remove(
+                key,
+                key_comparator(),
+                []( key_type const&, mapped_type pVal, rcu_disposer& disp ) -> bool { disp.dispose_value( pVal ); return true; }
+            );
         }
 
         /// Deletes the item from the map using \p pred predicate for searching
@@ -274,12 +352,11 @@ namespace cds { namespace container {
         bool erase_with( K const& key, Less pred )
         {
             CDS_UNUSED( pred );
-            return do_update( 
-                key, 
-                cds::details::predicate_wrapper<key_type, Less, cds::details::trivial_accessor >(),
-                []( mapped_type * pVal ) { disposer()(pVal); },
-                update_flags::allow_remove 
-            ) == update_flags::result_removed;
+            return do_remove(
+                key,
+                cds::opt::details::make_comparator_from_less<Less>(),
+                []( key_type const&, mapped_type pVal, rcu_disposer& disp ) -> bool { disp.dispose_value( pVal ); return true;  }
+            );
         }
 
         /// Delete \p key from the map
@@ -289,8 +366,8 @@ namespace cds { namespace container {
 
             The functor \p Func interface:
             \code
-            struct extractor {
-                void operator()(mapped_type& item) { ... }
+            struct functor {
+                void operator()( key_type const& key, std::remove_pointer<mapped_type>::type& val) { ... }
             };
             \endcode
 
@@ -301,12 +378,16 @@ namespace cds { namespace container {
         template <typename K, typename Func>
         bool erase( K const& key, Func f )
         {
-            return do_update( 
-                key, 
-                key_comparator(), 
-                [&f]( mapped_type * pVal ) { f( *pVal ); disposer()(pVal); },
-                update_flags::allow_remove
-            ) == update_flags::result_removed;
+            return do_remove(
+                key,
+                key_comparator(),
+                [&f]( key_type const& k, mapped_type pVal, rcu_disposer& disp ) -> bool {
+                    assert( pVal );
+                    f( k, *pVal );
+                    disp.dispose_value(pVal);
+                    return true;
+                }
+            );
         }
 
         /// Deletes the item from the map using \p pred predicate for searching
@@ -320,18 +401,25 @@ namespace cds { namespace container {
         bool erase_with( K const& key, Less pred, Func f )
         {
             CDS_UNUSED( pred );
-            return do_update( 
-                key, 
-                cds::details::predicate_wrapper<key_type, Less, cds::details::trivial_accessor >(),
-                [&f]( mapped_type * pVal ) { f( *pVal ); disposer()(pVal); },
-                update_flags::allow_remove
-            ) == update_flags::result_removed;
+            return do_remove(
+                key,
+                cds::opt::details::make_comparator_from_less<Less>(),
+                [&f]( key_type const& k, mapped_type pVal, rcu_disposer& disp ) -> bool {
+                    assert( pVal );
+                    f( k, *pVal );
+                    disp.dispose_value(pVal);
+                    return true;
+                }
+            );
         }
 
-        /// Extracts an item with minimal key from the map
+        /// Extracts a value with minimal key from the map
         /**
-            Returns \p std::unique_ptr to the leftmost item.
-            If the set is empty, returns empty \p std::unique_ptr.
+            Returns \p exempt_ptr to the leftmost item.
+            If the tree is empty, returns empty \p exempt_ptr.
+
+            Note that the function returns only the value for minimal key.
+            To retrieve its key use \p extract_min( Func ) member function.
 
             @note Due the concurrent nature of the map, the function extracts <i>nearly</i> minimum key.
             It means that the function gets leftmost leaf of the tree and tries to unlink it.
@@ -341,59 +429,150 @@ namespace cds { namespace container {
             RCU \p synchronize method can be called. RCU should NOT be locked.
             The function does not free the item.
             The deallocator will be implicitly invoked when the returned object is destroyed or when
-            its \p reset(nullptr) member function is called.
+            its \p release() member function is called.
         */
-        unique_ptr extract_min()
+        exempt_ptr extract_min()
         {
-            //TODO
+            return exempt_ptr(do_extract_min( []( key_type const& ) {}));
+        }
+
+        /// Extracts minimal key and corresponding value
+        /**
+            Returns \p exempt_ptr to the leftmost item.
+            If the tree is empty, returns empty \p exempt_ptr.
+
+            \p Func functor is used to store minimal key.
+            \p Func has the following signature:
+            \code
+            struct functor {
+                void operator()( key_type const& key );
+            };
+            \endcode
+            If the tree is empty, \p f is not called.
+            Otherwise, it is called with minimal key, the pointer to corresponding value is returned
+            as \p exempt_ptr.
+
+            @note Due the concurrent nature of the map, the function extracts <i>nearly</i> minimum key.
+            It means that the function gets leftmost leaf of the tree and tries to unlink it.
+            During unlinking, a concurrent thread may insert an item with key less than leftmost item's key.
+            So, the function returns the item with minimum key at the moment of tree traversing.
+
+            RCU \p synchronize method can be called. RCU should NOT be locked.
+            The function does not free the item.
+            The deallocator will be implicitly invoked when the returned object is destroyed or when
+            its \p release() member function is called.
+        */
+        template <typename Func>
+        exempt_ptr extract_min( Func f )
+        {
+            return exempt_ptr(do_extract_min( [&f]( key_type const& key ) { f(key); }));
+        }
+
+        /// Extracts minimal key and corresponding value
+        /**
+            This function is a shortcut for the following call:
+            \code
+            key_type key;
+            exempt_ptr xp = theTree.extract_min( [&key]( key_type const& k ) { key = k; } );
+            \endcode
+            \p key_type should be copy-assignable. The copy of minimal key
+            is returned in \p min_key argument.
+        */
+        typename std::enable_if< std::is_copy_assignable<key_type>::value, exempt_ptr >::type
+        extract_min_key( key_type& min_key )
+        {
+            return exempt_ptr(do_extract_min( [&min_key]( key_type const& key ) { min_key = key; }));
         }
 
-        /// Extracts an item with maximal key from the map
+        /// Extracts a value with maximal key from the tree
         /**
-            Returns \p std::unique_ptr pointer to the rightmost item.
-            If the set is empty, returns empty \p std::unique_ptr.
+            Returns \p exempt_ptr pointer to the rightmost item.
+            If the set is empty, returns empty \p exempt_ptr.
+
+            Note that the function returns only the value for maximal key.
+            To retrieve its key use \p extract_max( Func ) member function.
 
             @note Due the concurrent nature of the map, the function extracts <i>nearly</i> maximal key.
             It means that the function gets rightmost leaf of the tree and tries to unlink it.
-            During unlinking, a concurrent thread may insert an item with key great than leftmost item's key.
+            During unlinking, a concurrent thread may insert an item with key greater than rightmost item's key.
             So, the function returns the item with maximum key at the moment of tree traversing.
 
             RCU \p synchronize method can be called. RCU should NOT be locked.
             The function does not free the item.
             The deallocator will be implicitly invoked when the returned object is destroyed or when
-            its \p reset(nullptr) is called.
-            @note Before reusing \p result object you should call its \p release() method.
+            its \p release() is called.
         */
-        unique_ptr extract_max()
+        exempt_ptr extract_max()
         {
-            //TODO
+            return exempt_ptr(do_extract_max( []( key_type const& ) {}));
+        }
+
+        /// Extracts the maximal key and corresponding value
+        /**
+            Returns \p exempt_ptr pointer to the rightmost item.
+            If the set is empty, returns empty \p exempt_ptr.
+
+            \p Func functor is used to store maximal key.
+            \p Func has the following signature:
+            \code
+                struct functor {
+                    void operator()( key_type const& key );
+                };
+            \endcode
+            If the tree is empty, \p f is not called.
+            Otherwise, it is called with maximal key, the pointer to corresponding value is returned
+            as \p exempt_ptr.
+
+            @note Due the concurrent nature of the map, the function extracts <i>nearly</i> maximal key.
+            It means that the function gets rightmost leaf of the tree and tries to unlink it.
+            During unlinking, a concurrent thread may insert an item with key greater than rightmost item's key.
+            So, the function returns the item with maximum key at the moment of tree traversing.
+
+            RCU \p synchronize method can be called. RCU should NOT be locked.
+            The function does not free the item.
+            The deallocator will be implicitly invoked when the returned object is destroyed or when
+            its \p release() is called.
+        */
+        template <typename Func>
+        exempt_ptr extract_max( Func f )
+        {
+            return exempt_ptr(do_extract_max( [&f]( key_type const& key ) { f(key); }));
+        }
+
+        /// Extracts the maximal key and corresponding value
+        /**
+            This function is a shortcut for the following call:
+            \code
+                key_type key;
+                exempt_ptr xp = theTree.extract_max( [&key]( key_type const& k ) { key = k; } );
+            \endcode
+            \p key_type should be copy-assignable. The copy of maximal key
+            is returned in \p max_key argument.
+        */
+        typename std::enable_if< std::is_copy_assignable<key_type>::value, exempt_ptr >::type
+        extract_max_key( key_type& max_key )
+        {
+            return exempt_ptr(do_extract_max( [&max_key]( key_type const& key ) { max_key = key; }));
         }
 
         /// Extracts an item from the map
         /**
             The function searches an item with key equal to \p key in the tree,
-            unlinks it, and returns \p std::unique_ptr pointer to a value found.
-            If \p key is not found the function returns an empty \p std::unique_ptr.
+            unlinks it, and returns \p exempt_ptr pointer to a value found.
+            If \p key is not found the function returns an empty \p exempt_ptr.
 
             RCU \p synchronize method can be called. RCU should NOT be locked.
             The function does not destroy the value found.
             The disposer will be implicitly invoked when the returned object is destroyed or when
-            its \p reset(nullptr) member function is called.
+            its \p release() member function is called.
         */
         template <typename Q>
-        unique_ptr extract( Q const& key )
+        exempt_ptr extract( Q const& key )
         {
-            unique_ptr pExtracted;
-
-            do_update(
-                key,
-                key_comparator(),
-                [&pExtracted]( mapped_type * pVal ) { pExtracted.reset( pVal ); },
-                update_flags::allow_remove
-                );
-            return pExtracted;
+            return exempt_ptr(do_extract( key ));
         }
 
+
         /// Extracts an item from the map using \p pred for searching
         /**
             The function is an analog of \p extract(Q const&)
@@ -402,18 +581,9 @@ namespace cds { namespace container {
             \p pred must imply the same element order as the comparator used for building the tree.
         */
         template <typename Q, typename Less>
-        unique_ptr extract_with( Q const& key, Less pred )
+        exempt_ptr extract_with( Q const& key, Less pred )
         {
-            CDS_UNUSED( pred );
-            unique_ptr pExtracted;
-
-            do_update(
-                key,
-                cds::details::predicate_wrapper<key_type, Less, cds::details::trivial_accessor >(),
-                [&pExtracted]( mapped_type * pVal ) { pExtracted.reset( pVal ); },
-                update_flags::allow_remove
-                );
-            return pExtracted;
+            return exempt_ptr(do_extract_with( key, pred ));
         }
 
         /// Find the key \p key
@@ -422,7 +592,7 @@ namespace cds { namespace container {
             The interface of \p Func functor is:
             \code
             struct functor {
-                void operator()( mapped_type& item );
+                void operator()( key_type const& key, std::remove_pointer< mapped_type )::type& item );
             };
             \endcode
             where \p item is the item found.
@@ -435,16 +605,17 @@ namespace cds { namespace container {
         template <typename K, typename Func>
         bool find( K const& key, Func f )
         {
-            return do_find( key, key_comparator(), [&f]( sync_monitor& monitor, node_type * pNode ) -> bool {
-                assert( pNode != nullptr );
-                node_scoped_lock l( monitor, *pNode );
-                mapped_type * pVal = pNode->m_pValue.load( memory_model::memory_order_relaxed );
-                if ( pVal ) {
-                    f( *pVal );
-                    return true;
+            return do_find( key, key_comparator(),
+                [&f]( node_type * pNode ) -> bool {
+                    assert( pNode != nullptr );
+                    mapped_type pVal = pNode->m_pValue.load( memory_model::memory_order_relaxed );
+                    if ( pVal ) {
+                        f( pNode->m_key, *pVal );
+                        return true;
+                    }
+                    return false;
                 }
-                return false;
-            });
+            );
         }
 
         /// Finds the key \p val using \p pred predicate for searching
@@ -458,21 +629,20 @@ namespace cds { namespace container {
         bool find_with( K const& key, Less pred, Func f )
         {
             CDS_UNUSED( pred );
-            return do_find( key, opt::details::make_comparator_from_less<Less>(), 
-                [&f]( sync_monitor& monitor, node_type * pNode ) -> bool {
+            return do_find( key, cds::opt::details::make_comparator_from_less<Less>(),
+                [&f]( node_type * pNode ) -> bool {
                     assert( pNode != nullptr );
-                    node_scoped_lock l( monitor, *pNode );
-                    mapped_type * pVal = pNode->m_pValue.load( memory_model::memory_order_relaxed );
+                    mapped_type pVal = pNode->m_pValue.load( memory_model::memory_order_relaxed );
                     if ( pVal ) {
-                        f( *pVal );
+                        f( pNode->m_key, *pVal );
                         return true;
                     }
                     return false;
-                } 
+                }
             );
         }
 
-        /// Find the key \p key
+        /// Checks whether the map contains \p key
         /**
             The function searches the item with key equal to \p key
             and returns \p true if it is found, and \p false otherwise.
@@ -480,23 +650,22 @@ namespace cds { namespace container {
             The function applies RCU lock internally.
         */
         template <typename K>
-        bool find( K const& key )
+        bool contains( K const& key )
         {
             return do_find( key, key_comparator(), []( node_type * ) -> bool { return true; });
         }
 
-        /// Finds the key \p val using \p pred predicate for searching
+        /// Checks whether the map contains \p key using \p pred predicate for searching
         /**
-            The function is an analog of \p find(K const&)
-            but \p pred is used for key comparing.
+            The function is similar to <tt>contains( key )</tt> but \p pred is used for key comparing.
             \p Less functor has the interface like \p std::less.
-            \p Less must imply the same element order as the comparator used for building the map.
+            \p Less must imply the same element order as the comparator used for building the set.
         */
         template <typename K, typename Less>
-        bool find_with( K const& key, Less pred )
+        bool contains( K const& key, Less pred )
         {
             CDS_UNUSED( pred );
-            return do_find( key, opt::details::make_comparator_from_less<Less>(), []( node_type * ) -> bool { return true; } );
+            return do_find( key, cds::opt::details::make_comparator_from_less<Less>(), []( node_type * ) -> bool { return true; } );
         }
 
         /// Clears the tree (thread safe, not atomic)
@@ -506,7 +675,7 @@ namespace cds { namespace container {
             this sequence
             \code
             set.clear();
-            assert( set.empty() );
+            assert( set.empty());
             \endcode
             the assertion could be raised.
 
@@ -516,11 +685,7 @@ namespace cds { namespace container {
         */
         void clear()
         {
-            for ( ;; ) {
-                unique_ptr ep( extract_min() );
-                if ( !ep )
-                    return;
-            }
+            while ( extract_min());
         }
 
         /// Clears the tree (not thread safe)
@@ -530,6 +695,7 @@ namespace cds { namespace container {
         */
         void unsafe_clear()
         {
+            clear(); // temp solution
             //TODO
         }
 
@@ -560,25 +726,98 @@ namespace cds { namespace container {
             return m_stat;
         }
 
+        /// Returns reference to \p sync_monitor object
+        sync_monitor& monitor()
+        {
+            return m_Monitor;
+        }
+        //@cond
+        sync_monitor const& monitor() const
+        {
+            return m_Monitor;
+        }
+        //@endcond
+
         /// Checks internal consistency (not atomic, not thread-safe)
         /**
             The debugging function to check internal consistency of the tree.
         */
         bool check_consistency() const
         {
-            //TODO
+            return check_consistency([]( size_t /*nLevel*/, size_t /*hLeft*/, size_t /*hRight*/ ){} );
+        }
+
+        /// Checks internal consistency (not atomic, not thread-safe)
+        /**
+            The debugging function to check internal consistency of the tree.
+            The functor \p Func is called if a violation of internal tree structure
+            is found:
+            \code
+            struct functor {
+                void operator()( size_t nLevel, size_t hLeft, size_t hRight );
+            };
+            \endcode
+            where
+            - \p nLevel - the level where the violation is found
+            - \p hLeft - the height of left subtree
+            - \p hRight - the height of right subtree
+
+            The functor is called for each violation found.
+        */
+        template <typename Func>
+        bool check_consistency( Func f ) const
+        {
+            node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_acquire );
+            if ( pChild ) {
+                size_t nErrors = 0;
+                do_check_consistency( pChild, 1, f, nErrors );
+                return nErrors == 0;
+            }
             return true;
         }
 
     protected:
         //@cond
+        template <typename Func>
+        size_t do_check_consistency( node_type * pNode, size_t nLevel, Func f, size_t& nErrors ) const
+        {
+            if ( pNode ) {
+                key_comparator cmp;
+                node_type * pLeft = child( pNode, left_child, memory_model::memory_order_acquire );
+                node_type * pRight = child( pNode, right_child, memory_model::memory_order_acquire );
+                if ( pLeft && cmp( pLeft->m_key, pNode->m_key ) > 0 )
+                    ++nErrors;
+                if (  pRight && cmp( pNode->m_key, pRight->m_key ) > 0 )
+                    ++nErrors;
+
+                size_t hLeft = do_check_consistency( pLeft, nLevel + 1, f, nErrors );
+                size_t hRight = do_check_consistency( pRight, nLevel + 1, f, nErrors );
+
+                if ( hLeft >= hRight ) {
+                    if ( hLeft - hRight > 1 ) {
+                        f( nLevel, hLeft, hRight );
+                        ++nErrors;
+                    }
+                    return hLeft;
+                }
+                else {
+                    if ( hRight - hLeft > 1 ) {
+                        f( nLevel, hLeft, hRight );
+                        ++nErrors;
+                    }
+                    return hRight;
+                }
+            }
+            return 0;
+        }
+
         template <typename Q, typename Compare, typename Func>
         bool do_find( Q& key, Compare cmp, Func f ) const
         {
             find_result result;
             {
                 rcu_lock l;
-                result = try_find( key, cmp, f, m_pRoot, 1, 0 );
+                result = try_find( key, cmp, f, m_pRoot, right_child, 0 );
             }
             assert( result != find_result::retry );
             return result == find_result::found;
@@ -592,107 +831,271 @@ namespace cds { namespace container {
             rcu_disposer removed_list;
             {
                 rcu_lock l;
-                return try_udate_root( key, cmp, nFlags, funcUpdate, removed_list );
+                return try_update_root( key, cmp, nFlags, funcUpdate, removed_list );
             }
         }
 
+        template <typename K, typename Compare, typename Func>
+        bool do_remove( K const& key, Compare cmp, Func func )
+        {
+            // Func must return true if the value was disposed
+            //              or false if the value was extracted
+
+            check_deadlock_policy::check();
+
+            rcu_disposer removed_list;
+            {
+                rcu_lock l;
+                return try_remove_root( key, cmp, func, removed_list );
+            }
+        }
+
+        template <typename Func>
+        mapped_type do_extract_min( Func f )
+        {
+            mapped_type pExtracted = nullptr;
+            do_extract_minmax(
+                left_child,
+                [&pExtracted, &f]( key_type const& key, mapped_type pVal, rcu_disposer& ) -> bool { f( key ); pExtracted = pVal; return false; }
+            );
+            return pExtracted;
+        }
+
+        template <typename Func>
+        mapped_type do_extract_max( Func f )
+        {
+            mapped_type pExtracted = nullptr;
+            do_extract_minmax(
+                right_child,
+                [&pExtracted, &f]( key_type const& key, mapped_type pVal, rcu_disposer& ) -> bool { f( key ); pExtracted = pVal; return false; }
+            );
+            return pExtracted;
+        }
+
+        template <typename Func>
+        void do_extract_minmax( int nDir, Func func )
+        {
+            check_deadlock_policy::check();
+
+            rcu_disposer removed_list;
+            {
+                rcu_lock l;
+
+                while ( true ) {
+                    int result;
+
+                    // get right child of root
+                    node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_acquire );
+                    if ( pChild ) {
+                        version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
+                        if ( nChildVersion & node_type::shrinking ) {
+                            m_stat.onRemoveRootWaitShrinking();
+                            pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
+                            result = update_flags::retry;
+                        }
+                        else if ( pChild == child( m_pRoot, right_child, memory_model::memory_order_acquire )) {
+                            result = try_extract_minmax( nDir, func, m_pRoot, pChild, nChildVersion, removed_list );
+                        }
+                        else
+                            result = update_flags::retry;
+                    }
+                    else
+                        return;
+
+                    if ( result == update_flags::retry )
+                        m_stat.onRemoveRetry();
+                    else {
+                        m_stat.onExtract( result == update_flags::result_removed );
+                        return;
+                    }
+                }
+            }
+        }
+
+        template <typename Q>
+        mapped_type do_extract( Q const& key )
+        {
+            mapped_type pExtracted = nullptr;
+            do_remove(
+                key,
+                key_comparator(),
+                [&pExtracted]( key_type const&, mapped_type pVal, rcu_disposer& ) -> bool { pExtracted = pVal; return false; }
+            );
+            m_stat.onExtract( pExtracted != nullptr );
+            return pExtracted;
+        }
+
+        template <typename Q, typename Less>
+        mapped_type do_extract_with( Q const& key, Less pred )
+        {
+            CDS_UNUSED( pred );
+            mapped_type pExtracted = nullptr;
+            do_remove(
+                key,
+                cds::opt::details::make_comparator_from_less<Less>(),
+                [&pExtracted]( key_type const&, mapped_type pVal, rcu_disposer& ) -> bool { pExtracted = pVal; return false; }
+            );
+            m_stat.onExtract( pExtracted != nullptr );
+            return pExtracted;
+        }
         //@endcond
 
     private:
         //@cond
+        static int height( node_type * pNode, atomics::memory_order order )
+        {
+            assert( pNode );
+            return pNode->m_nHeight.load( order );
+        }
+        static void set_height( node_type * pNode, int h, atomics::memory_order order )
+        {
+            assert( pNode );
+            pNode->m_nHeight.store( h, order );
+        }
+        static int height_null( node_type * pNode, atomics::memory_order order )
+        {
+            return pNode ? height( pNode, order ) : 0;
+        }
+
+        static CDS_CONSTEXPR int const c_stackSize = 64;
+
         template <typename Q, typename Compare, typename Func>
         find_result try_find( Q const& key, Compare cmp, Func f, node_type * pNode, int nDir, version_type nVersion ) const
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
             assert( pNode );
 
-            while ( true ) {
-                node_type * pChild = pNode->child( nDir ).load( memory_model::memory_order_relaxed );
-                if ( !pChild ) {
-                    if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
-                        m_stat.onFindRetry();
-                        return find_result::retry;
-                    }
+            struct stack_record
+            {
+                node_type * pNode;
+                version_type nVersion;
+                int nDir;
+            };
 
-                    m_stat.onFindFailed();
-                    return find_result::not_found;
-                }
+            stack_record stack[c_stackSize];
+            int pos = 0;
+            stack[0].pNode = pNode;
+            stack[0].nVersion = nVersion;
+            stack[0].nDir = nDir;
+
+            while ( pos >= 0 ) {
+                pNode = stack[pos].pNode;
+                nVersion = stack[pos].nVersion;
+                nDir = stack[pos].nDir;
+
+                while ( true ) {
+                    node_type * pChild = child( pNode, nDir, memory_model::memory_order_acquire );
+                    if ( !pChild ) {
+                        if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                            --pos;
+                            m_stat.onFindRetry();
+                            break; // retry
+                        }
+                        m_stat.onFindFailed();
+                        return find_result::not_found;
+                    }
 
-                int nCmp = cmp( key, pChild->m_key );
-                if ( nCmp == 0 ) {
-                    if ( pChild->is_valued( memory_model::memory_order_relaxed ) ) {
-                        // key found
-                        node_scoped_lock l( m_Monitor, pChild );
-                        if ( pChild->is_valued( memory_model::memory_order_relaxed )) {
-                            if ( f( *pChild ) ) {
-                                m_stat.onFindSuccess();
-                                return find_result::found;
+                    int nCmp = cmp( key, pChild->m_key );
+                    if ( nCmp == 0 ) {
+                        if ( pChild->is_valued( memory_model::memory_order_acquire )) {
+                            // key found
+                            node_scoped_lock l( m_Monitor, *pChild );
+                            if ( child(pNode, nDir, memory_model::memory_order_acquire) == pChild ) {
+                                if ( pChild->is_valued( memory_model::memory_order_relaxed )) {
+                                    if ( f( pChild )) {
+                                        m_stat.onFindSuccess();
+                                        return find_result::found;
+                                    }
+                                }
+                            }
+                            else {
+                                m_stat.onFindRetry();
+                                continue;
                             }
                         }
+                        m_stat.onFindFailed();
+                        return find_result::not_found;
                     }
+                    else {
+                        version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
+                        if ( nChildVersion & node_type::shrinking ) {
+                            m_stat.onFindWaitShrinking();
+                            pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
+
+                            if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                                --pos;
+                                m_stat.onFindRetry();
+                                break; // retry
+                            }
+                        }
+                        else if ( nChildVersion != node_type::unlinked && child( pNode, nDir, memory_model::memory_order_acquire ) == pChild )
+                        {
+                            if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                                --pos;
+                                m_stat.onFindRetry();
+                                break; // retry
+                            }
 
-                    m_stat.onFindFailed();
-                    return find_result::not_found;
-                }
-
-                version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
-                if ( nChildVersion & node_type::shrinking ) {
-                    m_stat.onFindWaitShrinking();
-                    pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
-
-                    if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
-                        m_stat.onFindRetry();
-                        return find_result::retry;
-                    }
-                }
-                else if ( nChildVersion != node_type::unlinked ) {
+                            ++pos;
+                            assert(pos < c_stackSize);
+                            stack[pos].pNode = pChild;
+                            stack[pos].nVersion = nChildVersion;
+                            stack[pos].nDir = nCmp;
+                            break; // child iteration
+                        }
 
-                    if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
-                        m_stat.onFindRetry();
-                        return find_result::retry;
+                        if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                            --pos;
+                            m_stat.onFindRetry();
+                            break; // retry
+                        }
                     }
-
-                    find_result found = try_find( key, cmp, f, pChild, nCmp, nChildVersion );
-                    if ( found != find_result::retry )
-                        return found;
+                    m_stat.onFindRetry();
                 }
             }
+            return find_result::retry;
         }
 
         template <typename K, typename Compare, typename Func>
-        int try_update_root( K const* key, Compare cmp, int nFlags, Func funcUpdate, rcu_disposer& disp )
+        int try_update_root( K const& key, Compare cmp, int nFlags, Func funcUpdate, rcu_disposer& disp )
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
+
+            while ( true ) {
+                int result;
 
-            int result;
-            do {
                 // get right child of root
-                node_type * pChild = m_Root.child( 1, memory_model::memory_order_acquire );
+                node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_acquire );
                 if ( pChild ) {
-                    version_type nChildVersion = pChild->version( memory_model::memory_order_relaxed );
+                    version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
                     if ( nChildVersion & node_type::shrinking ) {
                         m_stat.onUpdateRootWaitShrinking();
-                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
+                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
                         result = update_flags::retry;
                     }
-                    else if ( pChild == m_Root.child( 1, memory_model::memory_order_acquire ) ) {
-                        result = try_update( key, cmp, nFlags, funcUpdate, m_pRoot, pChild, nChildVersion, disp );
-                    }
-                } 
+                    else if ( pChild == child( m_pRoot, right_child, memory_model::memory_order_acquire ))
+                        result = try_update( key, cmp, nFlags, funcUpdate, pChild, nChildVersion, disp );
+                    else
+                        result = update_flags::retry;
+                }
                 else {
                     // the tree is empty
                     if ( nFlags & update_flags::allow_insert ) {
                         // insert into tree as right child of the root
                         {
                             node_scoped_lock l( m_Monitor, *m_pRoot );
+                            if ( child( m_pRoot, right_child, memory_model::memory_order_acquire ) != nullptr ) {
+                                result = update_flags::retry;
+                                continue;
+                            }
 
                             node_type * pNew = alloc_node( key, 1, 0, m_pRoot, nullptr, nullptr );
-                            mapped_type pVal = funcUpdate( pNew );
+                            mapped_type pVal = funcUpdate( pNew );
                             assert( pVal != nullptr );
                             pNew->m_pValue.store( pVal, memory_model::memory_order_release );
 
-                            m_pRoot->child( pNew, 1, memory_model::memory_order_relaxed );
-                            m_pRoot->height( 2, memory_model::memory_order_relaxed );
+                            m_pRoot->child( pNew, right_child, memory_model::memory_order_release);
+                            set_height( m_pRoot, 2, memory_model::memory_order_release );
                         }
 
                         ++m_ItemCounter;
@@ -702,56 +1105,286 @@ namespace cds { namespace container {
 
                     return update_flags::failed;
                 }
-            } while ( result == update_flags::retry );
-            return result;
+
+                if ( result != update_flags::retry )
+                    return result;
+            }
         }
 
         template <typename K, typename Compare, typename Func>
-        int try_update( K const& key, Compare cmp, int nFlags, Func funcUpdate, node_type * pParent, node_type * pNode, version_type nVersion, rcu_disposer& disp )
+        bool try_remove_root( K const& key, Compare cmp, Func func, rcu_disposer& disp )
         {
-            assert( gc::is_locked() );
+            assert( gc::is_locked());
+
+            while ( true ) {
+                int result;
+
+                // get right child of root
+                node_type * pChild = child( m_pRoot, right_child, memory_model::memory_order_acquire );
+                if ( pChild ) {
+                    version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
+                    if ( nChildVersion & node_type::shrinking ) {
+                        m_stat.onRemoveRootWaitShrinking();
+                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
+                        result = update_flags::retry;
+                    }
+                    else if ( pChild == child( m_pRoot, right_child, memory_model::memory_order_acquire )) {
+                        result = try_remove( key, cmp, func, m_pRoot, pChild, nChildVersion, disp );
+                    }
+                    else
+                        result = update_flags::retry;
+                }
+                else
+                    return false;
+
+                if ( result == update_flags::retry )
+                    m_stat.onRemoveRetry();
+                else {
+                    m_stat.onRemove( result == update_flags::result_removed );
+                    return result == update_flags::result_removed;
+                }
+            }
+        }
+
+        template <typename K, typename Compare, typename Func>
+        int try_update( K const& key, Compare cmp, int nFlags, Func funcUpdate, node_type * pNode, version_type nVersion, rcu_disposer& disp )
+        {
+            assert( gc::is_locked());
             assert( nVersion != node_type::unlinked );
 
-            int nCmp = cmp( key, pNode->m_key );
-            if ( nCmp == 0 ) {
-                if ( nFlags & update_flags::allow_update ) {
-                    return try_update_node( funcUpdate, pNode );
+            struct stack_record
+            {
+                node_type * pNode;
+                version_type nVersion;
+            };
+
+            stack_record stack[c_stackSize];
+            int pos = 0;
+            stack[0].pNode = pNode;
+            stack[0].nVersion = nVersion;
+
+            while ( pos >= 0 ) {
+                pNode = stack[pos].pNode;
+                nVersion = stack[pos].nVersion;
+
+                int nCmp = cmp( key, pNode->m_key );
+                if ( nCmp == 0 ) {
+                    int result = try_update_node( nFlags, funcUpdate, pNode, nVersion, disp );
+                    if ( result != update_flags::retry )
+                        return result;
+                    --pos;
+                    m_stat.onUpdateRetry();
+                    continue;
                 }
-                if ( nFlags & update_flags::allow_remove ) {
-                    return try_remove_node( pParent, pNode, nVersion, funcUpdate, disp );
+
+                while ( true ) {
+                    node_type * pChild = child( pNode, nCmp, memory_model::memory_order_acquire );
+                    if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                        --pos;
+                        m_stat.onUpdateRetry();
+                        break;
+                    }
+
+                    if ( pChild == nullptr ) {
+                        // insert new node
+                        if ( nFlags & update_flags::allow_insert ) {
+                            int result = try_insert_node( key, funcUpdate, pNode, nCmp, nVersion, disp );
+                            if ( result != update_flags::retry )
+                                return result;
+                            --pos;
+                            m_stat.onUpdateRetry();
+                            break;
+                        }
+                        else
+                            return update_flags::failed;
+                    }
+                    else {
+                        // update child
+                        version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
+                        if ( nChildVersion & node_type::shrinking ) {
+                            m_stat.onUpdateWaitShrinking();
+                            pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
+                            // retry
+                        }
+                        else if ( pChild == child( pNode, nCmp, memory_model::memory_order_acquire )) {
+                            // this second read is important, because it is protected by nChildVersion
+
+                            // validate the read that our caller took to get to node
+                            if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
+                                --pos;
+                                m_stat.onUpdateRetry();
+                                break; // retry
+                            }
+                            // At this point we know that the traversal our parent took to get to node is still valid.
+                            // The recursive implementation will validate the traversal from node to
+                            // child, so just prior to the node nVersion validation both traversals were definitely okay.
+                            // This means that we are no longer vulnerable to node shrinks, and we don't need
+                            // to validate node version any more.
+                            ++pos;
+                            assert( pos < c_stackSize );
+                            stack[pos].pNode = pChild;
+                            stack[pos].nVersion = nChildVersion;
+                            assert( nChildVersion != node_type::unlinked );
+                            break; // child iteration
+                        }
+                        m_stat.onUpdateRetry();
+                    }
                 }
-                return update_flags::failed;
             }
+            return update_flags::retry;
+        }
 
-            int result;
-            do {
-                node_type * pChild = pNode->child( nCmp ).load( memory_model::memory_order_relaxed );
-                if ( pNode->version( memory_model::memory_order_acquire ) != nVersion )
-                    return update_flags::retry;
+        template <typename K, typename Compare, typename Func>
+        int try_remove( K const& key, Compare cmp, Func func, node_type * pParent, node_type * pNode, version_type nVersion, rcu_disposer& disp )
+        {
+            assert( gc::is_locked());
+            assert( nVersion != node_type::unlinked );
 
-                if ( pChild == nullptr ) {
-                    // insert new node
-                    if ( nFlags & update_flags::allow_insert )
-                        result = try_insert_node( key, funcUpdate, pNode, nCmp, nVersion, disp );
-                    else
-                        result = update_flags::failed;
+            struct stack_record
+            {
+                node_type * pParent;
+                node_type * pNode;
+                version_type nVersion;
+            };
+
+            stack_record stack[c_stackSize];
+            int pos = 0;
+            stack[0].pParent = pParent;
+            stack[0].pNode = pNode;
+            stack[0].nVersion = nVersion;
+
+            while ( pos >= 0 ) {
+                pParent = stack[pos].pParent;
+                pNode = stack[pos].pNode;
+                nVersion = stack[pos].nVersion;
+
+                int nCmp = cmp( key, pNode->m_key );
+                if ( nCmp == 0 ) {
+                    int result = try_remove_node( pParent, pNode, nVersion, func, disp );
+                    if ( result != update_flags::retry )
+                        return result;
+                    --pos;
+                    m_stat.onRemoveRetry();
+                    continue;
                 }
-                else {
+
+                while ( true ) {
+                    node_type * pChild = child( pNode, nCmp, memory_model::memory_order_acquire );
+                    if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                        --pos;
+                        m_stat.onRemoveRetry();
+                        break;
+                    }
+
+                    if ( pChild == nullptr )
+                        return update_flags::failed;
+
                     // update child
-                    result = update_flags::retry;
                     version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
                     if ( nChildVersion & node_type::shrinking ) {
-                        m_stat.onUpdateWaitShrinking();
-                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_relaxed );
+                        m_stat.onRemoveWaitShrinking();
+                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
                         // retry
                     }
-                    else if ( pChild == pNode->child( nCmp ).load( memory_model::memory_order_relaxed ) ) {
+                    else if ( pChild == child( pNode, nCmp, memory_model::memory_order_acquire )) {
                         // this second read is important, because it is protected by nChildVersion
 
                         // validate the read that our caller took to get to node
-                        if ( pNode->version( memory_model::memory_order_relaxed ) != nVersion ) {
-                            m_stat.onUpdateRetry();
-                            return update_flags::retry;
+                        if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                            --pos;
+                            m_stat.onRemoveRetry();
+                            break;
+                        }
+
+                        // At this point we know that the traversal our parent took to get to node is still valid.
+                        // The recursive implementation will validate the traversal from node to
+                        // child, so just prior to the node nVersion validation both traversals were definitely okay.
+                        // This means that we are no longer vulnerable to node shrinks, and we don't need
+                        // to validate node version any more.
+                        ++pos;
+                        assert( pos < c_stackSize );
+                        stack[pos].pParent = pNode;
+                        stack[pos].pNode = pChild;
+                        stack[pos].nVersion = nChildVersion;
+                        break; // child iteration
+                    }
+                    m_stat.onRemoveRetry();
+                }
+            }
+            return update_flags::retry;
+        }
+
+        template <typename Func>
+        int try_extract_minmax( int nDir, Func func, node_type * pParent, node_type * pNode, version_type nVersion, rcu_disposer& disp )
+        {
+            assert( gc::is_locked());
+            assert( nVersion != node_type::unlinked );
+
+            struct stack_record
+            {
+                node_type * pParent;
+                node_type * pNode;
+                version_type nVersion;
+            };
+
+            stack_record stack[c_stackSize];
+            int pos = 0;
+            stack[0].pParent = pParent;
+            stack[0].pNode = pNode;
+            stack[0].nVersion = nVersion;
+
+            while ( pos >= 0 ) {
+                pParent = stack[pos].pParent;
+                pNode = stack[pos].pNode;
+                nVersion = stack[pos].nVersion;
+
+                while ( true ) {
+                    int iterDir = nDir;
+                    node_type * pChild = child( pNode, iterDir, memory_model::memory_order_acquire );
+                    if ( pNode->version(memory_model::memory_order_acquire) != nVersion ) {
+                        --pos;
+                        m_stat.onRemoveRetry();
+                        break;
+                    }
+
+                    if ( !pChild ) {
+                        // Found min/max
+                        if ( pNode->is_valued( memory_model::memory_order_acquire )) {
+                            int result = try_remove_node( pParent, pNode, nVersion, func, disp );
+
+                            if ( result == update_flags::result_removed )
+                                return result;
+
+                            --pos;
+                            m_stat.onRemoveRetry();
+                            break;
+                        }
+                        else {
+                            // check right (for min) or left (for max) child node
+                            iterDir = -iterDir;
+                            pChild = child( pNode, iterDir, memory_model::memory_order_acquire );
+                            if ( !pChild ) {
+                                --pos;
+                                m_stat.onRemoveRetry();
+                                break;
+                            }
+                        }
+                    }
+
+                    version_type nChildVersion = pChild->version( memory_model::memory_order_acquire );
+                    if ( nChildVersion & node_type::shrinking ) {
+                        m_stat.onRemoveWaitShrinking();
+                        pChild->template wait_until_shrink_completed<back_off>( memory_model::memory_order_acquire );
+                        // retry
+                    }
+                    else if ( pChild == child( pNode, iterDir, memory_model::memory_order_acquire )) {
+                        // this second read is important, because it is protected by nChildVersion
+
+                        // validate the read that our caller took to get to node
+                        if ( pNode->version( memory_model::memory_order_acquire ) != nVersion ) {
+                            --pos;
+                            m_stat.onRemoveRetry();
+                            break;
                         }
 
                         // At this point we know that the traversal our parent took to get to node is still valid.
@@ -759,11 +1392,17 @@ namespace cds { namespace container {
                         // child, so just prior to the node nVersion validation both traversals were definitely okay.
                         // This means that we are no longer vulnerable to node shrinks, and we don't need
                         // to validate node version any more.
-                        result = try_update( key, cmp, nFlags, funcUpdate, pNode, pChild, nChildVersion, disp );
+                        ++pos;
+                        assert( pos < c_stackSize );
+                        stack[pos].pParent = pNode;
+                        stack[pos].pNode = pChild;
+                        stack[pos].nVersion = nChildVersion;
+                        break; // child iteration
                     }
+                    m_stat.onRemoveRetry();
                 }
-            } while ( result == update_flags::retry );
-            return result;
+            }
+            return update_flags::retry;
         }
 
         template <typename K, typename Func>
@@ -771,15 +1410,15 @@ namespace cds { namespace container {
         {
             node_type * pNew;
 
-            auto fnCreateNode = [&funcUpdate]( node_type * pNew ) {
-                mapped_type * pVal = funcUpdate( pNew );
+            auto fnCreateNode = [&funcUpdate]( node_type * node ) {
+                mapped_type pVal = funcUpdate( node );
                 assert( pVal != nullptr );
-                pNew->m_pValue.store( pVal, memory_model::memory_order_relaxed );
+                node->m_pValue.store( pVal, memory_model::memory_order_release );
             };
 
-            if ( c_bRelaxedInsert ) {
+            static_if ( c_bRelaxedInsert ) {
                 if ( pNode->version( memory_model::memory_order_acquire ) != nVersion
-                     || pNode->child( nDir ).load( memory_model::memory_order_relaxed ) != nullptr ) 
+                     || child( pNode, nDir, memory_model::memory_order_acquire ) != nullptr )
                 {
                     m_stat.onInsertRetry();
                     return update_flags::retry;
@@ -793,10 +1432,13 @@ namespace cds { namespace container {
                 assert( pNode != nullptr );
                 node_scoped_lock l( m_Monitor, *pNode );
 
-                if ( pNode->version( memory_model::memory_order_relaxed ) != nVersion
-                     || pNode->child( nDir ).load( memory_model::memory_order_relaxed ) != nullptr ) 
+                if ( pNode->version( memory_model::memory_order_acquire ) != nVersion
+                     || child( pNode, nDir, memory_model::memory_order_acquire ) != nullptr )
                 {
-                    if ( c_RelaxedInsert ) {
+                    static_if ( c_bRelaxedInsert ) {
+                        mapped_type pVal = pNew->m_pValue.load( memory_model::memory_order_relaxed );
+                        pNew->m_pValue.store( nullptr, memory_model::memory_order_relaxed );
+                        free_value( pVal );
                         free_node( pNew );
                         m_stat.onRelaxedInsertFailed();
                     }
@@ -805,43 +1447,68 @@ namespace cds { namespace container {
                     return update_flags::retry;
                 }
 
-                if ( !c_bRelaxedInsert )
+                static_if ( !c_bRelaxedInsert )
                     fnCreateNode( pNew = alloc_node( key, 1, 0, pNode, nullptr, nullptr ));
 
-                pNode->child( pNew, nDir, memory_model::memory_order_relaxed );
+                pNode->child( pNew, nDir, memory_model::memory_order_release );
                 pDamaged = fix_height_locked( pNode );
             }
+
+            ++m_ItemCounter;
             m_stat.onInsertSuccess();
 
-            fix_height_and_rebalance( pDamaged, disp );
+            if ( pDamaged ) {
+                fix_height_and_rebalance( pDamaged, disp );
+                m_stat.onInsertRebalanceRequired();
+            }
 
             return update_flags::result_inserted;
         }
 
         template <typename Func>
-        int try_update_node( Func funcUpdate, node_type * pNode )
+        int try_update_node( int nFlags, Func funcUpdate, node_type * pNode, version_type nVersion, rcu_disposer& disp )
         {
-            mapped_type * pOld;
+            mapped_type pOld;
+            bool bInserted;
+            assert( pNode != nullptr );
             {
-                assert( pNode != nullptr );
                 node_scoped_lock l( m_Monitor, *pNode );
 
-                if ( pNode->is_unlinked( memory_model::memory_order_relaxed )) {
+                if ( pNode->version(memory_model::memory_order_acquire) != nVersion )
+                    return update_flags::retry;
+
+                if ( pNode->is_unlinked( memory_model::memory_order_acquire )) {
                     m_stat.onUpdateUnlinked();
                     return update_flags::retry;
                 }
 
+                if ( pNode->is_valued( memory_model::memory_order_relaxed ) && !(nFlags & update_flags::allow_update)) {
+                    m_stat.onInsertFailed();
+                    return update_flags::failed;
+                }
+
                 pOld = pNode->value( memory_model::memory_order_relaxed );
-                mapped_type * pVal = funcUpdate( *pNode );
-                assert( pVal != nullptr );
-                pNode->m_pValue.store( pVal, memory_model::memory_order_relaxed );
+                bInserted = pOld == nullptr;
+                mapped_type pVal = funcUpdate( pNode );
+                if ( pVal == pOld )
+                    pOld = nullptr;
+                else {
+                    assert( pVal != nullptr );
+                    pNode->m_pValue.store( pVal, memory_model::memory_order_release );
+                }
             }
 
             if ( pOld ) {
-                disposer()(pOld);
+                disp.dispose_value(pOld);
                 m_stat.onDisposeValue();
             }
 
+            if ( bInserted ) {
+                ++m_ItemCounter;
+                m_stat.onInsertSuccess();
+                return update_flags::result_inserted;
+            }
+
             m_stat.onUpdateSuccess();
             return update_flags::result_updated;
         }
@@ -852,77 +1519,89 @@ namespace cds { namespace container {
             assert( pParent != nullptr );
             assert( pNode != nullptr );
 
-            if ( !pNode->is_valued( atomics::memory_order_relaxed ) )
+            if ( !pNode->is_valued( memory_model::memory_order_acquire ))
                 return update_flags::failed;
 
-            if ( pNode->child( -1, atomics::memory_order_relaxed ) == nullptr 
-              || pNode->child( 1, atomics::memory_order_relaxed ) == nullptr )
-            { 
+            if ( child( pNode, left_child, memory_model::memory_order_acquire ) == nullptr
+              || child( pNode, right_child, memory_model::memory_order_acquire ) == nullptr )
+            {
+                // pNode can be replaced with its child
+
                 node_type * pDamaged;
-                mapped_type pOld;
+                mapped_type pOld;
                 {
                     node_scoped_lock lp( m_Monitor, *pParent );
-                    if ( pParent->is_unlinked( atomics::memory_order_relaxed ) || pNode->m_pParent.load( atomics::memory_order_relaxed ) != pParent )
+                    if ( pParent->is_unlinked( memory_model::memory_order_acquire ) || parent( pNode, memory_model::memory_order_acquire ) != pParent )
                         return update_flags::retry;
 
                     {
                         node_scoped_lock ln( m_Monitor, *pNode );
-                        pOld = pNode->value( atomics::memory_order_relaxed );
-                        if ( pNode->version( atomics::memory_order_relaxed ) == nVersion
-                          && pOld 
-                          && try_unlink_locked( pParent, pNode, disp ) ) 
-                        {
-                            pNode->m_pValue.store( nullptr, atomics::memory_order_relaxed );
-                        }
-                        else
+                        if ( pNode->version( memory_model::memory_order_acquire ) != nVersion )
+                            return update_flags::retry;
+
+                        pOld = pNode->value( memory_model::memory_order_relaxed );
+                        if ( !pOld )
+                            return update_flags::failed;
+
+                        if ( !try_unlink_locked( pParent, pNode, disp ))
                             return update_flags::retry;
                     }
                     pDamaged = fix_height_locked( pParent );
                 }
 
-                func( pOld );   // calls pOld disposer inside
-                m_stat.onDisposeValue();
+                --m_ItemCounter;
+                if ( func( pNode->m_key, pOld, disp ))   // calls pOld disposer inside
+                    m_stat.onDisposeValue();
+                else
+                    m_stat.onExtractValue();
 
-                fix_height_and_rebalance( pDamaged, disp );
-                return upfate_flags::result_removed;
+                if ( pDamaged ) {
+                    fix_height_and_rebalance( pDamaged, disp );
+                    m_stat.onRemoveRebalanceRequired();
+                }
             }
             else {
-                int result = update_flags::retry;
+                // pNode is an internal with two children
+
+                mapped_type pOld;
                 {
                     node_scoped_lock ln( m_Monitor, *pNode );
-                    mapped_type * pOld = pNode->value( atomics::memory_order_relaxed );
-                    if ( pNode->version( atomics::memory_order_relaxed ) == nVersion && pOld ) {
-                        pNode->m_pValue.store( nullptr, atomics::memory_order_relaxed );
-                        result = upfate_flags::result_removed;
-                    }
-                }
+                    pOld = pNode->value( memory_model::memory_order_relaxed );
+                    if ( pNode->version( memory_model::memory_order_relaxed ) != nVersion )
+                        return update_flags::retry;
+                    if ( !pOld )
+                        return update_flags::failed;
 
-                if ( result == upfate_flags::result_removed ) {
-                    func( *pOld );  // calls pOld disposer inside
-                    m_stat.onDisposeValue();
+                    pNode->m_pValue.store( nullptr, memory_model::memory_order_release );
+                    m_stat.onMakeRoutingNode();
                 }
 
-                return result;
+                --m_ItemCounter;
+                if ( func( pNode->m_key, pOld, disp ))  // calls pOld disposer inside
+                    m_stat.onDisposeValue();
+                else
+                    m_stat.onExtractValue();
             }
+            return update_flags::result_removed;
         }
 
         bool try_unlink_locked( node_type * pParent, node_type * pNode, rcu_disposer& disp )
         {
             // pParent and pNode must be locked
-            assert( !pParent->is_unlinked(memory_model::memory_order_relaxed) );
+            assert( !pParent->is_unlinked(memory_model::memory_order_relaxed));
 
-            node_type * pParentLeft = pParent->m_pLeft.load( memory_model::memory_order_relaxed );
-            node_type * pParentRight = pParent->m_pRight.load( memory_model::memory_order_relaxed );
+            node_type * pParentLeft = child( pParent, left_child, memory_model::memory_order_relaxed );
+            node_type * pParentRight = child( pParent, right_child, memory_model::memory_order_relaxed );
             if ( pNode != pParentLeft && pNode != pParentRight ) {
                 // node is no longer a child of parent
                 return false;
             }
 
-            assert( !pNode->is_unlinked());
-            assert( pParent == pNode->m_pParent.load(memory_model::memory_order_relaxed));
+            assert( !pNode->is_unlinked( memory_model::memory_order_relaxed ));
+            assert( pParent == parent( pNode, memory_model::memory_order_relaxed ));
 
-            node_type * pLeft = pNode->m_pLeft.load( memory_model::memory_order_relaxed );
-            node_type * pRight = pNode->m_pRight.load( memory_model::memory_order_relaxed );
+            node_type * pLeft = child( pNode, left_child, memory_model::memory_order_relaxed );
+            node_type * pRight = child( pNode, right_child, memory_model::memory_order_relaxed );
             if ( pLeft != nullptr && pRight != nullptr ) {
                 // splicing is no longer possible
                 return false;
@@ -930,16 +1609,21 @@ namespace cds { namespace container {
             node_type * pSplice = pLeft ? pLeft : pRight;
 
             if ( pParentLeft == pNode )
-                pParent->m_pLeft.store( pSplice, memory_model::memory_order_relaxed );
+                pParent->m_pLeft.store( pSplice, memory_model::memory_order_release );
             else
-                pParent->m_pRight.store( pSplice, memory_model::memory_order_relaxed );
+                pParent->m_pRight.store( pSplice, memory_model::memory_order_release );
 
             if ( pSplice )
-                pSplice->pParent.store( pParent, memory_model::memory_order_release );
+                pSplice->parent( pParent, memory_model::memory_order_release );
 
             // Mark the node as unlinked
             pNode->version( node_type::unlinked, memory_model::memory_order_release );
+
+            // The value will be disposed by calling function
+            pNode->m_pValue.store( nullptr, memory_model::memory_order_release );
+
             disp.dispose( pNode );
+            m_stat.onDisposeNode();
 
             return true;
         }
@@ -948,17 +1632,22 @@ namespace cds { namespace container {
 
     private: // rotations
         //@cond
+        int check_node_ordering( node_type* pParent, node_type* pChild )
+        {
+            return key_comparator()( pParent->m_key, pChild->m_key );
+        }
+
         int estimate_node_condition( node_type * pNode )
         {
-            node_type * pLeft = pNode->m_pLeft.load( memory_model::memory_order_relaxed );
-            node_type * pRight = pNode->m_pRight.load( memory_model::memory_order_relaxed );
+            node_type * pLeft = child( pNode, left_child, memory_model::memory_order_acquire );
+            node_type * pRight = child( pNode, right_child, memory_model::memory_order_acquire );
 
-            if ( (pLeft == nullptr || pRight == nullptr) && pNode->value( memory_model::memory_order_relaxed ) == nullptr )
+            if ( (pLeft == nullptr || pRight == nullptr) && !pNode->is_valued( memory_model::memory_order_acquire ))
                 return unlink_required;
 
-            int h = pNode->height( memory_model::memory_order_relaxed );
-            int hL = pLeft ? pLeft->height( memory_model::memory_order_relaxed ) : 0;
-            int hR = pRight ? pRight->height( memory_model::memory_order_relaxed ) : 0;
+            int h = height( pNode, memory_model::memory_order_acquire );
+            int hL = height_null( pLeft, memory_model::memory_order_acquire );
+            int hR = height_null( pRight, memory_model::memory_order_acquire );
 
             int hNew = 1 + std::max( hL, hR );
             int nBalance = hL - hR;
@@ -987,28 +1676,26 @@ namespace cds { namespace container {
                 case nothing_required:
                     return nullptr;
                 default:
-                    pNode->height( h, memory_model::memory_order_relaxed );
-                    return pNode->m_pParent.load( memory_model::memory_order_relaxed );
+                    set_height( pNode, h, memory_model::memory_order_release );
+                    return parent( pNode, memory_model::memory_order_relaxed );
             }
         }
 
         void fix_height_and_rebalance( node_type * pNode, rcu_disposer& disp )
         {
-            while ( pNode && pNode->m_pParent.load( memory_model::memory_order_relaxed ) ) {
+            while ( pNode && parent( pNode, memory_model::memory_order_acquire )) {
                 int nCond = estimate_node_condition( pNode );
-                if ( nCond == nothing_required || pNode->is_unlinked( memory_model::memory_order_relaxed ) )
+                if ( nCond == nothing_required || pNode->is_unlinked( memory_model::memory_order_acquire ))
                     return;
 
                 if ( nCond != unlink_required && nCond != rebalance_required )
                     pNode = fix_height( pNode );
                 else {
-                    node_type * pParent = pNode->m_pParent.load( memory_model::memry_order_relaxed );
+                    node_type * pParent = parent( pNode, memory_model::memory_order_acquire );
                     assert( pParent != nullptr );
                     {
                         node_scoped_lock lp( m_Monitor, *pParent );
-                        if ( !pParent->is_unlinked( memory_model::memory_order_relaxed )
-                             && pNode->m_pParent.load( memory_model::memory_order_relaxed ) == pParent ) 
-                        {
+                        if ( !pParent->is_unlinked( memory_model::memory_order_relaxed ) && parent( pNode, memory_model::memory_order_acquire ) == pParent ) {
                             node_scoped_lock ln( m_Monitor, *pNode );
                             pNode = rebalance_locked( pParent, pNode, disp );
                         }
@@ -1021,14 +1708,12 @@ namespace cds { namespace container {
         {
             // pParent and pNode should be locked.
             // Returns a damaged node, or nullptr if no more rebalancing is necessary
-            assert( pNode->m_pParent.load( memory_model::memry_order_relaxed ) == pNode );
-            assert( m_pParent->m_pLeft.load( memory_model::memory_order_relaxed ) == pNode
-                 || m_pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+            assert( parent( pNode, memory_model::memory_order_relaxed ) == pParent );
 
-            node_type * pLeft = pNode->m_pLeft.load( memory_model::memory_order_relaxed );
-            node_type * pRight = pNode->m_pRight.load( memory_model::memory_order_relaxed );
+            node_type * pLeft = child( pNode, left_child, memory_model::memory_order_relaxed );
+            node_type * pRight = child( pNode, right_child, memory_model::memory_order_relaxed );
 
-            if ( (pLeft == nullptr || pRight == nullptr) && pNode->value( memory_model::memory_order_relaxed ) == nullptr ) {
+            if ( (pLeft == nullptr || pRight == nullptr) && !pNode->is_valued( memory_model::memory_order_relaxed )) {
                 if ( try_unlink_locked( pParent, pNode, disp ))
                     return fix_height_locked( pParent );
                 else {
@@ -1037,9 +1722,12 @@ namespace cds { namespace container {
                 }
             }
 
-            int h = pNode->height( memory_model::memory_order_relaxed );
-            int hL = pLeft ? pLeft->height( memory_model::memory_order_relaxed ) : 0;
-            int hR = pRight ? pRight->height( memory_model::memory_order_relaxed ) : 0;
+            assert( child( pParent, left_child, memory_model::memory_order_relaxed ) == pNode
+                 || child( pParent, right_child, memory_model::memory_order_relaxed ) == pNode );
+
+            int h = height( pNode, memory_model::memory_order_acquire );
+            int hL = height_null( pLeft, memory_model::memory_order_acquire );
+            int hR = height_null( pRight, memory_model::memory_order_acquire );
             int hNew = 1 + std::max( hL, hR );
             int balance = hL - hR;
 
@@ -1048,7 +1736,7 @@ namespace cds { namespace container {
             else if ( balance < -1 )
                 return rebalance_to_left_locked( pParent, pNode, pRight, hL );
             else if ( hNew != h ) {
-                pNode->height( hNew, memory_model::memory_order_relaxed );
+                set_height( pNode, hNew, memory_model::memory_order_release );
 
                 // pParent is already locked
                 return fix_height_locked( pParent );
@@ -1059,106 +1747,116 @@ namespace cds { namespace container {
 
         node_type * rebalance_to_right_locked( node_type * pParent, node_type * pNode, node_type * pLeft, int hR )
         {
-            assert( pNode->m_pParent.load( memory_model::memry_order_relaxed ) == pNode );
-            assert( m_pParent->m_pLeft.load( memory_model::memory_order_relaxed ) == pNode
-                 || m_pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+            assert( parent( pNode, memory_model::memory_order_relaxed ) == pParent );
+            assert( child( pParent, left_child, memory_model::memory_order_relaxed ) == pNode
+                 || child( pParent, right_child, memory_model::memory_order_relaxed ) == pNode );
 
             // pParent and pNode is locked yet
             // pNode->pLeft is too large, we will rotate-right.
             // If pLeft->pRight is taller than pLeft->pLeft, then we will first rotate-left pLeft.
 
-            {
-                assert( pLeft != nullptr );
-                node_scoped_lock l( m_Monitor, *pLeft );
-                if ( pNode->m_pLeft.load( memory_model::memory_order_relaxed ) != pLeft )
-                    return pNode; // retry for pNode
-
-                int hL = pLeft->height( memory_model::memory_order_relaxed );
-                if ( hL - hR <= 1 )
-                    return pNode; // retry
-
-                node_type * pLRight = pLeft->m_pRight.load( memory_model::memory_order_relaxed );
-                int hLR = pLRight ? pLRight->height( memory_model::memory_order_relaxed ) : 0;
-                node_type * pLLeft = pLeft->m_pLeft.load( memory_model::memory_order_relaxed );
-                int hLL = pLLeft ? pLLeft->height( memory_model::memory_order_relaxed ) : 0;
-
-                if ( hLL > hLR ) {
-                    // rotate right
-                    return rotate_right_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLR );
-                }
-                else {
-                    assert( pLRight != nullptr );
-                    {
-                        node_scoped_lock lr( m_Monitor, *pLRight );
-                        if ( pLeft->m_pRight.load( memory_model::memory_order_relaxed ) != pLRight )
-                            return pNode; // retry
-
-                        hLR = pLRight->height( memory_model::memory_order_relaxed );
-                        if ( hLL > hLR )
-                            return rotate_right_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLR );
-
-                        node_type * pLRLeft = pLRight->m_pLeft.load( memory_model::memory_order_relaxed );
-                        int hLRL = pLRLeft ? pLRLeft->height( memory_model::memory_order_relaxed  ) : 0;
-                        int balance = hLL - hLRL;
-                        if ( balance >= -1 && balance <= 1 && !((hLL == 0 || hLRL == 0) && pLeft->value(memory_model::memory_order_relaxed) == nullptr) ) {
-                            // nParent.child.left won't be damaged after a double rotation
-                            return rotate_right_over_left_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLRL );
-                        }
-                    }
+            assert( pLeft != nullptr );
+            node_scoped_lock l( m_Monitor, *pLeft );
+            if ( pNode->m_pLeft.load( memory_model::memory_order_relaxed ) != pLeft )
+                return pNode; // retry for pNode
+
+            assert( check_node_ordering( pNode, pLeft ) > 0 );
+
+            int hL = height( pLeft, memory_model::memory_order_acquire );
+            if ( hL - hR <= 1 )
+                return pNode; // retry
 
-                    // focus on pLeft, if necessary pNode will be balanced later
-                    return rebalance_to_left_locked( pNode, pLeft, pLRight, hLL );
+            node_type * pLRight = child( pLeft, right_child, memory_model::memory_order_relaxed );
+            int hLR = height_null( pLRight, memory_model::memory_order_acquire );
+            node_type * pLLeft = child( pLeft, left_child, memory_model::memory_order_relaxed );
+            int hLL = height_null( pLLeft, memory_model::memory_order_acquire );
+
+            if ( pLRight ) {
+                {
+                    node_scoped_lock lr( m_Monitor, *pLRight );
+                    if ( pLeft->m_pRight.load( memory_model::memory_order_acquire ) != pLRight )
+                        return pNode; // retry
+
+                    assert( check_node_ordering( pLeft, pLRight ) < 0 );
+
+                    hLR = height( pLRight, memory_model::memory_order_acquire );
+                    if ( hLL > hLR )
+                        return rotate_right_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLR );
+
+                    int hLRL = height_null( child( pLRight, left_child, memory_model::memory_order_relaxed ), memory_model::memory_order_acquire );
+                    int balance = hLL - hLRL;
+                    if ( balance >= -1 && balance <= 1 && !( ( hLL == 0 || hLRL == 0 ) && !pLeft->is_valued( memory_model::memory_order_relaxed ))) {
+                        // nParent.child.left won't be damaged after a double rotation
+                        return rotate_right_over_left_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLRL );
+                    }
                 }
+
+                // focus on pLeft, if necessary pNode will be balanced later
+                return rebalance_to_left_locked( pNode, pLeft, pLRight, hLL );
+            }
+            else if ( hLL > hLR ) {
+                // rotate right
+                return rotate_right_locked( pParent, pNode, pLeft, hR, hLL, pLRight, hLR );
             }
+
+            return pNode; // retry
         }
 
         node_type * rebalance_to_left_locked( node_type * pParent, node_type * pNode, node_type * pRight, int hL )
         {
-            assert( pNode->m_pParent.load( memory_model::memry_order_relaxed ) == pNode );
-            assert( m_pParent->m_pLeft.load( memory_model::memory_order_relaxed ) == pNode
-                    || m_pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+            assert( parent( pNode, memory_model::memory_order_relaxed ) == pParent );
+            assert( child( pParent, left_child, memory_model::memory_order_relaxed ) == pNode
+                 || child( pParent, right_child, memory_model::memory_order_relaxed ) == pNode );
 
             // pParent and pNode is locked yet
-            {
-                assert( pRight != nullptr );
-                node_scoped_lock l( m_Monitor, *pRight );
-                if ( pNode->m_pRight.load( memory_model::memory_order_relaxed ) != pRight )
-                    return pNode; // retry for pNode
-
-                int hR = pRight->height( memory_model::memory_order_relaxed );
-                if ( hL - hR >= -1 )
-                    return pNode; // retry
-
-                node_type * pRLeft = pRight->m_pLeft.load( memory_model::memory_order_relaxed );
-                int hRL = pRLeft > pRLeft->height( memory_model::memory_order_relaxed ) : 0;
-                node_type * pRRight = pRight->m_pRight.load( memory_model::memory_order_relaxed );
-                int hRR = pRRight ? pRRight->height( memory_model::memory_order_relaxed ) : 0;
-                if ( hRR > hRL )
-                    return rotate_left_locked( pParent, pNode, hL, pRight, pRLeft, hRL, hRR );
+            assert( pRight != nullptr );
+            node_scoped_lock l( m_Monitor, *pRight );
+            if ( pNode->m_pRight.load( memory_model::memory_order_relaxed ) != pRight )
+                return pNode; // retry for pNode
 
+            assert( check_node_ordering( pNode, pRight ) < 0 );
+
+            int hR = height( pRight, memory_model::memory_order_acquire );
+            if ( hL - hR >= -1 )
+                return pNode; // retry
+
+            node_type * pRLeft = child( pRight, left_child, memory_model::memory_order_relaxed );
+            int hRL = height_null( pRLeft, memory_model::memory_order_acquire );
+            node_type * pRRight = child( pRight, right_child, memory_model::memory_order_relaxed );
+            int hRR = height_null( pRRight, memory_model::memory_order_acquire );
+
+            if ( pRLeft ) {
                 {
-                    assert( pRLeft != nullptr );
                     node_scoped_lock lrl( m_Monitor, *pRLeft );
-                    if ( pRight->m_pLeft.load( memory_model::memory_order_relaxed ) != pRLeft )
+                    if ( pRight->m_pLeft.load( memory_model::memory_order_acquire ) != pRLeft )
                         return pNode; // retry
 
-                    hRL = pRLeft->height( memory_model::memory_order_relaxed );
+                    assert( check_node_ordering( pRight, pRLeft ) > 0 );
+
+                    hRL = height( pRLeft, memory_model::memory_order_acquire );
                     if ( hRR >= hRL )
                         return rotate_left_locked( pParent, pNode, hL, pRight, pRLeft, hRL, hRR );
 
-                    node_type * pRLRight = pRLeft->m_pRight.load( memory_model::memory_order_relaxed );
-                    int hRLR = pRLRight ? pRLRight->height( memory_model::memory_order_relaxed ) : 0;
+                    node_type * pRLRight = child( pRLeft, right_child, memory_model::memory_order_relaxed );
+                    int hRLR = height_null( pRLRight, memory_model::memory_order_acquire );
                     int balance = hRR - hRLR;
-                    if ( b >= -1 && b <= 1 && !((hRR == 0 || hRLR == 0) && pRight->value( memory_odel::memory_order_relaxed ) == nullptr)
-                         return rotate_left_over_right_locked( pParent, pNode, hL, pRight, pRLeft, hRR, hRLR );
+                    if ( balance >= -1 && balance <= 1 && !( ( hRR == 0 || hRLR == 0 ) && !pRight->is_valued( memory_model::memory_order_relaxed )))
+                        return rotate_left_over_right_locked( pParent, pNode, hL, pRight, pRLeft, hRR, hRLR );
                 }
+
                 return rebalance_to_right_locked( pNode, pRight, pRLeft, hRR );
             }
+            else if ( hRR > hRL )
+                return rotate_left_locked( pParent, pNode, hL, pRight, pRLeft, hRL, hRR );
+
+            return pNode;   // retry
         }
 
         static void begin_change( node_type * pNode, version_type version )
         {
-            pNode->version( version | node_type::shrinking, memory_model::memory_order_release );
+            assert(pNode->version(memory_model::memory_order_acquire) == version );
+            assert( (version & node_type::shrinking) == 0 );
+            pNode->exchange_version( version | node_type::shrinking, memory_model::memory_order_acquire );
         }
         static void end_change( node_type * pNode, version_type version )
         {
@@ -1169,31 +1867,53 @@ namespace cds { namespace container {
         node_type * rotate_right_locked( node_type * pParent, node_type * pNode, node_type * pLeft, int hR, int hLL, node_type * pLRight, int hLR )
         {
             version_type nodeVersion = pNode->version( memory_model::memory_order_relaxed );
-            node_type * pParentLeft = pParent->m_pLeft.load( memory_model::memory_order_relaxed );
+            node_type * pParentLeft = child( pParent, left_child, memory_model::memory_order_relaxed );
 
             begin_change( pNode, nodeVersion );
 
-            pNode->m_pLeft.store( pLRight, memory_model::memory_order_relaxed );
-            if ( pLRight != nullptr )
-                pLRight->m_pParent.store( pNode, memory_model::memory_order_relaxed  );
+            pNode->m_pLeft.store( pLRight, memory_model::memory_order_release );
+
+            if ( pLRight != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRight->m_pParent );
+                pLRight->parent( pNode, memory_model::memory_order_relaxed );
+                assert( check_node_ordering( pNode, pLRight ) > 0 );
+            }
 
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLeft->m_pRight );
             pLeft->m_pRight.store( pNode, memory_model::memory_order_relaxed );
-            pNode->m_pParent.store( pLeft, memory_model::memory_order_relaxed );
 
-            if ( pParentLeft == pNode )
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pNode->m_pParent );
+            pNode->parent( pLeft, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pLeft, pNode ) < 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            if ( pParentLeft == pNode ) {
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pLeft );
                 pParent->m_pLeft.store( pLeft, memory_model::memory_order_relaxed );
+            }
             else {
-                assert( pParent->m_pRight.load( memory_odel::memory_order_relaxed ) == pNode );
-                pParent->m_pRight.store( pLeft, memory_mdel::memory_order_relaxed );
+                assert( pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pRight );
+                pParent->m_pRight.store( pLeft, memory_model::memory_order_relaxed );
             }
-            pLeft->m_pParent.store( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLeft->m_pParent );
+            pLeft->parent( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
 
             // fix up heights links
             int hNode = 1 + std::max( hLR, hR );
-            pNode->height( hNode, memory_model::memory_order_relaxed );
-            pLeft->height( 1 + std::max( hLL, hNode ), memory_model::memory_order_relaxed );
+            set_height( pNode, hNode, memory_model::memory_order_release );
+            set_height( pLeft, 1 + std::max( hLL, hNode ), memory_model::memory_order_release);
 
             end_change( pNode, nodeVersion );
+            m_stat.onRotateRight();
 
             // We have damaged pParent, pNode (now parent.child.right), and pLeft (now
             // parent.child).  pNode is the deepest.  Perform as many fixes as we can
@@ -1205,24 +1925,30 @@ namespace cds { namespace container {
             int nodeBalance = hLR - hR;
             if ( nodeBalance < -1 || nodeBalance > 1 ) {
                 // we need another rotation at pNode
+                m_stat.onRotateAfterRightRotation();
                 return pNode;
             }
 
             // we've fixed balance and height damage for pNode, now handle
             // extra-routing node damage
-            if ( (pLRight == nullptr || hR == 0) && pNode->value(memory_model::memory_order_relaxed) == nullptr ) {
+            if ( (pLRight == nullptr || hR == 0) && !pNode->is_valued(memory_model::memory_order_relaxed)) {
                 // we need to remove pNode and then repair
+                m_stat.onRemoveAfterRightRotation();
                 return pNode;
             }
 
             // we've already fixed the height at pLeft, do we need a rotation here?
             int leftBalance = hLL - hNode;
-            if ( leftBalance < -1 || leftBalance > 1 )
+            if ( leftBalance < -1 || leftBalance > 1 ) {
+                m_stat.onRotateAfterRightRotation();
                 return pLeft;
+            }
 
             // pLeft might also have routing node damage (if pLeft.left was null)
-            if ( hLL == 0 && pLeft->value( memory_model::memory_order_relaxed ) == nullptr )
+            if ( hLL == 0 && !pLeft->is_valued(memory_model::memory_order_acquire)) {
+                m_stat.onDamageAfterRightRotation();
                 return pLeft;
+            }
 
             // try to fix the parent height while we've still got the lock
             return fix_height_locked( pParent );
@@ -1231,99 +1957,158 @@ namespace cds { namespace container {
         node_type * rotate_left_locked( node_type * pParent, node_type * pNode, int hL, node_type * pRight, node_type * pRLeft, int hRL, int hRR )
         {
             version_type nodeVersion = pNode->version( memory_model::memory_order_relaxed );
-            node_type * pParentLeft = pParent->m_pLeft.load( memory_odel::memory_order_relaxed );
+            node_type * pParentLeft = child( pParent, left_child, memory_model::memory_order_relaxed );
 
             begin_change( pNode, nodeVersion );
 
             // fix up pNode links, careful to be compatible with concurrent traversal for all but pNode
-            pNode->m_pRight.store( pRLeft, memory_nodel::memory_order_relaxed );
-            if ( pRLeft != nullptr )
-                pRLeft->m_pParent.store( pNode, memory_model::memory_order_relaxed );
+            pNode->m_pRight.store( pRLeft, memory_model::memory_order_release );
+            if ( pRLeft != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLeft->m_pParent );
+                pRLeft->parent( pNode, memory_model::memory_order_relaxed );
+            }
 
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRight->m_pLeft );
             pRight->m_pLeft.store( pNode, memory_model::memory_order_relaxed );
-            pNode->m_pParent.store( pRight, memory_model::memory_order_relaxed );
 
-            if ( pParentLeft == pNode )
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pNode->m_pParent );
+            pNode->parent( pRight, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pRight, pNode ) > 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            if ( pParentLeft == pNode ) {
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pLeft );
                 pParent->m_pLeft.store( pRight, memory_model::memory_order_relaxed );
+            }
             else {
-                assert( pParent->m_pRight.load( memory_odel::memory_order_relaxed ) == pNode );
-                pParent->m_pRight.store( pRight, memory_model::memory_model_relaxed );
+                assert( pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pRight );
+                pParent->m_pRight.store( pRight, memory_model::memory_order_relaxed );
             }
-            pRight->m_pParent.store( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRight->m_pParent );
+            pRight->parent( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
 
             // fix up heights
             int hNode = 1 + std::max( hL, hRL );
-            pNode->height( hNode, memory_model::memory_order_relaxed );
-            pRight->height( 1 + std::max( hNode, hRR ), memory_model::memory_order_relaxed );
+            set_height( pNode, hNode, memory_model::memory_order_release );
+            set_height( pRight, 1 + std::max( hNode, hRR ), memory_model::memory_order_release);
 
             end_change( pNode, nodeVersion );
+            m_stat.onRotateLeft();
 
             int nodeBalance = hRL - hL;
-            if ( nodeBalance < -1 || nodeBalance > 1 )
+            if ( nodeBalance < -1 || nodeBalance > 1 ) {
+                m_stat.onRotateAfterLeftRotation();
                 return pNode;
+            }
 
-            if ( (pRLeft == nullptr || hL == 0) && pNode->value( memory_model::memory_order_relaxed ) == nullptr )
+            if ( (pRLeft == nullptr || hL == 0) && !pNode->is_valued(memory_model::memory_order_relaxed)) {
+                m_stat.onRemoveAfterLeftRotation();
                 return pNode;
+            }
 
             int rightBalance = hRR - hNode;
-            if ( rightBalance < -1 || rightBalance > 1 )
+            if ( rightBalance < -1 || rightBalance > 1 ) {
+                m_stat.onRotateAfterLeftRotation();
                 return pRight;
+            }
 
-            if ( hRR == 0 && pRight->value( memory_model::memory_order_relaxed ) == nullptr )
+            if ( hRR == 0 && !pRight->is_valued(memory_model::memory_order_acquire)) {
+                m_stat.onDamageAfterLeftRotation();
                 return pRight;
+            }
 
             return fix_height_locked( pParent );
         }
 
         node_type * rotate_right_over_left_locked( node_type * pParent, node_type * pNode, node_type * pLeft, int hR, int hLL, node_type * pLRight, int hLRL )
         {
-            typename node_type::value_type nodeVersion = pNode->version( memory_model::memory_order_relaxed );
-            typename node_type::value_type leftVersion = pLeft->version( memory_model::memory_order_relaxed );
+            version_type nodeVersion = pNode->version( memory_model::memory_order_relaxed );
+            version_type leftVersion = pLeft->version( memory_model::memory_order_acquire );
 
-            node_type * pPL = pParent->m_pLeft.load( memory_model::memory_order_relaxed );
-            node_type * pLRL = pLRight->m_pLeft.load( memory_model::memory_order_relaxed );
-            node_type * pLRR = pLRight->m_pRight.load( memory_model::memory_order_relaxed );
-            int hLRR = pLRR ? pLRR->hight( memory_model::memory_order_relaxed ) : 0;
+            node_type * pPL = child( pParent, left_child, memory_model::memory_order_relaxed );
+            node_type * pLRL = child( pLRight, left_child, memory_model::memory_order_acquire );
+            node_type * pLRR = child( pLRight, right_child, memory_model::memory_order_acquire );
+            int hLRR = height_null( pLRR, memory_model::memory_order_acquire );
 
             begin_change( pNode, nodeVersion );
             begin_change( pLeft, leftVersion );
 
             // fix up pNode links, careful about the order!
-            pNode->m_pLeft.store( pLRR, memory_model::memory_order_relaxed );
-            if ( pLRR != nullptr )
-                pLRR->m_pParent.store( pNode, memory_model::memory_order_relaxed );
+            pNode->m_pLeft.store( pLRR, memory_model::memory_order_release );
+            if ( pLRR != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRR->m_pParent );
+                pLRR->parent( pNode, memory_model::memory_order_relaxed );
+            }
 
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLeft->m_pRight );
             pLeft->m_pRight.store( pLRL, memory_model::memory_order_relaxed );
-            if ( pLRL != nullptr )
-                pLRL->m_pParent.store( pLeft, memory_model::memory_order_relaxed );
 
+            if ( pLRL != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRL->m_pParent );
+                pLRL->parent( pLeft, memory_model::memory_order_relaxed );
+            }
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRight->m_pLeft );
             pLRight->m_pLeft.store( pLeft, memory_model::memory_order_relaxed );
-            pLeft->m_pParent.store( pLRight, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLeft->m_pParent );
+            pLeft->parent( pLRight, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pLRight, pLeft ) > 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRight->m_pRight );
             pLRight->m_pRight.store( pNode, memory_model::memory_order_relaxed );
-            pNode->m_pParent.store( pLRight, memory_model::memory_order_relaxed );
 
-            if ( pPL == pNode )
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pNode->m_pParent );
+            pNode->parent( pLRight, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pLRight, pNode ) < 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            if ( pPL == pNode ) {
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pLeft );
                 pParent->m_pLeft.store( pLRight, memory_model::memory_order_relaxed );
+            }
             else {
-                assert( Parent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+                assert( child( pParent, right_child, memory_model::memory_order_relaxed ) == pNode );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pRight );
                 pParent->m_pRight.store( pLRight, memory_model::memory_order_relaxed );
             }
-            pLRight->m_pParent.store( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pLRight->m_pParent );
+            pLRight->parent( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
 
             // fix up heights
             int hNode = 1 + std::max( hLRR, hR );
-            pNode->height( hNode, memory_model::memory_order_relaxed );
+            set_height( pNode, hNode, memory_model::memory_order_release );
             int hLeft = 1 + std::max( hLL, hLRL );
-            pLeft->height( hLeft, memory_model::memory_order_relaxed );
-            pLRight->height( 1 + std::max( hLeft, hNode ), memory_model::memory_order_relaxed );
+            set_height( pLeft, hLeft, memory_model::memory_order_release );
+            set_height( pLRight, 1 + std::max( hLeft, hNode ), memory_model::memory_order_release);
 
             end_change( pNode, nodeVersion );
             end_change( pLeft, leftVersion );
+            m_stat.onRotateRightOverLeft();
 
             // caller should have performed only a single rotation if pLeft was going
             // to end up damaged
             assert( hLL - hLRL <= 1 && hLRL - hLL <= 1 );
-            assert( !((hLL == 0 || pLRL == nullptr) && pLeft->value( memory_model::memory_order_relaxed ) == nullptr ));
+            assert( !((hLL == 0 || pLRL == nullptr) && !pLeft->is_valued( memory_model::memory_order_acquire )));
 
             // We have damaged pParent, pLR (now parent.child), and pNode (now
             // parent.child.right).  pNode is the deepest.  Perform as many fixes as we
@@ -1335,19 +2120,23 @@ namespace cds { namespace container {
             int nodeBalance = hLRR - hR;
             if ( nodeBalance < -1 || nodeBalance > 1 ) {
                 // we need another rotation at pNode
+                m_stat.onRotateAfterRLRotation();
                 return pNode;
             }
 
             // pNode might also be damaged by being an unnecessary routing node
-            if ( (pLRR == nullptr || hR == 0) && pNode->value( memory_model::memory_order_relaxed ) == nullptr ) {
+            if ( (pLRR == nullptr || hR == 0) && !pNode->is_valued( memory_model::memory_order_relaxed )) {
                 // repair involves splicing out pNode and maybe more rotations
+                m_stat.onRemoveAfterRLRotation();
                 return pNode;
             }
 
             // we've already fixed the height at pLRight, do we need a rotation here?
-            final int balanceLR = hLeft - hNode;
-            if ( balanceLR < -1 || balanceLR > 1 )
-                return pLR;
+            int balanceLR = hLeft - hNode;
+            if ( balanceLR < -1 || balanceLR > 1 ) {
+                m_stat.onRotateAfterRLRotation();
+                return pLRight;
+            }
 
             // try to fix the parent height while we've still got the lock
             return fix_height_locked( pParent );
@@ -1356,59 +2145,98 @@ namespace cds { namespace container {
         node_type * rotate_left_over_right_locked( node_type * pParent, node_type * pNode, int hL, node_type * pRight, node_type * pRLeft, int hRR, int hRLR )
         {
             version_type nodeVersion = pNode->version( memory_model::memory_order_relaxed );
-            version_type rightVersion = pRight->version( memory_model::memory_order_relaxed );
+            version_type rightVersion = pRight->version( memory_model::memory_order_acquire );
 
-            node_type * pPL = pParent->m_pLeft.load( memory_model::memory_order_relaxed );
-            node_type * pRLL = pRLeft->m_pLeft.load( memory_model::memory_order_relaxed );
-            node_type * pRLR = pRLeft->m_pRight.load( memory_model::memory_order_relaxed );
-            int hRLL = pRLL ? pRLL->height( memory_model::memory_order_relaxed ) : 0;
+            node_type * pPL = child( pParent, left_child, memory_model::memory_order_relaxed );
+            node_type * pRLL = child( pRLeft, left_child, memory_model::memory_order_acquire );
+            node_type * pRLR = child( pRLeft, right_child, memory_model::memory_order_acquire );
+            int hRLL = height_null( pRLL, memory_model::memory_order_acquire );
 
             begin_change( pNode, nodeVersion );
-            begin_change( pRight, ghtVersion );
+            begin_change( pRight, rightVersion );
 
             // fix up pNode links, careful about the order!
-            pNode->m_pRight.store( pRLL, memory_model::memory_order_relaxed );
-            if ( pRLL != nullptr )
-                pRLL->m_pParent.store( pNode, memory_model::memory_order_relaxed );
+            pNode->m_pRight.store( pRLL, memory_model::memory_order_release );
+            if ( pRLL != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLL->m_pParent );
+                pRLL->parent( pNode, memory_model::memory_order_relaxed );
+            }
 
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRight->m_pLeft );
             pRight->m_pLeft.store( pRLR, memory_model::memory_order_relaxed );
-            if ( pRLR != nullptr )
-                pRLR->m_pParent.store( pRight, memory_model::memory_order_relaxed );
 
+            if ( pRLR != nullptr ) {
+                atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLR->m_pParent );
+                pRLR->parent( pRight, memory_model::memory_order_relaxed );
+            }
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLeft->m_pRight );
             pRLeft->m_pRight.store( pRight, memory_model::memory_order_relaxed );
-            pRight->m_pParent.store( pRLeft, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRight->m_pParent );
+            pRight->parent( pRLeft, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pRLeft, pRight ) < 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLeft->m_pLeft );
             pRLeft->m_pLeft.store( pNode, memory_model::memory_order_relaxed );
-            pNode->m_pParent.store( pRLeft, memory_model::memory_order_relaxed );
 
-            if ( pPL == pNode )
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pNode->m_pParent );
+            pNode->parent( pRLeft, memory_model::memory_order_relaxed );
+            assert( check_node_ordering( pRLeft, pNode ) > 0 );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            if ( pPL == pNode ) {
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pLeft );
                 pParent->m_pLeft.store( pRLeft, memory_model::memory_order_relaxed );
+            }
             else {
                 assert( pParent->m_pRight.load( memory_model::memory_order_relaxed ) == pNode );
+                CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pParent->m_pRight );
                 pParent->m_pRight.store( pRLeft, memory_model::memory_order_relaxed );
             }
-            pRLeft->m_pParent.store( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
+            CDS_TSAN_ANNOTATE_HAPPENS_BEFORE( &pRLeft->m_pParent );
+            pRLeft->parent( pParent, memory_model::memory_order_relaxed );
+
+            atomics::atomic_thread_fence( memory_model::memory_order_acq_rel );
 
             // fix up heights
             int hNode = 1 + std::max( hL, hRLL );
-            pNode->height( hNode, memory_model::memory_order_relaxed );
+            set_height( pNode, hNode, memory_model::memory_order_release );
             int hRight = 1 + std::max( hRLR, hRR );
-            pRight->height( hRight, memory_model::memory_order_relaxed );
-            pRLeft->height( 1 + std::max( hNode, hRight ), memory_model::memory_order_relaxed );
+            set_height( pRight, hRight, memory_model::memory_order_release );
+            set_height( pRLeft, 1 + std::max( hNode, hRight ), memory_model::memory_order_release);
 
             end_change( pNode, nodeVersion );
             end_change( pRight, rightVersion );
+            m_stat.onRotateLeftOverRight();
 
             assert( hRR - hRLR <= 1 && hRLR - hRR <= 1 );
 
             int nodeBalance = hRLL - hL;
-            if ( nodeBalance < -1 || nodeBalance > 1 )
+            if ( nodeBalance < -1 || nodeBalance > 1 ) {
+                m_stat.onRotateAfterLRRotation();
                 return pNode;
-            if ( (pRLL == nullptr || hL == 0) && pNode->value( memory_model::memory_order_relaxed ) == nullptr )
+            }
+
+            if ( (pRLL == nullptr || hL == 0) && !pNode->is_valued(memory_model::memory_order_relaxed)) {
+                m_stat.onRemoveAfterLRRotation();
                 return pNode;
+            }
 
             int balRL = hRight - hNode;
-            if ( balRL < -1 || balRL > 1 )
+            if ( balRL < -1 || balRL > 1 ) {
+                m_stat.onRotateAfterLRRotation();
                 return pRLeft;
+            }
 
             return fix_height_locked( pParent );
         }