gdax-orderbook-hpp/demo/dependencies/libcds-2.3.2/cds/urcu/details/base.h

   1 /*
   2     This file is a part of libcds - Concurrent Data Structures library
   3
   4     (C) Copyright Maxim Khizhinsky (libcds.dev@gmail.com) 2006-2017
   5
   6     Source code repo: http://github.com/khizmax/libcds/
   7     Download: http://sourceforge.net/projects/libcds/files/
   8
   9     Redistribution and use in source and binary forms, with or without
  10     modification, are permitted provided that the following conditions are met:
  11
  12     * Redistributions of source code must retain the above copyright notice, this
  13       list of conditions and the following disclaimer.
  14
  15     * Redistributions in binary form must reproduce the above copyright notice,
  16       this list of conditions and the following disclaimer in the documentation
  17       and/or other materials provided with the distribution.
  18
  19     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  23     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  25     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  26     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  27     OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29 */
  30
  31 #ifndef CDSLIB_URCU_DETAILS_BASE_H
  32 #define CDSLIB_URCU_DETAILS_BASE_H
  33
  34 #include <cds/algo/atomic.h>
  35 #include <cds/gc/details/retired_ptr.h>
  36 #include <cds/details/allocator.h>
  37 #include <cds/os/thread.h>
  38 #include <cds/details/marked_ptr.h>
  39
  40 namespace cds {
  41     /// User-space Read-Copy Update (URCU) namespace
  42     /** @ingroup cds_garbage_collector
  43         @anchor cds_urcu_desc
  44
  45         This namespace contains declarations for different types of Read-Copy Update (%RCU)
  46         synchronization primitive and data structures developed for RCU.
  47         In <b>libcds</b> %RCU is used as garbage collector.
  48
  49         <b>Source papers</b>:
  50         - [2009] M.Desnoyers "Low-Impact Operating System Tracing" PhD Thesis,
  51           Chapter 6 "User-Level Implementations of Read-Copy Update"
  52         - [2011] M.Desnoyers, P.McKenney, A.Stern, M.Dagenias, J.Walpole "User-Level
  53           Implementations of Read-Copy Update"
  54         - [2012] M.Desnoyers, P.McKenney, A.Stern, M.Dagenias, J.Walpole "Supplementary
  55           Material for User-Level Implementations of Read-Copy Update"
  56
  57         <b>Informal introduction to user-space %RCU</b>
  58
  59         [<i>From Desnoyer's papers</i>] %RCU is a synchronization mechanism that was added to the
  60         Linux kernel in October of 2002. %RCU achieves scalability improvements by allowing
  61         reads to occur concurrently with updates. In contrast to conventional locking
  62         primitives that ensure mutual exclusion among concurrent threads regardless of whether
  63         they be readers or updaters, or with reader-writer locks that allow concurrent reads
  64         but not in the presence of updates, %RCU supports concurrency between multiple updaters
  65         and multiple readers. %RCU ensures that data are not freed up until all pre-existing
  66         critical sections complete. %RCU defines and uses efficient and scalable mechanisms
  67         for deferring reclamation of old data. These mechanisms distribute the work among read and update
  68         paths in such a way as to make read paths extremely fast.
  69
  70         %RCU readers execute within %RCU read-side critical sections. Each such critical section begins with
  71         \p rcu_read_lock(), ends with \p rcu_read_unlock() (in \p libcds these primitives are the methods of
  72         GC class and are usually called \p access_lock and \p access_unlock respectively). Read-side
  73         critical sections can be nested.
  74         The performance benefits of %RCU are due to the fact that \p rcu_read_lock()
  75         and \p rcu_read_unlock() are exceedingly fast.
  76
  77         When a thread is not in an %RCU read-side critical section, it is in a quiescent state.
  78         A quiescent state that persists for a significant time period is an extended quiescent state.
  79         Any time period during which every thread has been in at least one quiescent state
  80         is a grace period; this implies that every %RCU read-side critical section
  81         that starts before a grace period must end before that grace period does.
  82         Distinct grace periods may overlap, either partially or completely. Any time period
  83         that includes a grace period is by definition itself a grace period.
  84         Each grace period is guaranteed to complete as long as all read-side critical sections
  85         are finite in duration; thus even a constant flow of such critical sections is unable to
  86         extend an %RCU grace period indefinitely.
  87
  88         Suppose that readers enclose each of their data-structure traversals in
  89         an %RCU read-side critical section. If an updater first removes an element
  90         from such a data structure and then waits for a grace period, there can be
  91         no more readers accessing that element. The updater can then carry out destructive
  92         operations, for example freeing the element, without disturbing any readers.
  93
  94         The %RCU update is split into two phases, a removal phase and a reclamation phase.
  95         These two phases must be separated by a grace period, for example via the \p synchronize_rcu()
  96         primitive, which initiates a grace period and waits for it to finish.
  97         During the removal phase, the %RCU update removes elements from a shared data structure.
  98         The removed data elements will only be accessible to read-side critical sections
  99         that ran concurrently with the removal phase, which are guaranteed to complete before the
 100         grace period ends. Therefore the reclamation phase can safely free the data elements
 101         removed by the removal phase.
 102
 103         Desnoyers describes several classes of user-space %RCU implementations:
 104         - The Quiescent-State-Based Reclamation (QSBR) %RCU implementation offers
 105           the best possible read-side performance, but requires that each thread periodically
 106           calls a function to announce that it is in a quiescent state, thus strongly
 107           constraining the application design. This type of %RCU is not implemented in \p libcds.
 108         - The general-purpose %RCU implementation places almost no constraints on the application's
 109           design, thus being appropriate for use within a general-purpose library, but it has
 110           relatively higher read-side overhead. The \p libcds contains several implementations of general-purpose
 111           %RCU: \ref general_instant, \ref general_buffered, \ref general_threaded.
 112         - \p signal_buffered: the signal-handling %RCU presents an implementation having low read-side overhead and
 113           requiring only that the application give up one POSIX signal to %RCU update processing.
 114
 115         @note The signal-handled %RCU is defined only for UNIX-like systems, not for Windows.
 116
 117         @anchor cds_urcu_type
 118         <b>RCU implementation type</b>
 119
 120         There are several internal implementation of RCU (all declared in \p %cds::urcu namespace):
 121         - \ref general_instant - general purpose RCU with immediate reclamation
 122         - \ref general_buffered - general purpose RCU with deferred (buffered) reclamation
 123         - \ref general_threaded - general purpose RCU with special reclamation thread
 124         - \ref signal_buffered - signal-handling RCU with deferred (buffered) reclamation
 125
 126         You cannot create an object of any of those classes directly.
 127         Instead, you should use wrapper classes.
 128         The wrapper simplifies creation and usage of RCU singleton objects
 129         and has the reacher interface that combines interfaces of wrapped class i.e. RCU global part like
 130         \p synchronize, and corresponding RCU thread-specific interface like \p access_lock, \p access_unlock and \p retire_ptr.
 131
 132         @anchor cds_urcu_gc
 133         There are several wrapper classes (all declared in \p %cds::urcu namespace)
 134         - \ref cds_urcu_general_instant_gc "gc<general_instant>" - general purpose RCU with immediate reclamation,
 135             include file <tt><cds/urcu/general_instant.h></tt>
 136         - \ref cds_urcu_general_buffered_gc "gc<general_buffered>" - general purpose RCU with deferred (buffered) reclamation,
 137             include file <tt><cds/urcu/general_buffered.h></tt>
 138         - \ref cds_urcu_general_threaded_gc "gc<general_threaded>" - general purpose RCU with special reclamation thread
 139             include file <tt><cds/urcu/general_threaded.h></tt>
 140         - \ref cds_urcu_signal_buffered_gc "gc<signal_buffered>" - signal-handling RCU with deferred (buffered) reclamation
 141             include file <tt><cds/urcu/signal_buffered.h></tt>
 142
 143         Any RCU-related container in \p libcds expects that its \p RCU template parameter is one of those wrapper.
 144
 145         @anchor cds_urcu_tags
 146         For simplicity, in some algorithms instead of using RCU implementation type
 147         you should specify corresponding RCU tags (all declared in \p %cds::urcu namespace):
 148         - \ref general_instant_tag - for \ref general_instant
 149         - \ref general_buffered_tag - for \ref general_buffered
 150         - \ref general_threaded_tag - for \ref general_threaded
 151         - \ref signal_buffered_tag - for \ref signal_buffered
 152
 153         @anchor cds_urcu_performance
 154         <b>Performance</b>
 155
 156         As a result of our experiments we can range above %RCU implementation in such order,
 157         from high to low performance:
 158         - <tt>gc<general_buffered></tt> - high
 159         - <tt>gc<general_threaded></tt>
 160         - <tt>gc<signal_buffered></tt>
 161         - <tt>gc<general_instant></tt> - low
 162
 163         This estimation is very rough and depends on many factors:
 164         type of payload - mostly read-only (seeking) or read-write (inserting and deleting), -
 165         a hardware, your application, and so on.
 166
 167         @anchor cds_urcu_howto
 168         <b>How to use</b>
 169
 170         Usually, in your application you use only one \ref cds_urcu_gc "type of RCU" that is the best for your needs.
 171         However, the library allows to apply several RCU singleton in one application.
 172         The only limitation is that only one object of each RCU type can be instantiated.
 173         Since each RCU type is a template class the creation of two object of one RCU type class
 174         with different template arguments is an error and is not supported.
 175         However, it is correct when your RCU objects relates to different RCU types.
 176
 177         In \p libcds, many GC-based ordered list, set and map template classes have %RCU-related specializations
 178         that hide the %RCU specific details.
 179
 180         RCU GC is initialized in usual way: you should declare an object of type \p cds::urcu::gc< RCU_type >,
 181         for example:
 182         \code
 183         #include <cds/urcu/general_buffered.h>
 184
 185         typedef cds::urcu::gc< cds::urcu::general_buffered<> >    rcu_gpb;
 186
 187         int main() {
 188             // Initialize libcds
 189             cds::Initialize();
 190             {
 191                 // Initialize general_buffered RCU
 192                 rcu_gpb   gpbRCU;
 193
 194                 // If main thread uses lock-free containers
 195                 // the main thread should be attached to libcds infrastructure
 196                 cds::threading::Manager::attachThread();
 197
 198                 // Now you can use RCU-based containers in the main thread
 199                 //...
 200             }
 201             // Terminate libcds
 202             cds::Terminate();
 203         }
 204         \endcode
 205
 206         Each thread that deals with RCU-based container should be initialized first:
 207         \code
 208         #include <cds/urcu/general_buffered.h>
 209         int myThreadEntryPoint(void *)
 210         {
 211             // Attach the thread to libcds infrastructure
 212             cds::threading::Manager::attachThread();
 213
 214             // Now you can use RCU-based containers in the thread
 215             //...
 216
 217             // Detach thread when terminating
 218             cds::threading::Manager::detachThread();
 219         }
 220         \endcode
 221     */
 222     namespace urcu {
 223
 224 #   if (CDS_OS_INTERFACE == CDS_OSI_UNIX || defined(CDS_DOXYGEN_INVOKED)) && !defined(CDS_THREAD_SANITIZER_ENABLED)
 225 #       define CDS_URCU_SIGNAL_HANDLING_ENABLED 1
 226 #   endif
 227
 228         /// General-purpose URCU type
 229         struct general_purpose_rcu {
 230             //@cond
 231             static uint32_t const c_nControlBit = 0x80000000;
 232             static uint32_t const c_nNestMask   = c_nControlBit - 1;
 233             //@endcond
 234         };
 235
 236 #   ifdef CDS_URCU_SIGNAL_HANDLING_ENABLED
 237         /// Signal-handling URCU type
 238         struct signal_handling_rcu {
 239             //@cond
 240             static uint32_t const c_nControlBit = 0x80000000;
 241             static uint32_t const c_nNestMask   = c_nControlBit - 1;
 242             //@endcond
 243         };
 244 #   endif
 245
 246         /// Tag for general_instant URCU
 247         struct general_instant_tag: public general_purpose_rcu {
 248             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 249         };
 250
 251         /// Tag for general_buffered URCU
 252         struct general_buffered_tag: public general_purpose_rcu
 253         {
 254             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 255         };
 256
 257         /// Tag for general_threaded URCU
 258         struct general_threaded_tag: public general_purpose_rcu {
 259             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 260         };
 261
 262 #   ifdef CDS_URCU_SIGNAL_HANDLING_ENABLED
 263         /// Tag for signal_buffered URCU
 264         struct signal_buffered_tag: public signal_handling_rcu {
 265                 typedef signal_handling_rcu     rcu_class ; ///< The URCU type
 266         };
 267 #   endif
 268
 269         ///@anchor cds_urcu_retired_ptr Retired pointer, i.e. pointer that ready for reclamation
 270         typedef cds::gc::details::retired_ptr   retired_ptr;
 271         using cds::gc::make_retired_ptr;
 272
 273         /// Pointer to function to free (destruct and deallocate) retired pointer of specific type
 274         typedef cds::gc::details::free_retired_ptr_func free_retired_ptr_func;
 275
 276         //@cond
 277         /// Implementation-specific URCU details
 278         namespace details {
 279             /// forward declarations
 280             template <typename RCUtag>
 281             struct thread_data;
 282
 283             template <typename RCUtag>
 284             class thread_gc;
 285
 286             template <typename RCUtag >
 287             class singleton;
 288
 289             //@cond
 290             class singleton_vtbl {
 291             protected:
 292                 virtual ~singleton_vtbl()
 293                 {}
 294             public:
 295                 virtual void retire_ptr( retired_ptr& p ) = 0;
 296             };
 297
 298             class gc_common
 299             {
 300             public:
 301                 template <typename MarkedPtr> using atomic_marked_ptr = atomics::atomic<MarkedPtr>;
 302             };
 303             //@endcond
 304
 305             //@cond
 306             template <typename ThreadData>
 307             struct thread_list_record {
 308                 atomics::atomic<ThreadData*>  m_pNext;   ///< Next item in thread list
 309                 atomics::atomic<OS::ThreadId> m_idOwner; ///< Owner thread id; 0 - the record is free (not owned)
 310
 311                 thread_list_record()
 312                     : m_pNext( nullptr )
 313                     , m_idOwner( cds::OS::c_NullThreadId )
 314                 {}
 315
 316                 explicit thread_list_record( OS::ThreadId owner )
 317                     : m_pNext( nullptr )
 318                     , m_idOwner( owner )
 319                 {}
 320
 321                 ~thread_list_record()
 322                 {}
 323             };
 324             //@endcond
 325
 326             //@cond
 327             template <typename RCUtag, class Alloc = CDS_DEFAULT_ALLOCATOR >
 328             class thread_list {
 329             public:
 330                 typedef thread_data<RCUtag>                             thread_record;
 331                 typedef cds::details::Allocator< thread_record, Alloc > allocator_type;
 332
 333             private:
 334                 atomics::atomic<thread_record *> m_pHead;
 335
 336             public:
 337                 thread_list()
 338                     : m_pHead( nullptr )
 339                 {}
 340
 341                 ~thread_list()
 342                 {
 343                     destroy();
 344                 }
 345
 346                 thread_record * alloc()
 347                 {
 348                     thread_record * pRec;
 349                     cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId;
 350                     cds::OS::ThreadId const curThreadId  = cds::OS::get_current_thread_id();
 351
 352                     // First, try to reuse a retired (non-active) HP record
 353                     for ( pRec = m_pHead.load( atomics::memory_order_acquire ); pRec; pRec = pRec->m_list.m_pNext.load( atomics::memory_order_relaxed )) {
 354                         cds::OS::ThreadId thId = nullThreadId;
 355                         if ( !pRec->m_list.m_idOwner.compare_exchange_strong( thId, curThreadId, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ))
 356                             continue;
 357                         return pRec;
 358                     }
 359
 360                     // No records available for reuse
 361                     // Allocate and push a new record
 362                     pRec = allocator_type().New( curThreadId );
 363
 364                     thread_record * pOldHead = m_pHead.load( atomics::memory_order_acquire );
 365                     do {
 366                         // Compiler barriers: assignment MUST BE inside the loop
 367                         CDS_COMPILER_RW_BARRIER;
 368                         pRec->m_list.m_pNext.store( pOldHead, atomics::memory_order_relaxed );
 369                         CDS_COMPILER_RW_BARRIER;
 370                     } while ( !m_pHead.compare_exchange_weak( pOldHead, pRec, atomics::memory_order_acq_rel, atomics::memory_order_acquire ));
 371
 372                     return pRec;
 373                 }
 374
 375                 void retire( thread_record * pRec )
 376                 {
 377                     assert( pRec != nullptr );
 378                     pRec->m_list.m_idOwner.store( cds::OS::c_NullThreadId, atomics::memory_order_release );
 379                 }
 380
 381                 void detach_all()
 382                 {
 383                     thread_record * pNext = nullptr;
 384                     cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId;
 385
 386                     for ( thread_record * pRec = m_pHead.load( atomics::memory_order_acquire ); pRec; pRec = pNext ) {
 387                         pNext = pRec->m_list.m_pNext.load( atomics::memory_order_relaxed );
 388                         if ( pRec->m_list.m_idOwner.load( atomics::memory_order_relaxed ) != nullThreadId ) {
 389                             retire( pRec );
 390                         }
 391                     }
 392                 }
 393
 394                 thread_record * head( atomics::memory_order mo ) const
 395                 {
 396                     return m_pHead.load( mo );
 397                 }
 398
 399             private:
 400                 void destroy()
 401                 {
 402                     allocator_type al;
 403                     CDS_DEBUG_ONLY( cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId; )
 404                     CDS_DEBUG_ONLY( cds::OS::ThreadId const mainThreadId = cds::OS::get_current_thread_id() ;)
 405
 406                     thread_record * p = m_pHead.exchange( nullptr, atomics::memory_order_acquire );
 407                     while ( p ) {
 408                         thread_record * pNext = p->m_list.m_pNext.load( atomics::memory_order_relaxed );
 409
 410                         assert( p->m_list.m_idOwner.load( atomics::memory_order_relaxed ) == nullThreadId
 411                             || p->m_list.m_idOwner.load( atomics::memory_order_relaxed ) == mainThreadId
 412                             );
 413
 414                         al.Delete( p );
 415                         p = pNext;
 416                     }
 417                 }
 418             };
 419             //@endcond
 420
 421             //@cond
 422             template <class ThreadGC>
 423             class scoped_lock {
 424             public:
 425                 typedef ThreadGC                    thread_gc;
 426                 typedef typename thread_gc::rcu_tag rcu_tag;
 427
 428             public:
 429                 scoped_lock()
 430                 {
 431                     thread_gc::access_lock();
 432                 }
 433
 434                 ~scoped_lock()
 435                 {
 436                     thread_gc::access_unlock();
 437                 }
 438             };
 439             //@endcond
 440         } // namespace details
 441         //@endcond
 442
 443         // forwards
 444         //@cond
 445         template <typename RCUimpl> class gc;
 446         //@endcond
 447
 448         /// Epoch-based retired ptr
 449         /**
 450             Retired pointer with additional epoch field that prevents early reclamation.
 451             This type of retired pointer is used in buffered RCU implementations.
 452         */
 453         struct epoch_retired_ptr: public retired_ptr
 454         {
 455             uint64_t    m_nEpoch;  ///< The epoch when the object has been retired
 456
 457             //@cond
 458             epoch_retired_ptr()
 459             {}
 460             //@endcond
 461
 462             /// Constructor creates a copy of \p rp retired pointer and saves \p nEpoch reclamation epoch.
 463             epoch_retired_ptr( retired_ptr const& rp, uint64_t nEpoch )
 464                 : retired_ptr( rp )
 465                 , m_nEpoch( nEpoch )
 466             {}
 467         };
 468
 469     } // namespace urcu
 470 } // namespace cds
 471
 472 #endif // #ifndef CDSLIB_URCU_DETAILS_BASE_H