cds/urcu/details/base.h

   1 /*
   2     This file is a part of libcds - Concurrent Data Structures library
   3
   4     (C) Copyright Maxim Khizhinsky (libcds.dev@gmail.com) 2006-2016
   5
   6     Source code repo: http://github.com/khizmax/libcds/
   7     Download: http://sourceforge.net/projects/libcds/files/
   8
   9     Redistribution and use in source and binary forms, with or without
  10     modification, are permitted provided that the following conditions are met:
  11
  12     * Redistributions of source code must retain the above copyright notice, this
  13       list of conditions and the following disclaimer.
  14
  15     * Redistributions in binary form must reproduce the above copyright notice,
  16       this list of conditions and the following disclaimer in the documentation
  17       and/or other materials provided with the distribution.
  18
  19     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  23     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  25     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  26     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  27     OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29 */
  30
  31 #ifndef CDSLIB_URCU_DETAILS_BASE_H
  32 #define CDSLIB_URCU_DETAILS_BASE_H
  33
  34 #include <cds/algo/atomic.h>
  35 #include <cds/gc/details/retired_ptr.h>
  36 #include <cds/details/allocator.h>
  37 #include <cds/os/thread.h>
  38 #include <cds/details/marked_ptr.h>
  39
  40 namespace cds {
  41     /// User-space Read-Copy Update (URCU) namespace
  42     /** @ingroup cds_garbage_collector
  43         @anchor cds_urcu_desc
  44
  45         This namespace contains declarations for different types of Read-Copy Update (%RCU)
  46         synchronization primitive and data structures developed for RCU.
  47         In <b>libcds</b> %RCU is used as garbage collector.
  48
  49         <b>Source papers</b>:
  50         - [2009] M.Desnoyers "Low-Impact Operating System Tracing" PhD Thesis,
  51           Chapter 6 "User-Level Implementations of Read-Copy Update"
  52         - [2011] M.Desnoyers, P.McKenney, A.Stern, M.Dagenias, J.Walpole "User-Level
  53           Implementations of Read-Copy Update"
  54
  55         <b>Informal introduction to user-space %RCU</b>
  56
  57         [<i>From Desnoyer's papers</i>] %RCU is a synchronization mechanism that was added to the
  58         Linux kernel in October of 2002. %RCU achieves scalability improvements by allowing
  59         reads to occur concurrently with updates. In contrast to conventional locking
  60         primitives that ensure mutual exclusion among concurrent threads regardless of whether
  61         they be readers or updaters, or with reader-writer locks that allow concurrent reads
  62         but not in the presence of updates, %RCU supports concurrency between multiple updaters
  63         and multiple readers. %RCU ensures that data are not freed up until all pre-existing
  64         critical sections complete. %RCU defines and uses efficient and scalable mechanisms
  65         for deferring reclamation of old data. These mechanisms distribute the work among read and update
  66         paths in such a way as to make read paths extremely fast.
  67
  68         %RCU readers execute within %RCU read-side critical sections. Each such critical section begins with
  69         \p rcu_read_lock(), ends with \p rcu_read_unlock() (in \p libcds these primitives are the methods of
  70         GC class and are usually called \p access_lock and \p access_unlock respectively). Read-side
  71         critical sections can be nested.
  72         The performance benefits of %RCU are due to the fact that \p rcu_read_lock()
  73         and \p rcu_read_unlock() are exceedingly fast.
  74
  75         When a thread is not in an %RCU read-side critical section, it is in a quiescent state.
  76         A quiescent state that persists for a significant time period is an extended quiescent state.
  77         Any time period during which every thread has been in at least one quiescent state
  78         is a grace period; this implies that every %RCU read-side critical section
  79         that starts before a grace period must end before that grace period does.
  80         Distinct grace periods may overlap, either partially or completely. Any time period
  81         that includes a grace period is by definition itself a grace period.
  82         Each grace period is guaranteed to complete as long as all read-side critical sections
  83         are finite in duration; thus even a constant flow of such critical sections is unable to
  84         extend an %RCU grace period indefinitely.
  85
  86         Suppose that readers enclose each of their data-structure traversals in
  87         an %RCU read-side critical section. If an updater first removes an element
  88         from such a data structure and then waits for a grace period, there can be
  89         no more readers accessing that element. The updater can then carry out destructive
  90         operations, for example freeing the element, without disturbing any readers.
  91
  92         The %RCU update is split into two phases, a removal phase and a reclamation phase.
  93         These two phases must be separated by a grace period, for example via the \p synchronize_rcu()
  94         primitive, which initiates a grace period and waits for it to finish.
  95         During the removal phase, the %RCU update removes elements from a shared data structure.
  96         The removed data elements will only be accessible to read-side critical sections
  97         that ran concurrently with the removal phase, which are guaranteed to complete before the
  98         grace period ends. Therefore the reclamation phase can safely free the data elements
  99         removed by the removal phase.
 100
 101         Desnoyers describes several classes of user-space %RCU implementations:
 102         - The Quiescent-State-Based Reclamation (QSBR) %RCU implementation offers
 103           the best possible read-side performance, but requires that each thread periodically
 104           calls a function to announce that it is in a quiescent state, thus strongly
 105           constraining the application design. This type of %RCU is not implemented in \p libcds.
 106         - The general-purpose %RCU implementation places almost no constraints on the application's
 107           design, thus being appropriate for use within a general-purpose library, but it has
 108           relatively higher read-side overhead. The \p libcds contains several implementations of general-purpose
 109           %RCU: \ref general_instant, \ref general_buffered, \ref general_threaded.
 110         - The signal-handling %RCU presents an implementation having low read-side overhead and
 111           requiring only that the application give up one POSIX signal to %RCU update processing.
 112           The \p libcds contains several implementations if signal-handling %RCU: \ref signal_buffered,
 113           \ref signal_threaded.
 114
 115     @note The signal-handled %RCU is defined only for UNIX-like systems, not for Windows.
 116
 117     @anchor cds_urcu_type
 118     <b>RCU implementation type</b>
 119
 120         There are several internal implementation of RCU (all declared in \p %cds::urcu namespace):
 121         - \ref general_instant - general purpose RCU with immediate reclamation
 122         - \ref general_buffered - general purpose RCU with deferred (buffered) reclamation
 123         - \ref general_threaded - general purpose RCU with special reclamation thread
 124         - \ref signal_buffered - signal-handling RCU with deferred (buffered) reclamation
 125         - \ref signal_threaded - signal-handling RCU with special reclamation thread
 126
 127         You cannot create an object of any of those classes directly.
 128         Instead, you should use wrapper classes.
 129         The wrapper simplifies creation and usage of RCU singleton objects
 130         and has the reacher interface that combines interfaces of wrapped class i.e. RCU global part like
 131         \p synchronize, and corresponding RCU thread-specific interface like \p access_lock, \p access_unlock and \p retire_ptr.
 132
 133     @anchor cds_urcu_gc
 134     There are several wrapper classes (all declared in \p %cds::urcu namespace)
 135         - \ref cds_urcu_general_instant_gc "gc<general_instant>" - general purpose RCU with immediate reclamation,
 136             include file <tt><cds/urcu/general_instant.h></tt>
 137         - \ref cds_urcu_general_buffered_gc "gc<general_buffered>" - general purpose RCU with deferred (buffered) reclamation,
 138             include file <tt><cds/urcu/general_buffered.h></tt>
 139         - \ref cds_urcu_general_threaded_gc "gc<general_threaded>" - general purpose RCU with special reclamation thread
 140             include file <tt><cds/urcu/general_threaded.h></tt>
 141         - \ref cds_urcu_signal_buffered_gc "gc<signal_buffered>" - signal-handling RCU with deferred (buffered) reclamation
 142             include file <tt><cds/urcu/signal_buffered.h></tt>
 143         - \ref cds_urcu_signal_threaded_gc "gc<signal_threaded>" - signal-handling RCU with special reclamation thread
 144             include file <tt><cds/urcu/signal_threaded.h></tt>
 145
 146         Any RCU-related container in \p libcds expects that its \p RCU template parameter is one of those wrapper.
 147
 148     @anchor cds_urcu_tags
 149         For simplicity, in some algorithms instead of using RCU implementation type
 150         you should specify corresponding RCU tags (all declared in \p %cds::urcu namespace):
 151         - \ref general_instant_tag - for \ref general_instant
 152         - \ref general_buffered_tag - for \ref general_buffered
 153         - \ref general_threaded_tag - for \ref general_threaded
 154         - \ref signal_buffered_tag - for \ref signal_buffered
 155         - \ref signal_threaded_tag - for \ref signal_threaded
 156
 157     @anchor cds_urcu_performance
 158     <b>Performance</b>
 159
 160         As a result of our experiments we can range above %RCU implementation in such order,
 161         from high to low performance:
 162         - <tt>gc<general_buffered></tt> - high
 163         - <tt>gc<general_threaded></tt>
 164         - <tt>gc<signal_buffered></tt>
 165         - <tt>gc<signal_threaded></tt>
 166         - <tt>gc<general_instant></tt> - low
 167
 168         This estimation is very rough and depends on many factors:
 169         type of payload - mostly read-only (seeking) or read-write (inserting and deleting), -
 170         a hardware, your application, and so on.
 171
 172     @anchor cds_urcu_howto
 173     <b>How to use</b>
 174
 175         Usually, in your application you use only one \ref cds_urcu_gc "type of RCU" that is the best for your needs.
 176         However, the library allows to apply several RCU singleton in one application.
 177         The only limitation is that only one object of each RCU type can be instantiated.
 178         Since each RCU type is a template class the creation of two object of one RCU type class
 179         with different template arguments is an error and is not supported.
 180         However, it is correct when your RCU objects relates to different RCU types.
 181
 182         @note If you want to use \p %signal_buffered and \p %signal_threaded RCU in your application simultaneously,
 183         you should specify different signal number for each signal-handled RCU type on construction time,
 184         for example, \p SIGUSR1 and \p SIGUSR2 respectively. By default, both signal-handled RCU implementation
 185         share \p SIGUSR1 signal and cannot be applied together.
 186
 187         In \p libcds, many GC-based ordered list, set and map template classes have %RCU-related specializations
 188         that hide the %RCU specific details.
 189
 190         RCU GC is initialized in usual way: you should declare an object of type \p cds::urcu::gc< RCU_type >,
 191         for example:
 192         \code
 193         #include <cds/urcu/general_buffered.h>
 194
 195         typedef cds::urcu::gc< cds::urcu::general_buffered<> >    rcu_gpb;
 196
 197         int main() {
 198             // Initialize libcds
 199             cds::Initialize();
 200             {
 201                 // Initialize general_buffered RCU
 202                 rcu_gpb   gpbRCU;
 203
 204                 // If main thread uses lock-free containers
 205                 // the main thread should be attached to libcds infrastructure
 206                 cds::threading::Manager::attachThread();
 207
 208                 // Now you can use RCU-based containers in the main thread
 209                 //...
 210             }
 211             // Terminate libcds
 212             cds::Terminate();
 213         }
 214         \endcode
 215
 216         Each thread that deals with RCU-based container should be initialized first:
 217         \code
 218         #include <cds/urcu/general_buffered.h>
 219         int myThreadEntryPoint(void *)
 220         {
 221             // Attach the thread to libcds infrastructure
 222             cds::threading::Manager::attachThread();
 223
 224             // Now you can use RCU-based containers in the thread
 225             //...
 226
 227             // Detach thread when terminating
 228             cds::threading::Manager::detachThread();
 229         }
 230         \endcode
 231     */
 232     namespace urcu {
 233
 234 #   if CDS_OS_INTERFACE == CDS_OSI_UNIX || defined(CDS_DOXYGEN_INVOKED)
 235 #       define CDS_URCU_SIGNAL_HANDLING_ENABLED 1
 236 #   endif
 237
 238         /// General-purpose URCU type
 239         struct general_purpose_rcu {
 240             //@cond
 241             static uint32_t const c_nControlBit = 0x80000000;
 242             static uint32_t const c_nNestMask   = c_nControlBit - 1;
 243             //@endcond
 244         };
 245
 246 #   ifdef CDS_URCU_SIGNAL_HANDLING_ENABLED
 247         /// Signal-handling URCU type
 248         struct signal_handling_rcu {
 249             //@cond
 250             static uint32_t const c_nControlBit = 0x80000000;
 251             static uint32_t const c_nNestMask   = c_nControlBit - 1;
 252             //@endcond
 253         };
 254 #   endif
 255
 256         /// Tag for general_instant URCU
 257         struct general_instant_tag: public general_purpose_rcu {
 258             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 259         };
 260
 261         /// Tag for general_buffered URCU
 262         struct general_buffered_tag: public general_purpose_rcu
 263         {
 264             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 265         };
 266
 267         /// Tag for general_threaded URCU
 268         struct general_threaded_tag: public general_purpose_rcu {
 269             typedef general_purpose_rcu     rcu_class ; ///< The URCU type
 270         };
 271
 272 #   ifdef CDS_URCU_SIGNAL_HANDLING_ENABLED
 273         /// Tag for signal_buffered URCU
 274         struct signal_buffered_tag: public signal_handling_rcu {
 275                 typedef signal_handling_rcu     rcu_class ; ///< The URCU type
 276         };
 277
 278         /// Tag for signal_threaded URCU
 279         struct signal_threaded_tag: public signal_handling_rcu {
 280             typedef signal_handling_rcu     rcu_class ; ///< The URCU type
 281         };
 282 #   endif
 283
 284         ///@anchor cds_urcu_retired_ptr Retired pointer, i.e. pointer that ready for reclamation
 285         typedef cds::gc::details::retired_ptr   retired_ptr;
 286         using cds::gc::make_retired_ptr;
 287
 288         /// Pointer to function to free (destruct and deallocate) retired pointer of specific type
 289         typedef cds::gc::details::free_retired_ptr_func free_retired_ptr_func;
 290
 291         //@cond
 292         /// Implementation-specific URCU details
 293         namespace details {
 294             /// forward declarations
 295             template <typename RCUtag>
 296             struct thread_data;
 297
 298             template <typename RCUtag>
 299             class thread_gc;
 300
 301             template <typename RCUtag >
 302             class singleton;
 303
 304             //@cond
 305             class singleton_vtbl {
 306             protected:
 307                 virtual ~singleton_vtbl()
 308                 {}
 309             public:
 310                 virtual void retire_ptr( retired_ptr& p ) = 0;
 311             };
 312
 313             class gc_common
 314             {
 315             public:
 316                 template <typename MarkedPtr> using atomic_marked_ptr = atomics::atomic<MarkedPtr>;
 317             };
 318             //@endcond
 319
 320             //@cond
 321             template <typename ThreadData>
 322             struct thread_list_record {
 323                 atomics::atomic<ThreadData*>  m_pNext;   ///< Next item in thread list
 324                 atomics::atomic<OS::ThreadId> m_idOwner; ///< Owner thread id; 0 - the record is free (not owned)
 325
 326                 thread_list_record()
 327                     : m_pNext( nullptr )
 328                     , m_idOwner( cds::OS::c_NullThreadId )
 329                 {}
 330
 331                 explicit thread_list_record( OS::ThreadId owner )
 332                     : m_pNext( nullptr )
 333                     , m_idOwner( owner )
 334                 {}
 335
 336                 ~thread_list_record()
 337                 {}
 338             };
 339             //@endcond
 340
 341             //@cond
 342             template <typename RCUtag, class Alloc = CDS_DEFAULT_ALLOCATOR >
 343             class thread_list {
 344             public:
 345                 typedef thread_data<RCUtag>                             thread_record;
 346                 typedef cds::details::Allocator< thread_record, Alloc > allocator_type;
 347
 348             private:
 349                 atomics::atomic<thread_record *> m_pHead;
 350
 351             public:
 352                 thread_list()
 353                     : m_pHead( nullptr )
 354                 {}
 355
 356                 ~thread_list()
 357                 {
 358                     destroy();
 359                 }
 360
 361                 thread_record * alloc()
 362                 {
 363                     thread_record * pRec;
 364                     cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId;
 365                     cds::OS::ThreadId const curThreadId  = cds::OS::get_current_thread_id();
 366
 367                     // First, try to reuse a retired (non-active) HP record
 368                     for ( pRec = m_pHead.load( atomics::memory_order_acquire ); pRec; pRec = pRec->m_list.m_pNext.load( atomics::memory_order_relaxed )) {
 369                         cds::OS::ThreadId thId = nullThreadId;
 370                         if ( !pRec->m_list.m_idOwner.compare_exchange_strong( thId, curThreadId, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ))
 371                             continue;
 372                         return pRec;
 373                     }
 374
 375                     // No records available for reuse
 376                     // Allocate and push a new record
 377                     pRec = allocator_type().New( curThreadId );
 378
 379                     thread_record * pOldHead = m_pHead.load( atomics::memory_order_acquire );
 380                     do {
 381                         // Compiler barriers: assignment MUST BE inside the loop
 382                         CDS_COMPILER_RW_BARRIER;
 383                         pRec->m_list.m_pNext.store( pOldHead, atomics::memory_order_relaxed );
 384                         CDS_COMPILER_RW_BARRIER;
 385                     } while ( !m_pHead.compare_exchange_weak( pOldHead, pRec, atomics::memory_order_acq_rel, atomics::memory_order_acquire ));
 386
 387                     return pRec;
 388                 }
 389
 390                 void retire( thread_record * pRec )
 391                 {
 392                     assert( pRec != nullptr );
 393                     pRec->m_list.m_idOwner.store( cds::OS::c_NullThreadId, atomics::memory_order_release );
 394                 }
 395
 396                 void detach_all()
 397                 {
 398                     thread_record * pNext = nullptr;
 399                     cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId;
 400
 401                     for ( thread_record * pRec = m_pHead.load( atomics::memory_order_acquire ); pRec; pRec = pNext ) {
 402                         pNext = pRec->m_list.m_pNext.load( atomics::memory_order_relaxed );
 403                         if ( pRec->m_list.m_idOwner.load( atomics::memory_order_relaxed ) != nullThreadId ) {
 404                             retire( pRec );
 405                         }
 406                     }
 407                 }
 408
 409                 thread_record * head( atomics::memory_order mo ) const
 410                 {
 411                     return m_pHead.load( mo );
 412                 }
 413
 414             private:
 415                 void destroy()
 416                 {
 417                     allocator_type al;
 418                     CDS_DEBUG_ONLY( cds::OS::ThreadId const nullThreadId = cds::OS::c_NullThreadId; )
 419                     CDS_DEBUG_ONLY( cds::OS::ThreadId const mainThreadId = cds::OS::get_current_thread_id() ;)
 420
 421                     thread_record * p = m_pHead.exchange( nullptr, atomics::memory_order_acquire );
 422                     while ( p ) {
 423                         thread_record * pNext = p->m_list.m_pNext.load( atomics::memory_order_relaxed );
 424
 425                         assert( p->m_list.m_idOwner.load( atomics::memory_order_relaxed ) == nullThreadId
 426                             || p->m_list.m_idOwner.load( atomics::memory_order_relaxed ) == mainThreadId
 427                             || !cds::OS::is_thread_alive( p->m_list.m_idOwner.load( atomics::memory_order_relaxed ))
 428                             );
 429
 430                         al.Delete( p );
 431                         p = pNext;
 432                     }
 433                 }
 434             };
 435             //@endcond
 436
 437             //@cond
 438             template <class ThreadGC>
 439             class scoped_lock {
 440             public:
 441                 typedef ThreadGC                    thread_gc;
 442                 typedef typename thread_gc::rcu_tag rcu_tag;
 443
 444             public:
 445                 scoped_lock()
 446                 {
 447                     thread_gc::access_lock();
 448                 }
 449
 450                 ~scoped_lock()
 451                 {
 452                     thread_gc::access_unlock();
 453                 }
 454             };
 455             //@endcond
 456         } // namespace details
 457         //@endcond
 458
 459         // forwards
 460         //@cond
 461         template <typename RCUimpl> class gc;
 462         //@endcond
 463
 464         /// Epoch-based retired ptr
 465         /**
 466             Retired pointer with additional epoch field that prevents early reclamation.
 467             This type of retired pointer is used in buffered RCU implementations.
 468         */
 469         struct epoch_retired_ptr: public retired_ptr
 470         {
 471             uint64_t    m_nEpoch;  ///< The epoch when the object has been retired
 472
 473             //@cond
 474             epoch_retired_ptr()
 475             {}
 476             //@endcond
 477
 478             /// Constructor creates a copy of \p rp retired pointer and saves \p nEpoch reclamation epoch.
 479             epoch_retired_ptr( retired_ptr const& rp, uint64_t nEpoch )
 480                 : retired_ptr( rp )
 481                 , m_nEpoch( nEpoch )
 482             {}
 483         };
 484
 485     } // namespace urcu
 486 } // namespace cds
 487
 488 #endif // #ifndef CDSLIB_URCU_DETAILS_BASE_H