c4336d7ac3ca196b42570cad061c62d224ebcdc2
[libcds.git] / cds / memory / michael / allocator.h
1 //$$CDS-header$$
2
3 #ifndef CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
5
6 /*
7     Michael allocator implementation
8     Source:
9         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
10
11     Editions:
12     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
13                         This optimization allows to allocate system memory more regularly,
14                         in blocks of 1M that leads to less memory fragmentation.
15     2011.01.02 khizmax  Created
16 */
17
18 #include <stdlib.h>
19 #include <mutex>        // unique_lock
20 #include <cds/init.h>
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
25
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/sync/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
34
35 #include <boost/intrusive/list.hpp>
36
37 namespace cds {
38     /// Memory-related algorithms: allocators etc.
39 namespace memory {
40     /// Michael's allocator (class Heap)
41     /**
42         \par Source
43             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
44
45         This namespace declares the main class Heap and a lot of helper classes.
46     */
47 namespace michael {
48
49     /// Size class
50     struct size_class {
51         unsigned int    nBlockSize  ;   ///< block size in bytes
52         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
53         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
54         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
55     };
56
57     /// %Heap based on system \p malloc and \p free functions
58     struct malloc_heap
59     {
60         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61         static void * alloc( size_t nSize )
62         {
63             CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
64             void * p = ::malloc( nSize );
65             CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
66             return p;
67         }
68         /// Returning memory block to the system (\p free wrapper)
69         static void free( void * p )
70         {
71             CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
72             ::free( p );
73             CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
74         }
75     };
76
77     /// %Heap based on system provided aligned \p malloc and \p free functions
78     struct aligned_malloc_heap
79     {
80         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
81         static void * alloc( size_t nSize, size_t nAlignment  )
82         {
83             return cds::OS::aligned_malloc( nSize, nAlignment );
84         }
85         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
86         static void free( void * p )
87         {
88             cds::OS::aligned_free( p );
89         }
90     };
91
92     /// Page heap based on \p Heap
93     /**
94         Page heap can allocate memory by page-sized block only.
95         \p Heap may be any heap that provides interface like \ref malloc_heap.
96
97         This class is one of available implementation of opt::page_heap option.
98     */
99     template <class Heap = malloc_heap>
100     class page_allocator: public Heap
101     {
102         //@cond
103         typedef Heap base_class;
104         size_t  m_nPageSize;
105         //@endcond
106
107     public:
108         /// Initializes heap
109         page_allocator(
110             size_t nPageSize    ///< page size in bytes
111         )
112             : m_nPageSize( nPageSize )
113         {}
114
115         /// Allocate new page
116         void * alloc()
117         {
118             return base_class::alloc( m_nPageSize );
119         }
120
121         /// Free page \p pPage
122         void free( void * pPage )
123         {
124             base_class::free( pPage );
125         }
126     };
127
128     /// Page cacheable heap
129     /**
130         To improve performance this allocator maintains small list of free pages.
131         Page heap can allocate memory by page-sized block only.
132
133         Template parameters:
134             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
135             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
136
137         This class is one of available implementation of opt::page_heap option.
138     */
139     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
140     class page_cached_allocator: public page_allocator<Heap>
141     {
142         //@cond
143         typedef page_allocator<Heap> base_class;
144
145 #ifdef _DEBUG
146         struct make_null_ptr {
147             void operator ()(void *& p)
148             {
149                 p = nullptr;
150             }
151         };
152 #endif
153         struct free_list_traits : public cds::container::vyukov_queue::traits
154         {
155             typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
156 #ifdef _DEBUG
157             typedef make_null_ptr value_cleaner;
158 #endif
159         };
160         typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
161
162         free_list   m_FreeList;
163         //@endcond
164
165     public:
166         /// Initializes heap
167         page_cached_allocator(
168             size_t nPageSize    ///< page size in bytes
169         )
170             : base_class( nPageSize )
171             , m_FreeList( FreeListCapacity )
172         {}
173
174         //@cond
175         ~page_cached_allocator()
176         {
177             void * pPage;
178             while ( m_FreeList.pop(pPage) )
179                 base_class::free( pPage );
180         }
181         //@endcond
182
183         /// Allocate new page
184         void * alloc()
185         {
186             void * pPage;
187             if ( !m_FreeList.pop( pPage ) )
188                 pPage = base_class::alloc();
189             return pPage;
190         }
191
192         /// Free page \p pPage
193         void free( void * pPage )
194         {
195             if ( !m_FreeList.push( pPage ))
196                 base_class::free( pPage );
197         }
198     };
199
200     /// Implementation of opt::sizeclass_selector option
201     /**
202         Default size-class selector can manage memory blocks up to 64K.
203     */
204     class CDS_EXPORT_API default_sizeclass_selector
205     {
206         //@cond
207         /// Count of different size-classes
208         static const size_t c_nSizeClassCount = 63;
209
210         /// Max block size
211         static const size_t c_nMaxBlockSize = 64 * 1024;
212
213         /// Page size of type 0 (64K)
214         static const unsigned int c_nPage64K = 64 * 1024 - 32;
215
216         /// Page size of type 1 (1M)
217         static const unsigned int c_nPage1M = 1024 * 1024;
218
219         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
220         static size_class const m_szClass[c_nSizeClassCount];
221         static unsigned char const m_szClassMap[];
222         //@endcond
223     public:
224         /// Type of size-class index
225         typedef unsigned int sizeclass_index;
226
227 #ifdef _DEBUG
228         default_sizeclass_selector();
229 #endif
230
231         /// "No size class" index
232         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
233
234         /// Returns size-class count
235         static sizeclass_index size()
236         {
237             return c_nSizeClassCount;
238         }
239
240         /// Returns page size in bytes for given page type \p nPageType
241         static size_t page_size(size_t nPageType )
242         {
243             switch (nPageType) {
244             case 0:
245                 return c_nPage64K;
246             case 1:
247                 return c_nPage1M;
248             default:
249                 assert(false)   ;   // anything forgotten?..
250             }
251             return c_nPage1M;
252         }
253
254         /// Returns count of page size-class
255         /**
256             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
257         */
258         static size_t pageTypeCount()
259         {
260             return 2;
261         }
262
263         /// Returns size-class index for \p nSize
264         /**
265             For large blocks that cannot be allocated by Michael's allocator
266             the function must return -1.
267         */
268         static sizeclass_index find( size_t nSize )
269         {
270             if ( nSize > c_nMaxBlockSize ) {
271                 // Too large block - allocate from system
272                 return c_nNoSizeClass;
273             }
274             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
275             assert( nSize <= m_szClassBounds[ szClass ] );
276             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
277
278             return szClass;
279         }
280
281         /// Gets details::size_class struct for size-class index \p nIndex
282         static const size_class * at( sizeclass_index nIndex )
283         {
284             assert( nIndex < size() );
285             return m_szClass + nIndex;
286         }
287     };
288
289     //@cond
290     namespace details {
291         struct free_list_tag;
292         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
293
294         struct partial_list_tag;
295         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
296
297         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
298         {};
299     }
300     //@endcond
301
302     /// List of free superblock descriptor
303     /**
304         This class is a implementation of \ref opt::free_list option
305     */
306     template <class Lock, class T = details::intrusive_superblock_desc>
307     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
308     {
309         //@cond
310         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
311     public:
312         typedef details::free_list_locked_hook item_hook;
313         typedef Lock lock_type;
314     protected:
315         typedef std::unique_lock<lock_type>   auto_lock;
316
317         mutable lock_type   m_access;
318         //@endcond
319
320     public:
321         /// Rebinds to other item type \p T2
322         template <class T2>
323         struct rebind {
324             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
325         };
326
327     public:
328         /// Push superblock descriptor to free-list
329         void push( T * pDesc )
330         {
331             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
332             auto_lock al(m_access);
333             base_class::push_back( *pDesc );
334         }
335
336         /// Pop superblock descriptor from free-list
337         T *   pop()
338         {
339             auto_lock al(m_access);
340             if ( base_class::empty() )
341                 return nullptr;
342             T& rDesc = base_class::front();
343             base_class::pop_front();
344             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
345             return &rDesc;
346         }
347
348         /// Returns current count of superblocks in free-list
349         size_t  size() const
350         {
351             auto_lock al(m_access);
352             return base_class::size();
353         }
354     };
355
356     /// List of partial filled superblock descriptor
357     /**
358         This class is a implementation of \ref opt::partial_list option
359     */
360     template <class Lock, class T = details::intrusive_superblock_desc>
361     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
362     {
363         //@cond
364         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
365     public:
366         typedef details::partial_list_locked_hook item_hook;
367         typedef Lock    lock_type;
368     protected:
369         typedef std::unique_lock<lock_type>   auto_lock;
370
371         mutable lock_type   m_access;
372         //@endcond
373
374     public:
375         /// Rebinds to other item type \p T2
376         template <class T2>
377         struct rebind {
378             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
379         };
380
381     public:
382         /// Push a superblock \p pDesc to the list
383         void    push( T * pDesc )
384         {
385             auto_lock al( m_access );
386             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
387             base_class::push_back( *pDesc );
388         }
389
390         /// Pop superblock from the list
391         T * pop()
392         {
393             auto_lock al( m_access );
394             if ( base_class::empty() )
395                 return nullptr;
396             T& rDesc = base_class::front();
397             base_class::pop_front();
398             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
399             return &rDesc;
400         }
401
402         /// Removes \p pDesc descriptor from the free-list
403         bool unlink( T * pDesc )
404         {
405             assert(pDesc != nullptr);
406             auto_lock al( m_access );
407             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
408             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
409                 base_class::erase( base_class::iterator_to( *pDesc ) );
410                 return true;
411             }
412             return false;
413         }
414
415         /// Count of element in the list
416         size_t size() const
417         {
418             auto_lock al( m_access );
419             return base_class::size();
420         }
421     };
422
423     /// Summary processor heap statistics
424     /**
425         Summary heap statistics for use with Heap::summaryStat function.
426     */
427     struct summary_stat
428     {
429         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
430         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
431         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
432         size_t      nFreeCount          ;  ///< Count of \p free function call
433         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
434         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
435         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
436         size_t      nDescFull           ;  ///< Count of full superblock
437         atomic64u_t nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
438         atomic64u_t nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
439
440         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
441         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
442         atomic64u_t nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
443         atomic64_t  nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
444
445         // Internal contention indicators
446         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
447         /**
448             Contention indicator. The less value is better
449         */
450         size_t      nActiveDescCASFailureCount;
451         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
452         /**
453             Contention indicator. The less value is better
454         */
455         size_t      nActiveAnchorCASFailureCount;
456         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
457         /**
458             Contention indicator. The less value is better
459         */
460         size_t      nPartialDescCASFailureCount;
461         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
462         /**
463             Contention indicator. The less value is better
464         */
465         size_t      nPartialAnchorCASFailureCount;
466
467
468     public:
469         /// Constructs empty statistics. All counters are zero.
470         summary_stat()
471         {
472             clear();
473         }
474
475         /// Difference statistics
476         /**
477             This operator computes difference between \p *this and \p stat and places the difference to \p this.
478             Returns \p *this;
479         */
480         summary_stat& operator -=( const summary_stat& stat )
481         {
482             nAllocFromActive    -= stat.nAllocFromActive;
483             nAllocFromPartial   -= stat.nAllocFromPartial;
484             nAllocFromNew       -= stat.nAllocFromNew;
485             nFreeCount          -= stat.nFreeCount;
486             nPageAllocCount     -= stat.nPageAllocCount;
487             nPageDeallocCount   -= stat.nPageDeallocCount;
488             nDescAllocCount     -= stat.nDescAllocCount;
489             nDescFull           -= stat.nDescFull;
490             nBytesAllocated     -= stat.nBytesAllocated;
491             nBytesDeallocated   -= stat.nBytesDeallocated;
492
493             nSysAllocCount      -= stat.nSysAllocCount;
494             nSysFreeCount       -= stat.nSysFreeCount;
495             nSysBytesAllocated  -= stat.nSysBytesAllocated;
496             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
497
498             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
499             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
500             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
501             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
502
503             return *this;
504         }
505
506         /// Clears statistics
507         /**
508             All counters are set to zero.
509         */
510         void clear()
511         {
512             memset( this, 0, sizeof(*this));
513         }
514
515         //@cond
516         template <typename Stat>
517         summary_stat& add_procheap_stat( const Stat& stat )
518         {
519             nAllocFromActive    += stat.allocFromActive();
520             nAllocFromPartial   += stat.allocFromPartial();
521             nAllocFromNew       += stat.allocFromNew();
522             nFreeCount          += stat.freeCount();
523             nPageAllocCount     += stat.blockAllocated();
524             nPageDeallocCount   += stat.blockDeallocated();
525             nDescAllocCount     += stat.descAllocCount();
526             nDescFull           += stat.descFull();
527             nBytesAllocated     += stat.allocatedBytes();
528             nBytesDeallocated   += stat.deallocatedBytes();
529
530             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
531             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
532             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
533             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
534
535             return *this;
536         }
537
538         template <typename Stat>
539         summary_stat& add_heap_stat( const Stat& stat )
540         {
541             nSysAllocCount      += stat.allocCount();
542             nSysFreeCount       += stat.freeCount();
543
544             nSysBytesAllocated  += stat.allocatedBytes();
545             nSysBytesDeallocated+= stat.deallocatedBytes();
546
547             return *this;
548         }
549         //@endcond
550     };
551
552     /// Michael's allocator
553     /**
554         This class provides base functionality for Michael's allocator. It does not provide
555         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
556         The heap interface is closer to semantics of \p malloc / \p free system functions.
557         The heap supports allocation of aligned and unaligned data.
558
559         The algorithm is based on simplified version of
560             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
561
562         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
563             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
564
565         This is powerful, scalable, fully customizable heap with fast-path without any locks
566         that has been developed specifically for multi-threading.
567         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
568         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
569         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
570         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
571         The opt::check_bounds feature can help you to find a memory buffer overflow.
572
573         Brief algorithm description from Michael's work:
574
575         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
576         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
577         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
578         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
579         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
580         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
581         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
582         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
583         in a lock-free manner.
584
585         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
586         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
587         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
588         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
589         available blocks of its original superblock by atomically updating its descriptor.
590
591         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
592         superblock size.
593
594         Available \p Options:
595         - \ref opt::sys_topology - class that describes system topology needed for allocator.
596             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
597         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
598             Default is \ref malloc_heap.
599         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
600             Default is \ref aligned_malloc_heap
601         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
602             Default is \ref page_cached_allocator
603         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
604             for incoming allocation request.
605             Default is \ref default_sizeclass_selector
606         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
607             Default is \ref free_list_locked
608         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
609             Default is \ref partial_list_locked
610         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
611             that is maintained by the heap.
612             Default is \ref procheap_empty_stat
613         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
614             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
615             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
616             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
617             Default is \ref os_allocated_empty
618         - \ref opt::check_bounds - a bound checker.
619             Default is no bound checker (cds::opt::none)
620
621         \par Usage:
622         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
623
624         \code
625         #include <cds/memory/michael/allocator.h>
626
627         // Heap with explicitly defined options:
628         cds::memory::michael::Heap<
629             opt::aligned_heap< aligned_malloc_heap >,
630             opt::page_heap< page_cached_allocator<16, malloc_heap> >
631         >   myHeap;
632
633         // Heap with default options:
634         cds::memory::michael::Heap<>    myDefHeap;
635         \endcode
636
637         \par How to make std-like allocator
638
639         There are serious differencies of heap and <tt>std::allocator</tt> interface:
640             - Heap is stateful, and \p std::allocator is stateless.
641             - Heap has much more template parameters than \p std::allocator
642             - Heap has low-level interface for memory allocating only unlike the allocator
643                 interface that can construct/destroy objects of any type T.
644
645         To convert heap interface into \p std::allocator -like interface you should:
646             - Declare object of class cds::memory::michael::Heap specifying the necessary
647                 template parameters; this is usually static object
648             - Create a class with \p std::allocator interface that uses the function of heap.
649         \code
650         #include <cds/memory/michael/allocator.h>
651
652         template <class T>
653         class MichaelAllocator
654         {
655             typedef std::allocator<T>               std_allocator;
656             typedef cds::memory::michael::Heap<>    michael_heap;
657
658             // Michael heap static object
659             static michael_heap     s_Heap;
660         public:
661             // Declare typedefs from std::allocator
662             typedef typename std_allocator::const_pointer   const_pointer;
663             typedef typename std_allocator::pointer         pointer;
664             typedef typename std_allocator::const_reference const_reference;
665             typedef typename std_allocator::reference       reference;
666             typedef typename std_allocator::difference_type difference_type;
667             typedef typename std_allocator::size_type       size_type;
668             typedef typename std_allocator::value_type      value_type;
669
670             // Allocation function
671             pointer allocate( size_type _Count, const void* _Hint )
672             {
673                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
674             }
675
676             // Deallocation function
677             void deallocate( pointer _Ptr, size_type _Count )
678             {
679                 s_Heap.free( _Ptr );
680             }
681
682             // Other std::allocator specific functions: address, construct, destroy, etc.
683             ...
684
685             // Rebinding allocator to other type
686             template <class _Other>
687             struct rebind {
688                 typedef MichaelAllocator<_Other> other;
689             };
690         };
691
692         // In .cpp file:
693         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
694
695         \endcode
696     */
697     template <typename... Options>
698     class Heap {
699     protected:
700
701         //@cond
702         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
703         static const unsigned int c_nDefaultBlockAlignment = 8;
704
705         struct default_options {
706             typedef cds::OS::topology           sys_topology;
707             typedef malloc_heap                 system_heap;
708             typedef page_cached_allocator<>     page_heap;
709             typedef aligned_malloc_heap         aligned_heap;
710             typedef default_sizeclass_selector  sizeclass_selector;
711             typedef free_list_locked<cds::sync::spin>    free_list;
712             typedef partial_list_locked<cds::sync::spin> partial_list;
713             typedef procheap_empty_stat         procheap_stat;
714             typedef os_allocated_empty          os_allocated_stat;
715             typedef cds::opt::none              check_bounds;
716         };
717         //@endcond
718
719     protected:
720         //@cond
721         typedef typename opt::make_options<default_options, Options...>::type   options;
722         //@endcond
723
724         //@cond
725         typedef unsigned char   byte;
726         //@endcond
727     public:
728         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
729         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
730         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
731         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
732         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
733         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
734         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
735         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
736
737         // forward declarations
738         //@cond
739         struct superblock_desc;
740         struct processor_heap_base;
741         struct processor_desc;
742         //@endcond
743
744         /// Superblock states
745         /**
746             A superblock can be in one of four states: \p ACTIVE, \p FULL,
747             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
748             superblock in a heap, or if a thread intends to try to install it
749             as such. A superblock is \p FULL if all its blocks are either allocated
750             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
751             and contains unreserved available blocks. A superblock is
752             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
753         */
754         enum superblock_state {
755             SBSTATE_ACTIVE  = 0,    ///< superblock is active
756             SBSTATE_FULL    = 1,    ///< superblock is full
757             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
758             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
759         };
760
761         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
762
763         /// Anchor of the superblock descriptor. Updated by CAS
764         struct anchor_tag {
765             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
766             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
767             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
768             unsigned long long    tag:40    ;   ///< ABA prevention tag
769         };
770
771         /// Superblock descriptor
772         struct superblock_desc
773             : public options::free_list::item_hook
774             , public options::partial_list::item_hook
775         {
776             atomics::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
777             byte *              pSB         ;   ///< ptr to superblock
778             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
779             unsigned int        nBlockSize  ;   ///< block size in bytes
780             unsigned int        nCapacity   ;   ///< superblock size/block size
781
782             //@cond
783             superblock_desc()
784                 : pSB(nullptr)
785                 , pProcHeap( nullptr )
786             {}
787             //@endcond
788         };
789
790         //@cond
791         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
792         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
793         //@endcond
794
795 #if CDS_BUILD_BITS == 32
796         /// Allocated block header
797         /**
798             Each allocated block has 8-byte header.
799             The header contains pointer to owner superblock descriptor and the redirection flag.
800             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
801             \code
802                 +---------------+
803                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
804                 +---------------+
805                 |               | <- memory allocated
806                 |   memory      |
807                 |               |
808                 +---------------+
809             \endcode
810             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
811             In this case the redirection flag is 1 and the block's structure is:
812             \code
813                 +---------------+
814             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
815             |   +---------------+
816             |   |   padding     |
817             |   |   (unused)    |
818             |   |               |
819             |   +---------------+
820             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
821                 +---------------+
822                 |               | <- memory allocated
823                 |   memory      |
824                 |               |
825                 +---------------+
826             \endcode
827         */
828         class block_header
829         {
830         //@cond
831             enum {
832                 bitAligned = 1,
833                 bitOSAllocated = 2
834             };
835
836             union {
837                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
838                 uint32_t         nSize       ;   // block size (allocated form OS)
839             };
840             uint32_t         nFlags;
841
842         public:
843             void  set( superblock_desc * pdesc, uint32_t isAligned )
844             {
845                 pDesc = pdesc;
846                 nFlags = isAligned ? bitAligned : 0;
847             }
848
849             superblock_desc * desc()
850             {
851                 assert( (nFlags & bitOSAllocated) == 0 );
852                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
853             }
854
855             block_header * begin()
856             {
857                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
858             }
859
860             bool isAligned() const
861             {
862                 return (nFlags & bitAligned) != 0;
863             }
864
865             bool isOSAllocated() const
866             {
867                 return (nFlags & bitOSAllocated) != 0;
868             }
869
870             void setOSAllocated( size_t sz )
871             {
872                 nSize = sz;
873                 nFlags = bitOSAllocated;
874             }
875
876             size_t getOSAllocSize() const
877             {
878                 assert( isOSAllocated() );
879                 return nSize;
880             }
881
882         //@endcond
883         };
884 #elif CDS_BUILD_BITS == 64
885         //@cond
886         class block_header
887         {
888             enum {
889                 bitAligned = 1,
890                 bitOSAllocated = 2
891             };
892             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
893             // If bitOSAllocated is set the pDesc contains size of memory block
894             // allocated from OS
895             marked_desc_ptr     pDesc;
896         public:
897             void  set( superblock_desc * pdesc, uint32_t isAligned )
898             {
899                 pDesc = marked_desc_ptr( pdesc, isAligned );
900             }
901
902             superblock_desc * desc()
903             {
904                 assert( !isOSAllocated() );
905                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
906             }
907
908             block_header * begin()
909             {
910                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
911             }
912
913             bool isAligned() const
914             {
915                 return (pDesc.bits() & bitAligned) != 0;
916             }
917
918             bool isOSAllocated() const
919             {
920                 return (pDesc.bits() & bitOSAllocated) != 0;
921             }
922
923             void setOSAllocated( size_t nSize )
924             {
925
926                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
927             }
928
929             size_t getOSAllocSize() const
930             {
931                 assert( isOSAllocated() );
932                 return reinterpret_cast<uintptr_t>( pDesc.ptr() ) >> 2;
933             }
934
935         };
936         //@endcond
937 #else
938 #       error "Unexpected value of CDS_BUILD_BITS"
939 #endif  // CDS_BUILD_BITS
940
941         //@cond
942         struct free_block_header: block_header {
943             unsigned int    nNextFree;
944         };
945         //@endcond
946
947 #if CDS_BUILD_BITS == 32
948         /// Processor heap's \p active field
949         /**
950             The \p active field in the processor heap structure is primarily a pointer to the descriptor
951             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
952             guaranteed that the active superblock has at least one block available for reservation.
953             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
954             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
955             of blocks available for reservation in the active superblock less one. That is, if the value
956             of credits is n, then the active superblock contains n+1 blocks available for reservation
957             through the \p active field. Note that the number of blocks in a superblock is not limited
958             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
959             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
960             atomically decrements credits while validating that the active superblock is still valid.
961         */
962         class active_tag {
963         //@cond
964             superblock_desc *       pDesc;
965             uint32_t             nCredits;
966
967         public:
968             static const unsigned int c_nMaxCredits = 0 - 1;
969
970         public:
971             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
972                 : pDesc( nullptr )
973                 , nCredits(0)
974             {}
975
976             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
977             ~active_tag() CDS_NOEXCEPT = default;
978             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
979 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
980             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
981             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
982 #       endif
983
984             /// Returns pointer to superblock descriptor
985             superblock_desc * ptr() const
986             {
987                 return pDesc;
988             }
989
990             /// Sets superblock descriptor
991             void ptr( superblock_desc * p )
992             {
993                 pDesc = p;
994             }
995
996             unsigned int credits() const
997             {
998                 return nCredits;
999             }
1000
1001             void credits( unsigned int n )
1002             {
1003                 nCredits = n;
1004             }
1005
1006             void clear()
1007             {
1008                 pDesc = nullptr;
1009                 nCredits = 0;
1010             }
1011
1012             void set( superblock_desc * pSB, unsigned int n )
1013             {
1014                 pDesc = pSB;
1015                 nCredits = n;
1016             }
1017         //@endcond
1018         };
1019 #elif CDS_BUILD_BITS == 64
1020         //@cond
1021         class active_tag
1022         {
1023         public:
1024             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1025         protected:
1026             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1027             marked_desc_ptr pDesc;
1028
1029         public:
1030             active_tag() CDS_NOEXCEPT
1031                 : pDesc( nullptr )
1032             {}
1033             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1034             //active_tag() CDS_NOEXCEPT = default;
1035             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1036             ~active_tag() CDS_NOEXCEPT = default;
1037             active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1038 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1039             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1040             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1041 #       endif
1042             superblock_desc *    ptr() const
1043             {
1044                 return pDesc.ptr();
1045             }
1046
1047             void ptr( superblock_desc * p )
1048             {
1049                 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1050                 pDesc = marked_desc_ptr( p, pDesc.bits());
1051             }
1052
1053             unsigned int credits()
1054             {
1055                 return (unsigned int) pDesc.bits();
1056             }
1057
1058             void credits( unsigned int n )
1059             {
1060                 assert( n <= c_nMaxCredits );
1061                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1062             }
1063
1064             void clear()
1065             {
1066                 pDesc = marked_desc_ptr();
1067             }
1068
1069             void set( superblock_desc * pSB, unsigned int n )
1070             {
1071                 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1072                 pDesc = marked_desc_ptr( pSB, n );
1073             }
1074
1075         };
1076         //@endcond
1077 #else
1078 #       error "Unexpected value of CDS_BUILD_BITS"
1079 #endif  // CDS_BUILD_BITS
1080
1081
1082         /// Processor heap
1083         struct processor_heap_base
1084         {
1085             CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1086             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1087             const size_class *  pSizeClass  ;   ///< pointer to size class
1088             atomics::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1089             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1090             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1091
1092             /// Small page marker
1093             /**
1094                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1095                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1096                 to less memory fragmentation.
1097             */
1098             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1099
1100             procheap_stat       stat        ;   ///< heap statistics
1101             //processor_heap_statistics   stat;
1102
1103             //@cond
1104             processor_heap_base() CDS_NOEXCEPT
1105                 : pProcDesc( nullptr )
1106                 , pSizeClass( nullptr )
1107                 , pPartial( nullptr )
1108             {
1109                 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1110             }
1111             //@endcond
1112
1113             /// Get partial superblock owned by the processor heap
1114             superblock_desc * get_partial()
1115             {
1116                 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1117                 do {
1118                     if ( !pDesc ) {
1119                         pDesc =  partialList.pop();
1120                         break;
1121                     }
1122                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1123
1124                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1125                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1126                 return pDesc;
1127             }
1128
1129             /// Add partial superblock \p pDesc to the list
1130             void add_partial( superblock_desc * pDesc )
1131             {
1132                 assert( pPartial != pDesc );
1133                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1134
1135                 superblock_desc * pCur = nullptr;
1136                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1137                     partialList.push( pDesc );
1138             }
1139
1140
1141             /// Remove superblock \p pDesc from the list of partial superblock
1142             bool unlink_partial( superblock_desc * pDesc )
1143             {
1144                 return partialList.unlink( pDesc );
1145             }
1146         };
1147
1148         /// Aligned superblock descriptor
1149         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1150
1151         /// Processor descriptor
1152         struct processor_desc
1153         {
1154             processor_heap *    arrProcHeap     ; ///< array of processor heap
1155             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1156             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1157
1158             //@cond
1159             processor_desc()
1160                 : arrProcHeap( nullptr )
1161                 , pageHeaps( nullptr )
1162             {}
1163             //@endcond
1164         };
1165
1166
1167     protected:
1168         sys_topology        m_Topology           ;  ///< System topology
1169         system_heap         m_LargeHeap          ;  ///< Heap for large block
1170         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1171         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1172         atomics::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1173         unsigned int        m_nProcessorCount    ;  ///< Processor count
1174         bound_checker       m_BoundChecker       ;  ///< Bound checker
1175
1176         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1177
1178     protected:
1179         //@cond
1180
1181         /// Allocates large block from system memory
1182         block_header * alloc_from_OS( size_t nSize )
1183         {
1184             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1185             m_OSAllocStat.incBytesAllocated( nSize );
1186             p->setOSAllocated( nSize );
1187             return p;
1188         }
1189
1190         /// Allocates from the active superblock if it possible
1191         block_header * alloc_from_active( processor_heap * pProcHeap )
1192         {
1193             active_tag  oldActive;
1194             int nCollision = -1;
1195
1196             // Reserve block
1197             while ( true ) {
1198                 ++nCollision;
1199                 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1200                 if ( !oldActive.ptr() )
1201                     return nullptr;
1202                 unsigned int nCredits = oldActive.credits();
1203                 active_tag  newActive   ; // default = 0
1204                 if ( nCredits != 0 ) {
1205                     newActive = oldActive;
1206                     newActive.credits( nCredits - 1 );
1207                 }
1208                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1209                     break;
1210             }
1211
1212             if ( nCollision )
1213                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1214
1215             // pop block
1216             superblock_desc * pDesc = oldActive.ptr();
1217
1218             anchor_tag  oldAnchor;
1219             anchor_tag  newAnchor;
1220             byte * pAddr;
1221             unsigned int nMoreCredits = 0;
1222
1223             nCollision = -1;
1224             do {
1225                 ++nCollision;
1226                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1227
1228                 assert( oldAnchor.avail < pDesc->nCapacity );
1229                 CDS_TSAN_ANNOTATE_IGNORE_READS_BEGIN;
1230                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1231                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1232                 CDS_TSAN_ANNOTATE_IGNORE_READS_END;
1233                 newAnchor.tag += 1;
1234
1235                 if ( oldActive.credits() == 0 ) {
1236                     // state must be ACTIVE
1237                     if ( oldAnchor.count == 0 )
1238                         newAnchor.state = SBSTATE_FULL;
1239                     else {
1240                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1241                         newAnchor.count -= nMoreCredits;
1242                     }
1243                 }
1244             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1245
1246             if ( nCollision )
1247                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1248
1249             assert( newAnchor.state != SBSTATE_EMPTY );
1250
1251             if ( newAnchor.state == SBSTATE_FULL )
1252                 pProcHeap->stat.incDescFull();
1253             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1254                 update_active( pProcHeap, pDesc, nMoreCredits );
1255
1256             pProcHeap->stat.incAllocFromActive();
1257
1258             // block_header fields is not needed to setup
1259             // It was set in alloc_from_new_superblock
1260             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1261             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1262             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1263
1264             return reinterpret_cast<block_header *>( pAddr );
1265         }
1266
1267         /// Allocates from a partial filled superblock if it possible
1268         block_header * alloc_from_partial( processor_heap * pProcHeap )
1269         {
1270         retry:
1271             superblock_desc * pDesc = pProcHeap->get_partial();
1272             if ( !pDesc )
1273                 return nullptr;
1274
1275             // reserve blocks
1276             anchor_tag  oldAnchor;
1277             anchor_tag  newAnchor;
1278             //byte * pAddr;
1279             unsigned int nMoreCredits = 0;
1280
1281             int nCollision = -1;
1282             do {
1283                 ++nCollision;
1284
1285                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1286                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1287                     free_superblock( pDesc );
1288                     goto retry;
1289                 }
1290
1291                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1292                 newAnchor.count -= nMoreCredits + 1;
1293                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1294                 newAnchor.tag += 1;
1295             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1296
1297             if ( nCollision )
1298                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1299
1300             if ( newAnchor.state == SBSTATE_FULL )
1301                 pProcHeap->stat.incDescFull();
1302
1303             // Now, the thread is guaranteed to have reserved one or more blocks
1304             // pop reserved block
1305             byte * pAddr;
1306             nCollision = -1;
1307             do {
1308                 ++nCollision;
1309
1310                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1311
1312                 assert( oldAnchor.avail < pDesc->nCapacity );
1313                 CDS_TSAN_ANNOTATE_IGNORE_READS_BEGIN;
1314                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1315                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1316                 CDS_TSAN_ANNOTATE_IGNORE_READS_END;
1317                 ++newAnchor.tag;
1318             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1319
1320             if ( nCollision )
1321                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1322
1323             assert( newAnchor.state != SBSTATE_EMPTY );
1324
1325             pProcHeap->stat.incAllocFromPartial();
1326
1327             if ( nMoreCredits > 0 )
1328                 update_active( pProcHeap, pDesc, nMoreCredits );
1329
1330             // block_header fields is not needed to setup
1331             // It was set in alloc_from_new_superblock
1332             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1333             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1334             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1335
1336             return reinterpret_cast<block_header *>( pAddr );
1337         }
1338
1339         /// Allocates from the new superblock
1340         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1341         {
1342             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1343             assert( pDesc != nullptr );
1344             pDesc->pSB = new_superblock_buffer( pProcHeap );
1345
1346             anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1347             anchor.tag += 1;
1348
1349             // Make single-linked list of free blocks in superblock
1350             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1351             unsigned int nNext = 0;
1352             const unsigned int nBlockSize = pDesc->nBlockSize;
1353             CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1354             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1355                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1356                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1357             }
1358             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1359             CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1360
1361             active_tag newActive;
1362             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1363
1364             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1365             anchor.state = SBSTATE_ACTIVE;
1366             pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1367
1368             active_tag curActive;
1369             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1370                 pProcHeap->stat.incAllocFromNew();
1371                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1372                 return reinterpret_cast<block_header *>( pDesc->pSB );
1373             }
1374
1375             free_superblock( pDesc );
1376             return nullptr;
1377         }
1378
1379         /// Find appropriate processor heap based on size-class selected
1380         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1381         {
1382             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1383
1384             unsigned int nProcessorId = m_Topology.current_processor();
1385             assert( nProcessorId < m_nProcessorCount );
1386
1387             if ( nProcessorId >= m_nProcessorCount )
1388                 nProcessorId = 0;
1389
1390             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1391             while ( !pDesc ) {
1392
1393                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1394                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1395                     pDesc = pNewDesc;
1396                     break;
1397                 }
1398                 free_processor_desc( pNewDesc );
1399             }
1400
1401             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1402         }
1403
1404         /// Updates active field of processor heap \p pProcHeap
1405         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1406         {
1407             assert( pProcHeap == pDesc->pProcHeap );
1408
1409             active_tag  nullActive;
1410             active_tag  newActive;
1411             newActive.set( pDesc, nCredits - 1 );
1412
1413             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1414                 return;
1415
1416             // Someone installed another active superblock.
1417             // Return credits to superblock and make it partial
1418
1419             anchor_tag  oldAnchor;
1420             anchor_tag  newAnchor;
1421
1422             do {
1423                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1424                 newAnchor.count += nCredits;
1425                 newAnchor.state = SBSTATE_PARTIAL;
1426             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1427
1428             pDesc->pProcHeap->add_partial( pDesc );
1429         }
1430
1431         /// Allocates new processor descriptor
1432         processor_desc * new_processor_desc( unsigned int nProcessorId )
1433         {
1434             CDS_UNUSED( nProcessorId );
1435             processor_desc * pDesc;
1436             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1437
1438             /*
1439                 Processor descriptor layout
1440
1441                 proc_desc -  64-byte alignment
1442                 page_heap[0] 64-byte alignment
1443                 page_heap[1] 64-byte alignment
1444                 ...
1445                 page_heap[P] 64-byte alignment
1446
1447                 proc_heap[0] 64-byte alignment
1448                 proc_heap[1] 64-byte alignment
1449                 ...
1450                 proc_heap[N] 64-byte alignment
1451             */
1452
1453             const size_t szDesc =
1454                 ( sizeof(processor_desc)
1455                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1456                     + c_nAlignment - 1
1457                 ) / c_nAlignment
1458 ;
1459
1460             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1461
1462             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1463
1464             // TSan false positive: a new descriptor will be linked further with release fence
1465             CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1466
1467             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1468
1469             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1470             for ( size_t i = 0; i < nPageHeapCount; ++i )
1471                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1472
1473             // initialize processor heaps
1474             pDesc->arrProcHeap =
1475                 reinterpret_cast<processor_heap *>(
1476                     reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1477                     & ~(uintptr_t(c_nAlignment) - 1)
1478                 );
1479
1480             processor_heap * pProcHeap = pDesc->arrProcHeap;
1481             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1482             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1483                 new (pProcHeap) processor_heap();
1484                 pProcHeap->pProcDesc = pDesc;
1485                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1486                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1487                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1488                 else
1489                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1490             }
1491             CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1492             return pDesc;
1493         }
1494
1495
1496         void free_processor_heap( processor_heap * pProcHeap )
1497         {
1498             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1499                 superblock_desc * pDesc;
1500
1501                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1502                     free( pDesc->pSB );
1503                     m_AlignedHeap.free( pDesc );
1504                 }
1505
1506                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1507                 if ( pPartial ) {
1508                     free( pPartial->pSB );
1509                     m_AlignedHeap.free( pPartial );
1510                 }
1511
1512                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1513                 if ( pDesc ) {
1514                     free( pDesc->pSB );
1515                     m_AlignedHeap.free( pDesc );
1516                 }
1517             }
1518             else {
1519                 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1520                 superblock_desc * pDesc;
1521
1522                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1523                     pageHeap.free( pDesc->pSB );
1524                     m_AlignedHeap.free( pDesc );
1525                 }
1526
1527                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1528                 if ( pPartial ) {
1529                     pageHeap.free( pPartial->pSB );
1530                     m_AlignedHeap.free( pPartial );
1531                 }
1532
1533                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1534                 if ( pDesc ) {
1535                     pageHeap.free( pDesc->pSB );
1536                     m_AlignedHeap.free( pDesc );
1537                 }
1538             }
1539             pProcHeap->~processor_heap();
1540         }
1541
1542         /// Frees processor descriptor
1543         void free_processor_desc( processor_desc * pDesc )
1544         {
1545             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1546
1547             for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1548                 free_processor_heap( pDesc->arrProcHeap + j );
1549
1550             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1551                 m_AlignedHeap.free( pSBDesc );
1552
1553             for (size_t i = 0; i < nPageHeapCount; ++i )
1554                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1555
1556             //m_IntHeap.free( pDesc->pageHeaps );
1557             pDesc->pageHeaps = nullptr;
1558
1559             pDesc->processor_desc::~processor_desc();
1560             m_AlignedHeap.free( pDesc );
1561         }
1562
1563         /// Allocates new superblock descriptor
1564         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1565         {
1566             anchor_tag anchor;
1567             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1568             if ( pDesc == nullptr ) {
1569                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1570                 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1571
1572                 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1573                 anchor.tag = 0;
1574                 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1575
1576                 pProcHeap->stat.incDescAllocCount();
1577             }
1578             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1579             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1580             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1581             pDesc->pProcHeap = pProcHeap;
1582
1583             anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1584             anchor.avail = 1;
1585             pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1586
1587             return pDesc;
1588         }
1589
1590         /// Allocates superblock page
1591         byte * new_superblock_buffer( processor_heap * pProcHeap )
1592         {
1593             pProcHeap->stat.incBlockAllocated();
1594             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1595                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1596             }
1597             else {
1598                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1599             }
1600         }
1601
1602         /// Frees superblock descriptor and its page
1603         void free_superblock( superblock_desc * pDesc )
1604         {
1605             pDesc->pProcHeap->stat.incBlockDeallocated();
1606             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1607             if ( pDesc->pSB ) {
1608                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1609                     free( pDesc->pSB );
1610                 }
1611                 else {
1612                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1613                 }
1614             }
1615             pProcDesc->listSBDescFree.push( pDesc );
1616         }
1617
1618         /// Allocate memory block
1619         block_header * int_alloc(
1620             size_t nSize    ///< Size of memory block to allocate in bytes
1621             )
1622         {
1623             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1624             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1625                 return alloc_from_OS( nSize );
1626             }
1627             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1628
1629             block_header * pBlock;
1630             processor_heap * pProcHeap;
1631             while ( true ) {
1632                 pProcHeap = find_heap( nSizeClassIndex );
1633                 if ( !pProcHeap )
1634                     return alloc_from_OS( nSize );
1635
1636                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1637                     break;
1638                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1639                     break;
1640                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1641                     break;
1642             }
1643
1644             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1645
1646             assert( pBlock != nullptr );
1647             return pBlock;
1648         }
1649
1650         //@endcond
1651     public:
1652         /// Heap constructor
1653         Heap()
1654         {
1655             // Explicit libcds initialization is needed since a static object may be constructed
1656             cds::Initialize();
1657
1658             m_nProcessorCount = m_Topology.processor_count();
1659             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1660                 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1661             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1662         }
1663
1664         /// Heap destructor
1665         /**
1666             The destructor frees all memory allocated by the heap.
1667         */
1668         ~Heap()
1669         {
1670             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1671                 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1672                 if ( pDesc )
1673                     free_processor_desc( pDesc );
1674             }
1675
1676             m_AlignedHeap.free( m_arrProcDesc );
1677
1678             // Explicit termination of libcds
1679             cds::Terminate();
1680         }
1681
1682         /// Allocate memory block
1683         void * alloc(
1684             size_t nSize    ///< Size of memory block to allocate in bytes
1685         )
1686         {
1687             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1688
1689             // Bound checking is only for our blocks
1690             if ( !pBlock->isOSAllocated() ) {
1691                 // the block is allocated from our heap - bound checker is applicable
1692                 m_BoundChecker.make_trailer(
1693                     reinterpret_cast<byte *>(pBlock + 1),
1694                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1695                     nSize
1696                 );
1697             }
1698
1699             return pBlock + 1;
1700         }
1701
1702         /// Free previously allocated memory block
1703         void free(
1704             void * pMemory  ///< Pointer to memory block to free
1705         )
1706         {
1707             if ( !pMemory )
1708                 return;
1709
1710             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1711             block_header * pBlock = pRedirect->begin();
1712
1713             if ( pBlock->isOSAllocated() ) {
1714                 // Block has been allocated from OS
1715                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1716                 m_LargeHeap.free( pBlock );
1717                 return;
1718             }
1719
1720             assert( !pBlock->isAligned() );
1721             superblock_desc * pDesc = pBlock->desc();
1722
1723             m_BoundChecker.check_bounds(
1724                 pRedirect + 1,
1725                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1726                 pDesc->nBlockSize
1727             );
1728
1729
1730             anchor_tag oldAnchor;
1731             anchor_tag newAnchor;
1732             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1733
1734             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1735
1736             oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1737             do {
1738                 newAnchor = oldAnchor;
1739                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1740                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1741                 newAnchor.tag += 1;
1742
1743                 assert( oldAnchor.state != SBSTATE_EMPTY );
1744
1745                 if ( oldAnchor.state == SBSTATE_FULL )
1746                     newAnchor.state = SBSTATE_PARTIAL;
1747
1748                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1749                     //pProcHeap = pDesc->pProcHeap;
1750                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1751                     newAnchor.state = SBSTATE_EMPTY;
1752                 }
1753                 else
1754                     newAnchor.count += 1;
1755             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1756
1757             pProcHeap->stat.incFreeCount();
1758
1759             if ( newAnchor.state == SBSTATE_EMPTY ) {
1760                 if ( pProcHeap->unlink_partial( pDesc ))
1761                     free_superblock( pDesc );
1762             }
1763             else if (oldAnchor.state == SBSTATE_FULL ) {
1764                 assert( pProcHeap != nullptr );
1765                 pProcHeap->stat.decDescFull();
1766                 pProcHeap->add_partial( pDesc );
1767             }
1768         }
1769
1770         /// Reallocate memory block
1771         /**
1772             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1773             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1774
1775             If there is not enough available memory to expand the block to the given size,
1776             the original block is left unchanged, and \p nullptr is returned.
1777
1778             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1779             then the return value is \p nullptr and the original block is left unchanged.
1780         */
1781         void * realloc(
1782             void *  pMemory,    ///< Pointer to previously allocated memory block
1783             size_t  nNewSize    ///< New size of memory block, in bytes
1784         )
1785         {
1786             if ( nNewSize == 0 ) {
1787                 free( pMemory );
1788                 return nullptr;
1789             }
1790
1791             const size_t nOrigSize = nNewSize;
1792             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1793
1794             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1795
1796             // Reallocation of aligned block is not possible
1797             if ( pBlock->isAligned() ) {
1798                 assert( false );
1799                 return nullptr;
1800             }
1801
1802             if ( pBlock->isOSAllocated() ) {
1803                 // The block has been allocated from OS
1804                 size_t nCurSize = pBlock->getOSAllocSize();
1805
1806                 if ( nCurSize >= nNewSize )
1807                     return pMemory;
1808
1809                 // Grow block size
1810                 void * pNewBuf = alloc( nOrigSize );
1811                 if ( pNewBuf ) {
1812                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1813                     free( pMemory );
1814                 }
1815                 return pNewBuf;
1816             }
1817
1818             superblock_desc * pDesc = pBlock->desc();
1819             if ( pDesc->nBlockSize <= nNewSize ) {
1820                 // In-place reallocation
1821                 m_BoundChecker.make_trailer(
1822                     reinterpret_cast<byte *>(pBlock + 1),
1823                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1824                     nOrigSize
1825                     );
1826
1827                 return pMemory;
1828             }
1829
1830             void * pNew = alloc( nNewSize );
1831             if ( pNew ) {
1832                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1833                 free( pMemory );
1834                 return pNew;
1835             }
1836
1837             return nullptr;
1838         }
1839
1840         /// Allocate aligned memory block
1841         void * alloc_aligned(
1842             size_t nSize,       ///< Size of memory block to allocate in bytes
1843             size_t nAlignment   ///< Alignment
1844         )
1845         {
1846             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1847                 void * p = alloc( nSize );
1848                 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1849                 return p;
1850             }
1851
1852             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1853
1854             block_header * pRedirect;
1855             if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1856                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1857                 assert( pRedirect != pBlock );
1858                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1859
1860                 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1861             }
1862             else
1863                 pRedirect = pBlock;
1864
1865
1866             // Bound checking is only for our blocks
1867             if ( !pBlock->isOSAllocated() ) {
1868                 // the block is allocated from our heap - bound checker is applicable
1869                 m_BoundChecker.make_trailer(
1870                     reinterpret_cast<byte *>(pRedirect + 1),
1871                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1872                     nSize
1873                 );
1874             }
1875
1876             return pRedirect + 1;
1877         }
1878
1879         /// Free aligned memory block previously allocated by \ref alloc_aligned
1880         void free_aligned(
1881             void * pMemory      ///< Pointer to memory block to free
1882         )
1883         {
1884             free( pMemory );
1885         }
1886
1887     public:
1888
1889         /// Get instant summary statistics
1890         void summaryStat( summary_stat& st )
1891         {
1892             size_t nProcHeapCount = m_SizeClassSelector.size();
1893             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1894                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1895                 if ( pProcDesc ) {
1896                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1897                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1898                         if ( pProcHeap ) {
1899                             st.add_procheap_stat( pProcHeap->stat );
1900                         }
1901                     }
1902                 }
1903             }
1904
1905             st.add_heap_stat( m_OSAllocStat );
1906         }
1907     };
1908
1909 }}} // namespace cds::memory::michael
1910
1911 #endif // CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H