0366e55ac2b3b1514c8d8312df99e22874b0e237
[libcds.git] / cds / memory / michael / allocator.h
1 //$$CDS-header$$
2
3 #ifndef CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
5
6 /*
7     Michael allocator implementation
8     Source:
9         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
10
11     Editions:
12     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
13                         This optimization allows to allocate system memory more regularly,
14                         in blocks of 1M that leads to less memory fragmentation.
15     2011.01.02 khizmax  Created
16 */
17
18 #include <stdlib.h>
19 #include <mutex>        // unique_lock
20 #include <cds/init.h>
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
25
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/sync/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
34
35 #include <boost/intrusive/list.hpp>
36
37 namespace cds {
38     /// Memory-related algorithms: allocators etc.
39 namespace memory {
40     /// Michael's allocator (class Heap)
41     /**
42         \par Source
43             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
44
45         This namespace declares the main class Heap and a lot of helper classes.
46     */
47 namespace michael {
48
49     /// Size class
50     struct size_class {
51         unsigned int    nBlockSize  ;   ///< block size in bytes
52         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
53         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
54         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
55     };
56
57     /// %Heap based on system \p malloc and \p free functions
58     struct malloc_heap
59     {
60         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61         static void * alloc( size_t nSize )
62         {
63             void * p = ::malloc( nSize );
64             return p;
65         }
66         /// Returning memory block to the system (\p free wrapper)
67         static void free( void * p )
68         {
69             ::free( p );
70         }
71     };
72
73     /// %Heap based on system provided aligned \p malloc and \p free functions
74     struct aligned_malloc_heap
75     {
76         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
77         static void * alloc( size_t nSize, size_t nAlignment  )
78         {
79             return cds::OS::aligned_malloc( nSize, nAlignment );
80         }
81         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
82         static void free( void * p )
83         {
84             cds::OS::aligned_free( p );
85         }
86     };
87
88     /// Page heap based on \p Heap
89     /**
90         Page heap can allocate memory by page-sized block only.
91         \p Heap may be any heap that provides interface like \ref malloc_heap.
92
93         This class is one of available implementation of opt::page_heap option.
94     */
95     template <class Heap = malloc_heap>
96     class page_allocator: public Heap
97     {
98         //@cond
99         typedef Heap base_class;
100         size_t  m_nPageSize;
101         //@endcond
102
103     public:
104         /// Initializes heap
105         page_allocator(
106             size_t nPageSize    ///< page size in bytes
107         )
108             : m_nPageSize( nPageSize )
109         {}
110
111         /// Allocate new page
112         void * alloc()
113         {
114             return base_class::alloc( m_nPageSize );
115         }
116
117         /// Free page \p pPage
118         void free( void * pPage )
119         {
120             base_class::free( pPage );
121         }
122     };
123
124     /// Page cacheable heap
125     /**
126         To improve performance this allocator maintains small list of free pages.
127         Page heap can allocate memory by page-sized block only.
128
129         Template parameters:
130             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
131             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
132
133         This class is one of available implementation of opt::page_heap option.
134     */
135     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
136     class page_cached_allocator: public page_allocator<Heap>
137     {
138         //@cond
139         typedef page_allocator<Heap> base_class;
140
141 #ifdef _DEBUG
142         struct make_null_ptr {
143             void operator ()(void *& p)
144             {
145                 p = nullptr;
146             }
147         };
148 #endif
149         struct free_list_traits : public cds::container::vyukov_queue::traits
150         {
151             typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
152 #ifdef _DEBUG
153             typedef make_null_ptr value_cleaner;
154 #endif
155         };
156         typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
157
158         free_list   m_FreeList;
159         //@endcond
160
161     public:
162         /// Initializes heap
163         page_cached_allocator(
164             size_t nPageSize    ///< page size in bytes
165         )
166             : base_class( nPageSize )
167             , m_FreeList( FreeListCapacity )
168         {}
169
170         //@cond
171         ~page_cached_allocator()
172         {
173             void * pPage;
174             while ( m_FreeList.pop(pPage) )
175                 base_class::free( pPage );
176         }
177         //@endcond
178
179         /// Allocate new page
180         void * alloc()
181         {
182             void * pPage;
183             if ( !m_FreeList.pop( pPage ) )
184                 pPage = base_class::alloc();
185             return pPage;
186         }
187
188         /// Free page \p pPage
189         void free( void * pPage )
190         {
191             if ( !m_FreeList.push( pPage ))
192                 base_class::free( pPage );
193         }
194     };
195
196     /// Implementation of opt::sizeclass_selector option
197     /**
198         Default size-class selector can manage memory blocks up to 64K.
199     */
200     class CDS_EXPORT_API default_sizeclass_selector
201     {
202         //@cond
203         /// Count of different size-classes
204         static const size_t c_nSizeClassCount = 63;
205
206         /// Max block size
207         static const size_t c_nMaxBlockSize = 64 * 1024;
208
209         /// Page size of type 0 (64K)
210         static const unsigned int c_nPage64K = 64 * 1024 - 32;
211
212         /// Page size of type 1 (1M)
213         static const unsigned int c_nPage1M = 1024 * 1024;
214
215         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
216         static size_class const m_szClass[c_nSizeClassCount];
217         static unsigned char const m_szClassMap[];
218         //@endcond
219     public:
220         /// Type of size-class index
221         typedef unsigned int sizeclass_index;
222
223 #ifdef _DEBUG
224         default_sizeclass_selector();
225 #endif
226
227         /// "No size class" index
228         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
229
230         /// Returns size-class count
231         static sizeclass_index size()
232         {
233             return c_nSizeClassCount;
234         }
235
236         /// Returns page size in bytes for given page type \p nPageType
237         static size_t page_size(size_t nPageType )
238         {
239             switch (nPageType) {
240             case 0:
241                 return c_nPage64K;
242             case 1:
243                 return c_nPage1M;
244             default:
245                 assert(false)   ;   // anything forgotten?..
246             }
247             return c_nPage1M;
248         }
249
250         /// Returns count of page size-class
251         /**
252             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
253         */
254         static size_t pageTypeCount()
255         {
256             return 2;
257         }
258
259         /// Returns size-class index for \p nSize
260         /**
261             For large blocks that cannot be allocated by Michael's allocator
262             the function must return -1.
263         */
264         static sizeclass_index find( size_t nSize )
265         {
266             if ( nSize > c_nMaxBlockSize ) {
267                 // Too large block - allocate from system
268                 return c_nNoSizeClass;
269             }
270             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
271             assert( nSize <= m_szClassBounds[ szClass ] );
272             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
273
274             return szClass;
275         }
276
277         /// Gets details::size_class struct for size-class index \p nIndex
278         static const size_class * at( sizeclass_index nIndex )
279         {
280             assert( nIndex < size() );
281             return m_szClass + nIndex;
282         }
283     };
284
285     //@cond
286     namespace details {
287         struct free_list_tag;
288         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
289
290         struct partial_list_tag;
291         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
292
293         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
294         {};
295     }
296     //@endcond
297
298     /// List of free superblock descriptor
299     /**
300         This class is a implementation of \ref opt::free_list option
301     */
302     template <class Lock, class T = details::intrusive_superblock_desc>
303     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
304     {
305         //@cond
306         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
307     public:
308         typedef details::free_list_locked_hook item_hook;
309         typedef Lock lock_type;
310     protected:
311         typedef std::unique_lock<lock_type>   auto_lock;
312
313         mutable lock_type   m_access;
314         //@endcond
315
316     public:
317         /// Rebinds to other item type \p T2
318         template <class T2>
319         struct rebind {
320             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
321         };
322
323     public:
324         /// Push superblock descriptor to free-list
325         void push( T * pDesc )
326         {
327             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
328             auto_lock al(m_access);
329             base_class::push_back( *pDesc );
330         }
331
332         /// Pop superblock descriptor from free-list
333         T *   pop()
334         {
335             auto_lock al(m_access);
336             if ( base_class::empty() )
337                 return nullptr;
338             T& rDesc = base_class::front();
339             base_class::pop_front();
340             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
341             return &rDesc;
342         }
343
344         /// Returns current count of superblocks in free-list
345         size_t  size() const
346         {
347             auto_lock al(m_access);
348             return base_class::size();
349         }
350     };
351
352     /// List of partial filled superblock descriptor
353     /**
354         This class is a implementation of \ref opt::partial_list option
355     */
356     template <class Lock, class T = details::intrusive_superblock_desc>
357     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
358     {
359         //@cond
360         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
361     public:
362         typedef details::partial_list_locked_hook item_hook;
363         typedef Lock    lock_type;
364     protected:
365         typedef std::unique_lock<lock_type>   auto_lock;
366
367         mutable lock_type   m_access;
368         //@endcond
369
370     public:
371         /// Rebinds to other item type \p T2
372         template <class T2>
373         struct rebind {
374             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
375         };
376
377     public:
378         /// Push a superblock \p pDesc to the list
379         void    push( T * pDesc )
380         {
381             auto_lock al( m_access );
382             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
383             base_class::push_back( *pDesc );
384         }
385
386         /// Pop superblock from the list
387         T * pop()
388         {
389             auto_lock al( m_access );
390             if ( base_class::empty() )
391                 return nullptr;
392             T& rDesc = base_class::front();
393             base_class::pop_front();
394             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
395             return &rDesc;
396         }
397
398         /// Removes \p pDesc descriptor from the free-list
399         bool unlink( T * pDesc )
400         {
401             assert(pDesc != nullptr);
402             auto_lock al( m_access );
403             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
404             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
405                 base_class::erase( base_class::iterator_to( *pDesc ) );
406                 return true;
407             }
408             return false;
409         }
410
411         /// Count of element in the list
412         size_t size() const
413         {
414             auto_lock al( m_access );
415             return base_class::size();
416         }
417     };
418
419     /// Summary processor heap statistics
420     /**
421         Summary heap statistics for use with Heap::summaryStat function.
422     */
423     struct summary_stat
424     {
425         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
426         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
427         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
428         size_t      nFreeCount          ;  ///< Count of \p free function call
429         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
430         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
431         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
432         size_t      nDescFull           ;  ///< Count of full superblock
433         atomic64u_t nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
434         atomic64u_t nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
435
436         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
437         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
438         atomic64u_t nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
439         atomic64_t  nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
440
441         // Internal contention indicators
442         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
443         /**
444             Contention indicator. The less value is better
445         */
446         size_t      nActiveDescCASFailureCount;
447         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
448         /**
449             Contention indicator. The less value is better
450         */
451         size_t      nActiveAnchorCASFailureCount;
452         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
453         /**
454             Contention indicator. The less value is better
455         */
456         size_t      nPartialDescCASFailureCount;
457         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
458         /**
459             Contention indicator. The less value is better
460         */
461         size_t      nPartialAnchorCASFailureCount;
462
463
464     public:
465         /// Constructs empty statistics. All counters are zero.
466         summary_stat()
467         {
468             clear();
469         }
470
471         /// Difference statistics
472         /**
473             This operator computes difference between \p *this and \p stat and places the difference to \p this.
474             Returns \p *this;
475         */
476         summary_stat& operator -=( const summary_stat& stat )
477         {
478             nAllocFromActive    -= stat.nAllocFromActive;
479             nAllocFromPartial   -= stat.nAllocFromPartial;
480             nAllocFromNew       -= stat.nAllocFromNew;
481             nFreeCount          -= stat.nFreeCount;
482             nPageAllocCount     -= stat.nPageAllocCount;
483             nPageDeallocCount   -= stat.nPageDeallocCount;
484             nDescAllocCount     -= stat.nDescAllocCount;
485             nDescFull           -= stat.nDescFull;
486             nBytesAllocated     -= stat.nBytesAllocated;
487             nBytesDeallocated   -= stat.nBytesDeallocated;
488
489             nSysAllocCount      -= stat.nSysAllocCount;
490             nSysFreeCount       -= stat.nSysFreeCount;
491             nSysBytesAllocated  -= stat.nSysBytesAllocated;
492             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
493
494             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
495             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
496             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
497             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
498
499             return *this;
500         }
501
502         /// Clears statistics
503         /**
504             All counters are set to zero.
505         */
506         void clear()
507         {
508             memset( this, 0, sizeof(*this));
509         }
510
511         //@cond
512         template <typename Stat>
513         summary_stat& add_procheap_stat( const Stat& stat )
514         {
515             nAllocFromActive    += stat.allocFromActive();
516             nAllocFromPartial   += stat.allocFromPartial();
517             nAllocFromNew       += stat.allocFromNew();
518             nFreeCount          += stat.freeCount();
519             nPageAllocCount     += stat.blockAllocated();
520             nPageDeallocCount   += stat.blockDeallocated();
521             nDescAllocCount     += stat.descAllocCount();
522             nDescFull           += stat.descFull();
523             nBytesAllocated     += stat.allocatedBytes();
524             nBytesDeallocated   += stat.deallocatedBytes();
525
526             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
527             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
528             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
529             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
530
531             return *this;
532         }
533
534         template <typename Stat>
535         summary_stat& add_heap_stat( const Stat& stat )
536         {
537             nSysAllocCount      += stat.allocCount();
538             nSysFreeCount       += stat.freeCount();
539
540             nSysBytesAllocated  += stat.allocatedBytes();
541             nSysBytesDeallocated+= stat.deallocatedBytes();
542
543             return *this;
544         }
545         //@endcond
546     };
547
548     /// Michael's allocator
549     /**
550         This class provides base functionality for Michael's allocator. It does not provide
551         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
552         The heap interface is closer to semantics of \p malloc / \p free system functions.
553         The heap supports allocation of aligned and unaligned data.
554
555         The algorithm is based on simplified version of
556             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
557
558         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
559             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
560
561         This is powerful, scalable, fully customizable heap with fast-path without any locks
562         that has been developed specifically for multi-threading.
563         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
564         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
565         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
566         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
567         The opt::check_bounds feature can help you to find a memory buffer overflow.
568
569         Brief algorithm description from Michael's work:
570
571         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
572         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
573         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
574         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
575         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
576         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
577         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
578         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
579         in a lock-free manner.
580
581         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
582         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
583         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
584         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
585         available blocks of its original superblock by atomically updating its descriptor.
586
587         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
588         superblock size.
589
590         Available \p Options:
591         - \ref opt::sys_topology - class that describes system topology needed for allocator.
592             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
593         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
594             Default is \ref malloc_heap.
595         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
596             Default is \ref aligned_malloc_heap
597         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
598             Default is \ref page_cached_allocator
599         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
600             for incoming allocation request.
601             Default is \ref default_sizeclass_selector
602         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
603             Default is \ref free_list_locked
604         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
605             Default is \ref partial_list_locked
606         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
607             that is maintained by the heap.
608             Default is \ref procheap_empty_stat
609         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
610             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
611             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
612             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
613             Default is \ref os_allocated_empty
614         - \ref opt::check_bounds - a bound checker.
615             Default is no bound checker (cds::opt::none)
616
617         \par Usage:
618         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
619
620         \code
621         #include <cds/memory/michael/allocator.h>
622
623         // Heap with explicitly defined options:
624         cds::memory::michael::Heap<
625             opt::aligned_heap< aligned_malloc_heap >,
626             opt::page_heap< page_cached_allocator<16, malloc_heap> >
627         >   myHeap;
628
629         // Heap with default options:
630         cds::memory::michael::Heap<>    myDefHeap;
631         \endcode
632
633         \par How to make std-like allocator
634
635         There are serious differencies of heap and <tt>std::allocator</tt> interface:
636             - Heap is stateful, and \p std::allocator is stateless.
637             - Heap has much more template parameters than \p std::allocator
638             - Heap has low-level interface for memory allocating only unlike the allocator
639                 interface that can construct/destroy objects of any type T.
640
641         To convert heap interface into \p std::allocator -like interface you should:
642             - Declare object of class cds::memory::michael::Heap specifying the necessary
643                 template parameters; this is usually static object
644             - Create a class with \p std::allocator interface that uses the function of heap.
645         \code
646         #include <cds/memory/michael/allocator.h>
647
648         template <class T>
649         class MichaelAllocator
650         {
651             typedef std::allocator<T>               std_allocator;
652             typedef cds::memory::michael::Heap<>    michael_heap;
653
654             // Michael heap static object
655             static michael_heap     s_Heap;
656         public:
657             // Declare typedefs from std::allocator
658             typedef typename std_allocator::const_pointer   const_pointer;
659             typedef typename std_allocator::pointer         pointer;
660             typedef typename std_allocator::const_reference const_reference;
661             typedef typename std_allocator::reference       reference;
662             typedef typename std_allocator::difference_type difference_type;
663             typedef typename std_allocator::size_type       size_type;
664             typedef typename std_allocator::value_type      value_type;
665
666             // Allocation function
667             pointer allocate( size_type _Count, const void* _Hint )
668             {
669                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
670             }
671
672             // Deallocation function
673             void deallocate( pointer _Ptr, size_type _Count )
674             {
675                 s_Heap.free( _Ptr );
676             }
677
678             // Other std::allocator specific functions: address, construct, destroy, etc.
679             ...
680
681             // Rebinding allocator to other type
682             template <class _Other>
683             struct rebind {
684                 typedef MichaelAllocator<_Other> other;
685             };
686         };
687
688         // In .cpp file:
689         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
690
691         \endcode
692     */
693     template <typename... Options>
694     class Heap {
695     protected:
696
697         //@cond
698         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
699         static const unsigned int c_nDefaultBlockAlignment = 8;
700
701         struct default_options {
702             typedef cds::OS::topology           sys_topology;
703             typedef malloc_heap                 system_heap;
704             typedef page_cached_allocator<>     page_heap;
705             typedef aligned_malloc_heap         aligned_heap;
706             typedef default_sizeclass_selector  sizeclass_selector;
707             typedef free_list_locked<cds::sync::spin>    free_list;
708             typedef partial_list_locked<cds::sync::spin> partial_list;
709             typedef procheap_empty_stat         procheap_stat;
710             typedef os_allocated_empty          os_allocated_stat;
711             typedef cds::opt::none              check_bounds;
712         };
713         //@endcond
714
715     protected:
716         //@cond
717         typedef typename opt::make_options<default_options, Options...>::type   options;
718         //@endcond
719
720         //@cond
721         typedef unsigned char   byte;
722         //@endcond
723     public:
724         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
725         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
726         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
727         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
728         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
729         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
730         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
731         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
732
733         // forward declarations
734         //@cond
735         struct superblock_desc;
736         struct processor_heap_base;
737         struct processor_desc;
738         //@endcond
739
740         /// Superblock states
741         /**
742             A superblock can be in one of four states: \p ACTIVE, \p FULL,
743             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
744             superblock in a heap, or if a thread intends to try to install it
745             as such. A superblock is \p FULL if all its blocks are either allocated
746             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
747             and contains unreserved available blocks. A superblock is
748             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
749         */
750         enum superblock_state {
751             SBSTATE_ACTIVE  = 0,    ///< superblock is active
752             SBSTATE_FULL    = 1,    ///< superblock is full
753             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
754             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
755         };
756
757         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
758
759         /// Anchor of the superblock descriptor. Updated by CAS
760         struct anchor_tag {
761             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
762             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
763             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
764             unsigned long long    tag:40    ;   ///< ABA prevention tag
765         };
766
767         /// Superblock descriptor
768         struct superblock_desc
769             : public options::free_list::item_hook
770             , public options::partial_list::item_hook
771         {
772             atomics::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
773             byte *              pSB         ;   ///< ptr to superblock
774             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
775             unsigned int        nBlockSize  ;   ///< block size in bytes
776             unsigned int        nCapacity   ;   ///< superblock size/block size
777
778             //@cond
779             superblock_desc()
780                 : pSB(nullptr)
781                 , pProcHeap( nullptr )
782             {}
783             //@endcond
784         };
785
786         //@cond
787         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
788         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
789         //@endcond
790
791 #if CDS_BUILD_BITS == 32
792         /// Allocated block header
793         /**
794             Each allocated block has 8-byte header.
795             The header contains pointer to owner superblock descriptor and the redirection flag.
796             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
797             \code
798                 +---------------+
799                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
800                 +---------------+
801                 |               | <- memory allocated
802                 |   memory      |
803                 |               |
804                 +---------------+
805             \endcode
806             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
807             In this case the redirection flag is 1 and the block's structure is:
808             \code
809                 +---------------+
810             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
811             |   +---------------+
812             |   |   padding     |
813             |   |   (unused)    |
814             |   |               |
815             |   +---------------+
816             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
817                 +---------------+
818                 |               | <- memory allocated
819                 |   memory      |
820                 |               |
821                 +---------------+
822             \endcode
823         */
824         class block_header
825         {
826         //@cond
827             enum {
828                 bitAligned = 1,
829                 bitOSAllocated = 2
830             };
831
832             union {
833                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
834                 uint32_t         nSize       ;   // block size (allocated form OS)
835             };
836             uint32_t         nFlags;
837
838         public:
839             void  set( superblock_desc * pdesc, uint32_t isAligned )
840             {
841                 pDesc = pdesc;
842                 nFlags = isAligned ? bitAligned : 0;
843             }
844
845             superblock_desc * desc()
846             {
847                 assert( (nFlags & bitOSAllocated) == 0 );
848                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
849             }
850
851             block_header * begin()
852             {
853                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
854             }
855
856             bool isAligned() const
857             {
858                 return (nFlags & bitAligned) != 0;
859             }
860
861             bool isOSAllocated() const
862             {
863                 return (nFlags & bitOSAllocated) != 0;
864             }
865
866             void setOSAllocated( size_t sz )
867             {
868                 nSize = sz;
869                 nFlags = bitOSAllocated;
870             }
871
872             size_t getOSAllocSize() const
873             {
874                 assert( isOSAllocated() );
875                 return nSize;
876             }
877
878         //@endcond
879         };
880 #elif CDS_BUILD_BITS == 64
881         //@cond
882         class block_header
883         {
884             enum {
885                 bitAligned = 1,
886                 bitOSAllocated = 2
887             };
888             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
889             // If bitOSAllocated is set the pDesc contains size of memory block
890             // allocated from OS
891             marked_desc_ptr     pDesc;
892         public:
893             void  set( superblock_desc * pdesc, uint32_t isAligned )
894             {
895                 pDesc = marked_desc_ptr( pdesc, isAligned );
896             }
897
898             superblock_desc * desc()
899             {
900                 assert( !isOSAllocated() );
901                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
902             }
903
904             block_header * begin()
905             {
906                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
907             }
908
909             bool isAligned() const
910             {
911                 return (pDesc.bits() & bitAligned) != 0;
912             }
913
914             bool isOSAllocated() const
915             {
916                 return (pDesc.bits() & bitOSAllocated) != 0;
917             }
918
919             void setOSAllocated( size_t nSize )
920             {
921
922                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
923             }
924
925             size_t getOSAllocSize() const
926             {
927                 assert( isOSAllocated() );
928                 return reinterpret_cast<uintptr_t>( pDesc.ptr() ) >> 2;
929             }
930
931         };
932         //@endcond
933 #else
934 #       error "Unexpected value of CDS_BUILD_BITS"
935 #endif  // CDS_BUILD_BITS
936
937         //@cond
938         struct free_block_header: block_header {
939             unsigned int    nNextFree;
940         };
941         //@endcond
942
943 #if CDS_BUILD_BITS == 32
944         /// Processor heap's \p active field
945         /**
946             The \p active field in the processor heap structure is primarily a pointer to the descriptor
947             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
948             guaranteed that the active superblock has at least one block available for reservation.
949             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
950             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
951             of blocks available for reservation in the active superblock less one. That is, if the value
952             of credits is n, then the active superblock contains n+1 blocks available for reservation
953             through the \p active field. Note that the number of blocks in a superblock is not limited
954             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
955             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
956             atomically decrements credits while validating that the active superblock is still valid.
957         */
958         class active_tag {
959         //@cond
960             superblock_desc *       pDesc;
961             uint32_t             nCredits;
962
963         public:
964             static const unsigned int c_nMaxCredits = 0 - 1;
965
966         public:
967             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
968                 : pDesc( nullptr )
969                 , nCredits(0)
970             {}
971
972             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
973             ~active_tag() CDS_NOEXCEPT = default;
974             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
975 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
976             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
977             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
978 #       endif
979
980             /// Returns pointer to superblock descriptor
981             superblock_desc * ptr() const
982             {
983                 return pDesc;
984             }
985
986             /// Sets superblock descriptor
987             void ptr( superblock_desc * p )
988             {
989                 pDesc = p;
990             }
991
992             unsigned int credits() const
993             {
994                 return nCredits;
995             }
996
997             void credits( unsigned int n )
998             {
999                 nCredits = n;
1000             }
1001
1002             void clear()
1003             {
1004                 pDesc = nullptr;
1005                 nCredits = 0;
1006             }
1007
1008             void set( superblock_desc * pSB, unsigned int n )
1009             {
1010                 pDesc = pSB;
1011                 nCredits = n;
1012             }
1013         //@endcond
1014         };
1015 #elif CDS_BUILD_BITS == 64
1016         //@cond
1017         class active_tag
1018         {
1019         public:
1020             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1021         protected:
1022             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1023             marked_desc_ptr pDesc;
1024
1025         public:
1026             active_tag() CDS_NOEXCEPT
1027                 : pDesc( nullptr )
1028             {}
1029             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1030             //active_tag() CDS_NOEXCEPT = default;
1031             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1032             ~active_tag() CDS_NOEXCEPT = default;
1033             active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1034 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1035             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1036             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1037 #       endif
1038             superblock_desc *    ptr() const
1039             {
1040                 return pDesc.ptr();
1041             }
1042
1043             void ptr( superblock_desc * p )
1044             {
1045                 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1046                 pDesc = marked_desc_ptr( p, pDesc.bits());
1047             }
1048
1049             unsigned int credits()
1050             {
1051                 return (unsigned int) pDesc.bits();
1052             }
1053
1054             void credits( unsigned int n )
1055             {
1056                 assert( n <= c_nMaxCredits );
1057                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1058             }
1059
1060             void clear()
1061             {
1062                 pDesc = marked_desc_ptr();
1063             }
1064
1065             void set( superblock_desc * pSB, unsigned int n )
1066             {
1067                 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1068                 pDesc = marked_desc_ptr( pSB, n );
1069             }
1070
1071         };
1072         //@endcond
1073 #else
1074 #       error "Unexpected value of CDS_BUILD_BITS"
1075 #endif  // CDS_BUILD_BITS
1076
1077
1078         /// Processor heap
1079         struct processor_heap_base
1080         {
1081             CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1082             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1083             const size_class *  pSizeClass  ;   ///< pointer to size class
1084             atomics::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1085             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1086             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1087
1088             /// Small page marker
1089             /**
1090                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1091                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1092                 to less memory fragmentation.
1093             */
1094             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1095
1096             procheap_stat       stat        ;   ///< heap statistics
1097             //processor_heap_statistics   stat;
1098
1099             //@cond
1100             processor_heap_base() CDS_NOEXCEPT
1101                 : pProcDesc( nullptr )
1102                 , pSizeClass( nullptr )
1103                 , pPartial( nullptr )
1104             {
1105                 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1106             }
1107             //@endcond
1108
1109             /// Get partial superblock owned by the processor heap
1110             superblock_desc * get_partial()
1111             {
1112                 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1113                 do {
1114                     if ( !pDesc ) {
1115                         pDesc =  partialList.pop();
1116                         break;
1117                     }
1118                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1119
1120                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1121                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1122                 return pDesc;
1123             }
1124
1125             /// Add partial superblock \p pDesc to the list
1126             void add_partial( superblock_desc * pDesc )
1127             {
1128                 assert( pPartial != pDesc );
1129                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1130
1131                 superblock_desc * pCur = nullptr;
1132                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1133                     partialList.push( pDesc );
1134             }
1135
1136
1137             /// Remove superblock \p pDesc from the list of partial superblock
1138             bool unlink_partial( superblock_desc * pDesc )
1139             {
1140                 return partialList.unlink( pDesc );
1141             }
1142         };
1143
1144         /// Aligned superblock descriptor
1145         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1146
1147         /// Processor descriptor
1148         struct processor_desc
1149         {
1150             processor_heap *    arrProcHeap     ; ///< array of processor heap
1151             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1152             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1153
1154             //@cond
1155             processor_desc()
1156                 : arrProcHeap( nullptr )
1157                 , pageHeaps( nullptr )
1158             {}
1159             //@endcond
1160         };
1161
1162
1163     protected:
1164         sys_topology        m_Topology           ;  ///< System topology
1165         system_heap         m_LargeHeap          ;  ///< Heap for large block
1166         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1167         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1168         atomics::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1169         unsigned int        m_nProcessorCount    ;  ///< Processor count
1170         bound_checker       m_BoundChecker       ;  ///< Bound checker
1171
1172         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1173
1174     protected:
1175         //@cond
1176
1177         /// Allocates large block from system memory
1178         block_header * alloc_from_OS( size_t nSize )
1179         {
1180             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1181             m_OSAllocStat.incBytesAllocated( nSize );
1182             p->setOSAllocated( nSize );
1183             return p;
1184         }
1185
1186         /// Allocates from the active superblock if it possible
1187         block_header * alloc_from_active( processor_heap * pProcHeap )
1188         {
1189             active_tag  oldActive;
1190             int nCollision = -1;
1191
1192             // Reserve block
1193             while ( true ) {
1194                 ++nCollision;
1195                 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1196                 if ( !oldActive.ptr() )
1197                     return nullptr;
1198                 unsigned int nCredits = oldActive.credits();
1199                 active_tag  newActive   ; // default = 0
1200                 if ( nCredits != 0 ) {
1201                     newActive = oldActive;
1202                     newActive.credits( nCredits - 1 );
1203                 }
1204                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1205                     break;
1206             }
1207
1208             if ( nCollision )
1209                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1210
1211             // pop block
1212             superblock_desc * pDesc = oldActive.ptr();
1213
1214             anchor_tag  oldAnchor;
1215             anchor_tag  newAnchor;
1216             byte * pAddr;
1217             unsigned int nMoreCredits = 0;
1218
1219             nCollision = -1;
1220             do {
1221                 ++nCollision;
1222                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1223
1224                 assert( oldAnchor.avail < pDesc->nCapacity );
1225                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1226                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1227                 newAnchor.tag += 1;
1228
1229                 if ( oldActive.credits() == 0 ) {
1230                     // state must be ACTIVE
1231                     if ( oldAnchor.count == 0 )
1232                         newAnchor.state = SBSTATE_FULL;
1233                     else {
1234                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1235                         newAnchor.count -= nMoreCredits;
1236                     }
1237                 }
1238             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1239
1240             if ( nCollision )
1241                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1242
1243             assert( newAnchor.state != SBSTATE_EMPTY );
1244
1245             if ( newAnchor.state == SBSTATE_FULL )
1246                 pProcHeap->stat.incDescFull();
1247             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1248                 update_active( pProcHeap, pDesc, nMoreCredits );
1249
1250             pProcHeap->stat.incAllocFromActive();
1251
1252             // block_header fields is not needed to setup
1253             // It was set in alloc_from_new_superblock
1254             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1255             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1256             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1257
1258             return reinterpret_cast<block_header *>( pAddr );
1259         }
1260
1261         /// Allocates from a partial filled superblock if it possible
1262         block_header * alloc_from_partial( processor_heap * pProcHeap )
1263         {
1264         retry:
1265             superblock_desc * pDesc = pProcHeap->get_partial();
1266             if ( !pDesc )
1267                 return nullptr;
1268
1269             // reserve blocks
1270             anchor_tag  oldAnchor;
1271             anchor_tag  newAnchor;
1272             //byte * pAddr;
1273             unsigned int nMoreCredits = 0;
1274
1275             int nCollision = -1;
1276             do {
1277                 ++nCollision;
1278
1279                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1280                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1281                     free_superblock( pDesc );
1282                     goto retry;
1283                 }
1284
1285                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1286                 newAnchor.count -= nMoreCredits + 1;
1287                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1288                 newAnchor.tag += 1;
1289             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1290
1291             if ( nCollision )
1292                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1293
1294             if ( newAnchor.state == SBSTATE_FULL )
1295                 pProcHeap->stat.incDescFull();
1296
1297             // Now, the thread is guaranteed to have reserved one or more blocks
1298             // pop reserved block
1299             byte * pAddr;
1300             nCollision = -1;
1301             do {
1302                 ++nCollision;
1303
1304                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1305
1306                 assert( oldAnchor.avail < pDesc->nCapacity );
1307                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1308                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1309                 ++newAnchor.tag;
1310             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1311
1312             if ( nCollision )
1313                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1314
1315             assert( newAnchor.state != SBSTATE_EMPTY );
1316
1317             pProcHeap->stat.incAllocFromPartial();
1318
1319             if ( nMoreCredits > 0 )
1320                 update_active( pProcHeap, pDesc, nMoreCredits );
1321
1322             // block_header fields is not needed to setup
1323             // It was set in alloc_from_new_superblock
1324             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1325             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1326             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1327
1328             return reinterpret_cast<block_header *>( pAddr );
1329         }
1330
1331         /// Allocates from the new superblock
1332         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1333         {
1334             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1335             assert( pDesc != nullptr );
1336             pDesc->pSB = new_superblock_buffer( pProcHeap );
1337
1338             anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1339             anchor.tag += 1;
1340
1341             // Make single-linked list of free blocks in superblock
1342             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1343             unsigned int nNext = 0;
1344             const unsigned int nBlockSize = pDesc->nBlockSize;
1345             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1346                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1347                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1348             }
1349             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1350
1351             active_tag newActive;
1352             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1353
1354             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1355             anchor.state = SBSTATE_ACTIVE;
1356             pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1357
1358             active_tag curActive;
1359             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1360                 pProcHeap->stat.incAllocFromNew();
1361                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1362                 return reinterpret_cast<block_header *>( pDesc->pSB );
1363             }
1364
1365             free_superblock( pDesc );
1366             return nullptr;
1367         }
1368
1369         /// Find appropriate processor heap based on size-class selected
1370         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1371         {
1372             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1373
1374             unsigned int nProcessorId = m_Topology.current_processor();
1375             assert( nProcessorId < m_nProcessorCount );
1376
1377             if ( nProcessorId >= m_nProcessorCount )
1378                 nProcessorId = 0;
1379
1380             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1381             while ( !pDesc ) {
1382
1383                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1384                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1385                     pDesc = pNewDesc;
1386                     break;
1387                 }
1388                 free_processor_desc( pNewDesc );
1389             }
1390
1391             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1392         }
1393
1394         /// Updates active field of processor heap \p pProcHeap
1395         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1396         {
1397             assert( pProcHeap == pDesc->pProcHeap );
1398
1399             active_tag  nullActive;
1400             active_tag  newActive;
1401             newActive.set( pDesc, nCredits - 1 );
1402
1403             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1404                 return;
1405
1406             // Someone installed another active superblock.
1407             // Return credits to superblock and make it partial
1408
1409             anchor_tag  oldAnchor;
1410             anchor_tag  newAnchor;
1411
1412             do {
1413                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1414                 newAnchor.count += nCredits;
1415                 newAnchor.state = SBSTATE_PARTIAL;
1416             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1417
1418             pDesc->pProcHeap->add_partial( pDesc );
1419         }
1420
1421         /// Allocates new processor descriptor
1422         processor_desc * new_processor_desc( unsigned int nProcessorId )
1423         {
1424             CDS_UNUSED( nProcessorId );
1425             processor_desc * pDesc;
1426             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1427
1428             /*
1429                 Processor descriptor layout
1430
1431                 proc_desc -  64-byte alignment
1432                 page_heap[0] 64-byte alignment
1433                 page_heap[1] 64-byte alignment
1434                 ...
1435                 page_heap[P] 64-byte alignment
1436
1437                 proc_heap[0] 64-byte alignment
1438                 proc_heap[1] 64-byte alignment
1439                 ...
1440                 proc_heap[N] 64-byte alignment
1441             */
1442
1443             const size_t szDesc =
1444                 ( sizeof(processor_desc)
1445                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1446                     + c_nAlignment - 1
1447                 ) / c_nAlignment
1448 ;
1449
1450             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1451
1452             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1453
1454             // TSan false positive: a new descriptor will be linked further with release fence
1455             CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1456
1457             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1458
1459             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1460             for ( size_t i = 0; i < nPageHeapCount; ++i )
1461                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1462
1463             // initialize processor heaps
1464             pDesc->arrProcHeap =
1465                 reinterpret_cast<processor_heap *>(
1466                     reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1467                     & ~(uintptr_t(c_nAlignment) - 1)
1468                 );
1469
1470             processor_heap * pProcHeap = pDesc->arrProcHeap;
1471             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1472             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1473                 new (pProcHeap) processor_heap();
1474                 pProcHeap->pProcDesc = pDesc;
1475                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1476                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1477                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1478                 else
1479                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1480             }
1481             CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1482             return pDesc;
1483         }
1484
1485
1486         void free_processor_heap( processor_heap * pProcHeap )
1487         {
1488             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1489                 superblock_desc * pDesc;
1490
1491                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1492                     free( pDesc->pSB );
1493                     m_AlignedHeap.free( pDesc );
1494                 }
1495
1496                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1497                 if ( pPartial ) {
1498                     free( pPartial->pSB );
1499                     m_AlignedHeap.free( pPartial );
1500                 }
1501
1502                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1503                 if ( pDesc ) {
1504                     free( pDesc->pSB );
1505                     m_AlignedHeap.free( pDesc );
1506                 }
1507             }
1508             else {
1509                 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1510                 superblock_desc * pDesc;
1511
1512                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1513                     pageHeap.free( pDesc->pSB );
1514                     m_AlignedHeap.free( pDesc );
1515                 }
1516
1517                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1518                 if ( pPartial ) {
1519                     pageHeap.free( pPartial->pSB );
1520                     m_AlignedHeap.free( pPartial );
1521                 }
1522
1523                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1524                 if ( pDesc ) {
1525                     pageHeap.free( pDesc->pSB );
1526                     m_AlignedHeap.free( pDesc );
1527                 }
1528             }
1529             pProcHeap->~processor_heap();
1530         }
1531
1532         /// Frees processor descriptor
1533         void free_processor_desc( processor_desc * pDesc )
1534         {
1535             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1536
1537             for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1538                 free_processor_heap( pDesc->arrProcHeap + j );
1539
1540             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1541                 m_AlignedHeap.free( pSBDesc );
1542
1543             for (size_t i = 0; i < nPageHeapCount; ++i )
1544                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1545
1546             //m_IntHeap.free( pDesc->pageHeaps );
1547             pDesc->pageHeaps = nullptr;
1548
1549             pDesc->processor_desc::~processor_desc();
1550             m_AlignedHeap.free( pDesc );
1551         }
1552
1553         /// Allocates new superblock descriptor
1554         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1555         {
1556             anchor_tag anchor;
1557             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1558             if ( pDesc == nullptr ) {
1559                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1560                 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1561
1562                 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1563                 anchor.tag = 0;
1564                 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1565
1566                 pProcHeap->stat.incDescAllocCount();
1567             }
1568             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1569             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1570             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1571             pDesc->pProcHeap = pProcHeap;
1572
1573             anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1574             anchor.avail = 1;
1575             pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1576
1577             return pDesc;
1578         }
1579
1580         /// Allocates superblock page
1581         byte * new_superblock_buffer( processor_heap * pProcHeap )
1582         {
1583             pProcHeap->stat.incBlockAllocated();
1584             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1585                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1586             }
1587             else {
1588                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1589             }
1590         }
1591
1592         /// Frees superblock descriptor and its page
1593         void free_superblock( superblock_desc * pDesc )
1594         {
1595             pDesc->pProcHeap->stat.incBlockDeallocated();
1596             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1597             if ( pDesc->pSB ) {
1598                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1599                     free( pDesc->pSB );
1600                 }
1601                 else {
1602                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1603                 }
1604             }
1605             pProcDesc->listSBDescFree.push( pDesc );
1606         }
1607
1608         /// Allocate memory block
1609         block_header * int_alloc(
1610             size_t nSize    ///< Size of memory block to allocate in bytes
1611             )
1612         {
1613             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1614             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1615                 return alloc_from_OS( nSize );
1616             }
1617             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1618
1619             block_header * pBlock;
1620             processor_heap * pProcHeap;
1621             while ( true ) {
1622                 pProcHeap = find_heap( nSizeClassIndex );
1623                 if ( !pProcHeap )
1624                     return alloc_from_OS( nSize );
1625
1626                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1627                     break;
1628                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1629                     break;
1630                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1631                     break;
1632             }
1633
1634             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1635
1636             assert( pBlock != nullptr );
1637             return pBlock;
1638         }
1639
1640         //@endcond
1641     public:
1642         /// Heap constructor
1643         Heap()
1644         {
1645             // Explicit libcds initialization is needed since a static object may be constructed
1646             cds::Initialize();
1647
1648             m_nProcessorCount = m_Topology.processor_count();
1649             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1650                 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1651             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1652         }
1653
1654         /// Heap destructor
1655         /**
1656             The destructor frees all memory allocated by the heap.
1657         */
1658         ~Heap()
1659         {
1660             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1661                 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1662                 if ( pDesc )
1663                     free_processor_desc( pDesc );
1664             }
1665
1666             m_AlignedHeap.free( m_arrProcDesc );
1667
1668             // Explicit termination of libcds
1669             cds::Terminate();
1670         }
1671
1672         /// Allocate memory block
1673         void * alloc(
1674             size_t nSize    ///< Size of memory block to allocate in bytes
1675         )
1676         {
1677             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1678
1679             // Bound checking is only for our blocks
1680             if ( !pBlock->isOSAllocated() ) {
1681                 // the block is allocated from our heap - bound checker is applicable
1682                 m_BoundChecker.make_trailer(
1683                     reinterpret_cast<byte *>(pBlock + 1),
1684                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1685                     nSize
1686                 );
1687             }
1688
1689             return pBlock + 1;
1690         }
1691
1692         /// Free previously allocated memory block
1693         void free(
1694             void * pMemory  ///< Pointer to memory block to free
1695         )
1696         {
1697             if ( !pMemory )
1698                 return;
1699
1700             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1701             block_header * pBlock = pRedirect->begin();
1702
1703             if ( pBlock->isOSAllocated() ) {
1704                 // Block has been allocated from OS
1705                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1706                 m_LargeHeap.free( pBlock );
1707                 return;
1708             }
1709
1710             assert( !pBlock->isAligned() );
1711             superblock_desc * pDesc = pBlock->desc();
1712
1713             m_BoundChecker.check_bounds(
1714                 pRedirect + 1,
1715                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1716                 pDesc->nBlockSize
1717             );
1718
1719
1720             anchor_tag oldAnchor;
1721             anchor_tag newAnchor;
1722             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1723
1724             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1725
1726             oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1727             do {
1728                 newAnchor = oldAnchor;
1729                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1730                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1731                 newAnchor.tag += 1;
1732
1733                 assert( oldAnchor.state != SBSTATE_EMPTY );
1734
1735                 if ( oldAnchor.state == SBSTATE_FULL )
1736                     newAnchor.state = SBSTATE_PARTIAL;
1737
1738                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1739                     //pProcHeap = pDesc->pProcHeap;
1740                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1741                     newAnchor.state = SBSTATE_EMPTY;
1742                 }
1743                 else
1744                     newAnchor.count += 1;
1745             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1746
1747             pProcHeap->stat.incFreeCount();
1748
1749             if ( newAnchor.state == SBSTATE_EMPTY ) {
1750                 if ( pProcHeap->unlink_partial( pDesc ))
1751                     free_superblock( pDesc );
1752             }
1753             else if (oldAnchor.state == SBSTATE_FULL ) {
1754                 assert( pProcHeap != nullptr );
1755                 pProcHeap->stat.decDescFull();
1756                 pProcHeap->add_partial( pDesc );
1757             }
1758         }
1759
1760         /// Reallocate memory block
1761         /**
1762             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1763             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1764
1765             If there is not enough available memory to expand the block to the given size,
1766             the original block is left unchanged, and \p nullptr is returned.
1767
1768             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1769             then the return value is \p nullptr and the original block is left unchanged.
1770         */
1771         void * realloc(
1772             void *  pMemory,    ///< Pointer to previously allocated memory block
1773             size_t  nNewSize    ///< New size of memory block, in bytes
1774         )
1775         {
1776             if ( nNewSize == 0 ) {
1777                 free( pMemory );
1778                 return nullptr;
1779             }
1780
1781             const size_t nOrigSize = nNewSize;
1782             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1783
1784             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1785
1786             // Reallocation of aligned block is not possible
1787             if ( pBlock->isAligned() ) {
1788                 assert( false );
1789                 return nullptr;
1790             }
1791
1792             if ( pBlock->isOSAllocated() ) {
1793                 // The block has been allocated from OS
1794                 size_t nCurSize = pBlock->getOSAllocSize();
1795
1796                 if ( nCurSize >= nNewSize )
1797                     return pMemory;
1798
1799                 // Grow block size
1800                 void * pNewBuf = alloc( nOrigSize );
1801                 if ( pNewBuf ) {
1802                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1803                     free( pMemory );
1804                 }
1805                 return pNewBuf;
1806             }
1807
1808             superblock_desc * pDesc = pBlock->desc();
1809             if ( pDesc->nBlockSize <= nNewSize ) {
1810                 // In-place reallocation
1811                 m_BoundChecker.make_trailer(
1812                     reinterpret_cast<byte *>(pBlock + 1),
1813                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1814                     nOrigSize
1815                     );
1816
1817                 return pMemory;
1818             }
1819
1820             void * pNew = alloc( nNewSize );
1821             if ( pNew ) {
1822                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1823                 free( pMemory );
1824                 return pNew;
1825             }
1826
1827             return nullptr;
1828         }
1829
1830         /// Allocate aligned memory block
1831         void * alloc_aligned(
1832             size_t nSize,       ///< Size of memory block to allocate in bytes
1833             size_t nAlignment   ///< Alignment
1834         )
1835         {
1836             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1837                 void * p = alloc( nSize );
1838                 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1839                 return p;
1840             }
1841
1842             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1843
1844             block_header * pRedirect;
1845             if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1846                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1847                 assert( pRedirect != pBlock );
1848                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1849
1850                 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1851             }
1852             else
1853                 pRedirect = pBlock;
1854
1855
1856             // Bound checking is only for our blocks
1857             if ( !pBlock->isOSAllocated() ) {
1858                 // the block is allocated from our heap - bound checker is applicable
1859                 m_BoundChecker.make_trailer(
1860                     reinterpret_cast<byte *>(pRedirect + 1),
1861                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1862                     nSize
1863                 );
1864             }
1865
1866             return pRedirect + 1;
1867         }
1868
1869         /// Free aligned memory block previously allocated by \ref alloc_aligned
1870         void free_aligned(
1871             void * pMemory      ///< Pointer to memory block to free
1872         )
1873         {
1874             free( pMemory );
1875         }
1876
1877     public:
1878
1879         /// Get instant summary statistics
1880         void summaryStat( summary_stat& st )
1881         {
1882             size_t nProcHeapCount = m_SizeClassSelector.size();
1883             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1884                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1885                 if ( pProcDesc ) {
1886                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1887                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1888                         if ( pProcHeap ) {
1889                             st.add_procheap_stat( pProcHeap->stat );
1890                         }
1891                     }
1892                 }
1893             }
1894
1895             st.add_heap_stat( m_OSAllocStat );
1896         }
1897     };
1898
1899 }}} // namespace cds::memory::michael
1900
1901 #endif // CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H