Replacing some integral typedefs with standard types
[libcds.git] / cds / memory / michael / allocator.h
1 //$$CDS-header$$
2
3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
5
6 /*
7     Michael allocator implementation
8     Source:
9         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
10
11     Editions:
12     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
13                         This optimization allows to allocate system memory more regularly,
14                         in blocks of 1M that leads to less memory fragmentation.
15     2011.01.02 khizmax  Created
16 */
17
18 #include <stdlib.h>
19 #include <mutex>        // unique_lock
20 #include <cds/init.h>
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
25
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/lock/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
34
35 #include <boost/intrusive/list.hpp>
36
37 namespace cds {
38     /// Memory-related algorithms: allocators etc.
39 namespace memory {
40     /// Michael's allocator (class Heap)
41     /**
42         \par Source
43             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
44
45         This namespace declares the main class Heap and a lot of helper classes.
46     */
47 namespace michael {
48
49     /// Size class
50     struct size_class {
51         unsigned int    nBlockSize  ;   ///< block size in bytes
52         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
53         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
54         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
55     };
56
57     /// %Heap based on system \p malloc and \p free functions
58     struct malloc_heap
59     {
60         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61         static void * alloc( size_t nSize )
62         {
63             return ::malloc( nSize );
64         }
65         /// Returning memory block to the system (\p free wrapper)
66         static void free( void * p )
67         {
68             ::free( p );
69         }
70     };
71
72     /// %Heap based on system provided aligned \p malloc and \p free functions
73     struct aligned_malloc_heap
74     {
75         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
76         static void * alloc( size_t nSize, size_t nAlignment  )
77         {
78             return cds::OS::aligned_malloc( nSize, nAlignment );
79         }
80         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
81         static void free( void * p )
82         {
83             cds::OS::aligned_free( p );
84         }
85     };
86
87     /// Page heap based on \p Heap
88     /**
89         Page heap can allocate memory by page-sized block only.
90         \p Heap may be any heap that provides interface like \ref malloc_heap.
91
92         This class is one of available implementation of opt::page_heap option.
93     */
94     template <class Heap = malloc_heap>
95     class page_allocator: public Heap
96     {
97         //@cond
98         typedef Heap base_class;
99         size_t  m_nPageSize;
100         //@endcond
101
102     public:
103         /// Initializes heap
104         page_allocator(
105             size_t nPageSize    ///< page size in bytes
106         )
107             : m_nPageSize( nPageSize )
108         {}
109
110         /// Allocate new page
111         void * alloc()
112         {
113             return base_class::alloc( m_nPageSize );
114         }
115
116         /// Free page \p pPage
117         void free( void * pPage )
118         {
119             base_class::free( pPage );
120         }
121     };
122
123     /// Page cacheable heap
124     /**
125         To improve performance this allocator maintains small list of free pages.
126         Page heap can allocate memory by page-sized block only.
127
128         Template parameters:
129             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
130             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
131
132         This class is one of available implementation of opt::page_heap option.
133     */
134     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
135     class page_cached_allocator: public page_allocator<Heap>
136     {
137         //@cond
138         typedef page_allocator<Heap> base_class;
139
140 #ifdef _DEBUG
141         struct make_null_ptr {
142             void operator ()(void *& p)
143             {
144                 p = nullptr;
145             }
146         };
147 #endif
148         struct free_list_traits : public cds::container::vyukov_queue::traits
149         {
150             typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
151 #ifdef _DEBUG
152             typedef make_null_ptr value_cleaner;
153 #endif
154         };
155         typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
156
157         free_list   m_FreeList;
158         //@endcond
159
160     public:
161         /// Initializes heap
162         page_cached_allocator(
163             size_t nPageSize    ///< page size in bytes
164         )
165             : base_class( nPageSize )
166             , m_FreeList( FreeListCapacity )
167         {}
168
169         //@cond
170         ~page_cached_allocator()
171         {
172             void * pPage;
173             while ( m_FreeList.pop(pPage) )
174                 base_class::free( pPage );
175         }
176         //@endcond
177
178         /// Allocate new page
179         void * alloc()
180         {
181             void * pPage;
182             if ( !m_FreeList.pop( pPage ) )
183                 pPage = base_class::alloc();
184             return pPage;
185         }
186
187         /// Free page \p pPage
188         void free( void * pPage )
189         {
190             if ( !m_FreeList.push( pPage ))
191                 base_class::free( pPage );
192         }
193     };
194
195     /// Implementation of opt::sizeclass_selector option
196     /**
197         Default size-class selector can manage memory blocks up to 64K.
198     */
199     class CDS_EXPORT_API default_sizeclass_selector
200     {
201         //@cond
202         /// Count of different size-classes
203         static const size_t c_nSizeClassCount = 63;
204
205         /// Max block size
206         static const size_t c_nMaxBlockSize = 64 * 1024;
207
208         /// Page size of type 0 (64K)
209         static const unsigned int c_nPage64K = 64 * 1024 - 32;
210
211         /// Page size of type 1 (1M)
212         static const unsigned int c_nPage1M = 1024 * 1024;
213
214         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
215         static size_class const m_szClass[c_nSizeClassCount];
216         static unsigned char const m_szClassMap[];
217         //@endcond
218     public:
219         /// Type of size-class index
220         typedef unsigned int sizeclass_index;
221
222 #ifdef _DEBUG
223         default_sizeclass_selector();
224 #endif
225
226         /// "No size class" index
227         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
228
229         /// Returns size-class count
230         static sizeclass_index size()
231         {
232             return c_nSizeClassCount;
233         }
234
235         /// Returns page size in bytes for given page type \p nPageType
236         static size_t page_size(size_t nPageType )
237         {
238             switch (nPageType) {
239             case 0:
240                 return c_nPage64K;
241             case 1:
242                 return c_nPage1M;
243             default:
244                 assert(false)   ;   // anything forgotten?..
245             }
246             return c_nPage1M;
247         }
248
249         /// Returns count of page size-class
250         /**
251             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
252         */
253         static size_t pageTypeCount()
254         {
255             return 2;
256         }
257
258         /// Returns size-class index for \p nSize
259         /**
260             For large blocks that cannot be allocated by Michael's allocator
261             the function must return -1.
262         */
263         static sizeclass_index find( size_t nSize )
264         {
265             if ( nSize > c_nMaxBlockSize ) {
266                 // Too large block - allocate from system
267                 return c_nNoSizeClass;
268             }
269             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
270             assert( nSize <= m_szClassBounds[ szClass ] );
271             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
272
273             return szClass;
274         }
275
276         /// Gets details::size_class struct for size-class index \p nIndex
277         static const size_class * at( sizeclass_index nIndex )
278         {
279             assert( nIndex < size() );
280             return m_szClass + nIndex;
281         }
282     };
283
284     //@cond
285     namespace details {
286         struct free_list_tag;
287         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
288
289         struct partial_list_tag;
290         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
291
292         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
293         {};
294     }
295     //@endcond
296
297     /// List of free superblock descriptor
298     /**
299         This class is a implementation of \ref opt::free_list option
300     */
301     template <class Lock, class T = details::intrusive_superblock_desc>
302     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
303     {
304         //@cond
305         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
306     public:
307         typedef details::free_list_locked_hook item_hook;
308         typedef Lock lock_type;
309     protected:
310         typedef std::unique_lock<lock_type>   auto_lock;
311
312         mutable lock_type   m_access;
313         //@endcond
314
315     public:
316         /// Rebinds to other item type \p T2
317         template <class T2>
318         struct rebind {
319             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
320         };
321
322     public:
323         /// Push superblock descriptor to free-list
324         void push( T * pDesc )
325         {
326             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
327             auto_lock al(m_access);
328             base_class::push_back( *pDesc );
329         }
330
331         /// Pop superblock descriptor from free-list
332         T *   pop()
333         {
334             auto_lock al(m_access);
335             if ( base_class::empty() )
336                 return nullptr;
337             T& rDesc = base_class::front();
338             base_class::pop_front();
339             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
340             return &rDesc;
341         }
342
343         /// Returns current count of superblocks in free-list
344         size_t  size() const
345         {
346             auto_lock al(m_access);
347             return base_class::size();
348         }
349     };
350
351     /// List of partial filled superblock descriptor
352     /**
353         This class is a implementation of \ref opt::partial_list option
354     */
355     template <class Lock, class T = details::intrusive_superblock_desc>
356     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
357     {
358         //@cond
359         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
360     public:
361         typedef details::partial_list_locked_hook item_hook;
362         typedef Lock    lock_type;
363     protected:
364         typedef std::unique_lock<lock_type>   auto_lock;
365
366         mutable lock_type   m_access;
367         //@endcond
368
369     public:
370         /// Rebinds to other item type \p T2
371         template <class T2>
372         struct rebind {
373             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
374         };
375
376     public:
377         /// Push a superblock \p pDesc to the list
378         void    push( T * pDesc )
379         {
380             auto_lock al( m_access );
381             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
382             base_class::push_back( *pDesc );
383         }
384
385         /// Pop superblock from the list
386         T * pop()
387         {
388             auto_lock al( m_access );
389             if ( base_class::empty() )
390                 return nullptr;
391             T& rDesc = base_class::front();
392             base_class::pop_front();
393             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
394             return &rDesc;
395         }
396
397         /// Removes \p pDesc descriptor from the free-list
398         bool unlink( T * pDesc )
399         {
400             assert(pDesc != nullptr);
401             auto_lock al( m_access );
402             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
403             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
404                 base_class::erase( base_class::iterator_to( *pDesc ) );
405                 return true;
406             }
407             return false;
408         }
409
410         /// Count of element in the list
411         size_t size() const
412         {
413             auto_lock al( m_access );
414             return base_class::size();
415         }
416     };
417
418     /// Summary processor heap statistics
419     /**
420         Summary heap statistics for use with Heap::summaryStat function.
421     */
422     struct summary_stat
423     {
424         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
425         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
426         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
427         size_t      nFreeCount          ;  ///< Count of \p free function call
428         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
429         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
430         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
431         size_t      nDescFull           ;  ///< Count of full superblock
432         atomic64u_t nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
433         atomic64u_t nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
434
435         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
436         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
437         atomic64u_t nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
438         atomic64_t  nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
439
440         // Internal contention indicators
441         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
442         /**
443             Contention indicator. The less value is better
444         */
445         size_t      nActiveDescCASFailureCount;
446         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
447         /**
448             Contention indicator. The less value is better
449         */
450         size_t      nActiveAnchorCASFailureCount;
451         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
452         /**
453             Contention indicator. The less value is better
454         */
455         size_t      nPartialDescCASFailureCount;
456         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
457         /**
458             Contention indicator. The less value is better
459         */
460         size_t      nPartialAnchorCASFailureCount;
461
462
463     public:
464         /// Constructs empty statistics. All counters are zero.
465         summary_stat()
466         {
467             clear();
468         }
469
470         /// Difference statistics
471         /**
472             This operator computes difference between \p *this and \p stat and places the difference to \p this.
473             Returns \p *this;
474         */
475         summary_stat& operator -=( const summary_stat& stat )
476         {
477             nAllocFromActive    -= stat.nAllocFromActive;
478             nAllocFromPartial   -= stat.nAllocFromPartial;
479             nAllocFromNew       -= stat.nAllocFromNew;
480             nFreeCount          -= stat.nFreeCount;
481             nPageAllocCount     -= stat.nPageAllocCount;
482             nPageDeallocCount   -= stat.nPageDeallocCount;
483             nDescAllocCount     -= stat.nDescAllocCount;
484             nDescFull           -= stat.nDescFull;
485             nBytesAllocated     -= stat.nBytesAllocated;
486             nBytesDeallocated   -= stat.nBytesDeallocated;
487
488             nSysAllocCount      -= stat.nSysAllocCount;
489             nSysFreeCount       -= stat.nSysFreeCount;
490             nSysBytesAllocated  -= stat.nSysBytesAllocated;
491             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
492
493             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
494             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
495             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
496             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
497
498             return *this;
499         }
500
501         /// Clears statistics
502         /**
503             All counters are set to zero.
504         */
505         void clear()
506         {
507             memset( this, 0, sizeof(*this));
508         }
509
510         //@cond
511         template <typename Stat>
512         summary_stat& add_procheap_stat( const Stat& stat )
513         {
514             nAllocFromActive    += stat.allocFromActive();
515             nAllocFromPartial   += stat.allocFromPartial();
516             nAllocFromNew       += stat.allocFromNew();
517             nFreeCount          += stat.freeCount();
518             nPageAllocCount     += stat.blockAllocated();
519             nPageDeallocCount   += stat.blockDeallocated();
520             nDescAllocCount     += stat.descAllocCount();
521             nDescFull           += stat.descFull();
522             nBytesAllocated     += stat.allocatedBytes();
523             nBytesDeallocated   += stat.deallocatedBytes();
524
525             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
526             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
527             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
528             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
529
530             return *this;
531         }
532
533         template <typename Stat>
534         summary_stat& add_heap_stat( const Stat& stat )
535         {
536             nSysAllocCount      += stat.allocCount();
537             nSysFreeCount       += stat.freeCount();
538
539             nSysBytesAllocated  += stat.allocatedBytes();
540             nSysBytesDeallocated+= stat.deallocatedBytes();
541
542             return *this;
543         }
544         //@endcond
545     };
546
547     /// Michael's allocator
548     /**
549         This class provides base functionality for Michael's allocator. It does not provide
550         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
551         The heap interface is closer to semantics of \p malloc / \p free system functions.
552         The heap supports allocation of aligned and unaligned data.
553
554         The algorithm is based on simplified version of
555             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
556
557         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
558             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
559
560         This is powerful, scalable, fully customizable heap with fast-path without any locks
561         that has been developed specifically for multi-threading.
562         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
563         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
564         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
565         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
566         The opt::check_bounds feature can help you to find a memory buffer overflow.
567
568         Brief algorithm description from Michael's work:
569
570         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
571         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
572         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
573         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
574         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
575         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
576         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
577         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
578         in a lock-free manner.
579
580         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
581         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
582         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
583         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
584         available blocks of its original superblock by atomically updating its descriptor.
585
586         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
587         superblock size.
588
589         Available \p Options:
590         - \ref opt::sys_topology - class that describes system topology needed for allocator.
591             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
592         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
593             Default is \ref malloc_heap.
594         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
595             Default is \ref aligned_malloc_heap
596         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
597             Default is \ref page_cached_allocator
598         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
599             for incoming allocation request.
600             Default is \ref default_sizeclass_selector
601         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
602             Default is \ref free_list_locked
603         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
604             Default is \ref partial_list_locked
605         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
606             that is maintained by the heap.
607             Default is \ref procheap_empty_stat
608         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
609             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
610             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
611             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
612             Default is \ref os_allocated_empty
613         - \ref opt::check_bounds - a bound checker.
614             Default is no bound checker (cds::opt::none)
615
616         \par Usage:
617         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
618
619         \code
620         #include <cds/memory/michael/allocator.h>
621
622         // Heap with explicitly defined options:
623         cds::memory::michael::Heap<
624             opt::aligned_heap< aligned_malloc_heap >,
625             opt::page_heap< page_cached_allocator<16, malloc_heap> >
626         >   myHeap;
627
628         // Heap with default options:
629         cds::memory::michael::Heap<>    myDefHeap;
630         \endcode
631
632         \par How to make std-like allocator
633
634         There are serious differencies of heap and <tt>std::allocator</tt> interface:
635             - Heap is stateful, and \p std::allocator is stateless.
636             - Heap has much more template parameters than \p std::allocator
637             - Heap has low-level interface for memory allocating only unlike the allocator
638                 interface that can construct/destroy objects of any type T.
639
640         To convert heap interface into \p std::allocator -like interface you should:
641             - Declare object of class cds::memory::michael::Heap specifying the necessary
642                 template parameters; this is usually static object
643             - Create a class with \p std::allocator interface that uses the function of heap.
644         \code
645         #include <cds/memory/michael/allocator.h>
646
647         template <class T>
648         class MichaelAllocator
649         {
650             typedef std::allocator<T>               std_allocator;
651             typedef cds::memory::michael::Heap<>    michael_heap;
652
653             // Michael heap static object
654             static michael_heap     s_Heap;
655         public:
656             // Declare typedefs from std::allocator
657             typedef typename std_allocator::const_pointer   const_pointer;
658             typedef typename std_allocator::pointer         pointer;
659             typedef typename std_allocator::const_reference const_reference;
660             typedef typename std_allocator::reference       reference;
661             typedef typename std_allocator::difference_type difference_type;
662             typedef typename std_allocator::size_type       size_type;
663             typedef typename std_allocator::value_type      value_type;
664
665             // Allocation function
666             pointer allocate( size_type _Count, const void* _Hint )
667             {
668                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
669             }
670
671             // Deallocation function
672             void deallocate( pointer _Ptr, size_type _Count )
673             {
674                 s_Heap.free( _Ptr );
675             }
676
677             // Other std::allocator specific functions: address, construct, destroy, etc.
678             ...
679
680             // Rebinding allocator to other type
681             template <class _Other>
682             struct rebind {
683                 typedef MichaelAllocator<_Other> other;
684             };
685         };
686
687         // In .cpp file:
688         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
689
690         \endcode
691     */
692     template <typename... Options>
693     class Heap {
694     protected:
695
696         //@cond
697         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
698         static const unsigned int c_nDefaultBlockAlignment = 8;
699
700         struct default_options {
701             typedef cds::OS::topology           sys_topology;
702             typedef malloc_heap                 system_heap;
703             typedef page_cached_allocator<>     page_heap;
704             typedef aligned_malloc_heap         aligned_heap;
705             typedef default_sizeclass_selector  sizeclass_selector;
706             typedef free_list_locked<cds::lock::Spin>  free_list;
707             typedef partial_list_locked<cds::lock::Spin>    partial_list;
708             typedef procheap_empty_stat         procheap_stat;
709             typedef os_allocated_empty          os_allocated_stat;
710             typedef cds::opt::none              check_bounds;
711         };
712         //@endcond
713
714     protected:
715         //@cond
716         typedef typename opt::make_options<default_options, Options...>::type   options;
717         //@endcond
718
719         //@cond
720         typedef unsigned char   byte;
721         //@endcond
722     public:
723         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
724         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
725         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
726         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
727         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
728         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
729         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
730         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
731
732         // forward declarations
733         //@cond
734         struct superblock_desc;
735         struct processor_heap_base;
736         struct processor_desc;
737         //@endcond
738
739         /// Superblock states
740         /**
741             A superblock can be in one of four states: \p ACTIVE, \p FULL,
742             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
743             superblock in a heap, or if a thread intends to try to install it
744             as such. A superblock is \p FULL if all its blocks are either allocated
745             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
746             and contains unreserved available blocks. A superblock is
747             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
748         */
749         enum superblock_state {
750             SBSTATE_ACTIVE  = 0,    ///< superblock is active
751             SBSTATE_FULL    = 1,    ///< superblock is full
752             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
753             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
754         };
755
756         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
757
758         /// Anchor of the superblock descriptor. Updated by CAS
759         struct anchor_tag {
760             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
761             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
762             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
763             unsigned long long    tag:40    ;   ///< ABA prevention tag
764         };
765
766         /// Superblock descriptor
767         struct superblock_desc
768             : public options::free_list::item_hook
769             , public options::partial_list::item_hook
770         {
771             atomics::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
772             byte *              pSB         ;   ///< ptr to superblock
773             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
774             unsigned int        nBlockSize  ;   ///< block size in bytes
775             unsigned int        nCapacity   ;   ///< superblock size/block size
776
777             //@cond
778             superblock_desc()
779                 : pSB(nullptr)
780                 , pProcHeap( nullptr )
781             {}
782             //@endcond
783         };
784
785         //@cond
786         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
787         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
788         //@endcond
789
790 #if CDS_BUILD_BITS == 32
791         /// Allocated block header
792         /**
793             Each allocated block has 8-byte header.
794             The header contains pointer to owner superblock descriptor and the redirection flag.
795             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
796             \code
797                 +---------------+
798                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
799                 +---------------+
800                 |               | <- memory allocated
801                 |   memory      |
802                 |               |
803                 +---------------+
804             \endcode
805             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
806             In this case the redirection flag is 1 and the block's structure is:
807             \code
808                 +---------------+
809             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
810             |   +---------------+
811             |   |   padding     |
812             |   |   (unused)    |
813             |   |               |
814             |   +---------------+
815             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
816                 +---------------+
817                 |               | <- memory allocated
818                 |   memory      |
819                 |               |
820                 +---------------+
821             \endcode
822         */
823         class block_header
824         {
825         //@cond
826             enum {
827                 bitAligned = 1,
828                 bitOSAllocated = 2
829             };
830
831             union {
832                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
833                 uint32_t         nSize       ;   // block size (allocated form OS)
834             };
835             uint32_t         nFlags;
836
837         public:
838             void  set( superblock_desc * pdesc, uint32_t isAligned )
839             {
840                 pDesc = pdesc;
841                 nFlags = isAligned ? bitAligned : 0;
842             }
843
844             superblock_desc * desc()
845             {
846                 assert( (nFlags & bitOSAllocated) == 0 );
847                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
848             }
849
850             block_header * begin()
851             {
852                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
853             }
854
855             bool isAligned() const
856             {
857                 return (nFlags & bitAligned) != 0;
858             }
859
860             bool isOSAllocated() const
861             {
862                 return (nFlags & bitOSAllocated) != 0;
863             }
864
865             void setOSAllocated( size_t sz )
866             {
867                 nSize = sz;
868                 nFlags = bitOSAllocated;
869             }
870
871             size_t getOSAllocSize() const
872             {
873                 assert( isOSAllocated() );
874                 return nSize;
875             }
876
877         //@endcond
878         };
879 #elif CDS_BUILD_BITS == 64
880         //@cond
881         class block_header
882         {
883             enum {
884                 bitAligned = 1,
885                 bitOSAllocated = 2
886             };
887             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
888             // If bitOSAllocated is set the pDesc contains size of memory block
889             // allocated from OS
890             marked_desc_ptr     pDesc;
891         public:
892             void  set( superblock_desc * pdesc, uint32_t isAligned )
893             {
894                 pDesc = marked_desc_ptr( pdesc, isAligned );
895             }
896
897             superblock_desc * desc()
898             {
899                 assert( !isOSAllocated() );
900                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
901             }
902
903             block_header * begin()
904             {
905                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
906             }
907
908             bool isAligned() const
909             {
910                 return (pDesc.bits() & bitAligned) != 0;
911             }
912
913             bool isOSAllocated() const
914             {
915                 return (pDesc.bits() & bitOSAllocated) != 0;
916             }
917
918             void setOSAllocated( size_t nSize )
919             {
920
921                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
922             }
923
924             size_t getOSAllocSize() const
925             {
926                 assert( isOSAllocated() );
927                 return reinterpret_cast<uintptr_t>( pDesc.ptr() ) >> 2;
928             }
929
930         };
931         //@endcond
932 #else
933 #       error "Unexpected value of CDS_BUILD_BITS"
934 #endif  // CDS_BUILD_BITS
935
936         //@cond
937         struct free_block_header: block_header {
938             unsigned int    nNextFree;
939         };
940         //@endcond
941
942 #if CDS_BUILD_BITS == 32
943         /// Processor heap's \p active field
944         /**
945             The \p active field in the processor heap structure is primarily a pointer to the descriptor
946             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
947             guaranteed that the active superblock has at least one block available for reservation.
948             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
949             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
950             of blocks available for reservation in the active superblock less one. That is, if the value
951             of credits is n, then the active superblock contains n+1 blocks available for reservation
952             through the \p active field. Note that the number of blocks in a superblock is not limited
953             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
954             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
955             atomically decrements credits while validating that the active superblock is still valid.
956         */
957         class active_tag {
958         //@cond
959             superblock_desc *       pDesc;
960             uint32_t             nCredits;
961
962         public:
963             static const unsigned int c_nMaxCredits = 0 - 1;
964
965         public:
966             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
967                 : pDesc( nullptr )
968                 , nCredits(0)
969             {}
970
971             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
972             ~active_tag() CDS_NOEXCEPT = default;
973             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
974 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
975             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
976             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
977 #       endif
978
979             /// Returns pointer to superblock descriptor
980             superblock_desc * ptr() const
981             {
982                 return pDesc;
983             }
984
985             /// Sets superblock descriptor
986             void ptr( superblock_desc * p )
987             {
988                 pDesc = p;
989             }
990
991             unsigned int credits() const
992             {
993                 return nCredits;
994             }
995
996             void credits( unsigned int n )
997             {
998                 nCredits = n;
999             }
1000
1001             void clear()
1002             {
1003                 pDesc = nullptr;
1004                 nCredits = 0;
1005             }
1006
1007             void set( superblock_desc * pSB, unsigned int n )
1008             {
1009                 pDesc = pSB;
1010                 nCredits = n;
1011             }
1012         //@endcond
1013         };
1014 #elif CDS_BUILD_BITS == 64
1015         //@cond
1016         class active_tag
1017         {
1018         public:
1019             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1020         protected:
1021             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1022             marked_desc_ptr pDesc;
1023
1024         public:
1025             active_tag() CDS_NOEXCEPT
1026                 : pDesc( nullptr )
1027             {}
1028             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1029             //active_tag() CDS_NOEXCEPT = default;
1030             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1031             ~active_tag() CDS_NOEXCEPT = default;
1032             active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1033 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1034             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1035             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1036 #       endif
1037             superblock_desc *    ptr() const
1038             {
1039                 return pDesc.ptr();
1040             }
1041
1042             void ptr( superblock_desc * p )
1043             {
1044                 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1045                 pDesc = marked_desc_ptr( p, pDesc.bits());
1046             }
1047
1048             unsigned int credits()
1049             {
1050                 return (unsigned int) pDesc.bits();
1051             }
1052
1053             void credits( unsigned int n )
1054             {
1055                 assert( n <= c_nMaxCredits );
1056                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1057             }
1058
1059             void clear()
1060             {
1061                 pDesc = marked_desc_ptr();
1062             }
1063
1064             void set( superblock_desc * pSB, unsigned int n )
1065             {
1066                 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1067                 pDesc = marked_desc_ptr( pSB, n );
1068             }
1069
1070         };
1071         //@endcond
1072 #else
1073 #       error "Unexpected value of CDS_BUILD_BITS"
1074 #endif  // CDS_BUILD_BITS
1075
1076
1077         /// Processor heap
1078         struct processor_heap_base
1079         {
1080             CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1081             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1082             const size_class *  pSizeClass  ;   ///< pointer to size class
1083             atomics::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1084             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1085             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1086
1087             /// Small page marker
1088             /**
1089                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1090                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1091                 to less memory fragmentation.
1092             */
1093             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1094
1095             procheap_stat       stat        ;   ///< heap statistics
1096             //processor_heap_statistics   stat;
1097
1098             //@cond
1099             processor_heap_base() CDS_NOEXCEPT
1100                 : pProcDesc( nullptr )
1101                 , pSizeClass( nullptr )
1102                 , pPartial( nullptr )
1103             {
1104                 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1105             }
1106             //@endcond
1107
1108             /// Get partial superblock owned by the processor heap
1109             superblock_desc * get_partial()
1110             {
1111                 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1112                 do {
1113                     if ( !pDesc ) {
1114                         pDesc =  partialList.pop();
1115                         break;
1116                     }
1117                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1118
1119                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1120                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1121                 return pDesc;
1122             }
1123
1124             /// Add partial superblock \p pDesc to the list
1125             void add_partial( superblock_desc * pDesc )
1126             {
1127                 assert( pPartial != pDesc );
1128                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1129
1130                 superblock_desc * pCur = nullptr;
1131                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1132                     partialList.push( pDesc );
1133             }
1134
1135
1136             /// Remove superblock \p pDesc from the list of partial superblock
1137             bool unlink_partial( superblock_desc * pDesc )
1138             {
1139                 return partialList.unlink( pDesc );
1140             }
1141         };
1142
1143         /// Aligned superblock descriptor
1144         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1145
1146         /// Processor descriptor
1147         struct processor_desc
1148         {
1149             processor_heap *    arrProcHeap     ; ///< array of processor heap
1150             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1151             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1152
1153             //@cond
1154             processor_desc()
1155                 : arrProcHeap( nullptr )
1156                 , pageHeaps( nullptr )
1157             {}
1158             //@endcond
1159         };
1160
1161
1162     protected:
1163         sys_topology        m_Topology           ;  ///< System topology
1164         system_heap         m_LargeHeap          ;  ///< Heap for large block
1165         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1166         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1167         atomics::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1168         unsigned int        m_nProcessorCount    ;  ///< Processor count
1169         bound_checker       m_BoundChecker       ;  ///< Bound checker
1170
1171         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1172
1173     protected:
1174         //@cond
1175
1176         /// Allocates large block from system memory
1177         block_header * alloc_from_OS( size_t nSize )
1178         {
1179             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1180             m_OSAllocStat.incBytesAllocated( nSize );
1181             p->setOSAllocated( nSize );
1182             return p;
1183         }
1184
1185         /// Allocates from the active superblock if it possible
1186         block_header * alloc_from_active( processor_heap * pProcHeap )
1187         {
1188             active_tag  oldActive;
1189             int nCollision = -1;
1190
1191             // Reserve block
1192             while ( true ) {
1193                 ++nCollision;
1194                 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1195                 if ( !oldActive.ptr() )
1196                     return nullptr;
1197                 unsigned int nCredits = oldActive.credits();
1198                 active_tag  newActive   ; // default = 0
1199                 if ( nCredits != 0 ) {
1200                     newActive = oldActive;
1201                     newActive.credits( nCredits - 1 );
1202                 }
1203                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1204                     break;
1205             }
1206
1207             if ( nCollision )
1208                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1209
1210             // pop block
1211             superblock_desc * pDesc = oldActive.ptr();
1212
1213             anchor_tag  oldAnchor;
1214             anchor_tag  newAnchor;
1215             byte * pAddr;
1216             unsigned int nMoreCredits = 0;
1217
1218             nCollision = -1;
1219             do {
1220                 ++nCollision;
1221                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1222
1223                 assert( oldAnchor.avail < pDesc->nCapacity );
1224                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1225                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1226                 newAnchor.tag += 1;
1227
1228                 if ( oldActive.credits() == 0 ) {
1229                     // state must be ACTIVE
1230                     if ( oldAnchor.count == 0 )
1231                         newAnchor.state = SBSTATE_FULL;
1232                     else {
1233                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1234                         newAnchor.count -= nMoreCredits;
1235                     }
1236                 }
1237             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1238
1239             if ( nCollision )
1240                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1241
1242             assert( newAnchor.state != SBSTATE_EMPTY );
1243
1244             if ( newAnchor.state == SBSTATE_FULL )
1245                 pProcHeap->stat.incDescFull();
1246             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1247                 update_active( pProcHeap, pDesc, nMoreCredits );
1248
1249             pProcHeap->stat.incAllocFromActive();
1250
1251             // block_header fields is not needed to setup
1252             // It was set in alloc_from_new_superblock
1253             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1254             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1255             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1256
1257             return reinterpret_cast<block_header *>( pAddr );
1258         }
1259
1260         /// Allocates from a partial filled superblock if it possible
1261         block_header * alloc_from_partial( processor_heap * pProcHeap )
1262         {
1263         retry:
1264             superblock_desc * pDesc = pProcHeap->get_partial();
1265             if ( !pDesc )
1266                 return nullptr;
1267
1268             // reserve blocks
1269             anchor_tag  oldAnchor;
1270             anchor_tag  newAnchor;
1271             //byte * pAddr;
1272             unsigned int nMoreCredits = 0;
1273
1274             int nCollision = -1;
1275             do {
1276                 ++nCollision;
1277
1278                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1279                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1280                     free_superblock( pDesc );
1281                     goto retry;
1282                 }
1283
1284                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1285                 newAnchor.count -= nMoreCredits + 1;
1286                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1287                 newAnchor.tag += 1;
1288             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1289
1290             if ( nCollision )
1291                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1292
1293             if ( newAnchor.state == SBSTATE_FULL )
1294                 pProcHeap->stat.incDescFull();
1295
1296             // Now, the thread is guaranteed to have reserved one or more blocks
1297             // pop reserved block
1298             byte * pAddr;
1299             nCollision = -1;
1300             do {
1301                 ++nCollision;
1302
1303                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1304
1305                 assert( oldAnchor.avail < pDesc->nCapacity );
1306                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1307                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1308                 ++newAnchor.tag;
1309             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1310
1311             if ( nCollision )
1312                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1313
1314             assert( newAnchor.state != SBSTATE_EMPTY );
1315
1316             pProcHeap->stat.incAllocFromPartial();
1317
1318             if ( nMoreCredits > 0 )
1319                 update_active( pProcHeap, pDesc, nMoreCredits );
1320
1321             // block_header fields is not needed to setup
1322             // It was set in alloc_from_new_superblock
1323             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1324             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1325             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1326
1327             return reinterpret_cast<block_header *>( pAddr );
1328         }
1329
1330         /// Allocates from the new superblock
1331         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1332         {
1333             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1334             assert( pDesc != nullptr );
1335             pDesc->pSB = new_superblock_buffer( pProcHeap );
1336
1337             anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1338             anchor.tag += 1;
1339
1340             // Make single-linked list of free blocks in superblock
1341             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1342             unsigned int nNext = 0;
1343             const unsigned int nBlockSize = pDesc->nBlockSize;
1344             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1345                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1346                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1347             }
1348             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1349
1350             active_tag newActive;
1351             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1352
1353             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1354             anchor.state = SBSTATE_ACTIVE;
1355             pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1356
1357             active_tag curActive;
1358             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1359                 pProcHeap->stat.incAllocFromNew();
1360                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1361                 return reinterpret_cast<block_header *>( pDesc->pSB );
1362             }
1363
1364             free_superblock( pDesc );
1365             return nullptr;
1366         }
1367
1368         /// Find appropriate processor heap based on size-class selected
1369         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1370         {
1371             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1372
1373             unsigned int nProcessorId = m_Topology.current_processor();
1374             assert( nProcessorId < m_nProcessorCount );
1375
1376             if ( nProcessorId >= m_nProcessorCount )
1377                 nProcessorId = 0;
1378
1379             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1380             while ( !pDesc ) {
1381
1382                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1383                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1384                     pDesc = pNewDesc;
1385                     break;
1386                 }
1387                 free_processor_desc( pNewDesc );
1388             }
1389
1390             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1391         }
1392
1393         /// Updates active field of processor heap \p pProcHeap
1394         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1395         {
1396             assert( pProcHeap == pDesc->pProcHeap );
1397
1398             active_tag  nullActive;
1399             active_tag  newActive;
1400             newActive.set( pDesc, nCredits - 1 );
1401
1402             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1403                 return;
1404
1405             // Someone installed another active superblock.
1406             // Return credits to superblock and make it partial
1407
1408             anchor_tag  oldAnchor;
1409             anchor_tag  newAnchor;
1410
1411             do {
1412                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1413                 newAnchor.count += nCredits;
1414                 newAnchor.state = SBSTATE_PARTIAL;
1415             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1416
1417             pDesc->pProcHeap->add_partial( pDesc );
1418         }
1419
1420         /// Allocates new processor descriptor
1421         processor_desc * new_processor_desc( unsigned int nProcessorId )
1422         {
1423             CDS_UNUSED( nProcessorId );
1424             processor_desc * pDesc;
1425             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1426
1427             /*
1428                 Processor descriptor layout
1429
1430                 proc_desc -  64-byte alignment
1431                 page_heap[0] 64-byte alignment
1432                 page_heap[1] 64-byte alignment
1433                 ...
1434                 page_heap[P] 64-byte alignment
1435
1436                 proc_heap[0] 64-byte alignment
1437                 proc_heap[1] 64-byte alignment
1438                 ...
1439                 proc_heap[N] 64-byte alignment
1440             */
1441
1442             const size_t szDesc =
1443                 ( sizeof(processor_desc)
1444                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1445                     + c_nAlignment - 1
1446                 ) / c_nAlignment
1447 ;
1448
1449             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1450
1451             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1452
1453             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1454
1455             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1456             for ( size_t i = 0; i < nPageHeapCount; ++i )
1457                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1458
1459             // initialize processor heaps
1460             pDesc->arrProcHeap =
1461                 reinterpret_cast<processor_heap *>(
1462                     reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1463                     & ~(uintptr_t(c_nAlignment) - 1)
1464                 );
1465
1466             processor_heap * pProcHeap = pDesc->arrProcHeap;
1467             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1468             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1469                 new (pProcHeap) processor_heap();
1470                 pProcHeap->pProcDesc = pDesc;
1471                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1472                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1473                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1474                 else
1475                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1476             }
1477             return pDesc;
1478         }
1479
1480
1481         void free_processor_heap( processor_heap * pProcHeap )
1482         {
1483             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1484                 superblock_desc * pDesc;
1485
1486                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1487                     free( pDesc->pSB );
1488                     m_AlignedHeap.free( pDesc );
1489                 }
1490
1491                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1492                 if ( pPartial ) {
1493                     free( pPartial->pSB );
1494                     m_AlignedHeap.free( pPartial );
1495                 }
1496
1497                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1498                 if ( pDesc ) {
1499                     free( pDesc->pSB );
1500                     m_AlignedHeap.free( pDesc );
1501                 }
1502             }
1503             else {
1504                 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1505                 superblock_desc * pDesc;
1506
1507                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1508                     pageHeap.free( pDesc->pSB );
1509                     m_AlignedHeap.free( pDesc );
1510                 }
1511
1512                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1513                 if ( pPartial ) {
1514                     pageHeap.free( pPartial->pSB );
1515                     m_AlignedHeap.free( pPartial );
1516                 }
1517
1518                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1519                 if ( pDesc ) {
1520                     pageHeap.free( pDesc->pSB );
1521                     m_AlignedHeap.free( pDesc );
1522                 }
1523             }
1524             pProcHeap->~processor_heap();
1525         }
1526
1527         /// Frees processor descriptor
1528         void free_processor_desc( processor_desc * pDesc )
1529         {
1530             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1531
1532             for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1533                 free_processor_heap( pDesc->arrProcHeap + j );
1534
1535             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1536                 m_AlignedHeap.free( pSBDesc );
1537
1538             for (size_t i = 0; i < nPageHeapCount; ++i )
1539                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1540
1541             //m_IntHeap.free( pDesc->pageHeaps );
1542             pDesc->pageHeaps = nullptr;
1543
1544             pDesc->processor_desc::~processor_desc();
1545             m_AlignedHeap.free( pDesc );
1546         }
1547
1548         /// Allocates new superblock descriptor
1549         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1550         {
1551             anchor_tag anchor;
1552             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1553             if ( pDesc == nullptr ) {
1554                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1555                 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1556
1557                 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1558                 anchor.tag = 0;
1559                 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1560
1561                 pProcHeap->stat.incDescAllocCount();
1562             }
1563             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1564             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1565             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1566             pDesc->pProcHeap = pProcHeap;
1567
1568             anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1569             anchor.avail = 1;
1570             pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1571
1572             return pDesc;
1573         }
1574
1575         /// Allocates superblock page
1576         byte * new_superblock_buffer( processor_heap * pProcHeap )
1577         {
1578             pProcHeap->stat.incBlockAllocated();
1579             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1580                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1581             }
1582             else {
1583                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1584             }
1585         }
1586
1587         /// Frees superblock descriptor and its page
1588         void free_superblock( superblock_desc * pDesc )
1589         {
1590             pDesc->pProcHeap->stat.incBlockDeallocated();
1591             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1592             if ( pDesc->pSB ) {
1593                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1594                     free( pDesc->pSB );
1595                 }
1596                 else {
1597                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1598                 }
1599             }
1600             pProcDesc->listSBDescFree.push( pDesc );
1601         }
1602
1603         /// Allocate memory block
1604         block_header * int_alloc(
1605             size_t nSize    ///< Size of memory block to allocate in bytes
1606             )
1607         {
1608             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1609             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1610                 return alloc_from_OS( nSize );
1611             }
1612             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1613
1614             block_header * pBlock;
1615             processor_heap * pProcHeap;
1616             while ( true ) {
1617                 pProcHeap = find_heap( nSizeClassIndex );
1618                 if ( !pProcHeap )
1619                     return alloc_from_OS( nSize );
1620
1621                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1622                     break;
1623                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1624                     break;
1625                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1626                     break;
1627             }
1628
1629             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1630
1631             assert( pBlock != nullptr );
1632             return pBlock;
1633         }
1634
1635         //@endcond
1636     public:
1637         /// Heap constructor
1638         Heap()
1639         {
1640             // Explicit libcds initialization is needed since a static object may be constructed
1641             cds::Initialize();
1642
1643             m_nProcessorCount = m_Topology.processor_count();
1644             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1645                 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1646             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1647         }
1648
1649         /// Heap destructor
1650         /**
1651             The destructor frees all memory allocated by the heap.
1652         */
1653         ~Heap()
1654         {
1655             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1656                 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1657                 if ( pDesc )
1658                     free_processor_desc( pDesc );
1659             }
1660
1661             m_AlignedHeap.free( m_arrProcDesc );
1662
1663             // Explicit termination of libcds
1664             cds::Terminate();
1665         }
1666
1667         /// Allocate memory block
1668         void * alloc(
1669             size_t nSize    ///< Size of memory block to allocate in bytes
1670         )
1671         {
1672             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1673
1674             // Bound checking is only for our blocks
1675             if ( !pBlock->isOSAllocated() ) {
1676                 // the block is allocated from our heap - bound checker is applicable
1677                 m_BoundChecker.make_trailer(
1678                     reinterpret_cast<byte *>(pBlock + 1),
1679                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1680                     nSize
1681                 );
1682             }
1683
1684             return pBlock + 1;
1685         }
1686
1687         /// Free previously allocated memory block
1688         void free(
1689             void * pMemory  ///< Pointer to memory block to free
1690         )
1691         {
1692             if ( !pMemory )
1693                 return;
1694
1695             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1696             block_header * pBlock = pRedirect->begin();
1697
1698             if ( pBlock->isOSAllocated() ) {
1699                 // Block has been allocated from OS
1700                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1701                 m_LargeHeap.free( pBlock );
1702                 return;
1703             }
1704
1705             assert( !pBlock->isAligned() );
1706             superblock_desc * pDesc = pBlock->desc();
1707
1708             m_BoundChecker.check_bounds(
1709                 pRedirect + 1,
1710                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1711                 pDesc->nBlockSize
1712             );
1713
1714
1715             anchor_tag oldAnchor;
1716             anchor_tag newAnchor;
1717             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1718
1719             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1720
1721             oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1722             do {
1723                 newAnchor = oldAnchor;
1724                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1725                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1726                 newAnchor.tag += 1;
1727
1728                 assert( oldAnchor.state != SBSTATE_EMPTY );
1729
1730                 if ( oldAnchor.state == SBSTATE_FULL )
1731                     newAnchor.state = SBSTATE_PARTIAL;
1732
1733                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1734                     //pProcHeap = pDesc->pProcHeap;
1735                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1736                     newAnchor.state = SBSTATE_EMPTY;
1737                 }
1738                 else
1739                     newAnchor.count += 1;
1740             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1741
1742             pProcHeap->stat.incFreeCount();
1743
1744             if ( newAnchor.state == SBSTATE_EMPTY ) {
1745                 if ( pProcHeap->unlink_partial( pDesc ))
1746                     free_superblock( pDesc );
1747             }
1748             else if (oldAnchor.state == SBSTATE_FULL ) {
1749                 assert( pProcHeap != nullptr );
1750                 pProcHeap->stat.decDescFull();
1751                 pProcHeap->add_partial( pDesc );
1752             }
1753         }
1754
1755         /// Reallocate memory block
1756         /**
1757             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1758             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1759
1760             If there is not enough available memory to expand the block to the given size,
1761             the original block is left unchanged, and \p nullptr is returned.
1762
1763             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1764             then the return value is \p nullptr and the original block is left unchanged.
1765         */
1766         void * realloc(
1767             void *  pMemory,    ///< Pointer to previously allocated memory block
1768             size_t  nNewSize    ///< New size of memory block, in bytes
1769         )
1770         {
1771             if ( nNewSize == 0 ) {
1772                 free( pMemory );
1773                 return nullptr;
1774             }
1775
1776             const size_t nOrigSize = nNewSize;
1777             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1778
1779             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1780
1781             // Reallocation of aligned block is not possible
1782             if ( pBlock->isAligned() ) {
1783                 assert( false );
1784                 return nullptr;
1785             }
1786
1787             if ( pBlock->isOSAllocated() ) {
1788                 // The block has been allocated from OS
1789                 size_t nCurSize = pBlock->getOSAllocSize();
1790
1791                 if ( nCurSize >= nNewSize )
1792                     return pMemory;
1793
1794                 // Grow block size
1795                 void * pNewBuf = alloc( nOrigSize );
1796                 if ( pNewBuf ) {
1797                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1798                     free( pMemory );
1799                 }
1800                 return pNewBuf;
1801             }
1802
1803             superblock_desc * pDesc = pBlock->desc();
1804             if ( pDesc->nBlockSize <= nNewSize ) {
1805                 // In-place reallocation
1806                 m_BoundChecker.make_trailer(
1807                     reinterpret_cast<byte *>(pBlock + 1),
1808                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1809                     nOrigSize
1810                     );
1811
1812                 return pMemory;
1813             }
1814
1815             void * pNew = alloc( nNewSize );
1816             if ( pNew ) {
1817                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1818                 free( pMemory );
1819                 return pNew;
1820             }
1821
1822             return nullptr;
1823         }
1824
1825         /// Allocate aligned memory block
1826         void * alloc_aligned(
1827             size_t nSize,       ///< Size of memory block to allocate in bytes
1828             size_t nAlignment   ///< Alignment
1829         )
1830         {
1831             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1832                 void * p = alloc( nSize );
1833                 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1834                 return p;
1835             }
1836
1837             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1838
1839             block_header * pRedirect;
1840             if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1841                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1842                 assert( pRedirect != pBlock );
1843                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1844
1845                 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1846             }
1847             else
1848                 pRedirect = pBlock;
1849
1850
1851             // Bound checking is only for our blocks
1852             if ( !pBlock->isOSAllocated() ) {
1853                 // the block is allocated from our heap - bound checker is applicable
1854                 m_BoundChecker.make_trailer(
1855                     reinterpret_cast<byte *>(pRedirect + 1),
1856                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1857                     nSize
1858                 );
1859             }
1860
1861             return pRedirect + 1;
1862         }
1863
1864         /// Free aligned memory block previously allocated by \ref alloc_aligned
1865         void free_aligned(
1866             void * pMemory      ///< Pointer to memory block to free
1867         )
1868         {
1869             free( pMemory );
1870         }
1871
1872     public:
1873
1874         /// Get instant summary statistics
1875         void summaryStat( summary_stat& st )
1876         {
1877             size_t nProcHeapCount = m_SizeClassSelector.size();
1878             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1879                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1880                 if ( pProcDesc ) {
1881                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1882                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1883                         if ( pProcHeap ) {
1884                             st.add_procheap_stat( pProcHeap->stat );
1885                         }
1886                     }
1887                 }
1888             }
1889
1890             st.add_heap_stat( m_OSAllocStat );
1891         }
1892     };
1893
1894 }}} // namespace cds::memory::michael
1895
1896 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H