5680f17f1c8092aaa19a7dd37d9e2c57b67492f3
[libcds.git] / cds / memory / michael / allocator.h
1 //$$CDS-header$$
2
3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
5
6 /*
7     Michael allocator implementation
8     Source:
9         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
10
11     Editions:
12     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
13                         This optimization allows to allocate system memory more regularly,
14                         in blocks of 1M that leads to less memory fragmentation.
15     2011.01.02 khizmax  Created
16 */
17
18 #include <stdlib.h>
19 #include <mutex>        // unique_lock
20 #include <cds/init.h>
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
25
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/lock/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
34
35 #include <boost/intrusive/list.hpp>
36
37 namespace cds {
38     /// Memory-related algorithms: allocators etc.
39 namespace memory {
40     /// Michael's allocator (class Heap)
41     /**
42         \par Source
43             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
44
45         This namespace declares the main class Heap and a lot of helper classes.
46     */
47 namespace michael {
48
49     /// Size class
50     struct size_class {
51         unsigned int    nBlockSize  ;   ///< block size in bytes
52         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
53         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
54         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
55     };
56
57     /// %Heap based on system \p malloc and \p free functions
58     struct malloc_heap
59     {
60         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61         static void * alloc( size_t nSize )
62         {
63             return ::malloc( nSize );
64         }
65         /// Returning memory block to the system (\p free wrapper)
66         static void free( void * p )
67         {
68             ::free( p );
69         }
70     };
71
72     /// %Heap based on system provided aligned \p malloc and \p free functions
73     struct aligned_malloc_heap
74     {
75         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
76         static void * alloc( size_t nSize, size_t nAlignment  )
77         {
78             return cds::OS::aligned_malloc( nSize, nAlignment );
79         }
80         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
81         static void free( void * p )
82         {
83             cds::OS::aligned_free( p );
84         }
85     };
86
87     /// Page heap based on \p Heap
88     /**
89         Page heap can allocate memory by page-sized block only.
90         \p Heap may be any heap that provides interface like \ref malloc_heap.
91
92         This class is one of available implementation of opt::page_heap option.
93     */
94     template <class Heap = malloc_heap>
95     class page_allocator: public Heap
96     {
97         //@cond
98         typedef Heap base_class;
99         size_t  m_nPageSize;
100         //@endcond
101
102     public:
103         /// Initializes heap
104         page_allocator(
105             size_t nPageSize    ///< page size in bytes
106         )
107             : m_nPageSize( nPageSize )
108         {}
109
110         /// Allocate new page
111         void * alloc()
112         {
113             return base_class::alloc( m_nPageSize );
114         }
115
116         /// Free page \p pPage
117         void free( void * pPage )
118         {
119             base_class::free( pPage );
120         }
121     };
122
123     /// Page cacheable heap
124     /**
125         To improve performance this allocator maintains small list of free pages.
126         Page heap can allocate memory by page-sized block only.
127
128         Template parameters:
129             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
130             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
131
132         This class is one of available implementation of opt::page_heap option.
133     */
134     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
135     class page_cached_allocator: public page_allocator<Heap>
136     {
137         //@cond
138         typedef page_allocator<Heap> base_class;
139
140 #ifdef _DEBUG
141         struct make_null_ptr {
142             void operator ()(void *& p)
143             {
144                 p = nullptr;
145             }
146         };
147 #endif
148         struct free_list_traits : public cds::container::vyukov_queue::traits
149         {
150             typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
151 #ifdef _DEBUG
152             typedef make_null_ptr value_cleaner;
153 #endif
154         };
155         typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
156
157         free_list   m_FreeList;
158         //@endcond
159
160     public:
161         /// Initializes heap
162         page_cached_allocator(
163             size_t nPageSize    ///< page size in bytes
164         )
165             : base_class( nPageSize )
166             , m_FreeList( FreeListCapacity )
167         {}
168
169         //@cond
170         ~page_cached_allocator()
171         {
172             void * pPage;
173             while ( m_FreeList.pop(pPage) )
174                 base_class::free( pPage );
175         }
176         //@endcond
177
178         /// Allocate new page
179         void * alloc()
180         {
181             void * pPage;
182             if ( !m_FreeList.pop( pPage ) )
183                 pPage = base_class::alloc();
184             return pPage;
185         }
186
187         /// Free page \p pPage
188         void free( void * pPage )
189         {
190             if ( !m_FreeList.push( pPage ))
191                 base_class::free( pPage );
192         }
193     };
194
195     /// Implementation of opt::sizeclass_selector option
196     /**
197         Default size-class selector can manage memory blocks up to 64K.
198     */
199     class CDS_EXPORT_API default_sizeclass_selector
200     {
201         //@cond
202         /// Count of different size-classes
203         static const size_t c_nSizeClassCount = 63;
204
205         /// Max block size
206         static const size_t c_nMaxBlockSize = 64 * 1024;
207
208         /// Page size of type 0 (64K)
209         static const unsigned int c_nPage64K = 64 * 1024 - 32;
210
211         /// Page size of type 1 (1M)
212         static const unsigned int c_nPage1M = 1024 * 1024;
213
214         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
215         static size_class const m_szClass[c_nSizeClassCount];
216         static unsigned char const m_szClassMap[];
217         //@endcond
218     public:
219         /// Type of size-class index
220         typedef unsigned int sizeclass_index;
221
222 #ifdef _DEBUG
223         default_sizeclass_selector();
224 #endif
225
226         /// "No size class" index
227         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
228
229         /// Returns size-class count
230         static sizeclass_index size()
231         {
232             return c_nSizeClassCount;
233         }
234
235         /// Returns page size in bytes for given page type \p nPageType
236         static size_t page_size(size_t nPageType )
237         {
238             switch (nPageType) {
239             case 0:
240                 return c_nPage64K;
241             case 1:
242                 return c_nPage1M;
243             default:
244                 assert(false)   ;   // anything forgotten?..
245             }
246             return c_nPage1M;
247         }
248
249         /// Returns count of page size-class
250         /**
251             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
252         */
253         static size_t pageTypeCount()
254         {
255             return 2;
256         }
257
258         /// Returns size-class index for \p nSize
259         /**
260             For large blocks that cannot be allocated by Michael's allocator
261             the function must return -1.
262         */
263         static sizeclass_index find( size_t nSize )
264         {
265             if ( nSize > c_nMaxBlockSize ) {
266                 // Too large block - allocate from system
267                 return c_nNoSizeClass;
268             }
269             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
270             assert( nSize <= m_szClassBounds[ szClass ] );
271             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
272
273             return szClass;
274         }
275
276         /// Gets details::size_class struct for size-class index \p nIndex
277         static const size_class * at( sizeclass_index nIndex )
278         {
279             assert( nIndex < size() );
280             return m_szClass + nIndex;
281         }
282     };
283
284     //@cond
285     namespace details {
286         struct free_list_tag;
287         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
288
289         struct partial_list_tag;
290         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
291
292         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
293         {};
294     }
295     //@endcond
296
297     /// List of free superblock descriptor
298     /**
299         This class is a implementation of \ref opt::free_list option
300     */
301     template <class Lock, class T = details::intrusive_superblock_desc>
302     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
303     {
304         //@cond
305         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
306     public:
307         typedef details::free_list_locked_hook item_hook;
308         typedef Lock lock_type;
309     protected:
310         typedef std::unique_lock<lock_type>   auto_lock;
311
312         mutable lock_type   m_access;
313         //@endcond
314
315     public:
316         /// Rebinds to other item type \p T2
317         template <class T2>
318         struct rebind {
319             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
320         };
321
322     public:
323         /// Push superblock descriptor to free-list
324         void push( T * pDesc )
325         {
326             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
327             auto_lock al(m_access);
328             base_class::push_back( *pDesc );
329         }
330
331         /// Pop superblock descriptor from free-list
332         T *   pop()
333         {
334             auto_lock al(m_access);
335             if ( base_class::empty() )
336                 return nullptr;
337             T& rDesc = base_class::front();
338             base_class::pop_front();
339             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
340             return &rDesc;
341         }
342
343         /// Returns current count of superblocks in free-list
344         size_t  size() const
345         {
346             auto_lock al(m_access);
347             return base_class::size();
348         }
349     };
350
351     /// List of partial filled superblock descriptor
352     /**
353         This class is a implementation of \ref opt::partial_list option
354     */
355     template <class Lock, class T = details::intrusive_superblock_desc>
356     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
357     {
358         //@cond
359         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
360     public:
361         typedef details::partial_list_locked_hook item_hook;
362         typedef Lock    lock_type;
363     protected:
364         typedef std::unique_lock<lock_type>   auto_lock;
365
366         mutable lock_type   m_access;
367         //@endcond
368
369     public:
370         /// Rebinds to other item type \p T2
371         template <class T2>
372         struct rebind {
373             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
374         };
375
376     public:
377         /// Push a superblock \p pDesc to the list
378         void    push( T * pDesc )
379         {
380             auto_lock al( m_access );
381             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
382             base_class::push_back( *pDesc );
383         }
384
385         /// Pop superblock from the list
386         T * pop()
387         {
388             auto_lock al( m_access );
389             if ( base_class::empty() )
390                 return nullptr;
391             T& rDesc = base_class::front();
392             base_class::pop_front();
393             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
394             return &rDesc;
395         }
396
397         /// Removes \p pDesc descriptor from the free-list
398         bool unlink( T * pDesc )
399         {
400             assert(pDesc != nullptr);
401             auto_lock al( m_access );
402             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
403             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
404                 base_class::erase( base_class::iterator_to( *pDesc ) );
405                 return true;
406             }
407             return false;
408         }
409
410         /// Count of element in the list
411         size_t size() const
412         {
413             auto_lock al( m_access );
414             return base_class::size();
415         }
416     };
417
418     /// Summary processor heap statistics
419     /**
420         Summary heap statistics for use with Heap::summaryStat function.
421     */
422     struct summary_stat
423     {
424         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
425         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
426         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
427         size_t      nFreeCount          ;  ///< Count of \p free function call
428         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
429         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
430         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
431         size_t      nDescFull           ;  ///< Count of full superblock
432         atomic64u_t nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
433         atomic64u_t nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
434
435         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
436         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
437         atomic64u_t nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
438         atomic64_t  nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
439
440         // Internal contention indicators
441         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
442         /**
443             Contention indicator. The less value is better
444         */
445         size_t      nActiveDescCASFailureCount;
446         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
447         /**
448             Contention indicator. The less value is better
449         */
450         size_t      nActiveAnchorCASFailureCount;
451         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
452         /**
453             Contention indicator. The less value is better
454         */
455         size_t      nPartialDescCASFailureCount;
456         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
457         /**
458             Contention indicator. The less value is better
459         */
460         size_t      nPartialAnchorCASFailureCount;
461
462
463     public:
464         /// Constructs empty statistics. All counters are zero.
465         summary_stat()
466         {
467             clear();
468         }
469
470         /// Difference statistics
471         /**
472             This operator computes difference between \p *this and \p stat and places the difference to \p this.
473             Returns \p *this;
474         */
475         summary_stat& operator -=( const summary_stat& stat )
476         {
477             nAllocFromActive    -= stat.nAllocFromActive;
478             nAllocFromPartial   -= stat.nAllocFromPartial;
479             nAllocFromNew       -= stat.nAllocFromNew;
480             nFreeCount          -= stat.nFreeCount;
481             nPageAllocCount     -= stat.nPageAllocCount;
482             nPageDeallocCount   -= stat.nPageDeallocCount;
483             nDescAllocCount     -= stat.nDescAllocCount;
484             nDescFull           -= stat.nDescFull;
485             nBytesAllocated     -= stat.nBytesAllocated;
486             nBytesDeallocated   -= stat.nBytesDeallocated;
487
488             nSysAllocCount      -= stat.nSysAllocCount;
489             nSysFreeCount       -= stat.nSysFreeCount;
490             nSysBytesAllocated  -= stat.nSysBytesAllocated;
491             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
492
493             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
494             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
495             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
496             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
497
498             return *this;
499         }
500
501         /// Clears statistics
502         /**
503             All counters are set to zero.
504         */
505         void clear()
506         {
507             memset( this, 0, sizeof(*this));
508         }
509
510         //@cond
511         template <typename Stat>
512         summary_stat& add_procheap_stat( const Stat& stat )
513         {
514             nAllocFromActive    += stat.allocFromActive();
515             nAllocFromPartial   += stat.allocFromPartial();
516             nAllocFromNew       += stat.allocFromNew();
517             nFreeCount          += stat.freeCount();
518             nPageAllocCount     += stat.blockAllocated();
519             nPageDeallocCount   += stat.blockDeallocated();
520             nDescAllocCount     += stat.descAllocCount();
521             nDescFull           += stat.descFull();
522             nBytesAllocated     += stat.allocatedBytes();
523             nBytesDeallocated   += stat.deallocatedBytes();
524
525             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
526             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
527             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
528             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
529
530             return *this;
531         }
532
533         template <typename Stat>
534         summary_stat& add_heap_stat( const Stat& stat )
535         {
536             nSysAllocCount      += stat.allocCount();
537             nSysFreeCount       += stat.freeCount();
538
539             nSysBytesAllocated  += stat.allocatedBytes();
540             nSysBytesDeallocated+= stat.deallocatedBytes();
541
542             return *this;
543         }
544         //@endcond
545     };
546
547     /// Michael's allocator
548     /**
549         This class provides base functionality for Michael's allocator. It does not provide
550         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
551         The heap interface is closer to semantics of \p malloc / \p free system functions.
552         The heap supports allocation of aligned and unaligned data.
553
554         The algorithm is based on simplified version of
555             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
556
557         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
558             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
559
560         This is powerful, scalable, fully customizable heap with fast-path without any locks
561         that has been developed specifically for multi-threading.
562         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
563         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
564         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
565         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
566         The opt::check_bounds feature can help you to find a memory buffer overflow.
567
568         Brief algorithm description from Michael's work:
569
570         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
571         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
572         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
573         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
574         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
575         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
576         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
577         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
578         in a lock-free manner.
579
580         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
581         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
582         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
583         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
584         available blocks of its original superblock by atomically updating its descriptor.
585
586         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
587         superblock size.
588
589         Available \p Options:
590         - \ref opt::sys_topology - class that describes system topology needed for allocator.
591             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
592         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
593             Default is \ref malloc_heap.
594         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
595             Default is \ref aligned_malloc_heap
596         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
597             Default is \ref page_cached_allocator
598         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
599             for incoming allocation request.
600             Default is \ref default_sizeclass_selector
601         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
602             Default is \ref free_list_locked
603         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
604             Default is \ref partial_list_locked
605         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
606             that is maintained by the heap.
607             Default is \ref procheap_empty_stat
608         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
609             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
610             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
611             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
612             Default is \ref os_allocated_empty
613         - \ref opt::check_bounds - a bound checker.
614             Default is no bound checker (cds::opt::none)
615
616         \par Usage:
617         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
618
619         \code
620         #include <cds/memory/michael/allocator.h>
621
622         // Heap with explicitly defined options:
623         cds::memory::michael::Heap<
624             opt::aligned_heap< aligned_malloc_heap >,
625             opt::page_heap< page_cached_allocator<16, malloc_heap> >
626         >   myHeap;
627
628         // Heap with default options:
629         cds::memory::michael::Heap<>    myDefHeap;
630         \endcode
631
632         \par How to make std-like allocator
633
634         There are serious differencies of heap and <tt>std::allocator</tt> interface:
635             - Heap is stateful, and \p std::allocator is stateless.
636             - Heap has much more template parameters than \p std::allocator
637             - Heap has low-level interface for memory allocating only unlike the allocator
638                 interface that can construct/destroy objects of any type T.
639
640         To convert heap interface into \p std::allocator -like interface you should:
641             - Declare object of class cds::memory::michael::Heap specifying the necessary
642                 template parameters; this is usually static object
643             - Create a class with \p std::allocator interface that uses the function of heap.
644         \code
645         #include <cds/memory/michael/allocator.h>
646
647         template <class T>
648         class MichaelAllocator
649         {
650             typedef std::allocator<T>               std_allocator;
651             typedef cds::memory::michael::Heap<>    michael_heap;
652
653             // Michael heap static object
654             static michael_heap     s_Heap;
655         public:
656             // Declare typedefs from std::allocator
657             typedef typename std_allocator::const_pointer   const_pointer;
658             typedef typename std_allocator::pointer         pointer;
659             typedef typename std_allocator::const_reference const_reference;
660             typedef typename std_allocator::reference       reference;
661             typedef typename std_allocator::difference_type difference_type;
662             typedef typename std_allocator::size_type       size_type;
663             typedef typename std_allocator::value_type      value_type;
664
665             // Allocation function
666             pointer allocate( size_type _Count, const void* _Hint )
667             {
668                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
669             }
670
671             // Deallocation function
672             void deallocate( pointer _Ptr, size_type _Count )
673             {
674                 s_Heap.free( _Ptr );
675             }
676
677             // Other std::allocator specific functions: address, construct, destroy, etc.
678             ...
679
680             // Rebinding allocator to other type
681             template <class _Other>
682             struct rebind {
683                 typedef MichaelAllocator<_Other> other;
684             };
685         };
686
687         // In .cpp file:
688         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
689
690         \endcode
691     */
692     template <typename... Options>
693     class Heap {
694     protected:
695
696         //@cond
697         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
698         static const unsigned int c_nDefaultBlockAlignment = 8;
699
700         struct default_options {
701             typedef cds::OS::topology           sys_topology;
702             typedef malloc_heap                 system_heap;
703             typedef page_cached_allocator<>     page_heap;
704             typedef aligned_malloc_heap         aligned_heap;
705             typedef default_sizeclass_selector  sizeclass_selector;
706             typedef free_list_locked<cds::lock::Spin>  free_list;
707             typedef partial_list_locked<cds::lock::Spin>    partial_list;
708             typedef procheap_empty_stat         procheap_stat;
709             typedef os_allocated_empty          os_allocated_stat;
710             typedef cds::opt::none              check_bounds;
711         };
712         //@endcond
713
714     protected:
715         //@cond
716         typedef typename opt::make_options<default_options, Options...>::type   options;
717         //@endcond
718
719         //@cond
720         typedef unsigned char   byte;
721         //@endcond
722     public:
723         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
724         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
725         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
726         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
727         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
728         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
729         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
730         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
731
732         // forward declarations
733         //@cond
734         struct superblock_desc;
735         struct processor_heap_base;
736         struct processor_desc;
737         //@endcond
738
739         /// Superblock states
740         /**
741             A superblock can be in one of four states: \p ACTIVE, \p FULL,
742             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
743             superblock in a heap, or if a thread intends to try to install it
744             as such. A superblock is \p FULL if all its blocks are either allocated
745             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
746             and contains unreserved available blocks. A superblock is
747             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
748         */
749         enum superblock_state {
750             SBSTATE_ACTIVE  = 0,    ///< superblock is active
751             SBSTATE_FULL    = 1,    ///< superblock is full
752             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
753             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
754         };
755
756         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
757
758         /// Anchor of the superblock descriptor. Updated by CAS
759         struct anchor_tag {
760             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
761             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
762             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
763             unsigned long long    tag:40    ;   ///< ABA prevention tag
764         };
765
766         /// Superblock descriptor
767         struct superblock_desc
768             : public options::free_list::item_hook
769             , public options::partial_list::item_hook
770         {
771             atomics::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
772             byte *              pSB         ;   ///< ptr to superblock
773             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
774             unsigned int        nBlockSize  ;   ///< block size in bytes
775             unsigned int        nCapacity   ;   ///< superblock size/block size
776
777             //@cond
778             superblock_desc()
779                 : pSB(nullptr)
780                 , pProcHeap( nullptr )
781             {}
782             //@endcond
783         };
784
785         //@cond
786         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
787         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
788         //@endcond
789
790 #if CDS_BUILD_BITS == 32
791         /// Allocated block header
792         /**
793             Each allocated block has 8-byte header.
794             The header contains pointer to owner superblock descriptor and the redirection flag.
795             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
796             \code
797                 +---------------+
798                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
799                 +---------------+
800                 |               | <- memory allocated
801                 |   memory      |
802                 |               |
803                 +---------------+
804             \endcode
805             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
806             In this case the redirection flag is 1 and the block's structure is:
807             \code
808                 +---------------+
809             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
810             |   +---------------+
811             |   |   padding     |
812             |   |   (unused)    |
813             |   |               |
814             |   +---------------+
815             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
816                 +---------------+
817                 |               | <- memory allocated
818                 |   memory      |
819                 |               |
820                 +---------------+
821             \endcode
822         */
823         class block_header
824         {
825         //@cond
826             enum {
827                 bitAligned = 1,
828                 bitOSAllocated = 2
829             };
830
831             union {
832                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
833                 atomic32u_t         nSize       ;   // block size (allocated form OS)
834             };
835             atomic32u_t         nFlags;
836
837         public:
838             void  set( superblock_desc * pdesc, atomic32u_t isAligned )
839             {
840                 pDesc = pdesc;
841                 nFlags = isAligned ? bitAligned : 0;
842             }
843
844             superblock_desc * desc()
845             {
846                 assert( (nFlags & bitOSAllocated) == 0 );
847                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
848             }
849
850             block_header * begin()
851             {
852                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
853             }
854
855             bool isAligned() const
856             {
857                 return (nFlags & bitAligned) != 0;
858             }
859
860             bool isOSAllocated() const
861             {
862                 return (nFlags & bitOSAllocated) != 0;
863             }
864
865             void setOSAllocated( size_t sz )
866             {
867                 nSize = sz;
868                 nFlags = bitOSAllocated;
869             }
870
871             size_t getOSAllocSize() const
872             {
873                 assert( isOSAllocated() );
874                 return nSize;
875             }
876
877         //@endcond
878         };
879 #elif CDS_BUILD_BITS == 64
880         //@cond
881         class block_header
882         {
883             enum {
884                 bitAligned = 1,
885                 bitOSAllocated = 2
886             };
887             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
888             // If bitOSAllocated is set the pDesc contains size of memory block
889             // allocated from OS
890             marked_desc_ptr     pDesc;
891         public:
892             void  set( superblock_desc * pdesc, atomic32u_t isAligned )
893             {
894                 pDesc = marked_desc_ptr( pdesc, isAligned );
895             }
896
897             superblock_desc * desc()
898             {
899                 assert( !isOSAllocated() );
900                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
901             }
902
903             block_header * begin()
904             {
905                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
906             }
907
908             bool isAligned() const
909             {
910                 return (pDesc.bits() & bitAligned) != 0;
911             }
912
913             bool isOSAllocated() const
914             {
915                 return (pDesc.bits() & bitOSAllocated) != 0;
916             }
917
918             void setOSAllocated( size_t nSize )
919             {
920
921                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
922             }
923
924             size_t getOSAllocSize() const
925             {
926                 assert( isOSAllocated() );
927                 return reinterpret_cast<uptr_atomic_t>( pDesc.ptr() ) >> 2;
928             }
929
930         };
931         //@endcond
932 #else
933 #       error "Unexpected value of CDS_BUILD_BITS"
934 #endif  // CDS_BUILD_BITS
935
936         //@cond
937         struct free_block_header: block_header {
938             unsigned int    nNextFree;
939         };
940         //@endcond
941
942 #if CDS_BUILD_BITS == 32
943         /// Processor heap's \p active field
944         /**
945             The \p active field in the processor heap structure is primarily a pointer to the descriptor
946             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
947             guaranteed that the active superblock has at least one block available for reservation.
948             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
949             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
950             of blocks available for reservation in the active superblock less one. That is, if the value
951             of credits is n, then the active superblock contains n+1 blocks available for reservation
952             through the \p active field. Note that the number of blocks in a superblock is not limited
953             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
954             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
955             atomically decrements credits while validating that the active superblock is still valid.
956         */
957         class active_tag {
958         //@cond
959             superblock_desc *       pDesc;
960             atomic32u_t             nCredits;
961
962         public:
963             static const unsigned int c_nMaxCredits = 0 - 1;
964
965         public:
966             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
967                 : pDesc( nullptr )
968                 , nCredits(0)
969             {}
970
971             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
972             ~active_tag() CDS_NOEXCEPT = default;
973             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
974 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
975             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
976             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
977 #       endif
978
979             /// Returns pointer to superblock descriptor
980             superblock_desc * ptr() const
981             {
982                 return pDesc;
983             }
984
985             /// Sets superblock descriptor
986             void ptr( superblock_desc * p )
987             {
988                 pDesc = p;
989             }
990
991             unsigned int credits() const
992             {
993                 return nCredits;
994             }
995
996             void credits( unsigned int n )
997             {
998                 nCredits = n;
999             }
1000
1001             void clear()
1002             {
1003                 pDesc = nullptr;
1004                 nCredits = 0;
1005             }
1006
1007             void set( superblock_desc * pSB, unsigned int n )
1008             {
1009                 pDesc = pSB;
1010                 nCredits = n;
1011             }
1012         //@endcond
1013         };
1014 #elif CDS_BUILD_BITS == 64
1015         //@cond
1016         class active_tag
1017         {
1018         public:
1019             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1020         protected:
1021             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1022             marked_desc_ptr pDesc;
1023
1024         public:
1025             active_tag() CDS_NOEXCEPT
1026                 : pDesc( nullptr )
1027             {}
1028             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1029             //active_tag() CDS_NOEXCEPT = default;
1030             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1031             ~active_tag() CDS_NOEXCEPT = default;
1032             active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1033 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1034             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1035             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1036 #       endif
1037             superblock_desc *    ptr() const
1038             {
1039                 return pDesc.ptr();
1040             }
1041
1042             void ptr( superblock_desc * p )
1043             {
1044                 assert( (reinterpret_cast<uptr_atomic_t>(p) & c_nMaxCredits) == 0 );
1045                 pDesc = marked_desc_ptr( p, pDesc.bits());
1046             }
1047
1048             unsigned int credits()
1049             {
1050                 return (unsigned int) pDesc.bits();
1051             }
1052
1053             void credits( unsigned int n )
1054             {
1055                 assert( n <= c_nMaxCredits );
1056                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1057             }
1058
1059             void clear()
1060             {
1061                 pDesc = marked_desc_ptr();
1062             }
1063
1064             void set( superblock_desc * pSB, unsigned int n )
1065             {
1066                 assert( (reinterpret_cast<uptr_atomic_t>(pSB) & c_nMaxCredits) == 0 );
1067                 pDesc = marked_desc_ptr( pSB, n );
1068             }
1069
1070         };
1071         //@endcond
1072 #else
1073 #       error "Unexpected value of CDS_BUILD_BITS"
1074 #endif  // CDS_BUILD_BITS
1075
1076
1077         /// Processor heap
1078         struct processor_heap_base
1079         {
1080             CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1081             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1082             const size_class *  pSizeClass  ;   ///< pointer to size class
1083             atomics::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1084             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1085             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1086
1087             /// Small page marker
1088             /**
1089                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1090                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1091                 to less memory fragmentation.
1092             */
1093             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1094
1095             procheap_stat       stat        ;   ///< heap statistics
1096             //processor_heap_statistics   stat;
1097
1098             //@cond
1099             processor_heap_base() CDS_NOEXCEPT
1100                 : pProcDesc( nullptr )
1101                 , pSizeClass( nullptr )
1102                 , pPartial( nullptr )
1103             {
1104                 assert( (reinterpret_cast<uptr_atomic_t>(this) & (c_nAlignment - 1)) == 0 );
1105             }
1106             //@endcond
1107
1108             /// Get partial superblock owned by the processor heap
1109             superblock_desc * get_partial()
1110             {
1111                 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1112                 do {
1113                     if ( !pDesc ) {
1114                         pDesc =  partialList.pop();
1115                         break;
1116                     }
1117                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1118
1119                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1120                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1121                 return pDesc;
1122             }
1123
1124             /// Add partial superblock \p pDesc to the list
1125             void add_partial( superblock_desc * pDesc )
1126             {
1127                 assert( pPartial != pDesc );
1128                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1129
1130                 superblock_desc * pCur = nullptr;
1131                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1132                     partialList.push( pDesc );
1133             }
1134
1135
1136             /// Remove superblock \p pDesc from the list of partial superblock
1137             bool unlink_partial( superblock_desc * pDesc )
1138             {
1139                 return partialList.unlink( pDesc );
1140             }
1141         };
1142
1143         /// Aligned superblock descriptor
1144         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1145
1146         /// Processor descriptor
1147         struct processor_desc
1148         {
1149             processor_heap *    arrProcHeap     ; ///< array of processor heap
1150             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1151             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1152
1153             //@cond
1154             processor_desc()
1155                 : arrProcHeap( nullptr )
1156                 , pageHeaps( nullptr )
1157             {}
1158             //@endcond
1159         };
1160
1161
1162     protected:
1163         sys_topology        m_Topology           ;  ///< System topology
1164         system_heap         m_LargeHeap          ;  ///< Heap for large block
1165         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1166         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1167         atomics::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1168         unsigned int        m_nProcessorCount    ;  ///< Processor count
1169         bound_checker       m_BoundChecker       ;  ///< Bound checker
1170
1171         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1172
1173     protected:
1174         //@cond
1175
1176         /// Allocates large block from system memory
1177         block_header * alloc_from_OS( size_t nSize )
1178         {
1179             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1180             m_OSAllocStat.incBytesAllocated( nSize );
1181             p->setOSAllocated( nSize );
1182             return p;
1183         }
1184
1185         /// Allocates from the active superblock if it possible
1186         block_header * alloc_from_active( processor_heap * pProcHeap )
1187         {
1188             active_tag  oldActive;
1189             int nCollision = -1;
1190
1191             // Reserve block
1192             while ( true ) {
1193                 ++nCollision;
1194                 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1195                 if ( !oldActive.ptr() )
1196                     return nullptr;
1197                 unsigned int nCredits = oldActive.credits();
1198                 active_tag  newActive   ; // default = 0
1199                 if ( nCredits != 0 ) {
1200                     newActive = oldActive;
1201                     newActive.credits( nCredits - 1 );
1202                 }
1203                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1204                     break;
1205             }
1206
1207             if ( nCollision )
1208                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1209
1210             // pop block
1211             superblock_desc * pDesc = oldActive.ptr();
1212
1213             anchor_tag  oldAnchor;
1214             anchor_tag  newAnchor;
1215             byte * pAddr;
1216             unsigned int nMoreCredits = 0;
1217
1218             nCollision = -1;
1219             do {
1220                 ++nCollision;
1221                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1222
1223                 assert( oldAnchor.avail < pDesc->nCapacity );
1224                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1225                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1226                 newAnchor.tag += 1;
1227
1228                 if ( oldActive.credits() == 0 ) {
1229                     // state must be ACTIVE
1230                     if ( oldAnchor.count == 0 )
1231                         newAnchor.state = SBSTATE_FULL;
1232                     else {
1233                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1234                         newAnchor.count -= nMoreCredits;
1235                     }
1236                 }
1237             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1238
1239             if ( nCollision )
1240                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1241
1242             assert( newAnchor.state != SBSTATE_EMPTY );
1243
1244             if ( newAnchor.state == SBSTATE_FULL )
1245                 pProcHeap->stat.incDescFull();
1246             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1247                 update_active( pProcHeap, pDesc, nMoreCredits );
1248
1249             pProcHeap->stat.incAllocFromActive();
1250
1251             // block_header fields is not needed to setup
1252             // It was set in alloc_from_new_superblock
1253             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1254             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1255             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1256
1257             return reinterpret_cast<block_header *>( pAddr );
1258         }
1259
1260         /// Allocates from a partial filled superblock if it possible
1261         block_header * alloc_from_partial( processor_heap * pProcHeap )
1262         {
1263         retry:
1264             superblock_desc * pDesc = pProcHeap->get_partial();
1265             if ( !pDesc )
1266                 return nullptr;
1267
1268             // reserve blocks
1269             anchor_tag  oldAnchor;
1270             anchor_tag  newAnchor;
1271             //byte * pAddr;
1272             unsigned int nMoreCredits = 0;
1273
1274             int nCollision = -1;
1275             do {
1276                 ++nCollision;
1277
1278                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1279                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1280                     free_superblock( pDesc );
1281                     goto retry;
1282                 }
1283
1284                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1285                 newAnchor.count -= nMoreCredits + 1;
1286                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1287                 newAnchor.tag += 1;
1288             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1289
1290             if ( nCollision )
1291                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1292
1293             if ( newAnchor.state == SBSTATE_FULL )
1294                 pProcHeap->stat.incDescFull();
1295
1296             // Now, the thread is guaranteed to have reserved one or more blocks
1297             // pop reserved block
1298             byte * pAddr;
1299             nCollision = -1;
1300             do {
1301                 ++nCollision;
1302
1303                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1304
1305                 assert( oldAnchor.avail < pDesc->nCapacity );
1306                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1307                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1308                 ++newAnchor.tag;
1309             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1310
1311             if ( nCollision )
1312                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1313
1314             assert( newAnchor.state != SBSTATE_EMPTY );
1315
1316             pProcHeap->stat.incAllocFromPartial();
1317
1318             if ( nMoreCredits > 0 )
1319                 update_active( pProcHeap, pDesc, nMoreCredits );
1320
1321             // block_header fields is not needed to setup
1322             // It was set in alloc_from_new_superblock
1323             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1324             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1325             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1326
1327             return reinterpret_cast<block_header *>( pAddr );
1328         }
1329
1330         /// Allocates from the new superblock
1331         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1332         {
1333             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1334             assert( pDesc != nullptr );
1335             pDesc->pSB = new_superblock_buffer( pProcHeap );
1336
1337             anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1338             anchor.tag += 1;
1339
1340             // Make single-linked list of free blocks in superblock
1341             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1342             unsigned int nNext = 0;
1343             const unsigned int nBlockSize = pDesc->nBlockSize;
1344             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1345                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1346                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1347             }
1348             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1349
1350             active_tag newActive;
1351             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1352
1353             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1354             anchor.state = SBSTATE_ACTIVE;
1355             pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1356
1357             active_tag curActive;
1358             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1359                 pProcHeap->stat.incAllocFromNew();
1360                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1361                 return reinterpret_cast<block_header *>( pDesc->pSB );
1362             }
1363
1364             free_superblock( pDesc );
1365             return nullptr;
1366         }
1367
1368         /// Find appropriate processor heap based on size-class selected
1369         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1370         {
1371             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1372
1373             unsigned int nProcessorId = m_Topology.current_processor();
1374             assert( nProcessorId < m_nProcessorCount );
1375
1376             if ( nProcessorId >= m_nProcessorCount )
1377                 nProcessorId = 0;
1378
1379             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1380             while ( !pDesc ) {
1381
1382                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1383                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1384                     pDesc = pNewDesc;
1385                     break;
1386                 }
1387                 free_processor_desc( pNewDesc );
1388             }
1389
1390             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1391         }
1392
1393         /// Updates active field of processor heap \p pProcHeap
1394         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1395         {
1396             assert( pProcHeap == pDesc->pProcHeap );
1397
1398             active_tag  nullActive;
1399             active_tag  newActive;
1400             newActive.set( pDesc, nCredits - 1 );
1401
1402             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1403                 return;
1404
1405             // Someone installed another active superblock.
1406             // Return credits to superblock and make it partial
1407
1408             anchor_tag  oldAnchor;
1409             anchor_tag  newAnchor;
1410
1411             do {
1412                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1413                 newAnchor.count += nCredits;
1414                 newAnchor.state = SBSTATE_PARTIAL;
1415             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1416
1417             pDesc->pProcHeap->add_partial( pDesc );
1418         }
1419
1420         /// Allocates new processor descriptor
1421         processor_desc * new_processor_desc( unsigned int nProcessorId )
1422         {
1423             processor_desc * pDesc;
1424             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1425
1426             /*
1427                 Processor descriptor layout
1428
1429                 proc_desc -  64-byte alignment
1430                 page_heap[0] 64-byte alignment
1431                 page_heap[1] 64-byte alignment
1432                 ...
1433                 page_heap[P] 64-byte alignment
1434
1435                 proc_heap[0] 64-byte alignment
1436                 proc_heap[1] 64-byte alignment
1437                 ...
1438                 proc_heap[N] 64-byte alignment
1439             */
1440
1441             const size_t szDesc =
1442                 ( sizeof(processor_desc)
1443                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1444                     + c_nAlignment - 1
1445                 ) / c_nAlignment
1446 ;
1447
1448             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1449
1450             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1451
1452             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1453
1454             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1455             for ( size_t i = 0; i < nPageHeapCount; ++i )
1456                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1457
1458             // initialize processor heaps
1459             pDesc->arrProcHeap =
1460                 reinterpret_cast<processor_heap *>(
1461                     reinterpret_cast<uptr_atomic_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1462                     & ~(uptr_atomic_t(c_nAlignment) - 1)
1463                 );
1464
1465             processor_heap * pProcHeap = pDesc->arrProcHeap;
1466             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1467             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1468                 new (pProcHeap) processor_heap();
1469                 pProcHeap->pProcDesc = pDesc;
1470                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1471                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1472                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1473                 else
1474                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1475             }
1476             return pDesc;
1477         }
1478
1479
1480         void free_processor_heap( processor_heap * pProcHeap )
1481         {
1482             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1483                 superblock_desc * pDesc;
1484
1485                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1486                     free( pDesc->pSB );
1487                     m_AlignedHeap.free( pDesc );
1488                 }
1489
1490                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1491                 if ( pPartial ) {
1492                     free( pPartial->pSB );
1493                     m_AlignedHeap.free( pPartial );
1494                 }
1495
1496                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1497                 if ( pDesc ) {
1498                     free( pDesc->pSB );
1499                     m_AlignedHeap.free( pDesc );
1500                 }
1501             }
1502             else {
1503                 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1504                 superblock_desc * pDesc;
1505
1506                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1507                     pageHeap.free( pDesc->pSB );
1508                     m_AlignedHeap.free( pDesc );
1509                 }
1510
1511                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1512                 if ( pPartial ) {
1513                     pageHeap.free( pPartial->pSB );
1514                     m_AlignedHeap.free( pPartial );
1515                 }
1516
1517                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1518                 if ( pDesc ) {
1519                     pageHeap.free( pDesc->pSB );
1520                     m_AlignedHeap.free( pDesc );
1521                 }
1522             }
1523             pProcHeap->~processor_heap();
1524         }
1525
1526         /// Frees processor descriptor
1527         void free_processor_desc( processor_desc * pDesc )
1528         {
1529             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1530
1531             for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1532                 free_processor_heap( pDesc->arrProcHeap + j );
1533
1534             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1535                 m_AlignedHeap.free( pSBDesc );
1536
1537             for (size_t i = 0; i < nPageHeapCount; ++i )
1538                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1539
1540             //m_IntHeap.free( pDesc->pageHeaps );
1541             pDesc->pageHeaps = nullptr;
1542
1543             pDesc->processor_desc::~processor_desc();
1544             m_AlignedHeap.free( pDesc );
1545         }
1546
1547         /// Allocates new superblock descriptor
1548         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1549         {
1550             anchor_tag anchor;
1551             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1552             if ( pDesc == nullptr ) {
1553                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1554                 assert( (uptr_atomic_t(pDesc) & (c_nAlignment - 1)) == 0 );
1555
1556                 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1557                 anchor.tag = 0;
1558                 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1559
1560                 pProcHeap->stat.incDescAllocCount();
1561             }
1562             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1563             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1564             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1565             pDesc->pProcHeap = pProcHeap;
1566
1567             anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1568             anchor.avail = 1;
1569             pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1570
1571             return pDesc;
1572         }
1573
1574         /// Allocates superblock page
1575         byte * new_superblock_buffer( processor_heap * pProcHeap )
1576         {
1577             pProcHeap->stat.incBlockAllocated();
1578             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1579                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1580             }
1581             else {
1582                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1583             }
1584         }
1585
1586         /// Frees superblock descriptor and its page
1587         void free_superblock( superblock_desc * pDesc )
1588         {
1589             pDesc->pProcHeap->stat.incBlockDeallocated();
1590             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1591             if ( pDesc->pSB ) {
1592                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1593                     free( pDesc->pSB );
1594                 }
1595                 else {
1596                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1597                 }
1598             }
1599             pProcDesc->listSBDescFree.push( pDesc );
1600         }
1601
1602         /// Allocate memory block
1603         block_header * int_alloc(
1604             size_t nSize    ///< Size of memory block to allocate in bytes
1605             )
1606         {
1607             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1608             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1609                 return alloc_from_OS( nSize );
1610             }
1611             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1612
1613             block_header * pBlock;
1614             processor_heap * pProcHeap;
1615             while ( true ) {
1616                 pProcHeap = find_heap( nSizeClassIndex );
1617                 if ( !pProcHeap )
1618                     return alloc_from_OS( nSize );
1619
1620                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1621                     break;
1622                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1623                     break;
1624                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1625                     break;
1626             }
1627
1628             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1629
1630             assert( pBlock != nullptr );
1631             return pBlock;
1632         }
1633
1634         //@endcond
1635     public:
1636         /// Heap constructor
1637         Heap()
1638         {
1639             // Explicit libcds initialization is needed since a static object may be constructed
1640             cds::Initialize();
1641
1642             m_nProcessorCount = m_Topology.processor_count();
1643             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1644                 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1645             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1646         }
1647
1648         /// Heap destructor
1649         /**
1650             The destructor frees all memory allocated by the heap.
1651         */
1652         ~Heap()
1653         {
1654             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1655                 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1656                 if ( pDesc )
1657                     free_processor_desc( pDesc );
1658             }
1659
1660             m_AlignedHeap.free( m_arrProcDesc );
1661
1662             // Explicit termination of libcds
1663             cds::Terminate();
1664         }
1665
1666         /// Allocate memory block
1667         void * alloc(
1668             size_t nSize    ///< Size of memory block to allocate in bytes
1669         )
1670         {
1671             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1672
1673             // Bound checking is only for our blocks
1674             if ( !pBlock->isOSAllocated() ) {
1675                 // the block is allocated from our heap - bound checker is applicable
1676                 m_BoundChecker.make_trailer(
1677                     reinterpret_cast<byte *>(pBlock + 1),
1678                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1679                     nSize
1680                 );
1681             }
1682
1683             return pBlock + 1;
1684         }
1685
1686         /// Free previously allocated memory block
1687         void free(
1688             void * pMemory  ///< Pointer to memory block to free
1689         )
1690         {
1691             if ( !pMemory )
1692                 return;
1693
1694             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1695             block_header * pBlock = pRedirect->begin();
1696
1697             if ( pBlock->isOSAllocated() ) {
1698                 // Block has been allocated from OS
1699                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1700                 m_LargeHeap.free( pBlock );
1701                 return;
1702             }
1703
1704             assert( !pBlock->isAligned() );
1705             superblock_desc * pDesc = pBlock->desc();
1706
1707             m_BoundChecker.check_bounds(
1708                 pRedirect + 1,
1709                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1710                 pDesc->nBlockSize
1711             );
1712
1713
1714             anchor_tag oldAnchor;
1715             anchor_tag newAnchor;
1716             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1717
1718             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1719
1720             oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1721             do {
1722                 newAnchor = oldAnchor;
1723                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1724                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1725                 newAnchor.tag += 1;
1726
1727                 assert( oldAnchor.state != SBSTATE_EMPTY );
1728
1729                 if ( oldAnchor.state == SBSTATE_FULL )
1730                     newAnchor.state = SBSTATE_PARTIAL;
1731
1732                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1733                     //pProcHeap = pDesc->pProcHeap;
1734                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1735                     newAnchor.state = SBSTATE_EMPTY;
1736                 }
1737                 else
1738                     newAnchor.count += 1;
1739             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1740
1741             pProcHeap->stat.incFreeCount();
1742
1743             if ( newAnchor.state == SBSTATE_EMPTY ) {
1744                 if ( pProcHeap->unlink_partial( pDesc ))
1745                     free_superblock( pDesc );
1746             }
1747             else if (oldAnchor.state == SBSTATE_FULL ) {
1748                 assert( pProcHeap != nullptr );
1749                 pProcHeap->stat.decDescFull();
1750                 pProcHeap->add_partial( pDesc );
1751             }
1752         }
1753
1754         /// Reallocate memory block
1755         /**
1756             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1757             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1758
1759             If there is not enough available memory to expand the block to the given size,
1760             the original block is left unchanged, and \p nullptr is returned.
1761
1762             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1763             then the return value is \p nullptr and the original block is left unchanged.
1764         */
1765         void * realloc(
1766             void *  pMemory,    ///< Pointer to previously allocated memory block
1767             size_t  nNewSize    ///< New size of memory block, in bytes
1768         )
1769         {
1770             if ( nNewSize == 0 ) {
1771                 free( pMemory );
1772                 return nullptr;
1773             }
1774
1775             const size_t nOrigSize = nNewSize;
1776             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1777
1778             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1779
1780             // Reallocation of aligned block is not possible
1781             if ( pBlock->isAligned() ) {
1782                 assert( false );
1783                 return nullptr;
1784             }
1785
1786             if ( pBlock->isOSAllocated() ) {
1787                 // The block has been allocated from OS
1788                 size_t nCurSize = pBlock->getOSAllocSize();
1789
1790                 if ( nCurSize >= nNewSize )
1791                     return pMemory;
1792
1793                 // Grow block size
1794                 void * pNewBuf = alloc( nOrigSize );
1795                 if ( pNewBuf ) {
1796                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1797                     free( pMemory );
1798                 }
1799                 return pNewBuf;
1800             }
1801
1802             superblock_desc * pDesc = pBlock->desc();
1803             if ( pDesc->nBlockSize <= nNewSize ) {
1804                 // In-place reallocation
1805                 m_BoundChecker.make_trailer(
1806                     reinterpret_cast<byte *>(pBlock + 1),
1807                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1808                     nOrigSize
1809                     );
1810
1811                 return pMemory;
1812             }
1813
1814             void * pNew = alloc( nNewSize );
1815             if ( pNew ) {
1816                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1817                 free( pMemory );
1818                 return pNew;
1819             }
1820
1821             return nullptr;
1822         }
1823
1824         /// Allocate aligned memory block
1825         void * alloc_aligned(
1826             size_t nSize,       ///< Size of memory block to allocate in bytes
1827             size_t nAlignment   ///< Alignment
1828         )
1829         {
1830             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1831                 void * p = alloc( nSize );
1832                 assert( (reinterpret_cast<uptr_atomic_t>(p) & (nAlignment - 1)) == 0 );
1833                 return p;
1834             }
1835
1836             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1837
1838             block_header * pRedirect;
1839             if ( (reinterpret_cast<uptr_atomic_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1840                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uptr_atomic_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1841                 assert( pRedirect != pBlock );
1842                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1843
1844                 assert( (reinterpret_cast<uptr_atomic_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1845             }
1846             else
1847                 pRedirect = pBlock;
1848
1849
1850             // Bound checking is only for our blocks
1851             if ( !pBlock->isOSAllocated() ) {
1852                 // the block is allocated from our heap - bound checker is applicable
1853                 m_BoundChecker.make_trailer(
1854                     reinterpret_cast<byte *>(pRedirect + 1),
1855                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1856                     nSize
1857                 );
1858             }
1859
1860             return pRedirect + 1;
1861         }
1862
1863         /// Free aligned memory block previously allocated by \ref alloc_aligned
1864         void free_aligned(
1865             void * pMemory      ///< Pointer to memory block to free
1866         )
1867         {
1868             free( pMemory );
1869         }
1870
1871     public:
1872
1873         /// Get instant summary statistics
1874         void summaryStat( summary_stat& st )
1875         {
1876             size_t nProcHeapCount = m_SizeClassSelector.size();
1877             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1878                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1879                 if ( pProcDesc ) {
1880                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1881                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1882                         if ( pProcHeap ) {
1883                             st.add_procheap_stat( pProcHeap->stat );
1884                         }
1885                     }
1886                 }
1887             }
1888
1889             st.add_heap_stat( m_OSAllocStat );
1890         }
1891     };
1892
1893 }}} // namespace cds::memory::michael
1894
1895 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H