8da141ef40967d0d7625b315c64d582610e94ba4
[libcds.git] / cds / memory / michael / allocator.h
1 //$$CDS-header$$
2
3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
5
6 /*
7     Michael allocator implementation
8     Source:
9         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
10
11     Editions:
12     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
13                         This optimization allows to allocate system memory more regularly,
14                         in blocks of 1M that leads to less memory fragmentation.
15     2011.01.02 khizmax  Created
16 */
17
18 #include <cds/init.h>
19 #include <cds/memory/michael/options.h>
20 #include <cds/memory/michael/bound_check.h>
21 #include <cds/memory/michael/procheap_stat.h>
22 #include <cds/memory/michael/osalloc_stat.h>
23
24 #include <cds/os/topology.h>
25 #include <cds/os/alloc_aligned.h>
26 #include <cds/lock/spinlock.h>
27 #include <cds/details/type_padding.h>
28 #include <cds/details/marked_ptr.h>
29 #include <cds/container/vyukov_mpmc_cycle_queue.h>
30 #include <cds/user_setup/cache_line.h>
31 #include <cds/details/lib.h>
32
33 #include <stdlib.h>
34 #include <boost/intrusive/list.hpp>
35
36 namespace cds {
37     /// Memory-related algorithms: allocators etc.
38 namespace memory {
39     /// Michael's allocator (class Heap)
40     /**
41         \par Source
42             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
43
44         This namespace declares the main class Heap and a lot of helper classes.
45     */
46 namespace michael {
47
48     /// Size class
49     struct size_class {
50         unsigned int    nBlockSize  ;   ///< block size in bytes
51         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
52         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
53         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
54     };
55
56     /// %Heap based on system \p malloc and \p free functions
57     struct malloc_heap
58     {
59         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
60         static void * alloc( size_t nSize )
61         {
62             return ::malloc( nSize );
63         }
64         /// Returning memory block to the system (\p free wrapper)
65         static void free( void * p )
66         {
67             ::free( p );
68         }
69     };
70
71     /// %Heap based on system provided aligned \p malloc and \p free functions
72     struct aligned_malloc_heap
73     {
74         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
75         static void * alloc( size_t nSize, size_t nAlignment  )
76         {
77             return cds::OS::aligned_malloc( nSize, nAlignment );
78         }
79         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
80         static void free( void * p )
81         {
82             cds::OS::aligned_free( p );
83         }
84     };
85
86     /// Page heap based on \p Heap
87     /**
88         Page heap can allocate memory by page-sized block only.
89         \p Heap may be any heap that provides interface like \ref malloc_heap.
90
91         This class is one of available implementation of opt::page_heap option.
92     */
93     template <class Heap = malloc_heap>
94     class page_allocator: public Heap
95     {
96         //@cond
97         typedef Heap base_class;
98         size_t  m_nPageSize;
99         //@endcond
100
101     public:
102         /// Initializes heap
103         page_allocator(
104             size_t nPageSize    ///< page size in bytes
105         )
106             : m_nPageSize( nPageSize )
107         {}
108
109         /// Allocate new page
110         void * alloc()
111         {
112             return base_class::alloc( m_nPageSize );
113         }
114
115         /// Free page \p pPage
116         void free( void * pPage )
117         {
118             base_class::free( pPage );
119         }
120     };
121
122     /// Page cacheable heap
123     /**
124         To improve performance this allocator maintains small list of free pages.
125         Page heap can allocate memory by page-sized block only.
126
127         Template parameters:
128             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
129             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
130
131         This class is one of available implementation of opt::page_heap option.
132     */
133     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
134     class page_cached_allocator: public page_allocator<Heap>
135     {
136         //@cond
137         typedef page_allocator<Heap> base_class;
138
139 #ifdef _DEBUG
140         struct make_null_ptr {
141             void operator ()(void *& p)
142             {
143                 p = nullptr;
144             }
145         };
146 #endif
147
148         typedef container::VyukovMPMCCycleQueue<
149             void *,
150             opt::buffer< opt::v::static_buffer<void *, FreeListCapacity> >
151 #ifdef _DEBUG
152             , opt::value_cleaner< make_null_ptr >
153 #endif
154         >   free_list;
155
156         free_list   m_FreeList;
157         //@endcond
158
159     public:
160         /// Initializes heap
161         page_cached_allocator(
162             size_t nPageSize    ///< page size in bytes
163         )
164             : base_class( nPageSize )
165             , m_FreeList( FreeListCapacity )
166         {}
167
168         //@cond
169         ~page_cached_allocator()
170         {
171             void * pPage;
172             while ( m_FreeList.pop(pPage) )
173                 base_class::free( pPage );
174         }
175         //@endcond
176
177         /// Allocate new page
178         void * alloc()
179         {
180             void * pPage;
181             if ( !m_FreeList.pop( pPage ) )
182                 pPage = base_class::alloc();
183             return pPage;
184         }
185
186         /// Free page \p pPage
187         void free( void * pPage )
188         {
189             if ( !m_FreeList.push( pPage ))
190                 base_class::free( pPage );
191         }
192     };
193
194     /// Implementation of opt::sizeclass_selector option
195     /**
196         Default size-class selector can manage memory blocks up to 64K.
197     */
198     class CDS_EXPORT_API default_sizeclass_selector
199     {
200         //@cond
201         /// Count of different size-classes
202         static const size_t c_nSizeClassCount = 63;
203
204         /// Max block size
205         static const size_t c_nMaxBlockSize = 64 * 1024;
206
207         /// Page size of type 0 (64K)
208         static const unsigned int c_nPage64K = 64 * 1024 - 32;
209
210         /// Page size of type 1 (1M)
211         static const unsigned int c_nPage1M = 1024 * 1024;
212
213         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
214         static size_class const m_szClass[c_nSizeClassCount];
215         static unsigned char const m_szClassMap[];
216         //@endcond
217     public:
218         /// Type of size-class index
219         typedef unsigned int sizeclass_index;
220
221 #ifdef _DEBUG
222         default_sizeclass_selector();
223 #endif
224
225         /// "No size class" index
226         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
227
228         /// Returns size-class count
229         static sizeclass_index size()
230         {
231             return c_nSizeClassCount;
232         }
233
234         /// Returns page size in bytes for given page type \p nPageType
235         static size_t page_size(size_t nPageType )
236         {
237             switch (nPageType) {
238             case 0:
239                 return c_nPage64K;
240             case 1:
241                 return c_nPage1M;
242             default:
243                 assert(false)   ;   // anything forgotten?..
244             }
245             return c_nPage1M;
246         }
247
248         /// Returns count of page size-class
249         /**
250             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
251         */
252         static size_t pageTypeCount()
253         {
254             return 2;
255         }
256
257         /// Returns size-class index for \p nSize
258         /**
259             For large blocks that cannot be allocated by Michael's allocator
260             the function must return -1.
261         */
262         static sizeclass_index find( size_t nSize )
263         {
264             if ( nSize > c_nMaxBlockSize ) {
265                 // Too large block - allocate from system
266                 return c_nNoSizeClass;
267             }
268             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
269             assert( nSize <= m_szClassBounds[ szClass ] );
270             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
271
272             return szClass;
273         }
274
275         /// Gets details::size_class struct for size-class index \p nIndex
276         static const size_class * at( sizeclass_index nIndex )
277         {
278             assert( nIndex < size() );
279             return m_szClass + nIndex;
280         }
281     };
282
283     //@cond
284     namespace details {
285         struct free_list_tag;
286         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
287
288         struct partial_list_tag;
289         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
290
291         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
292         {};
293     }
294     //@endcond
295
296     /// List of free superblock descriptor
297     /**
298         This class is a implementation of \ref opt::free_list option
299     */
300     template <class Lock, class T = details::intrusive_superblock_desc>
301     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
302     {
303         //@cond
304         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
305     public:
306         typedef details::free_list_locked_hook item_hook;
307         typedef Lock lock_type;
308     protected:
309         typedef cds::lock::scoped_lock<lock_type>   auto_lock;
310
311         mutable lock_type   m_access;
312         //@endcond
313
314     public:
315         /// Rebinds to other item type \p T2
316         template <class T2>
317         struct rebind {
318             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
319         };
320
321     public:
322         /// Push superblock descriptor to free-list
323         void push( T * pDesc )
324         {
325             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
326             auto_lock al(m_access);
327             base_class::push_back( *pDesc );
328         }
329
330         /// Pop superblock descriptor from free-list
331         T *   pop()
332         {
333             auto_lock al(m_access);
334             if ( base_class::empty() )
335                 return nullptr;
336             T& rDesc = base_class::front();
337             base_class::pop_front();
338             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
339             return &rDesc;
340         }
341
342         /// Returns current count of superblocks in free-list
343         size_t  size() const
344         {
345             auto_lock al(m_access);
346             return base_class::size();
347         }
348     };
349
350     /// List of partial filled superblock descriptor
351     /**
352         This class is a implementation of \ref opt::partial_list option
353     */
354     template <class Lock, class T = details::intrusive_superblock_desc>
355     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
356     {
357         //@cond
358         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
359     public:
360         typedef details::partial_list_locked_hook item_hook;
361         typedef Lock    lock_type;
362     protected:
363         typedef cds::lock::scoped_lock<lock_type>   auto_lock;
364
365         mutable lock_type   m_access;
366         //@endcond
367
368     public:
369         /// Rebinds to other item type \p T2
370         template <class T2>
371         struct rebind {
372             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
373         };
374
375     public:
376         /// Push a superblock \p pDesc to the list
377         void    push( T * pDesc )
378         {
379             auto_lock al( m_access );
380             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
381             base_class::push_back( *pDesc );
382         }
383
384         /// Pop superblock from the list
385         T * pop()
386         {
387             auto_lock al( m_access );
388             if ( base_class::empty() )
389                 return nullptr;
390             T& rDesc = base_class::front();
391             base_class::pop_front();
392             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
393             return &rDesc;
394         }
395
396         /// Removes \p pDesc descriptor from the free-list
397         bool unlink( T * pDesc )
398         {
399             assert(pDesc != nullptr);
400             auto_lock al( m_access );
401             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
402             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
403                 base_class::erase( base_class::iterator_to( *pDesc ) );
404                 return true;
405             }
406             return false;
407         }
408
409         /// Count of element in the list
410         size_t size() const
411         {
412             auto_lock al( m_access );
413             return base_class::size();
414         }
415     };
416
417     /// Summary processor heap statistics
418     /**
419         Summary heap statistics for use with Heap::summaryStat function.
420     */
421     struct summary_stat
422     {
423         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
424         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
425         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
426         size_t      nFreeCount          ;  ///< Count of \p free function call
427         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
428         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
429         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
430         size_t      nDescFull           ;  ///< Count of full superblock
431         atomic64u_t nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
432         atomic64u_t nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
433
434         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
435         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
436         atomic64u_t nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
437         atomic64_t  nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
438
439         // Internal contention indicators
440         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
441         /**
442             Contention indicator. The less value is better
443         */
444         size_t      nActiveDescCASFailureCount;
445         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
446         /**
447             Contention indicator. The less value is better
448         */
449         size_t      nActiveAnchorCASFailureCount;
450         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
451         /**
452             Contention indicator. The less value is better
453         */
454         size_t      nPartialDescCASFailureCount;
455         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
456         /**
457             Contention indicator. The less value is better
458         */
459         size_t      nPartialAnchorCASFailureCount;
460
461
462     public:
463         /// Constructs empty statistics. All counters are zero.
464         summary_stat()
465         {
466             clear();
467         }
468
469         /// Difference statistics
470         /**
471             This operator computes difference between \p *this and \p stat and places the difference to \p this.
472             Returns \p *this;
473         */
474         summary_stat& operator -=( const summary_stat& stat )
475         {
476             nAllocFromActive    -= stat.nAllocFromActive;
477             nAllocFromPartial   -= stat.nAllocFromPartial;
478             nAllocFromNew       -= stat.nAllocFromNew;
479             nFreeCount          -= stat.nFreeCount;
480             nPageAllocCount     -= stat.nPageAllocCount;
481             nPageDeallocCount   -= stat.nPageDeallocCount;
482             nDescAllocCount     -= stat.nDescAllocCount;
483             nDescFull           -= stat.nDescFull;
484             nBytesAllocated     -= stat.nBytesAllocated;
485             nBytesDeallocated   -= stat.nBytesDeallocated;
486
487             nSysAllocCount      -= stat.nSysAllocCount;
488             nSysFreeCount       -= stat.nSysFreeCount;
489             nSysBytesAllocated  -= stat.nSysBytesAllocated;
490             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
491
492             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
493             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
494             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
495             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
496
497             return *this;
498         }
499
500         /// Clears statistics
501         /**
502             All counters are set to zero.
503         */
504         void clear()
505         {
506             memset( this, 0, sizeof(*this));
507         }
508
509         //@cond
510         template <typename Stat>
511         summary_stat& add_procheap_stat( const Stat& stat )
512         {
513             nAllocFromActive    += stat.allocFromActive();
514             nAllocFromPartial   += stat.allocFromPartial();
515             nAllocFromNew       += stat.allocFromNew();
516             nFreeCount          += stat.freeCount();
517             nPageAllocCount     += stat.blockAllocated();
518             nPageDeallocCount   += stat.blockDeallocated();
519             nDescAllocCount     += stat.descAllocCount();
520             nDescFull           += stat.descFull();
521             nBytesAllocated     += stat.allocatedBytes();
522             nBytesDeallocated   += stat.deallocatedBytes();
523
524             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
525             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
526             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
527             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
528
529             return *this;
530         }
531
532         template <typename Stat>
533         summary_stat& add_heap_stat( const Stat& stat )
534         {
535             nSysAllocCount      += stat.allocCount();
536             nSysFreeCount       += stat.freeCount();
537
538             nSysBytesAllocated  += stat.allocatedBytes();
539             nSysBytesDeallocated+= stat.deallocatedBytes();
540
541             return *this;
542         }
543         //@endcond
544     };
545
546     /// Michael's allocator
547     /**
548         This class provides base functionality for Michael's allocator. It does not provide
549         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
550         The heap interface is closer to semantics of \p malloc / \p free system functions.
551         The heap supports allocation of aligned and unaligned data.
552
553         The algorithm is based on simplified version of
554             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
555
556         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
557             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
558
559         This is powerful, scalable, fully customizable heap with fast-path without any locks
560         that has been developed specifically for multi-threading.
561         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
562         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
563         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
564         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
565         The opt::check_bounds feature can help you to find a memory buffer overflow.
566
567         Brief algorithm description from Michael's work:
568
569         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
570         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
571         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
572         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
573         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
574         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
575         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
576         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
577         in a lock-free manner.
578
579         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
580         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
581         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
582         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
583         available blocks of its original superblock by atomically updating its descriptor.
584
585         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
586         superblock size.
587
588         Available \p Options:
589         - \ref opt::sys_topology - class that describes system topology needed for allocator.
590             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
591         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
592             Default is \ref malloc_heap.
593         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
594             Default is \ref aligned_malloc_heap
595         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
596             Default is \ref page_cached_allocator
597         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
598             for incoming allocation request.
599             Default is \ref default_sizeclass_selector
600         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
601             Default is \ref free_list_locked
602         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
603             Default is \ref partial_list_locked
604         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
605             that is maintained by the heap.
606             Default is \ref procheap_empty_stat
607         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
608             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
609             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
610             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
611             Default is \ref os_allocated_empty
612         - \ref opt::check_bounds - a bound checker.
613             Default is no bound checker (cds::opt::none)
614
615         \par Usage:
616         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
617
618         \code
619         #include <cds/memory/michael/allocator.h>
620
621         // Heap with explicitly defined options:
622         cds::memory::michael::Heap<
623             opt::aligned_heap< aligned_malloc_heap >,
624             opt::page_heap< page_cached_allocator<16, malloc_heap> >
625         >   myHeap;
626
627         // Heap with default options:
628         cds::memory::michael::Heap<>    myDefHeap;
629         \endcode
630
631         \par How to make std-like allocator
632
633         There are serious differencies of heap and <tt>std::allocator</tt> interface:
634             - Heap is stateful, and \p std::allocator is stateless.
635             - Heap has much more template parameters than \p std::allocator
636             - Heap has low-level interface for memory allocating only unlike the allocator
637                 interface that can construct/destroy objects of any type T.
638
639         To convert heap interface into \p std::allocator -like interface you should:
640             - Declare object of class cds::memory::michael::Heap specifying the necessary
641                 template parameters; this is usually static object
642             - Create a class with \p std::allocator interface that uses the function of heap.
643         \code
644         #include <cds/memory/michael/allocator.h>
645
646         template <class T>
647         class MichaelAllocator
648         {
649             typedef std::allocator<T>               std_allocator;
650             typedef cds::memory::michael::Heap<>    michael_heap;
651
652             // Michael heap static object
653             static michael_heap     s_Heap;
654         public:
655             // Declare typedefs from std::allocator
656             typedef typename std_allocator::const_pointer   const_pointer;
657             typedef typename std_allocator::pointer         pointer;
658             typedef typename std_allocator::const_reference const_reference;
659             typedef typename std_allocator::reference       reference;
660             typedef typename std_allocator::difference_type difference_type;
661             typedef typename std_allocator::size_type       size_type;
662             typedef typename std_allocator::value_type      value_type;
663
664             // Allocation function
665             pointer allocate( size_type _Count, const void* _Hint )
666             {
667                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
668             }
669
670             // Deallocation function
671             void deallocate( pointer _Ptr, size_type _Count )
672             {
673                 s_Heap.free( _Ptr );
674             }
675
676             // Other std::allocator specific functions: address, construct, destroy, etc.
677             ...
678
679             // Rebinding allocator to other type
680             template <class _Other>
681             struct rebind {
682                 typedef MichaelAllocator<_Other> other;
683             };
684         };
685
686         // In .cpp file:
687         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
688
689         \endcode
690     */
691     template <typename... Options>
692     class Heap {
693     protected:
694
695         //@cond
696         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
697         static const unsigned int c_nDefaultBlockAlignment = 8;
698
699         struct default_options {
700             typedef cds::OS::topology           sys_topology;
701             typedef malloc_heap                 system_heap;
702             typedef page_cached_allocator<>     page_heap;
703             typedef aligned_malloc_heap         aligned_heap;
704             typedef default_sizeclass_selector  sizeclass_selector;
705             typedef free_list_locked<cds::lock::Spin>  free_list;
706             typedef partial_list_locked<cds::lock::Spin>    partial_list;
707             typedef procheap_empty_stat         procheap_stat;
708             typedef os_allocated_empty          os_allocated_stat;
709             typedef cds::opt::none              check_bounds;
710         };
711         //@endcond
712
713     protected:
714         //@cond
715         typedef typename opt::make_options<default_options, Options...>::type   options;
716         //@endcond
717
718         //@cond
719         typedef unsigned char   byte;
720         //@endcond
721     public:
722         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
723         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
724         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
725         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
726         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
727         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
728         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
729         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
730
731         // forward declarations
732         //@cond
733         struct superblock_desc;
734         struct processor_heap_base;
735         struct processor_desc;
736         //@endcond
737
738         /// Superblock states
739         /**
740             A superblock can be in one of four states: \p ACTIVE, \p FULL,
741             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
742             superblock in a heap, or if a thread intends to try to install it
743             as such. A superblock is \p FULL if all its blocks are either allocated
744             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
745             and contains unreserved available blocks. A superblock is
746             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
747         */
748         enum superblock_state {
749             SBSTATE_ACTIVE  = 0,    ///< superblock is active
750             SBSTATE_FULL    = 1,    ///< superblock is full
751             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
752             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
753         };
754
755         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
756
757         /// Anchor of the superblock descriptor. Updated by CAS
758         struct anchor_tag {
759             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
760             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
761             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
762             unsigned long long    tag:40    ;   ///< ABA prevention tag
763         };
764
765         /// Superblock descriptor
766         struct superblock_desc
767             : public options::free_list::item_hook
768             , public options::partial_list::item_hook
769         {
770             atomics::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
771             byte *              pSB         ;   ///< ptr to superblock
772             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
773             unsigned int        nBlockSize  ;   ///< block size in bytes
774             unsigned int        nCapacity   ;   ///< superblock size/block size
775
776             //@cond
777             superblock_desc()
778                 : pSB(nullptr)
779                 , pProcHeap( nullptr )
780             {}
781             //@endcond
782         };
783
784         //@cond
785         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
786         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
787         //@endcond
788
789 #if CDS_BUILD_BITS == 32
790         /// Allocated block header
791         /**
792             Each allocated block has 8-byte header.
793             The header contains pointer to owner superblock descriptor and the redirection flag.
794             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
795             \code
796                 +---------------+
797                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
798                 +---------------+
799                 |               | <- memory allocated
800                 |   memory      |
801                 |               |
802                 +---------------+
803             \endcode
804             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
805             In this case the redirection flag is 1 and the block's structure is:
806             \code
807                 +---------------+
808             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
809             |   +---------------+
810             |   |   padding     |
811             |   |   (unused)    |
812             |   |               |
813             |   +---------------+
814             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
815                 +---------------+
816                 |               | <- memory allocated
817                 |   memory      |
818                 |               |
819                 +---------------+
820             \endcode
821         */
822         class block_header
823         {
824         //@cond
825             enum {
826                 bitAligned = 1,
827                 bitOSAllocated = 2
828             };
829
830             union {
831                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
832                 atomic32u_t         nSize       ;   // block size (allocated form OS)
833             };
834             atomic32u_t         nFlags;
835
836         public:
837             void  set( superblock_desc * pdesc, atomic32u_t isAligned )
838             {
839                 pDesc = pdesc;
840                 nFlags = isAligned ? bitAligned : 0;
841             }
842
843             superblock_desc * desc()
844             {
845                 assert( (nFlags & bitOSAllocated) == 0 );
846                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
847             }
848
849             block_header * begin()
850             {
851                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
852             }
853
854             bool isAligned() const
855             {
856                 return (nFlags & bitAligned) != 0;
857             }
858
859             bool isOSAllocated() const
860             {
861                 return (nFlags & bitOSAllocated) != 0;
862             }
863
864             void setOSAllocated( size_t sz )
865             {
866                 nSize = sz;
867                 nFlags = bitOSAllocated;
868             }
869
870             size_t getOSAllocSize() const
871             {
872                 assert( isOSAllocated() );
873                 return nSize;
874             }
875
876         //@endcond
877         };
878 #elif CDS_BUILD_BITS == 64
879         //@cond
880         class block_header
881         {
882             enum {
883                 bitAligned = 1,
884                 bitOSAllocated = 2
885             };
886             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
887             // If bitOSAllocated is set the pDesc contains size of memory block
888             // allocated from OS
889             marked_desc_ptr     pDesc;
890         public:
891             void  set( superblock_desc * pdesc, atomic32u_t isAligned )
892             {
893                 pDesc = marked_desc_ptr( pdesc, isAligned );
894             }
895
896             superblock_desc * desc()
897             {
898                 assert( !isOSAllocated() );
899                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
900             }
901
902             block_header * begin()
903             {
904                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
905             }
906
907             bool isAligned() const
908             {
909                 return (pDesc.bits() & bitAligned) != 0;
910             }
911
912             bool isOSAllocated() const
913             {
914                 return (pDesc.bits() & bitOSAllocated) != 0;
915             }
916
917             void setOSAllocated( size_t nSize )
918             {
919
920                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
921             }
922
923             size_t getOSAllocSize() const
924             {
925                 assert( isOSAllocated() );
926                 return reinterpret_cast<uptr_atomic_t>( pDesc.ptr() ) >> 2;
927             }
928
929         };
930         //@endcond
931 #else
932 #       error "Unexpected value of CDS_BUILD_BITS"
933 #endif  // CDS_BUILD_BITS
934
935         //@cond
936         struct free_block_header: block_header {
937             unsigned int    nNextFree;
938         };
939         //@endcond
940
941 #if CDS_BUILD_BITS == 32
942         /// Processor heap's \p active field
943         /**
944             The \p active field in the processor heap structure is primarily a pointer to the descriptor
945             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
946             guaranteed that the active superblock has at least one block available for reservation.
947             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
948             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
949             of blocks available for reservation in the active superblock less one. That is, if the value
950             of credits is n, then the active superblock contains n+1 blocks available for reservation
951             through the \p active field. Note that the number of blocks in a superblock is not limited
952             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
953             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
954             atomically decrements credits while validating that the active superblock is still valid.
955         */
956         class active_tag {
957         //@cond
958             superblock_desc *       pDesc;
959             atomic32u_t             nCredits;
960
961         public:
962             static const unsigned int c_nMaxCredits = 0 - 1;
963
964         public:
965             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
966                 : pDesc( nullptr )
967                 , nCredits(0)
968             {}
969
970             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
971             ~active_tag() CDS_NOEXCEPT = default;
972             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
973 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
974             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
975             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
976 #       endif
977
978             /// Returns pointer to superblock descriptor
979             superblock_desc * ptr() const
980             {
981                 return pDesc;
982             }
983
984             /// Sets superblock descriptor
985             void ptr( superblock_desc * p )
986             {
987                 pDesc = p;
988             }
989
990             unsigned int credits() const
991             {
992                 return nCredits;
993             }
994
995             void credits( unsigned int n )
996             {
997                 nCredits = n;
998             }
999
1000             void clear()
1001             {
1002                 pDesc = nullptr;
1003                 nCredits = 0;
1004             }
1005
1006             void set( superblock_desc * pSB, unsigned int n )
1007             {
1008                 pDesc = pSB;
1009                 nCredits = n;
1010             }
1011         //@endcond
1012         };
1013 #elif CDS_BUILD_BITS == 64
1014         //@cond
1015         class active_tag
1016         {
1017         public:
1018             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1019         protected:
1020             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1021             marked_desc_ptr pDesc;
1022
1023         public:
1024             active_tag() CDS_NOEXCEPT
1025                 : pDesc( nullptr )
1026             {}
1027             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1028             //active_tag() CDS_NOEXCEPT = default;
1029             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1030             ~active_tag() CDS_NOEXCEPT = default;
1031             active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1032 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1033             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1034             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1035 #       endif
1036             superblock_desc *    ptr() const
1037             {
1038                 return pDesc.ptr();
1039             }
1040
1041             void ptr( superblock_desc * p )
1042             {
1043                 assert( (reinterpret_cast<uptr_atomic_t>(p) & c_nMaxCredits) == 0 );
1044                 pDesc = marked_desc_ptr( p, pDesc.bits());
1045             }
1046
1047             unsigned int credits()
1048             {
1049                 return (unsigned int) pDesc.bits();
1050             }
1051
1052             void credits( unsigned int n )
1053             {
1054                 assert( n <= c_nMaxCredits );
1055                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1056             }
1057
1058             void clear()
1059             {
1060                 pDesc = marked_desc_ptr();
1061             }
1062
1063             void set( superblock_desc * pSB, unsigned int n )
1064             {
1065                 assert( (reinterpret_cast<uptr_atomic_t>(pSB) & c_nMaxCredits) == 0 );
1066                 pDesc = marked_desc_ptr( pSB, n );
1067             }
1068
1069         };
1070         //@endcond
1071 #else
1072 #       error "Unexpected value of CDS_BUILD_BITS"
1073 #endif  // CDS_BUILD_BITS
1074
1075
1076         /// Processor heap
1077         struct processor_heap_base
1078         {
1079             CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1080             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1081             const size_class *  pSizeClass  ;   ///< pointer to size class
1082             atomics::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1083             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1084             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1085
1086             /// Small page marker
1087             /**
1088                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1089                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1090                 to less memory fragmentation.
1091             */
1092             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1093
1094             procheap_stat       stat        ;   ///< heap statistics
1095             //processor_heap_statistics   stat;
1096
1097             //@cond
1098             processor_heap_base() CDS_NOEXCEPT
1099                 : pProcDesc( nullptr )
1100                 , pSizeClass( nullptr )
1101                 , pPartial( nullptr )
1102             {
1103                 assert( (reinterpret_cast<uptr_atomic_t>(this) & (c_nAlignment - 1)) == 0 );
1104             }
1105             //@endcond
1106
1107             /// Get partial superblock owned by the processor heap
1108             superblock_desc * get_partial()
1109             {
1110                 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1111                 do {
1112                     if ( !pDesc ) {
1113                         pDesc =  partialList.pop();
1114                         break;
1115                     }
1116                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1117
1118                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1119                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1120                 return pDesc;
1121             }
1122
1123             /// Add partial superblock \p pDesc to the list
1124             void add_partial( superblock_desc * pDesc )
1125             {
1126                 assert( pPartial != pDesc );
1127                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1128
1129                 superblock_desc * pCur = nullptr;
1130                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1131                     partialList.push( pDesc );
1132             }
1133
1134
1135             /// Remove superblock \p pDesc from the list of partial superblock
1136             bool unlink_partial( superblock_desc * pDesc )
1137             {
1138                 return partialList.unlink( pDesc );
1139             }
1140         };
1141
1142         /// Aligned superblock descriptor
1143         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1144
1145         /// Processor descriptor
1146         struct processor_desc
1147         {
1148             processor_heap *    arrProcHeap     ; ///< array of processor heap
1149             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1150             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1151
1152             //@cond
1153             processor_desc()
1154                 : arrProcHeap( nullptr )
1155                 , pageHeaps( nullptr )
1156             {}
1157             //@endcond
1158         };
1159
1160
1161     protected:
1162         sys_topology        m_Topology           ;  ///< System topology
1163         system_heap         m_LargeHeap          ;  ///< Heap for large block
1164         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1165         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1166         atomics::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1167         unsigned int        m_nProcessorCount    ;  ///< Processor count
1168         bound_checker       m_BoundChecker       ;  ///< Bound checker
1169
1170         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1171
1172     protected:
1173         //@cond
1174
1175         /// Allocates large block from system memory
1176         block_header * alloc_from_OS( size_t nSize )
1177         {
1178             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1179             m_OSAllocStat.incBytesAllocated( nSize );
1180             p->setOSAllocated( nSize );
1181             return p;
1182         }
1183
1184         /// Allocates from the active superblock if it possible
1185         block_header * alloc_from_active( processor_heap * pProcHeap )
1186         {
1187             active_tag  oldActive;
1188             int nCollision = -1;
1189
1190             // Reserve block
1191             while ( true ) {
1192                 ++nCollision;
1193                 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1194                 if ( !oldActive.ptr() )
1195                     return nullptr;
1196                 unsigned int nCredits = oldActive.credits();
1197                 active_tag  newActive   ; // default = 0
1198                 if ( nCredits != 0 ) {
1199                     newActive = oldActive;
1200                     newActive.credits( nCredits - 1 );
1201                 }
1202                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1203                     break;
1204             }
1205
1206             if ( nCollision )
1207                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1208
1209             // pop block
1210             superblock_desc * pDesc = oldActive.ptr();
1211
1212             anchor_tag  oldAnchor;
1213             anchor_tag  newAnchor;
1214             byte * pAddr;
1215             unsigned int nMoreCredits = 0;
1216
1217             nCollision = -1;
1218             do {
1219                 ++nCollision;
1220                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1221
1222                 assert( oldAnchor.avail < pDesc->nCapacity );
1223                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1224                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1225                 newAnchor.tag += 1;
1226
1227                 if ( oldActive.credits() == 0 ) {
1228                     // state must be ACTIVE
1229                     if ( oldAnchor.count == 0 )
1230                         newAnchor.state = SBSTATE_FULL;
1231                     else {
1232                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1233                         newAnchor.count -= nMoreCredits;
1234                     }
1235                 }
1236             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1237
1238             if ( nCollision )
1239                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1240
1241             assert( newAnchor.state != SBSTATE_EMPTY );
1242
1243             if ( newAnchor.state == SBSTATE_FULL )
1244                 pProcHeap->stat.incDescFull();
1245             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1246                 update_active( pProcHeap, pDesc, nMoreCredits );
1247
1248             pProcHeap->stat.incAllocFromActive();
1249
1250             // block_header fields is not needed to setup
1251             // It was set in alloc_from_new_superblock
1252             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1253             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1254             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1255
1256             return reinterpret_cast<block_header *>( pAddr );
1257         }
1258
1259         /// Allocates from a partial filled superblock if it possible
1260         block_header * alloc_from_partial( processor_heap * pProcHeap )
1261         {
1262         retry:
1263             superblock_desc * pDesc = pProcHeap->get_partial();
1264             if ( !pDesc )
1265                 return nullptr;
1266
1267             // reserve blocks
1268             anchor_tag  oldAnchor;
1269             anchor_tag  newAnchor;
1270             //byte * pAddr;
1271             unsigned int nMoreCredits = 0;
1272
1273             int nCollision = -1;
1274             do {
1275                 ++nCollision;
1276
1277                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1278                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1279                     free_superblock( pDesc );
1280                     goto retry;
1281                 }
1282
1283                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1284                 newAnchor.count -= nMoreCredits + 1;
1285                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1286                 newAnchor.tag += 1;
1287             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1288
1289             if ( nCollision )
1290                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1291
1292             if ( newAnchor.state == SBSTATE_FULL )
1293                 pProcHeap->stat.incDescFull();
1294
1295             // Now, the thread is guaranteed to have reserved one or more blocks
1296             // pop reserved block
1297             byte * pAddr;
1298             nCollision = -1;
1299             do {
1300                 ++nCollision;
1301
1302                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1303
1304                 assert( oldAnchor.avail < pDesc->nCapacity );
1305                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1306                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1307                 ++newAnchor.tag;
1308             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1309
1310             if ( nCollision )
1311                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1312
1313             assert( newAnchor.state != SBSTATE_EMPTY );
1314
1315             pProcHeap->stat.incAllocFromPartial();
1316
1317             if ( nMoreCredits > 0 )
1318                 update_active( pProcHeap, pDesc, nMoreCredits );
1319
1320             // block_header fields is not needed to setup
1321             // It was set in alloc_from_new_superblock
1322             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1323             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1324             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1325
1326             return reinterpret_cast<block_header *>( pAddr );
1327         }
1328
1329         /// Allocates from the new superblock
1330         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1331         {
1332             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1333             assert( pDesc != nullptr );
1334             pDesc->pSB = new_superblock_buffer( pProcHeap );
1335
1336             anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1337             anchor.tag += 1;
1338
1339             // Make single-linked list of free blocks in superblock
1340             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1341             unsigned int nNext = 0;
1342             const unsigned int nBlockSize = pDesc->nBlockSize;
1343             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1344                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1345                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1346             }
1347             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1348
1349             active_tag newActive;
1350             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1351
1352             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1353             anchor.state = SBSTATE_ACTIVE;
1354             pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1355
1356             active_tag curActive;
1357             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1358                 pProcHeap->stat.incAllocFromNew();
1359                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1360                 return reinterpret_cast<block_header *>( pDesc->pSB );
1361             }
1362
1363             free_superblock( pDesc );
1364             return nullptr;
1365         }
1366
1367         /// Find appropriate processor heap based on size-class selected
1368         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1369         {
1370             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1371
1372             unsigned int nProcessorId = m_Topology.current_processor();
1373             assert( nProcessorId < m_nProcessorCount );
1374
1375             if ( nProcessorId >= m_nProcessorCount )
1376                 nProcessorId = 0;
1377
1378             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1379             while ( !pDesc ) {
1380
1381                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1382                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1383                     pDesc = pNewDesc;
1384                     break;
1385                 }
1386                 free_processor_desc( pNewDesc );
1387             }
1388
1389             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1390         }
1391
1392         /// Updates active field of processor heap \p pProcHeap
1393         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1394         {
1395             assert( pProcHeap == pDesc->pProcHeap );
1396
1397             active_tag  nullActive;
1398             active_tag  newActive;
1399             newActive.set( pDesc, nCredits - 1 );
1400
1401             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1402                 return;
1403
1404             // Someone installed another active superblock.
1405             // Return credits to superblock and make it partial
1406
1407             anchor_tag  oldAnchor;
1408             anchor_tag  newAnchor;
1409
1410             do {
1411                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1412                 newAnchor.count += nCredits;
1413                 newAnchor.state = SBSTATE_PARTIAL;
1414             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1415
1416             pDesc->pProcHeap->add_partial( pDesc );
1417         }
1418
1419         /// Allocates new processor descriptor
1420         processor_desc * new_processor_desc( unsigned int nProcessorId )
1421         {
1422             processor_desc * pDesc;
1423             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1424
1425             /*
1426                 Processor descriptor layout
1427
1428                 proc_desc -  64-byte alignment
1429                 page_heap[0] 64-byte alignment
1430                 page_heap[1] 64-byte alignment
1431                 ...
1432                 page_heap[P] 64-byte alignment
1433
1434                 proc_heap[0] 64-byte alignment
1435                 proc_heap[1] 64-byte alignment
1436                 ...
1437                 proc_heap[N] 64-byte alignment
1438             */
1439
1440             const size_t szDesc =
1441                 ( sizeof(processor_desc)
1442                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1443                     + c_nAlignment - 1
1444                 ) / c_nAlignment
1445 ;
1446
1447             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1448
1449             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1450
1451             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1452
1453             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1454             for ( size_t i = 0; i < nPageHeapCount; ++i )
1455                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1456
1457             // initialize processor heaps
1458             pDesc->arrProcHeap =
1459                 reinterpret_cast<processor_heap *>(
1460                     reinterpret_cast<uptr_atomic_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1461                     & ~(uptr_atomic_t(c_nAlignment) - 1)
1462                 );
1463
1464             processor_heap * pProcHeap = pDesc->arrProcHeap;
1465             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1466             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1467                 new (pProcHeap) processor_heap();
1468                 pProcHeap->pProcDesc = pDesc;
1469                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1470                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1471                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1472                 else
1473                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1474             }
1475             return pDesc;
1476         }
1477
1478
1479         void free_processor_heap( processor_heap * pProcHeap )
1480         {
1481             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1482                 superblock_desc * pDesc;
1483
1484                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1485                     free( pDesc->pSB );
1486                     m_AlignedHeap.free( pDesc );
1487                 }
1488
1489                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1490                 if ( pPartial ) {
1491                     free( pPartial->pSB );
1492                     m_AlignedHeap.free( pPartial );
1493                 }
1494
1495                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1496                 if ( pDesc ) {
1497                     free( pDesc->pSB );
1498                     m_AlignedHeap.free( pDesc );
1499                 }
1500             }
1501             else {
1502                 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1503                 superblock_desc * pDesc;
1504
1505                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1506                     pageHeap.free( pDesc->pSB );
1507                     m_AlignedHeap.free( pDesc );
1508                 }
1509
1510                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1511                 if ( pPartial ) {
1512                     pageHeap.free( pPartial->pSB );
1513                     m_AlignedHeap.free( pPartial );
1514                 }
1515
1516                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1517                 if ( pDesc ) {
1518                     pageHeap.free( pDesc->pSB );
1519                     m_AlignedHeap.free( pDesc );
1520                 }
1521             }
1522             pProcHeap->~processor_heap();
1523         }
1524
1525         /// Frees processor descriptor
1526         void free_processor_desc( processor_desc * pDesc )
1527         {
1528             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1529
1530             for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1531                 free_processor_heap( pDesc->arrProcHeap + j );
1532
1533             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1534                 m_AlignedHeap.free( pSBDesc );
1535
1536             for (size_t i = 0; i < nPageHeapCount; ++i )
1537                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1538
1539             //m_IntHeap.free( pDesc->pageHeaps );
1540             pDesc->pageHeaps = nullptr;
1541
1542             pDesc->processor_desc::~processor_desc();
1543             m_AlignedHeap.free( pDesc );
1544         }
1545
1546         /// Allocates new superblock descriptor
1547         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1548         {
1549             anchor_tag anchor;
1550             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1551             if ( pDesc == nullptr ) {
1552                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1553                 assert( (uptr_atomic_t(pDesc) & (c_nAlignment - 1)) == 0 );
1554
1555                 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1556                 anchor.tag = 0;
1557                 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1558
1559                 pProcHeap->stat.incDescAllocCount();
1560             }
1561             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1562             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1563             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1564             pDesc->pProcHeap = pProcHeap;
1565
1566             anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1567             anchor.avail = 1;
1568             pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1569
1570             return pDesc;
1571         }
1572
1573         /// Allocates superblock page
1574         byte * new_superblock_buffer( processor_heap * pProcHeap )
1575         {
1576             pProcHeap->stat.incBlockAllocated();
1577             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1578                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1579             }
1580             else {
1581                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1582             }
1583         }
1584
1585         /// Frees superblock descriptor and its page
1586         void free_superblock( superblock_desc * pDesc )
1587         {
1588             pDesc->pProcHeap->stat.incBlockDeallocated();
1589             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1590             if ( pDesc->pSB ) {
1591                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1592                     free( pDesc->pSB );
1593                 }
1594                 else {
1595                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1596                 }
1597             }
1598             pProcDesc->listSBDescFree.push( pDesc );
1599         }
1600
1601         /// Allocate memory block
1602         block_header * int_alloc(
1603             size_t nSize    ///< Size of memory block to allocate in bytes
1604             )
1605         {
1606             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1607             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1608                 return alloc_from_OS( nSize );
1609             }
1610             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1611
1612             block_header * pBlock;
1613             processor_heap * pProcHeap;
1614             while ( true ) {
1615                 pProcHeap = find_heap( nSizeClassIndex );
1616                 if ( !pProcHeap )
1617                     return alloc_from_OS( nSize );
1618
1619                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1620                     break;
1621                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1622                     break;
1623                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1624                     break;
1625             }
1626
1627             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1628
1629             assert( pBlock != nullptr );
1630             return pBlock;
1631         }
1632
1633         //@endcond
1634     public:
1635         /// Heap constructor
1636         Heap()
1637         {
1638             // Explicit libcds initialization is needed since a static object may be constructed
1639             cds::Initialize();
1640
1641             m_nProcessorCount = m_Topology.processor_count();
1642             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1643                 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1644             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1645         }
1646
1647         /// Heap destructor
1648         /**
1649             The destructor frees all memory allocated by the heap.
1650         */
1651         ~Heap()
1652         {
1653             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1654                 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1655                 if ( pDesc )
1656                     free_processor_desc( pDesc );
1657             }
1658
1659             m_AlignedHeap.free( m_arrProcDesc );
1660
1661             // Explicit termination of libcds
1662             cds::Terminate();
1663         }
1664
1665         /// Allocate memory block
1666         void * alloc(
1667             size_t nSize    ///< Size of memory block to allocate in bytes
1668         )
1669         {
1670             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1671
1672             // Bound checking is only for our blocks
1673             if ( !pBlock->isOSAllocated() ) {
1674                 // the block is allocated from our heap - bound checker is applicable
1675                 m_BoundChecker.make_trailer(
1676                     reinterpret_cast<byte *>(pBlock + 1),
1677                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1678                     nSize
1679                 );
1680             }
1681
1682             return pBlock + 1;
1683         }
1684
1685         /// Free previously allocated memory block
1686         void free(
1687             void * pMemory  ///< Pointer to memory block to free
1688         )
1689         {
1690             if ( !pMemory )
1691                 return;
1692
1693             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1694             block_header * pBlock = pRedirect->begin();
1695
1696             if ( pBlock->isOSAllocated() ) {
1697                 // Block has been allocated from OS
1698                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1699                 m_LargeHeap.free( pBlock );
1700                 return;
1701             }
1702
1703             assert( !pBlock->isAligned() );
1704             superblock_desc * pDesc = pBlock->desc();
1705
1706             m_BoundChecker.check_bounds(
1707                 pRedirect + 1,
1708                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1709                 pDesc->nBlockSize
1710             );
1711
1712
1713             anchor_tag oldAnchor;
1714             anchor_tag newAnchor;
1715             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1716
1717             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1718
1719             oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1720             do {
1721                 newAnchor = oldAnchor;
1722                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1723                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1724                 newAnchor.tag += 1;
1725
1726                 assert( oldAnchor.state != SBSTATE_EMPTY );
1727
1728                 if ( oldAnchor.state == SBSTATE_FULL )
1729                     newAnchor.state = SBSTATE_PARTIAL;
1730
1731                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1732                     //pProcHeap = pDesc->pProcHeap;
1733                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1734                     newAnchor.state = SBSTATE_EMPTY;
1735                 }
1736                 else
1737                     newAnchor.count += 1;
1738             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1739
1740             pProcHeap->stat.incFreeCount();
1741
1742             if ( newAnchor.state == SBSTATE_EMPTY ) {
1743                 if ( pProcHeap->unlink_partial( pDesc ))
1744                     free_superblock( pDesc );
1745             }
1746             else if (oldAnchor.state == SBSTATE_FULL ) {
1747                 assert( pProcHeap != nullptr );
1748                 pProcHeap->stat.decDescFull();
1749                 pProcHeap->add_partial( pDesc );
1750             }
1751         }
1752
1753         /// Reallocate memory block
1754         /**
1755             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1756             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1757
1758             If there is not enough available memory to expand the block to the given size,
1759             the original block is left unchanged, and \p nullptr is returned.
1760
1761             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1762             then the return value is \p nullptr and the original block is left unchanged.
1763         */
1764         void * realloc(
1765             void *  pMemory,    ///< Pointer to previously allocated memory block
1766             size_t  nNewSize    ///< New size of memory block, in bytes
1767         )
1768         {
1769             if ( nNewSize == 0 ) {
1770                 free( pMemory );
1771                 return nullptr;
1772             }
1773
1774             const size_t nOrigSize = nNewSize;
1775             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1776
1777             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1778
1779             // Reallocation of aligned block is not possible
1780             if ( pBlock->isAligned() ) {
1781                 assert( false );
1782                 return nullptr;
1783             }
1784
1785             if ( pBlock->isOSAllocated() ) {
1786                 // The block has been allocated from OS
1787                 size_t nCurSize = pBlock->getOSAllocSize();
1788
1789                 if ( nCurSize >= nNewSize )
1790                     return pMemory;
1791
1792                 // Grow block size
1793                 void * pNewBuf = alloc( nOrigSize );
1794                 if ( pNewBuf ) {
1795                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1796                     free( pMemory );
1797                 }
1798                 return pNewBuf;
1799             }
1800
1801             superblock_desc * pDesc = pBlock->desc();
1802             if ( pDesc->nBlockSize <= nNewSize ) {
1803                 // In-place reallocation
1804                 m_BoundChecker.make_trailer(
1805                     reinterpret_cast<byte *>(pBlock + 1),
1806                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1807                     nOrigSize
1808                     );
1809
1810                 return pMemory;
1811             }
1812
1813             void * pNew = alloc( nNewSize );
1814             if ( pNew ) {
1815                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1816                 free( pMemory );
1817                 return pNew;
1818             }
1819
1820             return nullptr;
1821         }
1822
1823         /// Allocate aligned memory block
1824         void * alloc_aligned(
1825             size_t nSize,       ///< Size of memory block to allocate in bytes
1826             size_t nAlignment   ///< Alignment
1827         )
1828         {
1829             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1830                 void * p = alloc( nSize );
1831                 assert( (reinterpret_cast<uptr_atomic_t>(p) & (nAlignment - 1)) == 0 );
1832                 return p;
1833             }
1834
1835             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1836
1837             block_header * pRedirect;
1838             if ( (reinterpret_cast<uptr_atomic_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1839                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uptr_atomic_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1840                 assert( pRedirect != pBlock );
1841                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1842
1843                 assert( (reinterpret_cast<uptr_atomic_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1844             }
1845             else
1846                 pRedirect = pBlock;
1847
1848
1849             // Bound checking is only for our blocks
1850             if ( !pBlock->isOSAllocated() ) {
1851                 // the block is allocated from our heap - bound checker is applicable
1852                 m_BoundChecker.make_trailer(
1853                     reinterpret_cast<byte *>(pRedirect + 1),
1854                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1855                     nSize
1856                 );
1857             }
1858
1859             return pRedirect + 1;
1860         }
1861
1862         /// Free aligned memory block previously allocated by \ref alloc_aligned
1863         void free_aligned(
1864             void * pMemory      ///< Pointer to memory block to free
1865         )
1866         {
1867             free( pMemory );
1868         }
1869
1870     public:
1871
1872         /// Get instant summary statistics
1873         void summaryStat( summary_stat& st )
1874         {
1875             size_t nProcHeapCount = m_SizeClassSelector.size();
1876             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1877                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1878                 if ( pProcDesc ) {
1879                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1880                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1881                         if ( pProcHeap ) {
1882                             st.add_procheap_stat( pProcHeap->stat );
1883                         }
1884                     }
1885                 }
1886             }
1887
1888             st.add_heap_stat( m_OSAllocStat );
1889         }
1890     };
1891
1892 }}} // namespace cds::memory::michael
1893
1894 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H