Remove CDS_CXX11_VARIADIC_TEMPLATE_SUPPORT macro and emulating code
[libcds.git] / cds / memory / michael / allocator.h
1 //$$CDS-header$$
2
3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
5
6 /*
7     Michael allocator implementation
8     Source:
9         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
10
11     Editions:
12     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
13                         This optimization allows to allocate system memory more regularly,
14                         in blocks of 1M that leads to less memory fragmentation.
15     2011.01.02 khizmax  Created
16 */
17
18 #include <cds/init.h>
19 #include <cds/memory/michael/options.h>
20 #include <cds/memory/michael/bound_check.h>
21 #include <cds/memory/michael/procheap_stat.h>
22 #include <cds/memory/michael/osalloc_stat.h>
23
24 #include <cds/os/topology.h>
25 #include <cds/os/alloc_aligned.h>
26 #include <cds/lock/spinlock.h>
27 #include <cds/details/type_padding.h>
28 #include <cds/details/marked_ptr.h>
29 #include <cds/container/vyukov_mpmc_cycle_queue.h>
30 #include <cds/user_setup/cache_line.h>
31 #include <cds/details/lib.h>
32
33 #include <stdlib.h>
34 #include <boost/intrusive/list.hpp>
35
36 namespace cds {
37     /// Memory-related algorithms: allocators etc.
38 namespace memory {
39     /// Michael's allocator (class Heap)
40     /**
41         \par Source
42             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
43
44         This namespace declares the main class Heap and a lot of helper classes.
45     */
46 namespace michael {
47
48     /// Size class
49     struct size_class {
50         unsigned int    nBlockSize  ;   ///< block size in bytes
51         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
52         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
53         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
54     };
55
56     /// %Heap based on system \p malloc and \p free functions
57     struct malloc_heap
58     {
59         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
60         static void * alloc( size_t nSize )
61         {
62             return ::malloc( nSize );
63         }
64         /// Returning memory block to the system (\p free wrapper)
65         static void free( void * p )
66         {
67             ::free( p );
68         }
69     };
70
71     /// %Heap based on system provided aligned \p malloc and \p free functions
72     struct aligned_malloc_heap
73     {
74         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
75         static void * alloc( size_t nSize, size_t nAlignment  )
76         {
77             return cds::OS::aligned_malloc( nSize, nAlignment );
78         }
79         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
80         static void free( void * p )
81         {
82             cds::OS::aligned_free( p );
83         }
84     };
85
86     /// Page heap based on \p Heap
87     /**
88         Page heap can allocate memory by page-sized block only.
89         \p Heap may be any heap that provides interface like \ref malloc_heap.
90
91         This class is one of available implementation of opt::page_heap option.
92     */
93     template <class Heap = malloc_heap>
94     class page_allocator: public Heap
95     {
96         //@cond
97         typedef Heap base_class;
98         size_t  m_nPageSize;
99         //@endcond
100
101     public:
102         /// Initializes heap
103         page_allocator(
104             size_t nPageSize    ///< page size in bytes
105         )
106             : m_nPageSize( nPageSize )
107         {}
108
109         /// Allocate new page
110         void * alloc()
111         {
112             return base_class::alloc( m_nPageSize );
113         }
114
115         /// Free page \p pPage
116         void free( void * pPage )
117         {
118             base_class::free( pPage );
119         }
120     };
121
122     /// Page cacheable heap
123     /**
124         To improve performance this allocator maintains small list of free pages.
125         Page heap can allocate memory by page-sized block only.
126
127         Template parameters:
128             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
129             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
130
131         This class is one of available implementation of opt::page_heap option.
132     */
133     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
134     class page_cached_allocator: public page_allocator<Heap>
135     {
136         //@cond
137         typedef page_allocator<Heap> base_class;
138
139 #ifdef _DEBUG
140         struct make_null_ptr {
141             void operator ()(void *& p)
142             {
143                 p = nullptr;
144             }
145         };
146 #endif
147
148         typedef container::VyukovMPMCCycleQueue<
149             void *,
150             opt::buffer< opt::v::static_buffer<void *, FreeListCapacity> >
151 #ifdef _DEBUG
152             , opt::value_cleaner< make_null_ptr >
153 #endif
154         >   free_list;
155
156         free_list   m_FreeList;
157         //@endcond
158
159     public:
160         /// Initializes heap
161         page_cached_allocator(
162             size_t nPageSize    ///< page size in bytes
163         )
164             : base_class( nPageSize )
165             , m_FreeList( FreeListCapacity )
166         {}
167
168         //@cond
169         ~page_cached_allocator()
170         {
171             void * pPage;
172             while ( m_FreeList.pop(pPage) )
173                 base_class::free( pPage );
174         }
175         //@endcond
176
177         /// Allocate new page
178         void * alloc()
179         {
180             void * pPage;
181             if ( !m_FreeList.pop( pPage ) )
182                 pPage = base_class::alloc();
183             return pPage;
184         }
185
186         /// Free page \p pPage
187         void free( void * pPage )
188         {
189             if ( !m_FreeList.push( pPage ))
190                 base_class::free( pPage );
191         }
192     };
193
194     /// Implementation of opt::sizeclass_selector option
195     /**
196         Default size-class selector can manage memory blocks up to 64K.
197     */
198     class CDS_EXPORT_API default_sizeclass_selector
199     {
200         //@cond
201         /// Count of different size-classes
202         static const size_t c_nSizeClassCount = 63;
203
204         /// Max block size
205         static const size_t c_nMaxBlockSize = 64 * 1024;
206
207         /// Page size of type 0 (64K)
208         static const unsigned int c_nPage64K = 64 * 1024 - 32;
209
210         /// Page size of type 1 (1M)
211         static const unsigned int c_nPage1M = 1024 * 1024;
212
213         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
214         static size_class const m_szClass[c_nSizeClassCount];
215         static unsigned char const m_szClassMap[];
216         //@endcond
217     public:
218         /// Type of size-class index
219         typedef unsigned int sizeclass_index;
220
221 #ifdef _DEBUG
222         default_sizeclass_selector();
223 #endif
224
225         /// "No size class" index
226         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
227
228         /// Returns size-class count
229         static sizeclass_index size()
230         {
231             return c_nSizeClassCount;
232         }
233
234         /// Returns page size in bytes for given page type \p nPageType
235         static size_t page_size(size_t nPageType )
236         {
237             switch (nPageType) {
238             case 0:
239                 return c_nPage64K;
240             case 1:
241                 return c_nPage1M;
242             default:
243                 assert(false)   ;   // anything forgotten?..
244             }
245             return c_nPage1M;
246         }
247
248         /// Returns count of page size-class
249         /**
250             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
251         */
252         static size_t pageTypeCount()
253         {
254             return 2;
255         }
256
257         /// Returns size-class index for \p nSize
258         /**
259             For large blocks that cannot be allocated by Michael's allocator
260             the function must return -1.
261         */
262         static sizeclass_index find( size_t nSize )
263         {
264             if ( nSize > c_nMaxBlockSize ) {
265                 // Too large block - allocate from system
266                 return c_nNoSizeClass;
267             }
268             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
269             assert( nSize <= m_szClassBounds[ szClass ] );
270             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
271
272             return szClass;
273         }
274
275         /// Gets details::size_class struct for size-class index \p nIndex
276         static const size_class * at( sizeclass_index nIndex )
277         {
278             assert( nIndex < size() );
279             return m_szClass + nIndex;
280         }
281     };
282
283     //@cond
284     namespace details {
285         struct free_list_tag;
286         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
287
288         struct partial_list_tag;
289         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
290
291         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
292         {};
293     }
294     //@endcond
295
296     /// List of free superblock descriptor
297     /**
298         This class is a implementation of \ref opt::free_list option
299     */
300     template <class Lock, class T = details::intrusive_superblock_desc>
301     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
302     {
303         //@cond
304         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
305     public:
306         typedef details::free_list_locked_hook item_hook;
307         typedef Lock lock_type;
308     protected:
309         typedef cds::lock::scoped_lock<lock_type>   auto_lock;
310
311         mutable lock_type   m_access;
312         //@endcond
313
314     public:
315         /// Rebinds to other item type \p T2
316         template <class T2>
317         struct rebind {
318             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
319         };
320
321     public:
322         /// Push superblock descriptor to free-list
323         void push( T * pDesc )
324         {
325             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
326             auto_lock al(m_access);
327             base_class::push_back( *pDesc );
328         }
329
330         /// Pop superblock descriptor from free-list
331         T *   pop()
332         {
333             auto_lock al(m_access);
334             if ( base_class::empty() )
335                 return nullptr;
336             T& rDesc = base_class::front();
337             base_class::pop_front();
338             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
339             return &rDesc;
340         }
341
342         /// Returns current count of superblocks in free-list
343         size_t  size() const
344         {
345             auto_lock al(m_access);
346             return base_class::size();
347         }
348     };
349
350     /// List of partial filled superblock descriptor
351     /**
352         This class is a implementation of \ref opt::partial_list option
353     */
354     template <class Lock, class T = details::intrusive_superblock_desc>
355     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
356     {
357         //@cond
358         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
359     public:
360         typedef details::partial_list_locked_hook item_hook;
361         typedef Lock    lock_type;
362     protected:
363         typedef cds::lock::scoped_lock<lock_type>   auto_lock;
364
365         mutable lock_type   m_access;
366         //@endcond
367
368     public:
369         /// Rebinds to other item type \p T2
370         template <class T2>
371         struct rebind {
372             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
373         };
374
375     public:
376         /// Push a superblock \p pDesc to the list
377         void    push( T * pDesc )
378         {
379             auto_lock al( m_access );
380             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
381             base_class::push_back( *pDesc );
382         }
383
384         /// Pop superblock from the list
385         T * pop()
386         {
387             auto_lock al( m_access );
388             if ( base_class::empty() )
389                 return nullptr;
390             T& rDesc = base_class::front();
391             base_class::pop_front();
392             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
393             return &rDesc;
394         }
395
396         /// Removes \p pDesc descriptor from the free-list
397         bool unlink( T * pDesc )
398         {
399             assert(pDesc != nullptr);
400             auto_lock al( m_access );
401             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
402             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
403                 base_class::erase( base_class::iterator_to( *pDesc ) );
404                 return true;
405             }
406             return false;
407         }
408
409         /// Count of element in the list
410         size_t size() const
411         {
412             auto_lock al( m_access );
413             return base_class::size();
414         }
415     };
416
417     /// Summary processor heap statistics
418     /**
419         Summary heap statistics for use with Heap::summaryStat function.
420     */
421     struct summary_stat
422     {
423         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
424         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
425         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
426         size_t      nFreeCount          ;  ///< Count of \p free function call
427         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
428         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
429         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
430         size_t      nDescFull           ;  ///< Count of full superblock
431         atomic64u_t nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
432         atomic64u_t nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
433
434         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
435         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
436         atomic64u_t nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
437         atomic64_t  nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
438
439         // Internal contention indicators
440         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
441         /**
442             Contention indicator. The less value is better
443         */
444         size_t      nActiveDescCASFailureCount;
445         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
446         /**
447             Contention indicator. The less value is better
448         */
449         size_t      nActiveAnchorCASFailureCount;
450         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
451         /**
452             Contention indicator. The less value is better
453         */
454         size_t      nPartialDescCASFailureCount;
455         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
456         /**
457             Contention indicator. The less value is better
458         */
459         size_t      nPartialAnchorCASFailureCount;
460
461
462     public:
463         /// Constructs empty statistics. All counters are zero.
464         summary_stat()
465         {
466             clear();
467         }
468
469         /// Difference statistics
470         /**
471             This operator computes difference between \p *this and \p stat and places the difference to \p this.
472             Returns \p *this;
473         */
474         summary_stat& operator -=( const summary_stat& stat )
475         {
476             nAllocFromActive    -= stat.nAllocFromActive;
477             nAllocFromPartial   -= stat.nAllocFromPartial;
478             nAllocFromNew       -= stat.nAllocFromNew;
479             nFreeCount          -= stat.nFreeCount;
480             nPageAllocCount     -= stat.nPageAllocCount;
481             nPageDeallocCount   -= stat.nPageDeallocCount;
482             nDescAllocCount     -= stat.nDescAllocCount;
483             nDescFull           -= stat.nDescFull;
484             nBytesAllocated     -= stat.nBytesAllocated;
485             nBytesDeallocated   -= stat.nBytesDeallocated;
486
487             nSysAllocCount      -= stat.nSysAllocCount;
488             nSysFreeCount       -= stat.nSysFreeCount;
489             nSysBytesAllocated  -= stat.nSysBytesAllocated;
490             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
491
492             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
493             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
494             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
495             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
496
497             return *this;
498         }
499
500         /// Clears statistics
501         /**
502             All counters are set to zero.
503         */
504         void clear()
505         {
506             memset( this, 0, sizeof(*this));
507         }
508
509         //@cond
510         template <typename Stat>
511         summary_stat& add_procheap_stat( const Stat& stat )
512         {
513             nAllocFromActive    += stat.allocFromActive();
514             nAllocFromPartial   += stat.allocFromPartial();
515             nAllocFromNew       += stat.allocFromNew();
516             nFreeCount          += stat.freeCount();
517             nPageAllocCount     += stat.blockAllocated();
518             nPageDeallocCount   += stat.blockDeallocated();
519             nDescAllocCount     += stat.descAllocCount();
520             nDescFull           += stat.descFull();
521             nBytesAllocated     += stat.allocatedBytes();
522             nBytesDeallocated   += stat.deallocatedBytes();
523
524             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
525             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
526             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
527             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
528
529             return *this;
530         }
531
532         template <typename Stat>
533         summary_stat& add_heap_stat( const Stat& stat )
534         {
535             nSysAllocCount      += stat.allocCount();
536             nSysFreeCount       += stat.freeCount();
537
538             nSysBytesAllocated  += stat.allocatedBytes();
539             nSysBytesDeallocated+= stat.deallocatedBytes();
540
541             return *this;
542         }
543         //@endcond
544     };
545
546     /// Michael's allocator
547     /**
548         This class provides base functionality for Michael's allocator. It does not provide
549         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
550         The heap interface is closer to semantics of \p malloc / \p free system functions.
551         The heap supports allocation of aligned and unaligned data.
552
553         The algorithm is based on simplified version of
554             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
555
556         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
557             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
558
559         This is powerful, scalable, fully customizable heap with fast-path without any locks
560         that has been developed specifically for multi-threading.
561         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
562         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
563         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
564         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
565         The opt::check_bounds feature can help you to find a memory buffer overflow.
566
567         Brief algorithm description from Michael's work:
568
569         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
570         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
571         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
572         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
573         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
574         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
575         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
576         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
577         in a lock-free manner.
578
579         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
580         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
581         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
582         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
583         available blocks of its original superblock by atomically updating its descriptor.
584
585         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
586         superblock size.
587
588         Available \p Options:
589         - \ref opt::sys_topology - class that describes system topology needed for allocator.
590             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
591         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
592             Default is \ref malloc_heap.
593         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
594             Default is \ref aligned_malloc_heap
595         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
596             Default is \ref page_cached_allocator
597         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
598             for incoming allocation request.
599             Default is \ref default_sizeclass_selector
600         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
601             Default is \ref free_list_locked
602         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
603             Default is \ref partial_list_locked
604         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
605             that is maintained by the heap.
606             Default is \ref procheap_empty_stat
607         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
608             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
609             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
610             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
611             Default is \ref os_allocated_empty
612         - \ref opt::check_bounds - a bound checker.
613             Default is no bound checker (cds::opt::none)
614
615         \par Usage:
616         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
617
618         \code
619         #include <cds/memory/michael/allocator.h>
620
621         // Heap with explicitly defined options:
622         cds::memory::michael::Heap<
623             opt::aligned_heap< aligned_malloc_heap >,
624             opt::page_heap< page_cached_allocator<16, malloc_heap> >
625         >   myHeap;
626
627         // Heap with default options:
628         cds::memory::michael::Heap<>    myDefHeap;
629         \endcode
630
631         \par How to make std-like allocator
632
633         There are serious differencies of heap and <tt>std::allocator</tt> interface:
634             - Heap is stateful, and \p std::allocator is stateless.
635             - Heap has much more template parameters than \p std::allocator
636             - Heap has low-level interface for memory allocating only unlike the allocator
637                 interface that can construct/destroy objects of any type T.
638
639         To convert heap interface into \p std::allocator -like interface you should:
640             - Declare object of class cds::memory::michael::Heap specifying the necessary
641                 template parameters; this is usually static object
642             - Create a class with \p std::allocator interface that uses the function of heap.
643         \code
644         #include <cds/memory/michael/allocator.h>
645
646         template <class T>
647         class MichaelAllocator
648         {
649             typedef std::allocator<T>               std_allocator;
650             typedef cds::memory::michael::Heap<>    michael_heap;
651
652             // Michael heap static object
653             static michael_heap     s_Heap;
654         public:
655             // Declare typedefs from std::allocator
656             typedef typename std_allocator::const_pointer   const_pointer;
657             typedef typename std_allocator::pointer         pointer;
658             typedef typename std_allocator::const_reference const_reference;
659             typedef typename std_allocator::reference       reference;
660             typedef typename std_allocator::difference_type difference_type;
661             typedef typename std_allocator::size_type       size_type;
662             typedef typename std_allocator::value_type      value_type;
663
664             // Allocation function
665             pointer allocate( size_type _Count, const void* _Hint )
666             {
667                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
668             }
669
670             // Deallocation function
671             void deallocate( pointer _Ptr, size_type _Count )
672             {
673                 s_Heap.free( _Ptr );
674             }
675
676             // Other std::allocator specific functions: address, construct, destroy, etc.
677             ...
678
679             // Rebinding allocator to other type
680             template <class _Other>
681             struct rebind {
682                 typedef MichaelAllocator<_Other> other;
683             };
684         };
685
686         // In .cpp file:
687         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
688
689         \endcode
690     */
691     template <typename... Options>
692     class Heap {
693     protected:
694
695         //@cond
696         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
697         static const unsigned int c_nDefaultBlockAlignment = 8;
698
699         struct default_options {
700             typedef cds::OS::topology           sys_topology;
701             typedef malloc_heap                 system_heap;
702             typedef page_cached_allocator<>     page_heap;
703             typedef aligned_malloc_heap         aligned_heap;
704             typedef default_sizeclass_selector  sizeclass_selector;
705             typedef free_list_locked<cds::lock::Spin>  free_list;
706             typedef partial_list_locked<cds::lock::Spin>    partial_list;
707             typedef procheap_empty_stat         procheap_stat;
708             typedef os_allocated_empty          os_allocated_stat;
709             typedef cds::opt::none              check_bounds;
710         };
711         //@endcond
712
713     protected:
714         //@cond
715         typedef typename opt::make_options<default_options, Options...>::type   options;
716         //@endcond
717
718         //@cond
719         typedef unsigned char   byte;
720         //@endcond
721     public:
722         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
723         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
724         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
725         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
726         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
727         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
728         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
729         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
730
731         // forward declarations
732         //@cond
733         struct superblock_desc;
734         struct processor_heap_base;
735         struct processor_desc;
736         //@endcond
737
738         /// Superblock states
739         /**
740             A superblock can be in one of four states: \p ACTIVE, \p FULL,
741             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
742             superblock in a heap, or if a thread intends to try to install it
743             as such. A superblock is \p FULL if all its blocks are either allocated
744             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
745             and contains unreserved available blocks. A superblock is
746             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
747         */
748         enum superblock_state {
749             SBSTATE_ACTIVE  = 0,    ///< superblock is active
750             SBSTATE_FULL    = 1,    ///< superblock is full
751             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
752             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
753         };
754
755         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
756
757         /// Anchor of the superblock descriptor. Updated by CAS
758         struct anchor_tag {
759             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
760             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
761             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
762             unsigned long long    tag:40    ;   ///< ABA prevention tag
763         };
764
765         /// Superblock descriptor
766         struct superblock_desc
767             : public options::free_list::item_hook
768             , public options::partial_list::item_hook
769         {
770             atomics::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
771             byte *              pSB         ;   ///< ptr to superblock
772             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
773             unsigned int        nBlockSize  ;   ///< block size in bytes
774             unsigned int        nCapacity   ;   ///< superblock size/block size
775
776             //@cond
777             superblock_desc()
778                 : pSB(nullptr)
779                 , pProcHeap( nullptr )
780             {}
781             //@endcond
782         };
783
784         //@cond
785         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
786         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
787         //@endcond
788
789 #if CDS_BUILD_BITS == 32
790         /// Allocated block header
791         /**
792             Each allocated block has 8-byte header.
793             The header contains pointer to owner superblock descriptor and the redirection flag.
794             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
795             \code
796                 +---------------+
797                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
798                 +---------------+
799                 |               | <- memory allocated
800                 |   memory      |
801                 |               |
802                 +---------------+
803             \endcode
804             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
805             In this case the redirection flag is 1 and the block's structure is:
806             \code
807                 +---------------+
808             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
809             |   +---------------+
810             |   |   padding     |
811             |   |   (unused)    |
812             |   |               |
813             |   +---------------+
814             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
815                 +---------------+
816                 |               | <- memory allocated
817                 |   memory      |
818                 |               |
819                 +---------------+
820             \endcode
821         */
822         class block_header
823         {
824         //@cond
825             enum {
826                 bitAligned = 1,
827                 bitOSAllocated = 2
828             };
829
830             union {
831                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
832                 atomic32u_t         nSize       ;   // block size (allocated form OS)
833             };
834             atomic32u_t         nFlags;
835
836         public:
837             void  set( superblock_desc * pdesc, atomic32u_t isAligned )
838             {
839                 pDesc = pdesc;
840                 nFlags = isAligned ? bitAligned : 0;
841             }
842
843             superblock_desc * desc()
844             {
845                 assert( (nFlags & bitOSAllocated) == 0 );
846                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
847             }
848
849             block_header * begin()
850             {
851                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
852             }
853
854             bool isAligned() const
855             {
856                 return (nFlags & bitAligned) != 0;
857             }
858
859             bool isOSAllocated() const
860             {
861                 return (nFlags & bitOSAllocated) != 0;
862             }
863
864             void setOSAllocated( size_t sz )
865             {
866                 nSize = sz;
867                 nFlags = bitOSAllocated;
868             }
869
870             size_t getOSAllocSize() const
871             {
872                 assert( isOSAllocated() );
873                 return nSize;
874             }
875
876         //@endcond
877         };
878 #elif CDS_BUILD_BITS == 64
879         //@cond
880         class block_header
881         {
882             enum {
883                 bitAligned = 1,
884                 bitOSAllocated = 2
885             };
886             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
887             // If bitOSAllocated is set the pDesc contains size of memory block
888             // allocated from OS
889             marked_desc_ptr     pDesc;
890         public:
891             void  set( superblock_desc * pdesc, atomic32u_t isAligned )
892             {
893                 pDesc = marked_desc_ptr( pdesc, isAligned );
894             }
895
896             superblock_desc * desc()
897             {
898                 assert( !isOSAllocated() );
899                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
900             }
901
902             block_header * begin()
903             {
904                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
905             }
906
907             bool isAligned() const
908             {
909                 return (pDesc.bits() & bitAligned) != 0;
910             }
911
912             bool isOSAllocated() const
913             {
914                 return (pDesc.bits() & bitOSAllocated) != 0;
915             }
916
917             void setOSAllocated( size_t nSize )
918             {
919
920                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
921             }
922
923             size_t getOSAllocSize() const
924             {
925                 assert( isOSAllocated() );
926                 return reinterpret_cast<uptr_atomic_t>( pDesc.ptr() ) >> 2;
927             }
928
929         };
930         //@endcond
931 #else
932 #       error "Unexpected value of CDS_BUILD_BITS"
933 #endif  // CDS_BUILD_BITS
934
935         //@cond
936         struct free_block_header: block_header {
937             unsigned int    nNextFree;
938         };
939         //@endcond
940
941 #if CDS_BUILD_BITS == 32
942         /// Processor heap's \p active field
943         /**
944             The \p active field in the processor heap structure is primarily a pointer to the descriptor
945             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
946             guaranteed that the active superblock has at least one block available for reservation.
947             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
948             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
949             of blocks available for reservation in the active superblock less one. That is, if the value
950             of credits is n, then the active superblock contains n+1 blocks available for reservation
951             through the \p active field. Note that the number of blocks in a superblock is not limited
952             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
953             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
954             atomically decrements credits while validating that the active superblock is still valid.
955         */
956         class active_tag {
957         //@cond
958             superblock_desc *       pDesc;
959             atomic32u_t             nCredits;
960
961         public:
962             static const unsigned int c_nMaxCredits = 0 - 1;
963
964         public:
965             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
966                 : pDesc( nullptr )
967                 , nCredits(0)
968             {}
969
970 #   ifdef CDS_CXX11_EXPLICITLY_DEFAULTED_FUNCTION_SUPPORT
971             active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
972             ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
973             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
974 #       if defined(CDS_MOVE_SEMANTICS_SUPPORT) && !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
975             active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
976             active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
977 #       endif
978 #   endif
979
980             /// Returns pointer to superblock descriptor
981             superblock_desc * ptr() const
982             {
983                 return pDesc;
984             }
985
986             /// Sets superblock descriptor
987             void ptr( superblock_desc * p )
988             {
989                 pDesc = p;
990             }
991
992             unsigned int credits() const
993             {
994                 return nCredits;
995             }
996
997             void credits( unsigned int n )
998             {
999                 nCredits = n;
1000             }
1001
1002             void clear()
1003             {
1004                 pDesc = nullptr;
1005                 nCredits = 0;
1006             }
1007
1008             void set( superblock_desc * pSB, unsigned int n )
1009             {
1010                 pDesc = pSB;
1011                 nCredits = n;
1012             }
1013         //@endcond
1014         };
1015 #elif CDS_BUILD_BITS == 64
1016         //@cond
1017         class active_tag
1018         {
1019         public:
1020             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1021         protected:
1022             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1023             marked_desc_ptr pDesc;
1024
1025         public:
1026             active_tag() CDS_NOEXCEPT
1027                 : pDesc( nullptr )
1028             {}
1029 #   ifdef CDS_CXX11_EXPLICITLY_DEFAULTED_FUNCTION_SUPPORT
1030             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1031             //active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1032             active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
1033             ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1034             active_tag& operator=(active_tag const&) CDS_NOEXCEPT_DEFAULTED = default;
1035 #       if defined(CDS_MOVE_SEMANTICS_SUPPORT) && !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1036             active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
1037             active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
1038 #       endif
1039 #   endif
1040             superblock_desc *    ptr() const
1041             {
1042                 return pDesc.ptr();
1043             }
1044
1045             void ptr( superblock_desc * p )
1046             {
1047                 assert( (reinterpret_cast<uptr_atomic_t>(p) & c_nMaxCredits) == 0 );
1048                 pDesc = marked_desc_ptr( p, pDesc.bits());
1049             }
1050
1051             unsigned int credits()
1052             {
1053                 return (unsigned int) pDesc.bits();
1054             }
1055
1056             void credits( unsigned int n )
1057             {
1058                 assert( n <= c_nMaxCredits );
1059                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1060             }
1061
1062             void clear()
1063             {
1064                 pDesc = marked_desc_ptr();
1065             }
1066
1067             void set( superblock_desc * pSB, unsigned int n )
1068             {
1069                 assert( (reinterpret_cast<uptr_atomic_t>(pSB) & c_nMaxCredits) == 0 );
1070                 pDesc = marked_desc_ptr( pSB, n );
1071             }
1072
1073         };
1074         //@endcond
1075 #else
1076 #       error "Unexpected value of CDS_BUILD_BITS"
1077 #endif  // CDS_BUILD_BITS
1078
1079
1080         /// Processor heap
1081         struct processor_heap_base
1082         {
1083             CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1084             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1085             const size_class *  pSizeClass  ;   ///< pointer to size class
1086             atomics::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1087             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1088             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1089
1090             /// Small page marker
1091             /**
1092                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1093                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1094                 to less memory fragmentation.
1095             */
1096             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1097
1098             procheap_stat       stat        ;   ///< heap statistics
1099             //processor_heap_statistics   stat;
1100
1101             //@cond
1102             processor_heap_base() CDS_NOEXCEPT
1103                 : pProcDesc( nullptr )
1104                 , pSizeClass( nullptr )
1105                 , pPartial( nullptr )
1106             {
1107                 assert( (reinterpret_cast<uptr_atomic_t>(this) & (c_nAlignment - 1)) == 0 );
1108             }
1109             //@endcond
1110
1111             /// Get partial superblock owned by the processor heap
1112             superblock_desc * get_partial()
1113             {
1114                 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1115                 do {
1116                     if ( !pDesc ) {
1117                         pDesc =  partialList.pop();
1118                         break;
1119                     }
1120                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1121
1122                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1123                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1124                 return pDesc;
1125             }
1126
1127             /// Add partial superblock \p pDesc to the list
1128             void add_partial( superblock_desc * pDesc )
1129             {
1130                 assert( pPartial != pDesc );
1131                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1132
1133                 superblock_desc * pCur = nullptr;
1134                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1135                     partialList.push( pDesc );
1136             }
1137
1138
1139             /// Remove superblock \p pDesc from the list of partial superblock
1140             bool unlink_partial( superblock_desc * pDesc )
1141             {
1142                 return partialList.unlink( pDesc );
1143             }
1144         };
1145
1146         /// Aligned superblock descriptor
1147         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1148
1149         /// Processor descriptor
1150         struct processor_desc
1151         {
1152             processor_heap *    arrProcHeap     ; ///< array of processor heap
1153             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1154             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1155
1156             //@cond
1157             processor_desc()
1158                 : arrProcHeap( nullptr )
1159                 , pageHeaps( nullptr )
1160             {}
1161             //@endcond
1162         };
1163
1164
1165     protected:
1166         sys_topology        m_Topology           ;  ///< System topology
1167         system_heap         m_LargeHeap          ;  ///< Heap for large block
1168         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1169         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1170         atomics::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1171         unsigned int        m_nProcessorCount    ;  ///< Processor count
1172         bound_checker       m_BoundChecker       ;  ///< Bound checker
1173
1174         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1175
1176     protected:
1177         //@cond
1178
1179         /// Allocates large block from system memory
1180         block_header * alloc_from_OS( size_t nSize )
1181         {
1182             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1183             m_OSAllocStat.incBytesAllocated( nSize );
1184             p->setOSAllocated( nSize );
1185             return p;
1186         }
1187
1188         /// Allocates from the active superblock if it possible
1189         block_header * alloc_from_active( processor_heap * pProcHeap )
1190         {
1191             active_tag  oldActive;
1192             int nCollision = -1;
1193
1194             // Reserve block
1195             while ( true ) {
1196                 ++nCollision;
1197                 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1198                 if ( !oldActive.ptr() )
1199                     return nullptr;
1200                 unsigned int nCredits = oldActive.credits();
1201                 active_tag  newActive   ; // default = 0
1202                 if ( nCredits != 0 ) {
1203                     newActive = oldActive;
1204                     newActive.credits( nCredits - 1 );
1205                 }
1206                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1207                     break;
1208             }
1209
1210             if ( nCollision )
1211                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1212
1213             // pop block
1214             superblock_desc * pDesc = oldActive.ptr();
1215
1216             anchor_tag  oldAnchor;
1217             anchor_tag  newAnchor;
1218             byte * pAddr;
1219             unsigned int nMoreCredits = 0;
1220
1221             nCollision = -1;
1222             do {
1223                 ++nCollision;
1224                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1225
1226                 assert( oldAnchor.avail < pDesc->nCapacity );
1227                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1228                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1229                 newAnchor.tag += 1;
1230
1231                 if ( oldActive.credits() == 0 ) {
1232                     // state must be ACTIVE
1233                     if ( oldAnchor.count == 0 )
1234                         newAnchor.state = SBSTATE_FULL;
1235                     else {
1236                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1237                         newAnchor.count -= nMoreCredits;
1238                     }
1239                 }
1240             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1241
1242             if ( nCollision )
1243                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1244
1245             assert( newAnchor.state != SBSTATE_EMPTY );
1246
1247             if ( newAnchor.state == SBSTATE_FULL )
1248                 pProcHeap->stat.incDescFull();
1249             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1250                 update_active( pProcHeap, pDesc, nMoreCredits );
1251
1252             pProcHeap->stat.incAllocFromActive();
1253
1254             // block_header fields is not needed to setup
1255             // It was set in alloc_from_new_superblock
1256             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1257             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1258             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1259
1260             return reinterpret_cast<block_header *>( pAddr );
1261         }
1262
1263         /// Allocates from a partial filled superblock if it possible
1264         block_header * alloc_from_partial( processor_heap * pProcHeap )
1265         {
1266         retry:
1267             superblock_desc * pDesc = pProcHeap->get_partial();
1268             if ( !pDesc )
1269                 return nullptr;
1270
1271             // reserve blocks
1272             anchor_tag  oldAnchor;
1273             anchor_tag  newAnchor;
1274             //byte * pAddr;
1275             unsigned int nMoreCredits = 0;
1276
1277             int nCollision = -1;
1278             do {
1279                 ++nCollision;
1280
1281                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1282                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1283                     free_superblock( pDesc );
1284                     goto retry;
1285                 }
1286
1287                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1288                 newAnchor.count -= nMoreCredits + 1;
1289                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1290                 newAnchor.tag += 1;
1291             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1292
1293             if ( nCollision )
1294                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1295
1296             if ( newAnchor.state == SBSTATE_FULL )
1297                 pProcHeap->stat.incDescFull();
1298
1299             // Now, the thread is guaranteed to have reserved one or more blocks
1300             // pop reserved block
1301             byte * pAddr;
1302             nCollision = -1;
1303             do {
1304                 ++nCollision;
1305
1306                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1307
1308                 assert( oldAnchor.avail < pDesc->nCapacity );
1309                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1310                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1311                 ++newAnchor.tag;
1312             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1313
1314             if ( nCollision )
1315                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1316
1317             assert( newAnchor.state != SBSTATE_EMPTY );
1318
1319             pProcHeap->stat.incAllocFromPartial();
1320
1321             if ( nMoreCredits > 0 )
1322                 update_active( pProcHeap, pDesc, nMoreCredits );
1323
1324             // block_header fields is not needed to setup
1325             // It was set in alloc_from_new_superblock
1326             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1327             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1328             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1329
1330             return reinterpret_cast<block_header *>( pAddr );
1331         }
1332
1333         /// Allocates from the new superblock
1334         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1335         {
1336             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1337             assert( pDesc != nullptr );
1338             pDesc->pSB = new_superblock_buffer( pProcHeap );
1339
1340             anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1341             anchor.tag += 1;
1342
1343             // Make single-linked list of free blocks in superblock
1344             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1345             unsigned int nNext = 0;
1346             const unsigned int nBlockSize = pDesc->nBlockSize;
1347             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1348                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1349                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1350             }
1351             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1352
1353             active_tag newActive;
1354             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1355
1356             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1357             anchor.state = SBSTATE_ACTIVE;
1358             pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1359
1360             active_tag curActive;
1361             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1362                 pProcHeap->stat.incAllocFromNew();
1363                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1364                 return reinterpret_cast<block_header *>( pDesc->pSB );
1365             }
1366
1367             free_superblock( pDesc );
1368             return nullptr;
1369         }
1370
1371         /// Find appropriate processor heap based on size-class selected
1372         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1373         {
1374             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1375
1376             unsigned int nProcessorId = m_Topology.current_processor();
1377             assert( nProcessorId < m_nProcessorCount );
1378
1379             if ( nProcessorId >= m_nProcessorCount )
1380                 nProcessorId = 0;
1381
1382             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1383             while ( !pDesc ) {
1384
1385                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1386                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1387                     pDesc = pNewDesc;
1388                     break;
1389                 }
1390                 free_processor_desc( pNewDesc );
1391             }
1392
1393             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1394         }
1395
1396         /// Updates active field of processor heap \p pProcHeap
1397         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1398         {
1399             assert( pProcHeap == pDesc->pProcHeap );
1400
1401             active_tag  nullActive;
1402             active_tag  newActive;
1403             newActive.set( pDesc, nCredits - 1 );
1404
1405             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1406                 return;
1407
1408             // Someone installed another active superblock.
1409             // Return credits to superblock and make it partial
1410
1411             anchor_tag  oldAnchor;
1412             anchor_tag  newAnchor;
1413
1414             do {
1415                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1416                 newAnchor.count += nCredits;
1417                 newAnchor.state = SBSTATE_PARTIAL;
1418             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1419
1420             pDesc->pProcHeap->add_partial( pDesc );
1421         }
1422
1423         /// Allocates new processor descriptor
1424         processor_desc * new_processor_desc( unsigned int nProcessorId )
1425         {
1426             processor_desc * pDesc;
1427             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1428
1429             /*
1430                 Processor descriptor layout
1431
1432                 proc_desc -  64-byte alignment
1433                 page_heap[0] 64-byte alignment
1434                 page_heap[1] 64-byte alignment
1435                 ...
1436                 page_heap[P] 64-byte alignment
1437
1438                 proc_heap[0] 64-byte alignment
1439                 proc_heap[1] 64-byte alignment
1440                 ...
1441                 proc_heap[N] 64-byte alignment
1442             */
1443
1444             const size_t szDesc =
1445                 ( sizeof(processor_desc)
1446                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1447                     + c_nAlignment - 1
1448                 ) / c_nAlignment
1449 ;
1450
1451             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1452
1453             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1454
1455             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1456
1457             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1458             for ( size_t i = 0; i < nPageHeapCount; ++i )
1459                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1460
1461             // initialize processor heaps
1462             pDesc->arrProcHeap =
1463                 reinterpret_cast<processor_heap *>(
1464                     reinterpret_cast<uptr_atomic_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1465                     & ~(uptr_atomic_t(c_nAlignment) - 1)
1466                 );
1467
1468             processor_heap * pProcHeap = pDesc->arrProcHeap;
1469             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1470             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1471                 new (pProcHeap) processor_heap();
1472                 pProcHeap->pProcDesc = pDesc;
1473                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1474                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1475                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1476                 else
1477                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1478             }
1479             return pDesc;
1480         }
1481
1482
1483         void free_processor_heap( processor_heap * pProcHeap )
1484         {
1485             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1486                 superblock_desc * pDesc;
1487
1488                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1489                     free( pDesc->pSB );
1490                     m_AlignedHeap.free( pDesc );
1491                 }
1492
1493                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1494                 if ( pPartial ) {
1495                     free( pPartial->pSB );
1496                     m_AlignedHeap.free( pPartial );
1497                 }
1498
1499                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1500                 if ( pDesc ) {
1501                     free( pDesc->pSB );
1502                     m_AlignedHeap.free( pDesc );
1503                 }
1504             }
1505             else {
1506                 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1507                 superblock_desc * pDesc;
1508
1509                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1510                     pageHeap.free( pDesc->pSB );
1511                     m_AlignedHeap.free( pDesc );
1512                 }
1513
1514                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1515                 if ( pPartial ) {
1516                     pageHeap.free( pPartial->pSB );
1517                     m_AlignedHeap.free( pPartial );
1518                 }
1519
1520                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1521                 if ( pDesc ) {
1522                     pageHeap.free( pDesc->pSB );
1523                     m_AlignedHeap.free( pDesc );
1524                 }
1525             }
1526             pProcHeap->~processor_heap();
1527         }
1528
1529         /// Frees processor descriptor
1530         void free_processor_desc( processor_desc * pDesc )
1531         {
1532             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1533
1534             for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1535                 free_processor_heap( pDesc->arrProcHeap + j );
1536
1537             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1538                 m_AlignedHeap.free( pSBDesc );
1539
1540             for (size_t i = 0; i < nPageHeapCount; ++i )
1541                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1542
1543             //m_IntHeap.free( pDesc->pageHeaps );
1544             pDesc->pageHeaps = nullptr;
1545
1546             pDesc->processor_desc::~processor_desc();
1547             m_AlignedHeap.free( pDesc );
1548         }
1549
1550         /// Allocates new superblock descriptor
1551         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1552         {
1553             anchor_tag anchor;
1554             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1555             if ( pDesc == nullptr ) {
1556                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1557                 assert( (uptr_atomic_t(pDesc) & (c_nAlignment - 1)) == 0 );
1558
1559                 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1560                 anchor.tag = 0;
1561                 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1562
1563                 pProcHeap->stat.incDescAllocCount();
1564             }
1565             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1566             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1567             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1568             pDesc->pProcHeap = pProcHeap;
1569
1570             anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1571             anchor.avail = 1;
1572             pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1573
1574             return pDesc;
1575         }
1576
1577         /// Allocates superblock page
1578         byte * new_superblock_buffer( processor_heap * pProcHeap )
1579         {
1580             pProcHeap->stat.incBlockAllocated();
1581             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1582                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1583             }
1584             else {
1585                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1586             }
1587         }
1588
1589         /// Frees superblock descriptor and its page
1590         void free_superblock( superblock_desc * pDesc )
1591         {
1592             pDesc->pProcHeap->stat.incBlockDeallocated();
1593             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1594             if ( pDesc->pSB ) {
1595                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1596                     free( pDesc->pSB );
1597                 }
1598                 else {
1599                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1600                 }
1601             }
1602             pProcDesc->listSBDescFree.push( pDesc );
1603         }
1604
1605         /// Allocate memory block
1606         block_header * int_alloc(
1607             size_t nSize    ///< Size of memory block to allocate in bytes
1608             )
1609         {
1610             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1611             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1612                 return alloc_from_OS( nSize );
1613             }
1614             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1615
1616             block_header * pBlock;
1617             processor_heap * pProcHeap;
1618             while ( true ) {
1619                 pProcHeap = find_heap( nSizeClassIndex );
1620                 if ( !pProcHeap )
1621                     return alloc_from_OS( nSize );
1622
1623                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1624                     break;
1625                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1626                     break;
1627                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1628                     break;
1629             }
1630
1631             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1632
1633             assert( pBlock != nullptr );
1634             return pBlock;
1635         }
1636
1637         //@endcond
1638     public:
1639         /// Heap constructor
1640         Heap()
1641         {
1642             // Explicit libcds initialization is needed since a static object may be constructed
1643             cds::Initialize();
1644
1645             m_nProcessorCount = m_Topology.processor_count();
1646             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1647                 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1648             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1649         }
1650
1651         /// Heap destructor
1652         /**
1653             The destructor frees all memory allocated by the heap.
1654         */
1655         ~Heap()
1656         {
1657             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1658                 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1659                 if ( pDesc )
1660                     free_processor_desc( pDesc );
1661             }
1662
1663             m_AlignedHeap.free( m_arrProcDesc );
1664
1665             // Explicit termination of libcds
1666             cds::Terminate();
1667         }
1668
1669         /// Allocate memory block
1670         void * alloc(
1671             size_t nSize    ///< Size of memory block to allocate in bytes
1672         )
1673         {
1674             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1675
1676             // Bound checking is only for our blocks
1677             if ( !pBlock->isOSAllocated() ) {
1678                 // the block is allocated from our heap - bound checker is applicable
1679                 m_BoundChecker.make_trailer(
1680                     reinterpret_cast<byte *>(pBlock + 1),
1681                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1682                     nSize
1683                 );
1684             }
1685
1686             return pBlock + 1;
1687         }
1688
1689         /// Free previously allocated memory block
1690         void free(
1691             void * pMemory  ///< Pointer to memory block to free
1692         )
1693         {
1694             if ( !pMemory )
1695                 return;
1696
1697             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1698             block_header * pBlock = pRedirect->begin();
1699
1700             if ( pBlock->isOSAllocated() ) {
1701                 // Block has been allocated from OS
1702                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1703                 m_LargeHeap.free( pBlock );
1704                 return;
1705             }
1706
1707             assert( !pBlock->isAligned() );
1708             superblock_desc * pDesc = pBlock->desc();
1709
1710             m_BoundChecker.check_bounds(
1711                 pRedirect + 1,
1712                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1713                 pDesc->nBlockSize
1714             );
1715
1716
1717             anchor_tag oldAnchor;
1718             anchor_tag newAnchor;
1719             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1720
1721             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1722
1723             oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1724             do {
1725                 newAnchor = oldAnchor;
1726                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1727                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1728                 newAnchor.tag += 1;
1729
1730                 assert( oldAnchor.state != SBSTATE_EMPTY );
1731
1732                 if ( oldAnchor.state == SBSTATE_FULL )
1733                     newAnchor.state = SBSTATE_PARTIAL;
1734
1735                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1736                     //pProcHeap = pDesc->pProcHeap;
1737                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1738                     newAnchor.state = SBSTATE_EMPTY;
1739                 }
1740                 else
1741                     newAnchor.count += 1;
1742             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1743
1744             pProcHeap->stat.incFreeCount();
1745
1746             if ( newAnchor.state == SBSTATE_EMPTY ) {
1747                 if ( pProcHeap->unlink_partial( pDesc ))
1748                     free_superblock( pDesc );
1749             }
1750             else if (oldAnchor.state == SBSTATE_FULL ) {
1751                 assert( pProcHeap != nullptr );
1752                 pProcHeap->stat.decDescFull();
1753                 pProcHeap->add_partial( pDesc );
1754             }
1755         }
1756
1757         /// Reallocate memory block
1758         /**
1759             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1760             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1761
1762             If there is not enough available memory to expand the block to the given size,
1763             the original block is left unchanged, and \p nullptr is returned.
1764
1765             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1766             then the return value is \p nullptr and the original block is left unchanged.
1767         */
1768         void * realloc(
1769             void *  pMemory,    ///< Pointer to previously allocated memory block
1770             size_t  nNewSize    ///< New size of memory block, in bytes
1771         )
1772         {
1773             if ( nNewSize == 0 ) {
1774                 free( pMemory );
1775                 return nullptr;
1776             }
1777
1778             const size_t nOrigSize = nNewSize;
1779             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1780
1781             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1782
1783             // Reallocation of aligned block is not possible
1784             if ( pBlock->isAligned() ) {
1785                 assert( false );
1786                 return nullptr;
1787             }
1788
1789             if ( pBlock->isOSAllocated() ) {
1790                 // The block has been allocated from OS
1791                 size_t nCurSize = pBlock->getOSAllocSize();
1792
1793                 if ( nCurSize >= nNewSize )
1794                     return pMemory;
1795
1796                 // Grow block size
1797                 void * pNewBuf = alloc( nOrigSize );
1798                 if ( pNewBuf ) {
1799                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1800                     free( pMemory );
1801                 }
1802                 return pNewBuf;
1803             }
1804
1805             superblock_desc * pDesc = pBlock->desc();
1806             if ( pDesc->nBlockSize <= nNewSize ) {
1807                 // In-place reallocation
1808                 m_BoundChecker.make_trailer(
1809                     reinterpret_cast<byte *>(pBlock + 1),
1810                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1811                     nOrigSize
1812                     );
1813
1814                 return pMemory;
1815             }
1816
1817             void * pNew = alloc( nNewSize );
1818             if ( pNew ) {
1819                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1820                 free( pMemory );
1821                 return pNew;
1822             }
1823
1824             return nullptr;
1825         }
1826
1827         /// Allocate aligned memory block
1828         void * alloc_aligned(
1829             size_t nSize,       ///< Size of memory block to allocate in bytes
1830             size_t nAlignment   ///< Alignment
1831         )
1832         {
1833             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1834                 void * p = alloc( nSize );
1835                 assert( (reinterpret_cast<uptr_atomic_t>(p) & (nAlignment - 1)) == 0 );
1836                 return p;
1837             }
1838
1839             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1840
1841             block_header * pRedirect;
1842             if ( (reinterpret_cast<uptr_atomic_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1843                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uptr_atomic_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1844                 assert( pRedirect != pBlock );
1845                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1846
1847                 assert( (reinterpret_cast<uptr_atomic_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1848             }
1849             else
1850                 pRedirect = pBlock;
1851
1852
1853             // Bound checking is only for our blocks
1854             if ( !pBlock->isOSAllocated() ) {
1855                 // the block is allocated from our heap - bound checker is applicable
1856                 m_BoundChecker.make_trailer(
1857                     reinterpret_cast<byte *>(pRedirect + 1),
1858                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1859                     nSize
1860                 );
1861             }
1862
1863             return pRedirect + 1;
1864         }
1865
1866         /// Free aligned memory block previously allocated by \ref alloc_aligned
1867         void free_aligned(
1868             void * pMemory      ///< Pointer to memory block to free
1869         )
1870         {
1871             free( pMemory );
1872         }
1873
1874     public:
1875
1876         /// Get instant summary statistics
1877         void summaryStat( summary_stat& st )
1878         {
1879             size_t nProcHeapCount = m_SizeClassSelector.size();
1880             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1881                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1882                 if ( pProcDesc ) {
1883                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1884                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1885                         if ( pProcHeap ) {
1886                             st.add_procheap_stat( pProcHeap->stat );
1887                         }
1888                     }
1889                 }
1890             }
1891
1892             st.add_heap_stat( m_OSAllocStat );
1893         }
1894     };
1895
1896 }}} // namespace cds::memory::michael
1897
1898 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H