Replace NULL with nullptr
[libcds.git] / cds / memory / michael / allocator.h
1 //$$CDS-header$$
2
3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
5
6 /*
7     Michael allocator implementation
8     Source:
9         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
10
11     Editions:
12     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
13                         This optimization allows to allocate system memory more regularly,
14                         in blocks of 1M that leads to less memory fragmentation.
15     2011.01.02 khizmax  Created
16 */
17
18 #include <cds/init.h>
19 #include <cds/memory/michael/options.h>
20 #include <cds/memory/michael/bound_check.h>
21 #include <cds/memory/michael/procheap_stat.h>
22 #include <cds/memory/michael/osalloc_stat.h>
23
24 #include <cds/os/topology.h>
25 #include <cds/os/alloc_aligned.h>
26 #include <cds/lock/spinlock.h>
27 #include <cds/details/type_padding.h>
28 #include <cds/details/marked_ptr.h>
29 #include <cds/container/vyukov_mpmc_cycle_queue.h>
30 #include <cds/user_setup/cache_line.h>
31 #include <cds/details/lib.h>
32
33 #include <stdlib.h>
34 #include <boost/intrusive/list.hpp>
35
36 namespace cds {
37     /// Memory-related algorithms: allocators etc.
38 namespace memory {
39     /// Michael's allocator (class Heap)
40     /**
41         \par Source
42             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
43
44         This namespace declares the main class Heap and a lot of helper classes.
45     */
46 namespace michael {
47
48     /// Size class
49     struct size_class {
50         unsigned int    nBlockSize  ;   ///< block size in bytes
51         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
52         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
53         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
54     };
55
56     /// %Heap based on system \p malloc and \p free functions
57     struct malloc_heap
58     {
59         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
60         static void * alloc( size_t nSize )
61         {
62             return ::malloc( nSize );
63         }
64         /// Returning memory block to the system (\p free wrapper)
65         static void free( void * p )
66         {
67             ::free( p );
68         }
69     };
70
71     /// %Heap based on system provided aligned \p malloc and \p free functions
72     struct aligned_malloc_heap
73     {
74         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
75         static void * alloc( size_t nSize, size_t nAlignment  )
76         {
77             return cds::OS::aligned_malloc( nSize, nAlignment );
78         }
79         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
80         static void free( void * p )
81         {
82             cds::OS::aligned_free( p );
83         }
84     };
85
86     /// Page heap based on \p Heap
87     /**
88         Page heap can allocate memory by page-sized block only.
89         \p Heap may be any heap that provides interface like \ref malloc_heap.
90
91         This class is one of available implementation of opt::page_heap option.
92     */
93     template <class Heap = malloc_heap>
94     class page_allocator: public Heap
95     {
96         //@cond
97         typedef Heap base_class;
98         size_t  m_nPageSize;
99         //@endcond
100
101     public:
102         /// Initializes heap
103         page_allocator(
104             size_t nPageSize    ///< page size in bytes
105         )
106             : m_nPageSize( nPageSize )
107         {}
108
109         /// Allocate new page
110         void * alloc()
111         {
112             return base_class::alloc( m_nPageSize );
113         }
114
115         /// Free page \p pPage
116         void free( void * pPage )
117         {
118             base_class::free( pPage );
119         }
120     };
121
122     /// Page cacheable heap
123     /**
124         To improve performance this allocator maintains small list of free pages.
125         Page heap can allocate memory by page-sized block only.
126
127         Template parameters:
128             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
129             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
130
131         This class is one of available implementation of opt::page_heap option.
132     */
133     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
134     class page_cached_allocator: public page_allocator<Heap>
135     {
136         //@cond
137         typedef page_allocator<Heap> base_class;
138
139 #ifdef _DEBUG
140         struct make_null_ptr {
141             void operator ()(void *& p)
142             {
143                 p = nullptr;
144             }
145         };
146 #endif
147
148         typedef container::VyukovMPMCCycleQueue<
149             void *,
150             opt::buffer< opt::v::static_buffer<void *, FreeListCapacity> >
151 #ifdef _DEBUG
152             , opt::value_cleaner< make_null_ptr >
153 #endif
154         >   free_list;
155
156         free_list   m_FreeList;
157         //@endcond
158
159     public:
160         /// Initializes heap
161         page_cached_allocator(
162             size_t nPageSize    ///< page size in bytes
163         )
164             : base_class( nPageSize )
165             , m_FreeList( FreeListCapacity )
166         {}
167
168         //@cond
169         ~page_cached_allocator()
170         {
171             void * pPage;
172             while ( m_FreeList.pop(pPage) )
173                 base_class::free( pPage );
174         }
175         //@endcond
176
177         /// Allocate new page
178         void * alloc()
179         {
180             void * pPage;
181             if ( !m_FreeList.pop( pPage ) )
182                 pPage = base_class::alloc();
183             return pPage;
184         }
185
186         /// Free page \p pPage
187         void free( void * pPage )
188         {
189             if ( !m_FreeList.push( pPage ))
190                 base_class::free( pPage );
191         }
192     };
193
194     /// Implementation of opt::sizeclass_selector option
195     /**
196         Default size-class selector can manage memory blocks up to 64K.
197     */
198     class CDS_EXPORT_API default_sizeclass_selector
199     {
200         //@cond
201         /// Count of different size-classes
202         static const size_t c_nSizeClassCount = 63;
203
204         /// Max block size
205         static const size_t c_nMaxBlockSize = 64 * 1024;
206
207         /// Page size of type 0 (64K)
208         static const unsigned int c_nPage64K = 64 * 1024 - 32;
209
210         /// Page size of type 1 (1M)
211         static const unsigned int c_nPage1M = 1024 * 1024;
212
213         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
214         static size_class const m_szClass[c_nSizeClassCount];
215         static unsigned char const m_szClassMap[];
216         //@endcond
217     public:
218         /// Type of size-class index
219         typedef unsigned int sizeclass_index;
220
221 #ifdef _DEBUG
222         default_sizeclass_selector();
223 #endif
224
225         /// "No size class" index
226         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
227
228         /// Returns size-class count
229         static sizeclass_index size()
230         {
231             return c_nSizeClassCount;
232         }
233
234         /// Returns page size in bytes for given page type \p nPageType
235         static size_t page_size(size_t nPageType )
236         {
237             switch (nPageType) {
238             case 0:
239                 return c_nPage64K;
240             case 1:
241                 return c_nPage1M;
242             default:
243                 assert(false)   ;   // anything forgotten?..
244             }
245             return c_nPage1M;
246         }
247
248         /// Returns count of page size-class
249         /**
250             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
251         */
252         static size_t pageTypeCount()
253         {
254             return 2;
255         }
256
257         /// Returns size-class index for \p nSize
258         /**
259             For large blocks that cannot be allocated by Michael's allocator
260             the function must return -1.
261         */
262         static sizeclass_index find( size_t nSize )
263         {
264             if ( nSize > c_nMaxBlockSize ) {
265                 // Too large block - allocate from system
266                 return c_nNoSizeClass;
267             }
268             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
269             assert( nSize <= m_szClassBounds[ szClass ] );
270             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
271
272             return szClass;
273         }
274
275         /// Gets details::size_class struct for size-class index \p nIndex
276         static const size_class * at( sizeclass_index nIndex )
277         {
278             assert( nIndex < size() );
279             return m_szClass + nIndex;
280         }
281     };
282
283     //@cond
284     namespace details {
285         struct free_list_tag;
286         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
287
288         struct partial_list_tag;
289         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
290
291         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
292         {};
293     }
294     //@endcond
295
296     /// List of free superblock descriptor
297     /**
298         This class is a implementation of \ref opt::free_list option
299     */
300     template <class Lock, class T = details::intrusive_superblock_desc>
301     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
302     {
303         //@cond
304         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
305     public:
306         typedef details::free_list_locked_hook item_hook;
307         typedef Lock lock_type;
308     protected:
309         typedef cds::lock::scoped_lock<lock_type>   auto_lock;
310
311         mutable lock_type   m_access;
312         //@endcond
313
314     public:
315         /// Rebinds to other item type \p T2
316         template <class T2>
317         struct rebind {
318             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
319         };
320
321     public:
322         /// Push superblock descriptor to free-list
323         void push( T * pDesc )
324         {
325             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
326             auto_lock al(m_access);
327             base_class::push_back( *pDesc );
328         }
329
330         /// Pop superblock descriptor from free-list
331         T *   pop()
332         {
333             auto_lock al(m_access);
334             if ( base_class::empty() )
335                 return nullptr;
336             T& rDesc = base_class::front();
337             base_class::pop_front();
338             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
339             return &rDesc;
340         }
341
342         /// Returns current count of superblocks in free-list
343         size_t  size() const
344         {
345             auto_lock al(m_access);
346             return base_class::size();
347         }
348     };
349
350     /// List of partial filled superblock descriptor
351     /**
352         This class is a implementation of \ref opt::partial_list option
353     */
354     template <class Lock, class T = details::intrusive_superblock_desc>
355     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
356     {
357         //@cond
358         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
359     public:
360         typedef details::partial_list_locked_hook item_hook;
361         typedef Lock    lock_type;
362     protected:
363         typedef cds::lock::scoped_lock<lock_type>   auto_lock;
364
365         mutable lock_type   m_access;
366         //@endcond
367
368     public:
369         /// Rebinds to other item type \p T2
370         template <class T2>
371         struct rebind {
372             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
373         };
374
375     public:
376         /// Push a superblock \p pDesc to the list
377         void    push( T * pDesc )
378         {
379             auto_lock al( m_access );
380             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
381             base_class::push_back( *pDesc );
382         }
383
384         /// Pop superblock from the list
385         T * pop()
386         {
387             auto_lock al( m_access );
388             if ( base_class::empty() )
389                 return nullptr;
390             T& rDesc = base_class::front();
391             base_class::pop_front();
392             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
393             return &rDesc;
394         }
395
396         /// Removes \p pDesc descriptor from the free-list
397         bool unlink( T * pDesc )
398         {
399             assert(pDesc != nullptr);
400             auto_lock al( m_access );
401             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
402             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
403                 base_class::erase( base_class::iterator_to( *pDesc ) );
404                 return true;
405             }
406             return false;
407         }
408
409         /// Count of element in the list
410         size_t size() const
411         {
412             auto_lock al( m_access );
413             return base_class::size();
414         }
415     };
416
417     /// Summary processor heap statistics
418     /**
419         Summary heap statistics for use with Heap::summaryStat function.
420     */
421     struct summary_stat
422     {
423         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
424         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
425         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
426         size_t      nFreeCount          ;  ///< Count of \p free function call
427         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
428         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
429         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
430         size_t      nDescFull           ;  ///< Count of full superblock
431         atomic64u_t nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
432         atomic64u_t nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
433
434         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
435         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
436         atomic64u_t nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
437         atomic64_t  nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
438
439         // Internal contention indicators
440         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
441         /**
442             Contention indicator. The less value is better
443         */
444         size_t      nActiveDescCASFailureCount;
445         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
446         /**
447             Contention indicator. The less value is better
448         */
449         size_t      nActiveAnchorCASFailureCount;
450         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
451         /**
452             Contention indicator. The less value is better
453         */
454         size_t      nPartialDescCASFailureCount;
455         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
456         /**
457             Contention indicator. The less value is better
458         */
459         size_t      nPartialAnchorCASFailureCount;
460
461
462     public:
463         /// Constructs empty statistics. All counters are zero.
464         summary_stat()
465         {
466             clear();
467         }
468
469         /// Difference statistics
470         /**
471             This operator computes difference between \p *this and \p stat and places the difference to \p this.
472             Returns \p *this;
473         */
474         summary_stat& operator -=( const summary_stat& stat )
475         {
476             nAllocFromActive    -= stat.nAllocFromActive;
477             nAllocFromPartial   -= stat.nAllocFromPartial;
478             nAllocFromNew       -= stat.nAllocFromNew;
479             nFreeCount          -= stat.nFreeCount;
480             nPageAllocCount     -= stat.nPageAllocCount;
481             nPageDeallocCount   -= stat.nPageDeallocCount;
482             nDescAllocCount     -= stat.nDescAllocCount;
483             nDescFull           -= stat.nDescFull;
484             nBytesAllocated     -= stat.nBytesAllocated;
485             nBytesDeallocated   -= stat.nBytesDeallocated;
486
487             nSysAllocCount      -= stat.nSysAllocCount;
488             nSysFreeCount       -= stat.nSysFreeCount;
489             nSysBytesAllocated  -= stat.nSysBytesAllocated;
490             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
491
492             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
493             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
494             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
495             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
496
497             return *this;
498         }
499
500         /// Clears statistics
501         /**
502             All counters are set to zero.
503         */
504         void clear()
505         {
506             memset( this, 0, sizeof(*this));
507         }
508
509         //@cond
510         template <typename Stat>
511         summary_stat& add_procheap_stat( const Stat& stat )
512         {
513             nAllocFromActive    += stat.allocFromActive();
514             nAllocFromPartial   += stat.allocFromPartial();
515             nAllocFromNew       += stat.allocFromNew();
516             nFreeCount          += stat.freeCount();
517             nPageAllocCount     += stat.blockAllocated();
518             nPageDeallocCount   += stat.blockDeallocated();
519             nDescAllocCount     += stat.descAllocCount();
520             nDescFull           += stat.descFull();
521             nBytesAllocated     += stat.allocatedBytes();
522             nBytesDeallocated   += stat.deallocatedBytes();
523
524             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
525             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
526             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
527             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
528
529             return *this;
530         }
531
532         template <typename Stat>
533         summary_stat& add_heap_stat( const Stat& stat )
534         {
535             nSysAllocCount      += stat.allocCount();
536             nSysFreeCount       += stat.freeCount();
537
538             nSysBytesAllocated  += stat.allocatedBytes();
539             nSysBytesDeallocated+= stat.deallocatedBytes();
540
541             return *this;
542         }
543         //@endcond
544     };
545
546     /// Michael's allocator
547     /**
548         This class provides base functionality for Michael's allocator. It does not provide
549         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
550         The heap interface is closer to semantics of \p malloc / \p free system functions.
551         The heap supports allocation of aligned and unaligned data.
552
553         The algorithm is based on simplified version of
554             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
555
556         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
557             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
558
559         This is powerful, scalable, fully customizable heap with fast-path without any locks
560         that has been developed specifically for multi-threading.
561         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
562         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
563         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
564         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
565         The opt::check_bounds feature can help you to find a memory buffer overflow.
566
567         Brief algorithm description from Michael's work:
568
569         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
570         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
571         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
572         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
573         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
574         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
575         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
576         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
577         in a lock-free manner.
578
579         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
580         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
581         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
582         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
583         available blocks of its original superblock by atomically updating its descriptor.
584
585         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
586         superblock size.
587
588         Available \p Options:
589         - \ref opt::sys_topology - class that describes system topology needed for allocator.
590             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
591         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
592             Default is \ref malloc_heap.
593         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
594             Default is \ref aligned_malloc_heap
595         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
596             Default is \ref page_cached_allocator
597         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
598             for incoming allocation request.
599             Default is \ref default_sizeclass_selector
600         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
601             Default is \ref free_list_locked
602         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
603             Default is \ref partial_list_locked
604         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
605             that is maintained by the heap.
606             Default is \ref procheap_empty_stat
607         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
608             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
609             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
610             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
611             Default is \ref os_allocated_empty
612         - \ref opt::check_bounds - a bound checker.
613             Default is no bound checker (cds::opt::none)
614
615         \par Usage:
616         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
617
618         \code
619         #include <cds/memory/michael/allocator.h>
620
621         // Heap with explicitly defined options:
622         cds::memory::michael::Heap<
623             opt::aligned_heap< aligned_malloc_heap >,
624             opt::page_heap< page_cached_allocator<16, malloc_heap> >
625         >   myHeap;
626
627         // Heap with default options:
628         cds::memory::michael::Heap<>    myDefHeap;
629         \endcode
630
631         \par How to make std-like allocator
632
633         There are serious differencies of heap and <tt>std::allocator</tt> interface:
634             - Heap is stateful, and \p std::allocator is stateless.
635             - Heap has much more template parameters than \p std::allocator
636             - Heap has low-level interface for memory allocating only unlike the allocator
637                 interface that can construct/destroy objects of any type T.
638
639         To convert heap interface into \p std::allocator -like interface you should:
640             - Declare object of class cds::memory::michael::Heap specifying the necessary
641                 template parameters; this is usually static object
642             - Create a class with \p std::allocator interface that uses the function of heap.
643         \code
644         #include <cds/memory/michael/allocator.h>
645
646         template <class T>
647         class MichaelAllocator
648         {
649             typedef std::allocator<T>               std_allocator;
650             typedef cds::memory::michael::Heap<>    michael_heap;
651
652             // Michael heap static object
653             static michael_heap     s_Heap;
654         public:
655             // Declare typedefs from std::allocator
656             typedef typename std_allocator::const_pointer   const_pointer;
657             typedef typename std_allocator::pointer         pointer;
658             typedef typename std_allocator::const_reference const_reference;
659             typedef typename std_allocator::reference       reference;
660             typedef typename std_allocator::difference_type difference_type;
661             typedef typename std_allocator::size_type       size_type;
662             typedef typename std_allocator::value_type      value_type;
663
664             // Allocation function
665             pointer allocate( size_type _Count, const void* _Hint )
666             {
667                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
668             }
669
670             // Deallocation function
671             void deallocate( pointer _Ptr, size_type _Count )
672             {
673                 s_Heap.free( _Ptr );
674             }
675
676             // Other std::allocator specific functions: address, construct, destroy, etc.
677             ...
678
679             // Rebinding allocator to other type
680             template <class _Other>
681             struct rebind {
682                 typedef MichaelAllocator<_Other> other;
683             };
684         };
685
686         // In .cpp file:
687         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
688
689         \endcode
690     */
691 #ifdef CDS_CXX11_VARIADIC_TEMPLATE_SUPPORT
692     template <typename... Options>
693 #else
694     template <
695         typename O1 = opt::none,
696         typename O2 = opt::none,
697         typename O3 = opt::none,
698         typename O4 = opt::none,
699         typename O5 = opt::none,
700         typename O6 = opt::none,
701         typename O7 = opt::none,
702         typename O8 = opt::none,
703         typename O9 = opt::none,
704         typename O10= opt::none
705     >
706 #endif
707     class Heap {
708     protected:
709
710         //@cond
711         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
712         static const unsigned int c_nDefaultBlockAlignment = 8;
713
714         struct default_options {
715             typedef cds::OS::topology           sys_topology;
716             typedef malloc_heap                 system_heap;
717             typedef page_cached_allocator<>     page_heap;
718             typedef aligned_malloc_heap         aligned_heap;
719             typedef default_sizeclass_selector  sizeclass_selector;
720             typedef free_list_locked<cds::lock::Spin>  free_list;
721             typedef partial_list_locked<cds::lock::Spin>    partial_list;
722             typedef procheap_empty_stat         procheap_stat;
723             typedef os_allocated_empty          os_allocated_stat;
724             typedef cds::opt::none              check_bounds;
725         };
726         //@endcond
727
728     protected:
729         //@cond
730 #ifdef CDS_CXX11_VARIADIC_TEMPLATE_SUPPORT
731         typedef typename opt::make_options<default_options, Options...>::type   options;
732 #else
733         typedef typename opt::make_options<default_options, O1, O2, O3, O4, O5, O6, O7, O8, O9, O10 >::type   options;
734 #endif
735         //@endcond
736
737         //@cond
738         typedef unsigned char   byte;
739         //@endcond
740     public:
741         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
742         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
743         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
744         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
745         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
746         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
747         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
748         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
749
750         // forward declarations
751         //@cond
752         struct superblock_desc;
753         struct processor_heap_base;
754         struct processor_desc;
755         //@endcond
756
757         /// Superblock states
758         /**
759             A superblock can be in one of four states: \p ACTIVE, \p FULL,
760             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
761             superblock in a heap, or if a thread intends to try to install it
762             as such. A superblock is \p FULL if all its blocks are either allocated
763             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
764             and contains unreserved available blocks. A superblock is
765             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
766         */
767         enum superblock_state {
768             SBSTATE_ACTIVE  = 0,    ///< superblock is active
769             SBSTATE_FULL    = 1,    ///< superblock is full
770             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
771             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
772         };
773
774         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
775
776         /// Anchor of the superblock descriptor. Updated by CAS
777         struct anchor_tag {
778             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
779             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
780             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
781             unsigned long long    tag:40    ;   ///< ABA prevention tag
782         };
783
784         /// Superblock descriptor
785         struct superblock_desc
786             : public options::free_list::item_hook
787             , public options::partial_list::item_hook
788         {
789             CDS_ATOMIC::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
790             byte *              pSB         ;   ///< ptr to superblock
791             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
792             unsigned int        nBlockSize  ;   ///< block size in bytes
793             unsigned int        nCapacity   ;   ///< superblock size/block size
794
795             //@cond
796             superblock_desc()
797                 : pSB(nullptr)
798                 , pProcHeap( nullptr )
799             {}
800             //@endcond
801         };
802
803         //@cond
804         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
805         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
806         //@endcond
807
808 #if CDS_BUILD_BITS == 32
809         /// Allocated block header
810         /**
811             Each allocated block has 8-byte header.
812             The header contains pointer to owner superblock descriptor and the redirection flag.
813             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
814             \code
815                 +---------------+
816                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
817                 +---------------+
818                 |               | <- memory allocated
819                 |   memory      |
820                 |               |
821                 +---------------+
822             \endcode
823             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
824             In this case the redirection flag is 1 and the block's structure is:
825             \code
826                 +---------------+
827             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
828             |   +---------------+
829             |   |   padding     |
830             |   |   (unused)    |
831             |   |               |
832             |   +---------------+
833             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
834                 +---------------+
835                 |               | <- memory allocated
836                 |   memory      |
837                 |               |
838                 +---------------+
839             \endcode
840         */
841         class block_header
842         {
843         //@cond
844             enum {
845                 bitAligned = 1,
846                 bitOSAllocated = 2
847             };
848
849             union {
850                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
851                 atomic32u_t         nSize       ;   // block size (allocated form OS)
852             };
853             atomic32u_t         nFlags;
854
855         public:
856             void  set( superblock_desc * pdesc, atomic32u_t isAligned )
857             {
858                 pDesc = pdesc;
859                 nFlags = isAligned ? bitAligned : 0;
860             }
861
862             superblock_desc * desc()
863             {
864                 assert( (nFlags & bitOSAllocated) == 0 );
865                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
866             }
867
868             block_header * begin()
869             {
870                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
871             }
872
873             bool isAligned() const
874             {
875                 return (nFlags & bitAligned) != 0;
876             }
877
878             bool isOSAllocated() const
879             {
880                 return (nFlags & bitOSAllocated) != 0;
881             }
882
883             void setOSAllocated( size_t sz )
884             {
885                 nSize = sz;
886                 nFlags = bitOSAllocated;
887             }
888
889             size_t getOSAllocSize() const
890             {
891                 assert( isOSAllocated() );
892                 return nSize;
893             }
894
895         //@endcond
896         };
897 #elif CDS_BUILD_BITS == 64
898         //@cond
899         class block_header
900         {
901             enum {
902                 bitAligned = 1,
903                 bitOSAllocated = 2
904             };
905             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
906             // If bitOSAllocated is set the pDesc contains size of memory block
907             // allocated from OS
908             marked_desc_ptr     pDesc;
909         public:
910             void  set( superblock_desc * pdesc, atomic32u_t isAligned )
911             {
912                 pDesc = marked_desc_ptr( pdesc, isAligned );
913             }
914
915             superblock_desc * desc()
916             {
917                 assert( !isOSAllocated() );
918                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
919             }
920
921             block_header * begin()
922             {
923                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
924             }
925
926             bool isAligned() const
927             {
928                 return (pDesc.bits() & bitAligned) != 0;
929             }
930
931             bool isOSAllocated() const
932             {
933                 return (pDesc.bits() & bitOSAllocated) != 0;
934             }
935
936             void setOSAllocated( size_t nSize )
937             {
938
939                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
940             }
941
942             size_t getOSAllocSize() const
943             {
944                 assert( isOSAllocated() );
945                 return reinterpret_cast<uptr_atomic_t>( pDesc.ptr() ) >> 2;
946             }
947
948         };
949         //@endcond
950 #else
951 #       error "Unexpected value of CDS_BUILD_BITS"
952 #endif  // CDS_BUILD_BITS
953
954         //@cond
955         struct free_block_header: block_header {
956             unsigned int    nNextFree;
957         };
958         //@endcond
959
960 #if CDS_BUILD_BITS == 32
961         /// Processor heap's \p active field
962         /**
963             The \p active field in the processor heap structure is primarily a pointer to the descriptor
964             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
965             guaranteed that the active superblock has at least one block available for reservation.
966             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
967             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
968             of blocks available for reservation in the active superblock less one. That is, if the value
969             of credits is n, then the active superblock contains n+1 blocks available for reservation
970             through the \p active field. Note that the number of blocks in a superblock is not limited
971             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
972             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
973             atomically decrements credits while validating that the active superblock is still valid.
974         */
975         class active_tag {
976         //@cond
977             superblock_desc *       pDesc;
978             atomic32u_t             nCredits;
979
980         public:
981             static const unsigned int c_nMaxCredits = 0 - 1;
982
983         public:
984             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
985                 : pDesc( nullptr )
986                 , nCredits(0)
987             {}
988
989 #   ifdef CDS_CXX11_EXPLICITLY_DEFAULTED_FUNCTION_SUPPORT
990             active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
991             ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
992             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
993 #       if defined(CDS_MOVE_SEMANTICS_SUPPORT) && !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
994             active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
995             active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
996 #       endif
997 #   endif
998
999             /// Returns pointer to superblock descriptor
1000             superblock_desc * ptr() const
1001             {
1002                 return pDesc;
1003             }
1004
1005             /// Sets superblock descriptor
1006             void ptr( superblock_desc * p )
1007             {
1008                 pDesc = p;
1009             }
1010
1011             unsigned int credits() const
1012             {
1013                 return nCredits;
1014             }
1015
1016             void credits( unsigned int n )
1017             {
1018                 nCredits = n;
1019             }
1020
1021             void clear()
1022             {
1023                 pDesc = nullptr;
1024                 nCredits = 0;
1025             }
1026
1027             void set( superblock_desc * pSB, unsigned int n )
1028             {
1029                 pDesc = pSB;
1030                 nCredits = n;
1031             }
1032         //@endcond
1033         };
1034 #elif CDS_BUILD_BITS == 64
1035         //@cond
1036         class active_tag
1037         {
1038         public:
1039             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1040         protected:
1041             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1042             marked_desc_ptr pDesc;
1043
1044         public:
1045             active_tag() CDS_NOEXCEPT
1046                 : pDesc( nullptr )
1047             {}
1048 #   ifdef CDS_CXX11_EXPLICITLY_DEFAULTED_FUNCTION_SUPPORT
1049             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1050             //active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1051             active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
1052             ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1053             active_tag& operator=(active_tag const&) CDS_NOEXCEPT_DEFAULTED = default;
1054 #       if defined(CDS_MOVE_SEMANTICS_SUPPORT) && !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1055             active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
1056             active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
1057 #       endif
1058 #   endif
1059             superblock_desc *    ptr() const
1060             {
1061                 return pDesc.ptr();
1062             }
1063
1064             void ptr( superblock_desc * p )
1065             {
1066                 assert( (reinterpret_cast<uptr_atomic_t>(p) & c_nMaxCredits) == 0 );
1067                 pDesc = marked_desc_ptr( p, pDesc.bits());
1068             }
1069
1070             unsigned int credits()
1071             {
1072                 return (unsigned int) pDesc.bits();
1073             }
1074
1075             void credits( unsigned int n )
1076             {
1077                 assert( n <= c_nMaxCredits );
1078                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1079             }
1080
1081             void clear()
1082             {
1083                 pDesc = marked_desc_ptr();
1084             }
1085
1086             void set( superblock_desc * pSB, unsigned int n )
1087             {
1088                 assert( (reinterpret_cast<uptr_atomic_t>(pSB) & c_nMaxCredits) == 0 );
1089                 pDesc = marked_desc_ptr( pSB, n );
1090             }
1091
1092         };
1093         //@endcond
1094 #else
1095 #       error "Unexpected value of CDS_BUILD_BITS"
1096 #endif  // CDS_BUILD_BITS
1097
1098
1099         /// Processor heap
1100         struct processor_heap_base
1101         {
1102             CDS_DATA_ALIGNMENT(8) CDS_ATOMIC::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1103             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1104             const size_class *  pSizeClass  ;   ///< pointer to size class
1105             CDS_ATOMIC::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1106             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1107             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1108
1109             /// Small page marker
1110             /**
1111                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1112                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1113                 to less memory fragmentation.
1114             */
1115             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1116
1117             procheap_stat       stat        ;   ///< heap statistics
1118             //processor_heap_statistics   stat;
1119
1120             //@cond
1121             processor_heap_base() CDS_NOEXCEPT
1122                 : pProcDesc( nullptr )
1123                 , pSizeClass( nullptr )
1124                 , pPartial( nullptr )
1125             {
1126                 assert( (reinterpret_cast<uptr_atomic_t>(this) & (c_nAlignment - 1)) == 0 );
1127             }
1128             //@endcond
1129
1130             /// Get partial superblock owned by the processor heap
1131             superblock_desc * get_partial()
1132             {
1133                 superblock_desc * pDesc = pPartial.load(CDS_ATOMIC::memory_order_acquire);
1134                 do {
1135                     if ( !pDesc ) {
1136                         pDesc =  partialList.pop();
1137                         break;
1138                     }
1139                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ) );
1140
1141                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1142                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1143                 return pDesc;
1144             }
1145
1146             /// Add partial superblock \p pDesc to the list
1147             void add_partial( superblock_desc * pDesc )
1148             {
1149                 assert( pPartial != pDesc );
1150                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1151
1152                 superblock_desc * pCur = nullptr;
1153                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, CDS_ATOMIC::memory_order_acq_rel, CDS_ATOMIC::memory_order_relaxed) )
1154                     partialList.push( pDesc );
1155             }
1156
1157
1158             /// Remove superblock \p pDesc from the list of partial superblock
1159             bool unlink_partial( superblock_desc * pDesc )
1160             {
1161                 return partialList.unlink( pDesc );
1162             }
1163         };
1164
1165         /// Aligned superblock descriptor
1166         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1167
1168         /// Processor descriptor
1169         struct processor_desc
1170         {
1171             processor_heap *    arrProcHeap     ; ///< array of processor heap
1172             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1173             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1174
1175             //@cond
1176             processor_desc()
1177                 : arrProcHeap( nullptr )
1178                 , pageHeaps( nullptr )
1179             {}
1180             //@endcond
1181         };
1182
1183
1184     protected:
1185         sys_topology        m_Topology           ;  ///< System topology
1186         system_heap         m_LargeHeap          ;  ///< Heap for large block
1187         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1188         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1189         CDS_ATOMIC::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1190         unsigned int        m_nProcessorCount    ;  ///< Processor count
1191         bound_checker       m_BoundChecker       ;  ///< Bound checker
1192
1193         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1194
1195     protected:
1196         //@cond
1197
1198         /// Allocates large block from system memory
1199         block_header * alloc_from_OS( size_t nSize )
1200         {
1201             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1202             m_OSAllocStat.incBytesAllocated( nSize );
1203             p->setOSAllocated( nSize );
1204             return p;
1205         }
1206
1207         /// Allocates from the active superblock if it possible
1208         block_header * alloc_from_active( processor_heap * pProcHeap )
1209         {
1210             active_tag  oldActive;
1211             int nCollision = -1;
1212
1213             // Reserve block
1214             while ( true ) {
1215                 ++nCollision;
1216                 oldActive = pProcHeap->active.load(CDS_ATOMIC::memory_order_acquire);
1217                 if ( !oldActive.ptr() )
1218                     return nullptr;
1219                 unsigned int nCredits = oldActive.credits();
1220                 active_tag  newActive   ; // default = 0
1221                 if ( nCredits != 0 ) {
1222                     newActive = oldActive;
1223                     newActive.credits( nCredits - 1 );
1224                 }
1225                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ))
1226                     break;
1227             }
1228
1229             if ( nCollision )
1230                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1231
1232             // pop block
1233             superblock_desc * pDesc = oldActive.ptr();
1234
1235             anchor_tag  oldAnchor;
1236             anchor_tag  newAnchor;
1237             byte * pAddr;
1238             unsigned int nMoreCredits = 0;
1239
1240             nCollision = -1;
1241             do {
1242                 ++nCollision;
1243                 newAnchor = oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1244
1245                 assert( oldAnchor.avail < pDesc->nCapacity );
1246                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1247                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1248                 newAnchor.tag += 1;
1249
1250                 if ( oldActive.credits() == 0 ) {
1251                     // state must be ACTIVE
1252                     if ( oldAnchor.count == 0 )
1253                         newAnchor.state = SBSTATE_FULL;
1254                     else {
1255                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1256                         newAnchor.count -= nMoreCredits;
1257                     }
1258                 }
1259             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ));
1260
1261             if ( nCollision )
1262                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1263
1264             assert( newAnchor.state != SBSTATE_EMPTY );
1265
1266             if ( newAnchor.state == SBSTATE_FULL )
1267                 pProcHeap->stat.incDescFull();
1268             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1269                 update_active( pProcHeap, pDesc, nMoreCredits );
1270
1271             pProcHeap->stat.incAllocFromActive();
1272
1273             // block_header fields is not needed to setup
1274             // It was set in alloc_from_new_superblock
1275             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1276             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1277             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1278
1279             return reinterpret_cast<block_header *>( pAddr );
1280         }
1281
1282         /// Allocates from a partial filled superblock if it possible
1283         block_header * alloc_from_partial( processor_heap * pProcHeap )
1284         {
1285         retry:
1286             superblock_desc * pDesc = pProcHeap->get_partial();
1287             if ( !pDesc )
1288                 return nullptr;
1289
1290             // reserve blocks
1291             anchor_tag  oldAnchor;
1292             anchor_tag  newAnchor;
1293             //byte * pAddr;
1294             unsigned int nMoreCredits = 0;
1295
1296             int nCollision = -1;
1297             do {
1298                 ++nCollision;
1299
1300                 newAnchor = oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1301                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1302                     free_superblock( pDesc );
1303                     goto retry;
1304                 }
1305
1306                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1307                 newAnchor.count -= nMoreCredits + 1;
1308                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1309                 newAnchor.tag += 1;
1310             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed) );
1311
1312             if ( nCollision )
1313                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1314
1315             if ( newAnchor.state == SBSTATE_FULL )
1316                 pProcHeap->stat.incDescFull();
1317
1318             // Now, the thread is guaranteed to have reserved one or more blocks
1319             // pop reserved block
1320             byte * pAddr;
1321             nCollision = -1;
1322             do {
1323                 ++nCollision;
1324
1325                 newAnchor = oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1326
1327                 assert( oldAnchor.avail < pDesc->nCapacity );
1328                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1329                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1330                 ++newAnchor.tag;
1331             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed) );
1332
1333             if ( nCollision )
1334                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1335
1336             assert( newAnchor.state != SBSTATE_EMPTY );
1337
1338             pProcHeap->stat.incAllocFromPartial();
1339
1340             if ( nMoreCredits > 0 )
1341                 update_active( pProcHeap, pDesc, nMoreCredits );
1342
1343             // block_header fields is not needed to setup
1344             // It was set in alloc_from_new_superblock
1345             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1346             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1347             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1348
1349             return reinterpret_cast<block_header *>( pAddr );
1350         }
1351
1352         /// Allocates from the new superblock
1353         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1354         {
1355             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1356             assert( pDesc != nullptr );
1357             pDesc->pSB = new_superblock_buffer( pProcHeap );
1358
1359             anchor_tag anchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_relaxed);
1360             anchor.tag += 1;
1361
1362             // Make single-linked list of free blocks in superblock
1363             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1364             unsigned int nNext = 0;
1365             const unsigned int nBlockSize = pDesc->nBlockSize;
1366             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1367                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1368                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1369             }
1370             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1371
1372             active_tag newActive;
1373             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1374
1375             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1376             anchor.state = SBSTATE_ACTIVE;
1377             pDesc->anchor.store(anchor, CDS_ATOMIC::memory_order_relaxed);
1378
1379             active_tag curActive;
1380             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed )) {
1381                 pProcHeap->stat.incAllocFromNew();
1382                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1383                 return reinterpret_cast<block_header *>( pDesc->pSB );
1384             }
1385
1386             free_superblock( pDesc );
1387             return nullptr;
1388         }
1389
1390         /// Find appropriate processor heap based on size-class selected
1391         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1392         {
1393             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1394
1395             unsigned int nProcessorId = m_Topology.current_processor();
1396             assert( nProcessorId < m_nProcessorCount );
1397
1398             if ( nProcessorId >= m_nProcessorCount )
1399                 nProcessorId = 0;
1400
1401             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( CDS_ATOMIC::memory_order_relaxed );
1402             while ( !pDesc ) {
1403
1404                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1405                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ) ) {
1406                     pDesc = pNewDesc;
1407                     break;
1408                 }
1409                 free_processor_desc( pNewDesc );
1410             }
1411
1412             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1413         }
1414
1415         /// Updates active field of processor heap \p pProcHeap
1416         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1417         {
1418             assert( pProcHeap == pDesc->pProcHeap );
1419
1420             active_tag  nullActive;
1421             active_tag  newActive;
1422             newActive.set( pDesc, nCredits - 1 );
1423
1424             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, CDS_ATOMIC::memory_order_seq_cst, CDS_ATOMIC::memory_order_relaxed ) )
1425                 return;
1426
1427             // Someone installed another active superblock.
1428             // Return credits to superblock and make it partial
1429
1430             anchor_tag  oldAnchor;
1431             anchor_tag  newAnchor;
1432
1433             do {
1434                 newAnchor = oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1435                 newAnchor.count += nCredits;
1436                 newAnchor.state = SBSTATE_PARTIAL;
1437             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ));
1438
1439             pDesc->pProcHeap->add_partial( pDesc );
1440         }
1441
1442         /// Allocates new processor descriptor
1443         processor_desc * new_processor_desc( unsigned int nProcessorId )
1444         {
1445             processor_desc * pDesc;
1446             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1447
1448             /*
1449                 Processor descriptor layout
1450
1451                 proc_desc -  64-byte alignment
1452                 page_heap[0] 64-byte alignment
1453                 page_heap[1] 64-byte alignment
1454                 ...
1455                 page_heap[P] 64-byte alignment
1456
1457                 proc_heap[0] 64-byte alignment
1458                 proc_heap[1] 64-byte alignment
1459                 ...
1460                 proc_heap[N] 64-byte alignment
1461             */
1462
1463             const size_t szDesc =
1464                 ( sizeof(processor_desc)
1465                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1466                     + c_nAlignment - 1
1467                 ) / c_nAlignment
1468 ;
1469
1470             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1471
1472             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1473
1474             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1475
1476             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1477             for ( size_t i = 0; i < nPageHeapCount; ++i )
1478                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1479
1480             // initialize processor heaps
1481             pDesc->arrProcHeap =
1482                 reinterpret_cast<processor_heap *>(
1483                     reinterpret_cast<uptr_atomic_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1484                     & ~(uptr_atomic_t(c_nAlignment) - 1)
1485                 );
1486
1487             processor_heap * pProcHeap = pDesc->arrProcHeap;
1488             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1489             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1490                 new (pProcHeap) processor_heap();
1491                 pProcHeap->pProcDesc = pDesc;
1492                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1493                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1494                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1495                 else
1496                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1497             }
1498             return pDesc;
1499         }
1500
1501
1502         void free_processor_heap( processor_heap * pProcHeap )
1503         {
1504             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1505                 superblock_desc * pDesc;
1506
1507                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1508                     free( pDesc->pSB );
1509                     m_AlignedHeap.free( pDesc );
1510                 }
1511
1512                 superblock_desc * pPartial = pProcHeap->pPartial.load(CDS_ATOMIC::memory_order_relaxed);
1513                 if ( pPartial ) {
1514                     free( pPartial->pSB );
1515                     m_AlignedHeap.free( pPartial );
1516                 }
1517
1518                 pDesc = pProcHeap->active.load(CDS_ATOMIC::memory_order_relaxed).ptr();
1519                 if ( pDesc ) {
1520                     free( pDesc->pSB );
1521                     m_AlignedHeap.free( pDesc );
1522                 }
1523             }
1524             else {
1525                 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1526                 superblock_desc * pDesc;
1527
1528                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1529                     pageHeap.free( pDesc->pSB );
1530                     m_AlignedHeap.free( pDesc );
1531                 }
1532
1533                 superblock_desc * pPartial = pProcHeap->pPartial.load(CDS_ATOMIC::memory_order_relaxed);
1534                 if ( pPartial ) {
1535                     pageHeap.free( pPartial->pSB );
1536                     m_AlignedHeap.free( pPartial );
1537                 }
1538
1539                 pDesc = pProcHeap->active.load(CDS_ATOMIC::memory_order_relaxed).ptr();
1540                 if ( pDesc ) {
1541                     pageHeap.free( pDesc->pSB );
1542                     m_AlignedHeap.free( pDesc );
1543                 }
1544             }
1545             pProcHeap->~processor_heap();
1546         }
1547
1548         /// Frees processor descriptor
1549         void free_processor_desc( processor_desc * pDesc )
1550         {
1551             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1552
1553             for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1554                 free_processor_heap( pDesc->arrProcHeap + j );
1555
1556             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1557                 m_AlignedHeap.free( pSBDesc );
1558
1559             for (size_t i = 0; i < nPageHeapCount; ++i )
1560                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1561
1562             //m_IntHeap.free( pDesc->pageHeaps );
1563             pDesc->pageHeaps = nullptr;
1564
1565             pDesc->processor_desc::~processor_desc();
1566             m_AlignedHeap.free( pDesc );
1567         }
1568
1569         /// Allocates new superblock descriptor
1570         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1571         {
1572             anchor_tag anchor;
1573             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1574             if ( pDesc == nullptr ) {
1575                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1576                 assert( (uptr_atomic_t(pDesc) & (c_nAlignment - 1)) == 0 );
1577
1578                 anchor = pDesc->anchor.load( CDS_ATOMIC::memory_order_relaxed );
1579                 anchor.tag = 0;
1580                 pDesc->anchor.store( anchor, CDS_ATOMIC::memory_order_relaxed );
1581
1582                 pProcHeap->stat.incDescAllocCount();
1583             }
1584             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1585             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1586             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1587             pDesc->pProcHeap = pProcHeap;
1588
1589             anchor = pDesc->anchor.load( CDS_ATOMIC::memory_order_relaxed );
1590             anchor.avail = 1;
1591             pDesc->anchor.store( anchor, CDS_ATOMIC::memory_order_relaxed );
1592
1593             return pDesc;
1594         }
1595
1596         /// Allocates superblock page
1597         byte * new_superblock_buffer( processor_heap * pProcHeap )
1598         {
1599             pProcHeap->stat.incBlockAllocated();
1600             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1601                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1602             }
1603             else {
1604                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1605             }
1606         }
1607
1608         /// Frees superblock descriptor and its page
1609         void free_superblock( superblock_desc * pDesc )
1610         {
1611             pDesc->pProcHeap->stat.incBlockDeallocated();
1612             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1613             if ( pDesc->pSB ) {
1614                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1615                     free( pDesc->pSB );
1616                 }
1617                 else {
1618                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1619                 }
1620             }
1621             pProcDesc->listSBDescFree.push( pDesc );
1622         }
1623
1624         /// Allocate memory block
1625         block_header * int_alloc(
1626             size_t nSize    ///< Size of memory block to allocate in bytes
1627             )
1628         {
1629             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1630             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1631                 return alloc_from_OS( nSize );
1632             }
1633             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1634
1635             block_header * pBlock;
1636             processor_heap * pProcHeap;
1637             while ( true ) {
1638                 pProcHeap = find_heap( nSizeClassIndex );
1639                 if ( !pProcHeap )
1640                     return alloc_from_OS( nSize );
1641
1642                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1643                     break;
1644                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1645                     break;
1646                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1647                     break;
1648             }
1649
1650             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1651
1652             assert( pBlock != nullptr );
1653             return pBlock;
1654         }
1655
1656         //@endcond
1657     public:
1658         /// Heap constructor
1659         Heap()
1660         {
1661             // Explicit libcds initialization is needed since a static object may be constructed
1662             cds::Initialize();
1663
1664             m_nProcessorCount = m_Topology.processor_count();
1665             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1666                 CDS_ATOMIC::atomic<processor_desc *>[ m_nProcessorCount ];
1667             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1668         }
1669
1670         /// Heap destructor
1671         /**
1672             The destructor frees all memory allocated by the heap.
1673         */
1674         ~Heap()
1675         {
1676             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1677                 processor_desc * pDesc = m_arrProcDesc[i].load(CDS_ATOMIC::memory_order_relaxed);
1678                 if ( pDesc )
1679                     free_processor_desc( pDesc );
1680             }
1681
1682             m_AlignedHeap.free( m_arrProcDesc );
1683
1684             // Explicit termination of libcds
1685             cds::Terminate();
1686         }
1687
1688         /// Allocate memory block
1689         void * alloc(
1690             size_t nSize    ///< Size of memory block to allocate in bytes
1691         )
1692         {
1693             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1694
1695             // Bound checking is only for our blocks
1696             if ( !pBlock->isOSAllocated() ) {
1697                 // the block is allocated from our heap - bound checker is applicable
1698                 m_BoundChecker.make_trailer(
1699                     reinterpret_cast<byte *>(pBlock + 1),
1700                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1701                     nSize
1702                 );
1703             }
1704
1705             return pBlock + 1;
1706         }
1707
1708         /// Free previously allocated memory block
1709         void free(
1710             void * pMemory  ///< Pointer to memory block to free
1711         )
1712         {
1713             if ( !pMemory )
1714                 return;
1715
1716             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1717             block_header * pBlock = pRedirect->begin();
1718
1719             if ( pBlock->isOSAllocated() ) {
1720                 // Block has been allocated from OS
1721                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1722                 m_LargeHeap.free( pBlock );
1723                 return;
1724             }
1725
1726             assert( !pBlock->isAligned() );
1727             superblock_desc * pDesc = pBlock->desc();
1728
1729             m_BoundChecker.check_bounds(
1730                 pRedirect + 1,
1731                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1732                 pDesc->nBlockSize
1733             );
1734
1735
1736             anchor_tag oldAnchor;
1737             anchor_tag newAnchor;
1738             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1739
1740             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1741
1742             oldAnchor = pDesc->anchor.load(CDS_ATOMIC::memory_order_acquire);
1743             do {
1744                 newAnchor = oldAnchor;
1745                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1746                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1747                 newAnchor.tag += 1;
1748
1749                 assert( oldAnchor.state != SBSTATE_EMPTY );
1750
1751                 if ( oldAnchor.state == SBSTATE_FULL )
1752                     newAnchor.state = SBSTATE_PARTIAL;
1753
1754                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1755                     //pProcHeap = pDesc->pProcHeap;
1756                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1757                     newAnchor.state = SBSTATE_EMPTY;
1758                 }
1759                 else
1760                     newAnchor.count += 1;
1761             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, CDS_ATOMIC::memory_order_release, CDS_ATOMIC::memory_order_relaxed ) );
1762
1763             pProcHeap->stat.incFreeCount();
1764
1765             if ( newAnchor.state == SBSTATE_EMPTY ) {
1766                 if ( pProcHeap->unlink_partial( pDesc ))
1767                     free_superblock( pDesc );
1768             }
1769             else if (oldAnchor.state == SBSTATE_FULL ) {
1770                 assert( pProcHeap != nullptr );
1771                 pProcHeap->stat.decDescFull();
1772                 pProcHeap->add_partial( pDesc );
1773             }
1774         }
1775
1776         /// Reallocate memory block
1777         /**
1778             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1779             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1780
1781             If there is not enough available memory to expand the block to the given size,
1782             the original block is left unchanged, and \p nullptr is returned.
1783
1784             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1785             then the return value is \p nullptr and the original block is left unchanged.
1786         */
1787         void * realloc(
1788             void *  pMemory,    ///< Pointer to previously allocated memory block
1789             size_t  nNewSize    ///< New size of memory block, in bytes
1790         )
1791         {
1792             if ( nNewSize == 0 ) {
1793                 free( pMemory );
1794                 return nullptr;
1795             }
1796
1797             const size_t nOrigSize = nNewSize;
1798             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1799
1800             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1801
1802             // Reallocation of aligned block is not possible
1803             if ( pBlock->isAligned() ) {
1804                 assert( false );
1805                 return nullptr;
1806             }
1807
1808             if ( pBlock->isOSAllocated() ) {
1809                 // The block has been allocated from OS
1810                 size_t nCurSize = pBlock->getOSAllocSize();
1811
1812                 if ( nCurSize >= nNewSize )
1813                     return pMemory;
1814
1815                 // Grow block size
1816                 void * pNewBuf = alloc( nOrigSize );
1817                 if ( pNewBuf ) {
1818                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1819                     free( pMemory );
1820                 }
1821                 return pNewBuf;
1822             }
1823
1824             superblock_desc * pDesc = pBlock->desc();
1825             if ( pDesc->nBlockSize <= nNewSize ) {
1826                 // In-place reallocation
1827                 m_BoundChecker.make_trailer(
1828                     reinterpret_cast<byte *>(pBlock + 1),
1829                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1830                     nOrigSize
1831                     );
1832
1833                 return pMemory;
1834             }
1835
1836             void * pNew = alloc( nNewSize );
1837             if ( pNew ) {
1838                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1839                 free( pMemory );
1840                 return pNew;
1841             }
1842
1843             return nullptr;
1844         }
1845
1846         /// Allocate aligned memory block
1847         void * alloc_aligned(
1848             size_t nSize,       ///< Size of memory block to allocate in bytes
1849             size_t nAlignment   ///< Alignment
1850         )
1851         {
1852             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1853                 void * p = alloc( nSize );
1854                 assert( (reinterpret_cast<uptr_atomic_t>(p) & (nAlignment - 1)) == 0 );
1855                 return p;
1856             }
1857
1858             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1859
1860             block_header * pRedirect;
1861             if ( (reinterpret_cast<uptr_atomic_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1862                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uptr_atomic_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1863                 assert( pRedirect != pBlock );
1864                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1865
1866                 assert( (reinterpret_cast<uptr_atomic_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1867             }
1868             else
1869                 pRedirect = pBlock;
1870
1871
1872             // Bound checking is only for our blocks
1873             if ( !pBlock->isOSAllocated() ) {
1874                 // the block is allocated from our heap - bound checker is applicable
1875                 m_BoundChecker.make_trailer(
1876                     reinterpret_cast<byte *>(pRedirect + 1),
1877                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1878                     nSize
1879                 );
1880             }
1881
1882             return pRedirect + 1;
1883         }
1884
1885         /// Free aligned memory block previously allocated by \ref alloc_aligned
1886         void free_aligned(
1887             void * pMemory      ///< Pointer to memory block to free
1888         )
1889         {
1890             free( pMemory );
1891         }
1892
1893     public:
1894
1895         /// Get instant summary statistics
1896         void summaryStat( summary_stat& st )
1897         {
1898             size_t nProcHeapCount = m_SizeClassSelector.size();
1899             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1900                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(CDS_ATOMIC::memory_order_relaxed);
1901                 if ( pProcDesc ) {
1902                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1903                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1904                         if ( pProcHeap ) {
1905                             st.add_procheap_stat( pProcHeap->stat );
1906                         }
1907                     }
1908                 }
1909             }
1910
1911             st.add_heap_stat( m_OSAllocStat );
1912         }
1913     };
1914
1915 }}} // namespace cds::memory::michael
1916
1917 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H