Updated copyright
[libcds.git] / cds / memory / michael / allocator.h
1 /*
2     This file is a part of libcds - Concurrent Data Structures library
3
4     (C) Copyright Maxim Khizhinsky (libcds.dev@gmail.com) 2006-2017
5
6     Source code repo: http://github.com/khizmax/libcds/
7     Download: http://sourceforge.net/projects/libcds/files/
8
9     Redistribution and use in source and binary forms, with or without
10     modification, are permitted provided that the following conditions are met:
11
12     * Redistributions of source code must retain the above copyright notice, this
13       list of conditions and the following disclaimer.
14
15     * Redistributions in binary form must reproduce the above copyright notice,
16       this list of conditions and the following disclaimer in the documentation
17       and/or other materials provided with the distribution.
18
19     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27     OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #ifndef CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
32 #define CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
33
34 /*
35     Michael allocator implementation
36     Source:
37         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
38
39     Editions:
40     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
41                         This optimization allows to allocate system memory more regularly,
42                         in blocks of 1M that leads to less memory fragmentation.
43     2011.01.02 khizmax  Created
44 */
45
46 #include <stdlib.h>
47 #include <mutex>        // unique_lock
48 #include <cds/init.h>
49 #include <cds/memory/michael/options.h>
50 #include <cds/memory/michael/bound_check.h>
51 #include <cds/memory/michael/procheap_stat.h>
52 #include <cds/memory/michael/osalloc_stat.h>
53
54 #include <cds/os/topology.h>
55 #include <cds/os/alloc_aligned.h>
56 #include <cds/sync/spinlock.h>
57 #include <cds/details/type_padding.h>
58 #include <cds/details/marked_ptr.h>
59 #include <cds/container/vyukov_mpmc_cycle_queue.h>
60 #include <cds/user_setup/cache_line.h>
61 #include <cds/details/lib.h>
62
63 #include <boost/intrusive/list.hpp>
64
65 namespace cds {
66     /// Memory-related algorithms: allocators etc.
67 namespace memory {
68     /// Michael's allocator (class Heap)
69     /**
70         \par Source
71             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
72
73         This namespace declares the main class Heap and a lot of helper classes.
74     */
75 namespace michael {
76
77     /// Size class
78     struct size_class {
79         unsigned int    nBlockSize  ;   ///< block size in bytes
80         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
81         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
82         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
83     };
84
85     /// %Heap based on system \p malloc and \p free functions
86     struct malloc_heap
87     {
88         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
89         static void * alloc( size_t nSize )
90         {
91             void * p = ::malloc( nSize );
92             return p;
93         }
94         /// Returning memory block to the system (\p free wrapper)
95         static void free( void * p )
96         {
97             ::free( p );
98         }
99     };
100
101     /// %Heap based on system provided aligned \p malloc and \p free functions
102     struct aligned_malloc_heap
103     {
104         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
105         static void * alloc( size_t nSize, size_t nAlignment  )
106         {
107             return cds::OS::aligned_malloc( nSize, nAlignment );
108         }
109         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
110         static void free( void * p )
111         {
112             cds::OS::aligned_free( p );
113         }
114     };
115
116     /// Page heap based on \p Heap
117     /**
118         Page heap can allocate memory by page-sized block only.
119         \p Heap may be any heap that provides interface like \ref malloc_heap.
120
121         This class is one of available implementation of opt::page_heap option.
122     */
123     template <class Heap = malloc_heap>
124     class page_allocator: public Heap
125     {
126         //@cond
127         typedef Heap base_class;
128         size_t  m_nPageSize;
129         //@endcond
130
131     public:
132         /// Initializes heap
133         page_allocator(
134             size_t nPageSize    ///< page size in bytes
135         )
136             : m_nPageSize( nPageSize )
137         {}
138
139         /// Allocate new page
140         void * alloc()
141         {
142             return base_class::alloc( m_nPageSize );
143         }
144
145         /// Free page \p pPage
146         void free( void * pPage )
147         {
148             base_class::free( pPage );
149         }
150     };
151
152     /// Page cacheable heap
153     /**
154         To improve performance this allocator maintains small list of free pages.
155         Page heap can allocate memory by page-sized block only.
156
157         Template parameters:
158             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
159             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
160
161         This class is one of available implementation of opt::page_heap option.
162     */
163     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
164     class page_cached_allocator: public page_allocator<Heap>
165     {
166         //@cond
167         typedef page_allocator<Heap> base_class;
168
169 #ifdef _DEBUG
170         struct make_null_ptr {
171             void operator ()(void *& p)
172             {
173                 p = nullptr;
174             }
175         };
176 #endif
177         struct free_list_traits : public cds::container::vyukov_queue::traits
178         {
179             typedef opt::v::initialized_static_buffer<void *, FreeListCapacity> buffer;
180 #ifdef _DEBUG
181             typedef make_null_ptr value_cleaner;
182 #endif
183         };
184         typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
185
186         free_list   m_FreeList;
187         //@endcond
188
189     public:
190         /// Initializes heap
191         page_cached_allocator(
192             size_t nPageSize    ///< page size in bytes
193         )
194             : base_class( nPageSize )
195             , m_FreeList( FreeListCapacity )
196         {}
197
198         //@cond
199         ~page_cached_allocator()
200         {
201             void * pPage;
202             while ( m_FreeList.pop(pPage))
203                 base_class::free( pPage );
204         }
205         //@endcond
206
207         /// Allocate new page
208         void * alloc()
209         {
210             void * pPage;
211             if ( !m_FreeList.pop( pPage ))
212                 pPage = base_class::alloc();
213             return pPage;
214         }
215
216         /// Free page \p pPage
217         void free( void * pPage )
218         {
219             if ( !m_FreeList.push( pPage ))
220                 base_class::free( pPage );
221         }
222     };
223
224     /// Implementation of opt::sizeclass_selector option
225     /**
226         Default size-class selector can manage memory blocks up to 64K.
227     */
228     class CDS_EXPORT_API default_sizeclass_selector
229     {
230         //@cond
231         /// Count of different size-classes
232         static const size_t c_nSizeClassCount = 63;
233
234         /// Max block size
235         static const size_t c_nMaxBlockSize = 64 * 1024;
236
237         /// Page size of type 0 (64K)
238         static const unsigned int c_nPage64K = 64 * 1024 - 32;
239
240         /// Page size of type 1 (1M)
241         static const unsigned int c_nPage1M = 1024 * 1024;
242
243         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
244         static size_class const m_szClass[c_nSizeClassCount];
245         static unsigned char const m_szClassMap[];
246         //@endcond
247     public:
248         /// Type of size-class index
249         typedef unsigned int sizeclass_index;
250
251 #ifdef _DEBUG
252         default_sizeclass_selector();
253 #endif
254
255         /// "No size class" index
256         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
257
258         /// Returns size-class count
259         static sizeclass_index size()
260         {
261             return c_nSizeClassCount;
262         }
263
264         /// Returns page size in bytes for given page type \p nPageType
265         static size_t page_size(size_t nPageType )
266         {
267             switch (nPageType) {
268             case 0:
269                 return c_nPage64K;
270             case 1:
271                 return c_nPage1M;
272             default:
273                 assert(false)   ;   // anything forgotten?..
274             }
275             return c_nPage1M;
276         }
277
278         /// Returns count of page size-class
279         /**
280             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
281         */
282         static size_t pageTypeCount()
283         {
284             return 2;
285         }
286
287         /// Returns size-class index for \p nSize
288         /**
289             For large blocks that cannot be allocated by Michael's allocator
290             the function must return -1.
291         */
292         static sizeclass_index find( size_t nSize )
293         {
294             if ( nSize > c_nMaxBlockSize ) {
295                 // Too large block - allocate from system
296                 return c_nNoSizeClass;
297             }
298             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
299             assert( nSize <= m_szClassBounds[ szClass ] );
300             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
301
302             return szClass;
303         }
304
305         /// Gets details::size_class struct for size-class index \p nIndex
306         static const size_class * at( sizeclass_index nIndex )
307         {
308             assert( nIndex < size());
309             return m_szClass + nIndex;
310         }
311     };
312
313     //@cond
314     namespace details {
315         struct free_list_tag;
316         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
317
318         struct partial_list_tag;
319         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
320
321         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
322         {};
323     }
324     //@endcond
325
326     /// List of free superblock descriptor
327     /**
328         This class is a implementation of \ref opt::free_list option
329     */
330     template <class Lock, class T = details::intrusive_superblock_desc>
331     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
332     {
333         //@cond
334         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
335     public:
336         typedef details::free_list_locked_hook item_hook;
337         typedef Lock lock_type;
338     protected:
339         typedef std::unique_lock<lock_type>   auto_lock;
340
341         mutable lock_type   m_access;
342         //@endcond
343
344     public:
345         /// Rebinds to other item type \p T2
346         template <class T2>
347         struct rebind {
348             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
349         };
350
351     public:
352         /// Push superblock descriptor to free-list
353         void push( T * pDesc )
354         {
355             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc)));
356             auto_lock al(m_access);
357             base_class::push_back( *pDesc );
358         }
359
360         /// Pop superblock descriptor from free-list
361         T *   pop()
362         {
363             auto_lock al(m_access);
364             if ( base_class::empty())
365                 return nullptr;
366             T& rDesc = base_class::front();
367             base_class::pop_front();
368             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc)));
369             return &rDesc;
370         }
371
372         /// Returns current count of superblocks in free-list
373         size_t  size() const
374         {
375             auto_lock al(m_access);
376             return base_class::size();
377         }
378     };
379
380     /// List of partial filled superblock descriptor
381     /**
382         This class is a implementation of \ref opt::partial_list option
383     */
384     template <class Lock, class T = details::intrusive_superblock_desc>
385     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
386     {
387         //@cond
388         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
389     public:
390         typedef details::partial_list_locked_hook item_hook;
391         typedef Lock    lock_type;
392     protected:
393         typedef std::unique_lock<lock_type>   auto_lock;
394
395         mutable lock_type   m_access;
396         //@endcond
397
398     public:
399         /// Rebinds to other item type \p T2
400         template <class T2>
401         struct rebind {
402             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
403         };
404
405     public:
406         /// Push a superblock \p pDesc to the list
407         void    push( T * pDesc )
408         {
409             auto_lock al( m_access );
410             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc)));
411             base_class::push_back( *pDesc );
412         }
413
414         /// Pop superblock from the list
415         T * pop()
416         {
417             auto_lock al( m_access );
418             if ( base_class::empty())
419                 return nullptr;
420             T& rDesc = base_class::front();
421             base_class::pop_front();
422             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc)));
423             return &rDesc;
424         }
425
426         /// Removes \p pDesc descriptor from the free-list
427         bool unlink( T * pDesc )
428         {
429             assert(pDesc != nullptr);
430             auto_lock al( m_access );
431             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
432             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc))) {
433                 base_class::erase( base_class::iterator_to( *pDesc ));
434                 return true;
435             }
436             return false;
437         }
438
439         /// Count of element in the list
440         size_t size() const
441         {
442             auto_lock al( m_access );
443             return base_class::size();
444         }
445     };
446
447     /// Summary processor heap statistics
448     /**
449         Summary heap statistics for use with Heap::summaryStat function.
450     */
451     struct summary_stat
452     {
453         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
454         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
455         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
456         size_t      nFreeCount          ;  ///< Count of \p free function call
457         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
458         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
459         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
460         size_t      nDescFull           ;  ///< Count of full superblock
461         uint64_t    nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
462         uint64_t    nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
463
464         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
465         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
466         uint64_t    nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
467         int64_t     nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
468
469         // Internal contention indicators
470         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
471         /**
472             Contention indicator. The less value is better
473         */
474         size_t      nActiveDescCASFailureCount;
475         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
476         /**
477             Contention indicator. The less value is better
478         */
479         size_t      nActiveAnchorCASFailureCount;
480         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
481         /**
482             Contention indicator. The less value is better
483         */
484         size_t      nPartialDescCASFailureCount;
485         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
486         /**
487             Contention indicator. The less value is better
488         */
489         size_t      nPartialAnchorCASFailureCount;
490
491
492     public:
493         /// Constructs empty statistics. All counters are zero.
494         summary_stat()
495         {
496             clear();
497         }
498
499         /// Difference statistics
500         /**
501             This operator computes difference between \p *this and \p stat and places the difference to \p this.
502             Returns \p *this;
503         */
504         summary_stat& operator -=( const summary_stat& stat )
505         {
506             nAllocFromActive    -= stat.nAllocFromActive;
507             nAllocFromPartial   -= stat.nAllocFromPartial;
508             nAllocFromNew       -= stat.nAllocFromNew;
509             nFreeCount          -= stat.nFreeCount;
510             nPageAllocCount     -= stat.nPageAllocCount;
511             nPageDeallocCount   -= stat.nPageDeallocCount;
512             nDescAllocCount     -= stat.nDescAllocCount;
513             nDescFull           -= stat.nDescFull;
514             nBytesAllocated     -= stat.nBytesAllocated;
515             nBytesDeallocated   -= stat.nBytesDeallocated;
516
517             nSysAllocCount      -= stat.nSysAllocCount;
518             nSysFreeCount       -= stat.nSysFreeCount;
519             nSysBytesAllocated  -= stat.nSysBytesAllocated;
520             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
521
522             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
523             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
524             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
525             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
526
527             return *this;
528         }
529
530         /// Clears statistics
531         /**
532             All counters are set to zero.
533         */
534         void clear()
535         {
536             memset( this, 0, sizeof(*this));
537         }
538
539         //@cond
540         template <typename Stat>
541         summary_stat& add_procheap_stat( const Stat& stat )
542         {
543             nAllocFromActive    += stat.allocFromActive();
544             nAllocFromPartial   += stat.allocFromPartial();
545             nAllocFromNew       += stat.allocFromNew();
546             nFreeCount          += stat.freeCount();
547             nPageAllocCount     += stat.blockAllocated();
548             nPageDeallocCount   += stat.blockDeallocated();
549             nDescAllocCount     += stat.descAllocCount();
550             nDescFull           += stat.descFull();
551             nBytesAllocated     += stat.allocatedBytes();
552             nBytesDeallocated   += stat.deallocatedBytes();
553
554             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
555             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
556             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
557             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
558
559             return *this;
560         }
561
562         template <typename Stat>
563         summary_stat& add_heap_stat( const Stat& stat )
564         {
565             nSysAllocCount      += stat.allocCount();
566             nSysFreeCount       += stat.freeCount();
567
568             nSysBytesAllocated  += stat.allocatedBytes();
569             nSysBytesDeallocated+= stat.deallocatedBytes();
570
571             return *this;
572         }
573         //@endcond
574     };
575
576     /// Michael's allocator
577     /**
578         This class provides base functionality for Michael's allocator. It does not provide
579         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
580         The heap interface is closer to semantics of \p malloc / \p free system functions.
581         The heap supports allocation of aligned and unaligned data.
582
583         The algorithm is based on simplified version of
584             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
585
586         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
587             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
588
589         This is powerful, scalable, fully customizable heap with fast-path without any locks
590         that has been developed specifically for multi-threading.
591         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
592         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
593         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
594         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
595         The opt::check_bounds feature can help you to find a memory buffer overflow.
596
597         Brief algorithm description from Michael's work:
598
599         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
600         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
601         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
602         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
603         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
604         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
605         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
606         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
607         in a lock-free manner.
608
609         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
610         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
611         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
612         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
613         available blocks of its original superblock by atomically updating its descriptor.
614
615         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
616         superblock size.
617
618         Available \p Options:
619         - \ref opt::sys_topology - class that describes system topology needed for allocator.
620             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
621         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
622             Default is \ref malloc_heap.
623         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
624             Default is \ref aligned_malloc_heap
625         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
626             Default is \ref page_cached_allocator
627         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
628             for incoming allocation request.
629             Default is \ref default_sizeclass_selector
630         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
631             Default is \ref free_list_locked
632         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
633             Default is \ref partial_list_locked
634         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
635             that is maintained by the heap.
636             Default is \ref procheap_empty_stat
637         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
638             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
639             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
640             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
641             Default is \ref os_allocated_empty
642         - \ref opt::check_bounds - a bound checker.
643             Default is no bound checker (cds::opt::none)
644
645         \par Usage:
646         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
647
648         \code
649         #include <cds/memory/michael/allocator.h>
650
651         // Heap with explicitly defined options:
652         cds::memory::michael::Heap<
653             opt::aligned_heap< aligned_malloc_heap >,
654             opt::page_heap< page_cached_allocator<16, malloc_heap> >
655         >   myHeap;
656
657         // Heap with default options:
658         cds::memory::michael::Heap<>    myDefHeap;
659         \endcode
660
661         \par How to make std-like allocator
662
663         There are serious differencies of heap and <tt>std::allocator</tt> interface:
664             - Heap is stateful, and \p std::allocator is stateless.
665             - Heap has much more template parameters than \p std::allocator
666             - Heap has low-level interface for memory allocating only unlike the allocator
667                 interface that can construct/destroy objects of any type T.
668
669         To convert heap interface into \p std::allocator -like interface you should:
670             - Declare object of class cds::memory::michael::Heap specifying the necessary
671                 template parameters; this is usually static object
672             - Create a class with \p std::allocator interface that uses the function of heap.
673         \code
674         #include <cds/memory/michael/allocator.h>
675
676         template <class T>
677         class MichaelAllocator
678         {
679             typedef std::allocator<T>               std_allocator;
680             typedef cds::memory::michael::Heap<>    michael_heap;
681
682             // Michael heap static object
683             static michael_heap     s_Heap;
684         public:
685             // Declare typedefs from std::allocator
686             typedef typename std_allocator::const_pointer   const_pointer;
687             typedef typename std_allocator::pointer         pointer;
688             typedef typename std_allocator::const_reference const_reference;
689             typedef typename std_allocator::reference       reference;
690             typedef typename std_allocator::difference_type difference_type;
691             typedef typename std_allocator::size_type       size_type;
692             typedef typename std_allocator::value_type      value_type;
693
694             // Allocation function
695             pointer allocate( size_type _Count, const void* _Hint )
696             {
697                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
698             }
699
700             // Deallocation function
701             void deallocate( pointer _Ptr, size_type _Count )
702             {
703                 s_Heap.free( _Ptr );
704             }
705
706             // Other std::allocator specific functions: address, construct, destroy, etc.
707             ...
708
709             // Rebinding allocator to other type
710             template <class _Other>
711             struct rebind {
712                 typedef MichaelAllocator<_Other> other;
713             };
714         };
715
716         // In .cpp file:
717         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
718
719         \endcode
720     */
721     template <typename... Options>
722     class Heap {
723     protected:
724
725         //@cond
726         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
727         static const unsigned int c_nDefaultBlockAlignment = 8;
728
729         struct default_options {
730             typedef cds::OS::topology           sys_topology;
731             typedef malloc_heap                 system_heap;
732             typedef page_cached_allocator<>     page_heap;
733             typedef aligned_malloc_heap         aligned_heap;
734             typedef default_sizeclass_selector  sizeclass_selector;
735             typedef free_list_locked<cds::sync::spin>    free_list;
736             typedef partial_list_locked<cds::sync::spin> partial_list;
737             typedef procheap_empty_stat         procheap_stat;
738             typedef os_allocated_empty          os_allocated_stat;
739             typedef cds::opt::none              check_bounds;
740         };
741         //@endcond
742
743     protected:
744         //@cond
745         typedef typename opt::make_options<default_options, Options...>::type   options;
746         //@endcond
747
748         //@cond
749         typedef unsigned char   byte;
750         //@endcond
751     public:
752         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
753         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
754         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
755         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
756         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
757         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
758         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
759         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
760
761         // forward declarations
762         //@cond
763         struct superblock_desc;
764         struct processor_heap_base;
765         struct processor_desc;
766         //@endcond
767
768         /// Superblock states
769         /**
770             A superblock can be in one of four states: \p ACTIVE, \p FULL,
771             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
772             superblock in a heap, or if a thread intends to try to install it
773             as such. A superblock is \p FULL if all its blocks are either allocated
774             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
775             and contains unreserved available blocks. A superblock is
776             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
777         */
778         enum superblock_state {
779             SBSTATE_ACTIVE  = 0,    ///< superblock is active
780             SBSTATE_FULL    = 1,    ///< superblock is full
781             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
782             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
783         };
784
785         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
786
787         /// Anchor of the superblock descriptor. Updated by CAS
788         struct anchor_tag {
789             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
790             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
791             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
792             unsigned long long    tag:40    ;   ///< ABA prevention tag
793         };
794
795         /// Superblock descriptor
796         struct superblock_desc
797             : public options::free_list::item_hook
798             , public options::partial_list::item_hook
799         {
800             atomics::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
801             byte *              pSB         ;   ///< ptr to superblock
802             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
803             unsigned int        nBlockSize  ;   ///< block size in bytes
804             unsigned int        nCapacity   ;   ///< superblock size/block size
805
806             //@cond
807             superblock_desc()
808                 : pSB(nullptr)
809                 , pProcHeap( nullptr )
810             {}
811             //@endcond
812         };
813
814         //@cond
815         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
816         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
817         //@endcond
818
819 #if CDS_BUILD_BITS == 32
820         /// Allocated block header
821         /**
822             Each allocated block has 8-byte header.
823             The header contains pointer to owner superblock descriptor and the redirection flag.
824             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
825             \code
826                 +---------------+
827                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
828                 +---------------+
829                 |               | <- memory allocated
830                 |   memory      |
831                 |               |
832                 +---------------+
833             \endcode
834             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
835             In this case the redirection flag is 1 and the block's structure is:
836             \code
837                 +---------------+
838             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
839             |   +---------------+
840             |   |   padding     |
841             |   |   (unused)    |
842             |   |               |
843             |   +---------------+
844             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
845                 +---------------+
846                 |               | <- memory allocated
847                 |   memory      |
848                 |               |
849                 +---------------+
850             \endcode
851         */
852         class block_header
853         {
854         //@cond
855             enum {
856                 bitAligned = 1,
857                 bitOSAllocated = 2
858             };
859
860             union {
861                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
862                 uint32_t         nSize       ;   // block size (allocated form OS)
863             };
864             uint32_t         nFlags;
865
866         public:
867             void  set( superblock_desc * pdesc, uint32_t isAligned )
868             {
869                 pDesc = pdesc;
870                 nFlags = isAligned ? bitAligned : 0;
871             }
872
873             superblock_desc * desc()
874             {
875                 assert( (nFlags & bitOSAllocated) == 0 );
876                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
877             }
878
879             block_header * begin()
880             {
881                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
882             }
883
884             bool isAligned() const
885             {
886                 return (nFlags & bitAligned) != 0;
887             }
888
889             bool isOSAllocated() const
890             {
891                 return (nFlags & bitOSAllocated) != 0;
892             }
893
894             void setOSAllocated( size_t sz )
895             {
896                 nSize = sz;
897                 nFlags = bitOSAllocated;
898             }
899
900             size_t getOSAllocSize() const
901             {
902                 assert( isOSAllocated());
903                 return nSize;
904             }
905
906         //@endcond
907         };
908 #elif CDS_BUILD_BITS == 64
909         //@cond
910         class block_header
911         {
912             enum {
913                 bitAligned = 1,
914                 bitOSAllocated = 2
915             };
916             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
917             // If bitOSAllocated is set the pDesc contains size of memory block
918             // allocated from OS
919             marked_desc_ptr     pDesc;
920         public:
921             void  set( superblock_desc * pdesc, uint32_t isAligned )
922             {
923                 pDesc = marked_desc_ptr( pdesc, isAligned );
924             }
925
926             superblock_desc * desc()
927             {
928                 assert( !isOSAllocated());
929                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr())->desc() : pDesc.ptr();
930             }
931
932             block_header * begin()
933             {
934                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr()) : this;
935             }
936
937             bool isAligned() const
938             {
939                 return (pDesc.bits() & bitAligned) != 0;
940             }
941
942             bool isOSAllocated() const
943             {
944                 return (pDesc.bits() & bitOSAllocated) != 0;
945             }
946
947             void setOSAllocated( size_t nSize )
948             {
949
950                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
951             }
952
953             size_t getOSAllocSize() const
954             {
955                 assert( isOSAllocated());
956                 return reinterpret_cast<uintptr_t>( pDesc.ptr()) >> 2;
957             }
958
959         };
960         //@endcond
961 #else
962 #       error "Unexpected value of CDS_BUILD_BITS"
963 #endif  // CDS_BUILD_BITS
964
965         //@cond
966         struct free_block_header: block_header {
967             unsigned int    nNextFree;
968         };
969         //@endcond
970
971 #if CDS_BUILD_BITS == 32
972         /// Processor heap's \p active field
973         /**
974             The \p active field in the processor heap structure is primarily a pointer to the descriptor
975             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
976             guaranteed that the active superblock has at least one block available for reservation.
977             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
978             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
979             of blocks available for reservation in the active superblock less one. That is, if the value
980             of credits is n, then the active superblock contains n+1 blocks available for reservation
981             through the \p active field. Note that the number of blocks in a superblock is not limited
982             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
983             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
984             atomically decrements credits while validating that the active superblock is still valid.
985         */
986         class active_tag {
987         //@cond
988             superblock_desc *       pDesc;
989             uint32_t             nCredits;
990
991         public:
992             static const unsigned int c_nMaxCredits = 0 - 1;
993
994         public:
995             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
996                 : pDesc( nullptr )
997                 , nCredits(0)
998             {}
999
1000             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1001             ~active_tag() CDS_NOEXCEPT = default;
1002             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
1003 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1004             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1005             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1006 #       endif
1007
1008             /// Returns pointer to superblock descriptor
1009             superblock_desc * ptr() const
1010             {
1011                 return pDesc;
1012             }
1013
1014             /// Sets superblock descriptor
1015             void ptr( superblock_desc * p )
1016             {
1017                 pDesc = p;
1018             }
1019
1020             unsigned int credits() const
1021             {
1022                 return nCredits;
1023             }
1024
1025             void credits( unsigned int n )
1026             {
1027                 nCredits = n;
1028             }
1029
1030             void clear()
1031             {
1032                 pDesc = nullptr;
1033                 nCredits = 0;
1034             }
1035
1036             void set( superblock_desc * pSB, unsigned int n )
1037             {
1038                 pDesc = pSB;
1039                 nCredits = n;
1040             }
1041         //@endcond
1042         };
1043 #elif CDS_BUILD_BITS == 64
1044         //@cond
1045         class active_tag
1046         {
1047         public:
1048             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1049         protected:
1050             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1051             marked_desc_ptr pDesc;
1052
1053         public:
1054             active_tag() CDS_NOEXCEPT
1055                 : pDesc( nullptr )
1056             {}
1057             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1058             //active_tag() CDS_NOEXCEPT = default;
1059             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1060             ~active_tag() CDS_NOEXCEPT = default;
1061             active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1062 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1063             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1064             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1065 #       endif
1066             superblock_desc *    ptr() const
1067             {
1068                 return pDesc.ptr();
1069             }
1070
1071             void ptr( superblock_desc * p )
1072             {
1073                 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1074                 pDesc = marked_desc_ptr( p, pDesc.bits());
1075             }
1076
1077             unsigned int credits()
1078             {
1079                 return (unsigned int) pDesc.bits();
1080             }
1081
1082             void credits( unsigned int n )
1083             {
1084                 assert( n <= c_nMaxCredits );
1085                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1086             }
1087
1088             void clear()
1089             {
1090                 pDesc = marked_desc_ptr();
1091             }
1092
1093             void set( superblock_desc * pSB, unsigned int n )
1094             {
1095                 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1096                 pDesc = marked_desc_ptr( pSB, n );
1097             }
1098
1099         };
1100         //@endcond
1101 #else
1102 #       error "Unexpected value of CDS_BUILD_BITS"
1103 #endif  // CDS_BUILD_BITS
1104
1105
1106         /// Processor heap
1107         struct processor_heap_base
1108         {
1109             CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1110             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1111             const size_class *  pSizeClass  ;   ///< pointer to size class
1112             atomics::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1113             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1114             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1115
1116             /// Small page marker
1117             /**
1118                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1119                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1120                 to less memory fragmentation.
1121             */
1122             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1123
1124             procheap_stat       stat        ;   ///< heap statistics
1125             //processor_heap_statistics   stat;
1126
1127             //@cond
1128             processor_heap_base() CDS_NOEXCEPT
1129                 : pProcDesc( nullptr )
1130                 , pSizeClass( nullptr )
1131                 , pPartial( nullptr )
1132             {
1133                 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1134             }
1135             //@endcond
1136
1137             /// Get partial superblock owned by the processor heap
1138             superblock_desc * get_partial()
1139             {
1140                 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1141                 do {
1142                     if ( !pDesc ) {
1143                         pDesc =  partialList.pop();
1144                         break;
1145                     }
1146                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ));
1147
1148                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc)));
1149                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc)));
1150                 return pDesc;
1151             }
1152
1153             /// Add partial superblock \p pDesc to the list
1154             void add_partial( superblock_desc * pDesc )
1155             {
1156                 assert( pPartial != pDesc );
1157                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc)));
1158
1159                 superblock_desc * pCur = nullptr;
1160                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed))
1161                     partialList.push( pDesc );
1162             }
1163
1164
1165             /// Remove superblock \p pDesc from the list of partial superblock
1166             bool unlink_partial( superblock_desc * pDesc )
1167             {
1168                 return partialList.unlink( pDesc );
1169             }
1170         };
1171
1172         /// Aligned superblock descriptor
1173         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1174
1175         /// Processor descriptor
1176         struct processor_desc
1177         {
1178             processor_heap *    arrProcHeap     ; ///< array of processor heap
1179             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1180             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1181
1182             //@cond
1183             processor_desc()
1184                 : arrProcHeap( nullptr )
1185                 , pageHeaps( nullptr )
1186             {}
1187             //@endcond
1188         };
1189
1190
1191     protected:
1192         sys_topology        m_Topology           ;  ///< System topology
1193         system_heap         m_LargeHeap          ;  ///< Heap for large block
1194         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1195         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1196         atomics::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1197         unsigned int        m_nProcessorCount    ;  ///< Processor count
1198         bound_checker       m_BoundChecker       ;  ///< Bound checker
1199
1200         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1201
1202     protected:
1203         //@cond
1204
1205         /// Allocates large block from system memory
1206         block_header * alloc_from_OS( size_t nSize )
1207         {
1208             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ));
1209             m_OSAllocStat.incBytesAllocated( nSize );
1210             p->setOSAllocated( nSize );
1211             return p;
1212         }
1213
1214         /// Allocates from the active superblock if it possible
1215         block_header * alloc_from_active( processor_heap * pProcHeap )
1216         {
1217             active_tag  oldActive;
1218             int nCollision = -1;
1219
1220             // Reserve block
1221             while ( true ) {
1222                 ++nCollision;
1223                 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1224                 if ( !oldActive.ptr())
1225                     return nullptr;
1226                 unsigned int nCredits = oldActive.credits();
1227                 active_tag  newActive   ; // default = 0
1228                 if ( nCredits != 0 ) {
1229                     newActive = oldActive;
1230                     newActive.credits( nCredits - 1 );
1231                 }
1232                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1233                     break;
1234             }
1235
1236             if ( nCollision )
1237                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1238
1239             // pop block
1240             superblock_desc * pDesc = oldActive.ptr();
1241
1242             anchor_tag  oldAnchor;
1243             anchor_tag  newAnchor;
1244             byte * pAddr;
1245             unsigned int nMoreCredits = 0;
1246
1247             nCollision = -1;
1248             do {
1249                 ++nCollision;
1250                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1251
1252                 assert( oldAnchor.avail < pDesc->nCapacity );
1253                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1254
1255                 // TSan reports data race if the block contained atomic ops before
1256                 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1257                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1258                 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1259                 newAnchor.tag += 1;
1260
1261                 if ( oldActive.credits() == 0 ) {
1262                     // state must be ACTIVE
1263                     if ( oldAnchor.count == 0 )
1264                         newAnchor.state = SBSTATE_FULL;
1265                     else {
1266                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1267                         newAnchor.count -= nMoreCredits;
1268                     }
1269                 }
1270             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1271
1272             if ( nCollision )
1273                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1274
1275             assert( newAnchor.state != SBSTATE_EMPTY );
1276
1277             if ( newAnchor.state == SBSTATE_FULL )
1278                 pProcHeap->stat.incDescFull();
1279             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1280                 update_active( pProcHeap, pDesc, nMoreCredits );
1281
1282             pProcHeap->stat.incAllocFromActive();
1283
1284             // block_header fields is not needed to setup
1285             // It was set in alloc_from_new_superblock
1286             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1287             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated());
1288             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned());
1289
1290             return reinterpret_cast<block_header *>( pAddr );
1291         }
1292
1293         /// Allocates from a partial filled superblock if it possible
1294         block_header * alloc_from_partial( processor_heap * pProcHeap )
1295         {
1296         retry:
1297             superblock_desc * pDesc = pProcHeap->get_partial();
1298             if ( !pDesc )
1299                 return nullptr;
1300
1301             // reserve blocks
1302             anchor_tag  oldAnchor;
1303             anchor_tag  newAnchor;
1304             //byte * pAddr;
1305             unsigned int nMoreCredits = 0;
1306
1307             int nCollision = -1;
1308             do {
1309                 ++nCollision;
1310
1311                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1312                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1313                     free_superblock( pDesc );
1314                     goto retry;
1315                 }
1316
1317                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1318                 newAnchor.count -= nMoreCredits + 1;
1319                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1320                 newAnchor.tag += 1;
1321             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed));
1322
1323             if ( nCollision )
1324                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1325
1326             if ( newAnchor.state == SBSTATE_FULL )
1327                 pProcHeap->stat.incDescFull();
1328
1329             // Now, the thread is guaranteed to have reserved one or more blocks
1330             // pop reserved block
1331             byte * pAddr;
1332             nCollision = -1;
1333             do {
1334                 ++nCollision;
1335
1336                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1337
1338                 assert( oldAnchor.avail < pDesc->nCapacity );
1339                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1340                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1341                 ++newAnchor.tag;
1342             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed));
1343
1344             if ( nCollision )
1345                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1346
1347             assert( newAnchor.state != SBSTATE_EMPTY );
1348
1349             pProcHeap->stat.incAllocFromPartial();
1350
1351             if ( nMoreCredits > 0 )
1352                 update_active( pProcHeap, pDesc, nMoreCredits );
1353
1354             // block_header fields is not needed to setup
1355             // It was set in alloc_from_new_superblock
1356             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1357             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned());
1358             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated());
1359
1360             return reinterpret_cast<block_header *>( pAddr );
1361         }
1362
1363         /// Allocates from the new superblock
1364         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1365         {
1366             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1367             assert( pDesc != nullptr );
1368             pDesc->pSB = new_superblock_buffer( pProcHeap );
1369
1370             anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1371             anchor.tag += 1;
1372
1373             // Make single-linked list of free blocks in superblock
1374             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1375             unsigned int nNext = 0;
1376             const unsigned int nBlockSize = pDesc->nBlockSize;
1377             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1378                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1379                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1380             }
1381             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1382
1383             active_tag newActive;
1384             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1385
1386             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1387             anchor.state = SBSTATE_ACTIVE;
1388             pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1389
1390             active_tag curActive;
1391             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1392                 pProcHeap->stat.incAllocFromNew();
1393                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1394                 return reinterpret_cast<block_header *>( pDesc->pSB );
1395             }
1396
1397             free_superblock( pDesc );
1398             return nullptr;
1399         }
1400
1401         /// Find appropriate processor heap based on size-class selected
1402         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1403         {
1404             assert( nSizeClassIndex < m_SizeClassSelector.size());
1405
1406             unsigned int nProcessorId = m_Topology.current_processor();
1407             assert( nProcessorId < m_nProcessorCount );
1408
1409             if ( nProcessorId >= m_nProcessorCount )
1410                 nProcessorId = 0;
1411
1412             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1413             while ( !pDesc ) {
1414
1415                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1416                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1417                     pDesc = pNewDesc;
1418                     break;
1419                 }
1420                 free_processor_desc( pNewDesc );
1421             }
1422
1423             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1424         }
1425
1426         /// Updates active field of processor heap \p pProcHeap
1427         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1428         {
1429             assert( pProcHeap == pDesc->pProcHeap );
1430
1431             active_tag  nullActive;
1432             active_tag  newActive;
1433             newActive.set( pDesc, nCredits - 1 );
1434
1435             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ))
1436                 return;
1437
1438             // Someone installed another active superblock.
1439             // Return credits to superblock and make it partial
1440
1441             anchor_tag  oldAnchor;
1442             anchor_tag  newAnchor;
1443
1444             do {
1445                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1446                 newAnchor.count += nCredits;
1447                 newAnchor.state = SBSTATE_PARTIAL;
1448             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1449
1450             pDesc->pProcHeap->add_partial( pDesc );
1451         }
1452
1453         /// Allocates new processor descriptor
1454         processor_desc * new_processor_desc( unsigned int nProcessorId )
1455         {
1456             CDS_UNUSED( nProcessorId );
1457             processor_desc * pDesc;
1458             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1459
1460             /*
1461                 Processor descriptor layout
1462
1463                 proc_desc -  64-byte alignment
1464                 page_heap[0] 64-byte alignment
1465                 page_heap[1] 64-byte alignment
1466                 ...
1467                 page_heap[P] 64-byte alignment
1468
1469                 proc_heap[0] 64-byte alignment
1470                 proc_heap[1] 64-byte alignment
1471                 ...
1472                 proc_heap[N] 64-byte alignment
1473             */
1474
1475             const size_t szDesc =
1476                 ( sizeof(processor_desc)
1477                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1478                     + c_nAlignment - 1
1479                 ) / c_nAlignment
1480 ;
1481
1482             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1483
1484             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1485
1486             // TSan false positive: a new descriptor will be linked further with release fence
1487             CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1488
1489             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment )) processor_desc;
1490
1491             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1492             for ( size_t i = 0; i < nPageHeapCount; ++i )
1493                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1494
1495             // initialize processor heaps
1496             pDesc->arrProcHeap =
1497                 reinterpret_cast<processor_heap *>(
1498                     reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1499                     & ~(uintptr_t(c_nAlignment) - 1)
1500                 );
1501
1502             processor_heap * pProcHeap = pDesc->arrProcHeap;
1503             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1504             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1505                 new (pProcHeap) processor_heap();
1506                 pProcHeap->pProcDesc = pDesc;
1507                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1508                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1509                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1510                 else
1511                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1512             }
1513             CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1514             return pDesc;
1515         }
1516
1517
1518         void free_processor_heap( processor_heap * pProcHeap )
1519         {
1520             assert( pProcHeap->nPageIdx != processor_heap::c_nPageSelfAllocation );
1521
1522             page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1523             superblock_desc * pDesc;
1524
1525             for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1526                 pageHeap.free( pDesc->pSB );
1527                 m_AlignedHeap.free( pDesc );
1528             }
1529
1530             superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1531             if ( pPartial ) {
1532                 pageHeap.free( pPartial->pSB );
1533                 m_AlignedHeap.free( pPartial );
1534             }
1535
1536             pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1537             if ( pDesc ) {
1538                 pageHeap.free( pDesc->pSB );
1539                 m_AlignedHeap.free( pDesc );
1540             }
1541         }
1542
1543         /// Frees processor descriptor
1544         void free_processor_desc( processor_desc * pDesc )
1545         {
1546             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1547
1548             {
1549                 processor_heap * const pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1550
1551                 // free large blocks only
1552                 for ( processor_heap * pProcHeap = pDesc->arrProcHeap; pProcHeap < pProcHeapEnd; ++pProcHeap ) {
1553                     if ( pProcHeap->nPageIdx != processor_heap::c_nPageSelfAllocation )
1554                         free_processor_heap( pProcHeap );
1555
1556                     pProcHeap->~processor_heap();
1557                 }
1558             }
1559
1560             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1561                 m_AlignedHeap.free( pSBDesc );
1562
1563             for (size_t i = 0; i < nPageHeapCount; ++i )
1564                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1565
1566             pDesc->pageHeaps = nullptr;
1567
1568             pDesc->processor_desc::~processor_desc();
1569             m_AlignedHeap.free( pDesc );
1570         }
1571
1572         /// Allocates new superblock descriptor
1573         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1574         {
1575             anchor_tag anchor;
1576             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1577             if ( pDesc == nullptr ) {
1578                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment )) superblock_desc;
1579                 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1580
1581                 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1582                 anchor.tag = 0;
1583                 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1584
1585                 pProcHeap->stat.incDescAllocCount();
1586             }
1587             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1588             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1589             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1590             pDesc->pProcHeap = pProcHeap;
1591
1592             anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1593             anchor.avail = 1;
1594             pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1595
1596             return pDesc;
1597         }
1598
1599         /// Allocates superblock page
1600         byte * new_superblock_buffer( processor_heap * pProcHeap )
1601         {
1602             pProcHeap->stat.incBlockAllocated();
1603             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1604                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1605             }
1606             else {
1607                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1608             }
1609         }
1610
1611         /// Frees superblock descriptor and its page
1612         void free_superblock( superblock_desc * pDesc )
1613         {
1614             pDesc->pProcHeap->stat.incBlockDeallocated();
1615             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1616             if ( pDesc->pSB ) {
1617                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation )
1618                     free( pDesc->pSB );
1619                 else
1620                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1621             }
1622             pProcDesc->listSBDescFree.push( pDesc );
1623         }
1624
1625         /// Allocate memory block
1626         block_header * int_alloc(
1627             size_t nSize    ///< Size of memory block to allocate in bytes
1628             )
1629         {
1630             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1631             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1632                 return alloc_from_OS( nSize );
1633             }
1634             assert( nSizeClassIndex < m_SizeClassSelector.size());
1635
1636             block_header * pBlock;
1637             processor_heap * pProcHeap;
1638             while ( true ) {
1639                 pProcHeap = find_heap( nSizeClassIndex );
1640                 if ( !pProcHeap )
1641                     return alloc_from_OS( nSize );
1642
1643                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1644                     break;
1645                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1646                     break;
1647                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1648                     break;
1649             }
1650
1651             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1652
1653             assert( pBlock != nullptr );
1654             return pBlock;
1655         }
1656
1657         //@endcond
1658     public:
1659         /// Heap constructor
1660         Heap()
1661         {
1662             // Explicit libcds initialization is needed since a static object may be constructed
1663             cds::Initialize();
1664
1665             m_nProcessorCount = m_Topology.processor_count();
1666             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1667                 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1668             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1669         }
1670
1671         /// Heap destructor
1672         /**
1673             The destructor frees all memory allocated by the heap.
1674         */
1675         ~Heap()
1676         {
1677             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1678                 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1679                 if ( pDesc )
1680                     free_processor_desc( pDesc );
1681             }
1682
1683             m_AlignedHeap.free( m_arrProcDesc );
1684
1685             // Explicit termination of libcds
1686             cds::Terminate();
1687         }
1688
1689         /// Allocate memory block
1690         void * alloc(
1691             size_t nSize    ///< Size of memory block to allocate in bytes
1692         )
1693         {
1694             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1695
1696             // Bound checking is only for our blocks
1697             if ( !pBlock->isOSAllocated()) {
1698                 // the block is allocated from our heap - bound checker is applicable
1699                 m_BoundChecker.make_trailer(
1700                     reinterpret_cast<byte *>(pBlock + 1),
1701                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1702                     nSize
1703                 );
1704             }
1705
1706             CDS_TSAN_ANNOTATE_PUBLISH_MEMORY_RANGE( pBlock + 1, nSize );
1707             return pBlock + 1;
1708         }
1709
1710         /// Free previously allocated memory block
1711         void free(
1712             void * pMemory  ///< Pointer to memory block to free
1713         )
1714         {
1715             if ( !pMemory )
1716                 return;
1717
1718             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1719             block_header * pBlock = pRedirect->begin();
1720
1721             if ( pBlock->isOSAllocated()) {
1722                 // Block has been allocated from OS
1723                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize());
1724                 m_LargeHeap.free( pBlock );
1725                 return;
1726             }
1727
1728             assert( !pBlock->isAligned());
1729             superblock_desc * pDesc = pBlock->desc();
1730
1731             m_BoundChecker.check_bounds(
1732                 pRedirect + 1,
1733                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1734                 pDesc->nBlockSize
1735             );
1736
1737
1738             anchor_tag oldAnchor;
1739             anchor_tag newAnchor;
1740             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1741
1742             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1743
1744             oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1745             do {
1746                 newAnchor = oldAnchor;
1747                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1748                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1749                 newAnchor.tag += 1;
1750
1751                 assert( oldAnchor.state != SBSTATE_EMPTY );
1752
1753                 if ( oldAnchor.state == SBSTATE_FULL )
1754                     newAnchor.state = SBSTATE_PARTIAL;
1755
1756                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1757                     //pProcHeap = pDesc->pProcHeap;
1758                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1759                     newAnchor.state = SBSTATE_EMPTY;
1760                 }
1761                 else
1762                     newAnchor.count += 1;
1763             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1764
1765             pProcHeap->stat.incFreeCount();
1766
1767             if ( newAnchor.state == SBSTATE_EMPTY ) {
1768                 if ( pProcHeap->unlink_partial( pDesc ))
1769                     free_superblock( pDesc );
1770             }
1771             else if (oldAnchor.state == SBSTATE_FULL ) {
1772                 assert( pProcHeap != nullptr );
1773                 pProcHeap->stat.decDescFull();
1774                 pProcHeap->add_partial( pDesc );
1775             }
1776         }
1777
1778         /// Reallocate memory block
1779         /**
1780             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1781             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1782
1783             If there is not enough available memory to expand the block to the given size,
1784             the original block is left unchanged, and \p nullptr is returned.
1785
1786             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1787             then the return value is \p nullptr and the original block is left unchanged.
1788         */
1789         void * realloc(
1790             void *  pMemory,    ///< Pointer to previously allocated memory block
1791             size_t  nNewSize    ///< New size of memory block, in bytes
1792         )
1793         {
1794             if ( nNewSize == 0 ) {
1795                 free( pMemory );
1796                 return nullptr;
1797             }
1798
1799             const size_t nOrigSize = nNewSize;
1800             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1801
1802             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1803
1804             // Reallocation of aligned block is not possible
1805             if ( pBlock->isAligned()) {
1806                 assert( false );
1807                 return nullptr;
1808             }
1809
1810             if ( pBlock->isOSAllocated()) {
1811                 // The block has been allocated from OS
1812                 size_t nCurSize = pBlock->getOSAllocSize();
1813
1814                 if ( nCurSize >= nNewSize )
1815                     return pMemory;
1816
1817                 // Grow block size
1818                 void * pNewBuf = alloc( nOrigSize );
1819                 if ( pNewBuf ) {
1820                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header));
1821                     free( pMemory );
1822                 }
1823                 return pNewBuf;
1824             }
1825
1826             superblock_desc * pDesc = pBlock->desc();
1827             if ( pDesc->nBlockSize <= nNewSize ) {
1828                 // In-place reallocation
1829                 m_BoundChecker.make_trailer(
1830                     reinterpret_cast<byte *>(pBlock + 1),
1831                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1832                     nOrigSize
1833                     );
1834
1835                 return pMemory;
1836             }
1837
1838             void * pNew = alloc( nNewSize );
1839             if ( pNew ) {
1840                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header));
1841                 free( pMemory );
1842                 return pNew;
1843             }
1844
1845             return nullptr;
1846         }
1847
1848         /// Allocate aligned memory block
1849         void * alloc_aligned(
1850             size_t nSize,       ///< Size of memory block to allocate in bytes
1851             size_t nAlignment   ///< Alignment
1852         )
1853         {
1854             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1855                 void * p = alloc( nSize );
1856                 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1857                 return p;
1858             }
1859
1860             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1861
1862             block_header * pRedirect;
1863             if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1864                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1865                 assert( pRedirect != pBlock );
1866                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1867
1868                 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1869             }
1870             else
1871                 pRedirect = pBlock;
1872
1873
1874             // Bound checking is only for our blocks
1875             if ( !pBlock->isOSAllocated()) {
1876                 // the block is allocated from our heap - bound checker is applicable
1877                 m_BoundChecker.make_trailer(
1878                     reinterpret_cast<byte *>(pRedirect + 1),
1879                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1880                     nSize
1881                 );
1882             }
1883
1884             return pRedirect + 1;
1885         }
1886
1887         /// Free aligned memory block previously allocated by \ref alloc_aligned
1888         void free_aligned(
1889             void * pMemory      ///< Pointer to memory block to free
1890         )
1891         {
1892             free( pMemory );
1893         }
1894
1895     public:
1896
1897         /// Get instant summary statistics
1898         void summaryStat( summary_stat& st )
1899         {
1900             size_t nProcHeapCount = m_SizeClassSelector.size();
1901             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1902                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1903                 if ( pProcDesc ) {
1904                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1905                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1906                         if ( pProcHeap ) {
1907                             st.add_procheap_stat( pProcHeap->stat );
1908                         }
1909                     }
1910                 }
1911             }
1912
1913             st.add_heap_stat( m_OSAllocStat );
1914         }
1915     };
1916
1917 }}} // namespace cds::memory::michael
1918
1919 #endif // CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H