Added TSan annotations
[libcds.git] / cds / memory / michael / allocator.h
1 //$$CDS-header$$
2
3 #ifndef CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
5
6 /*
7     Michael allocator implementation
8     Source:
9         [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
10
11     Editions:
12     2011.09.07 khizmax  Optimization: small page (about 64K) is allocated by Heap::alloc call.
13                         This optimization allows to allocate system memory more regularly,
14                         in blocks of 1M that leads to less memory fragmentation.
15     2011.01.02 khizmax  Created
16 */
17
18 #include <stdlib.h>
19 #include <mutex>        // unique_lock
20 #include <cds/init.h>
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
25
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/sync/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
34
35 #include <boost/intrusive/list.hpp>
36
37 namespace cds {
38     /// Memory-related algorithms: allocators etc.
39 namespace memory {
40     /// Michael's allocator (class Heap)
41     /**
42         \par Source
43             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
44
45         This namespace declares the main class Heap and a lot of helper classes.
46     */
47 namespace michael {
48
49     /// Size class
50     struct size_class {
51         unsigned int    nBlockSize  ;   ///< block size in bytes
52         unsigned int    nSBSize     ;   ///< superblock size (64K or 1M)
53         unsigned int    nCapacity   ;   ///< superblock capacity (nSBSize / nBlockSize)
54         unsigned int    nSBSizeIdx  ;   ///< internal superblock size index (page index)
55     };
56
57     /// %Heap based on system \p malloc and \p free functions
58     struct malloc_heap
59     {
60         /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61         static void * alloc( size_t nSize )
62         {
63             void * p = ::malloc( nSize );
64             return p;
65         }
66         /// Returning memory block to the system (\p free wrapper)
67         static void free( void * p )
68         {
69             ::free( p );
70         }
71     };
72
73     /// %Heap based on system provided aligned \p malloc and \p free functions
74     struct aligned_malloc_heap
75     {
76         /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
77         static void * alloc( size_t nSize, size_t nAlignment  )
78         {
79             return cds::OS::aligned_malloc( nSize, nAlignment );
80         }
81         /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
82         static void free( void * p )
83         {
84             cds::OS::aligned_free( p );
85         }
86     };
87
88     /// Page heap based on \p Heap
89     /**
90         Page heap can allocate memory by page-sized block only.
91         \p Heap may be any heap that provides interface like \ref malloc_heap.
92
93         This class is one of available implementation of opt::page_heap option.
94     */
95     template <class Heap = malloc_heap>
96     class page_allocator: public Heap
97     {
98         //@cond
99         typedef Heap base_class;
100         size_t  m_nPageSize;
101         //@endcond
102
103     public:
104         /// Initializes heap
105         page_allocator(
106             size_t nPageSize    ///< page size in bytes
107         )
108             : m_nPageSize( nPageSize )
109         {}
110
111         /// Allocate new page
112         void * alloc()
113         {
114             return base_class::alloc( m_nPageSize );
115         }
116
117         /// Free page \p pPage
118         void free( void * pPage )
119         {
120             base_class::free( pPage );
121         }
122     };
123
124     /// Page cacheable heap
125     /**
126         To improve performance this allocator maintains small list of free pages.
127         Page heap can allocate memory by page-sized block only.
128
129         Template parameters:
130             \li \p FreeListCapacity - capacity of free-list, default value is 64 page
131             \li \p Heap may be any heap that provides interface like \ref malloc_heap.
132
133         This class is one of available implementation of opt::page_heap option.
134     */
135     template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
136     class page_cached_allocator: public page_allocator<Heap>
137     {
138         //@cond
139         typedef page_allocator<Heap> base_class;
140
141 #ifdef _DEBUG
142         struct make_null_ptr {
143             void operator ()(void *& p)
144             {
145                 p = nullptr;
146             }
147         };
148 #endif
149         struct free_list_traits : public cds::container::vyukov_queue::traits
150         {
151             typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
152 #ifdef _DEBUG
153             typedef make_null_ptr value_cleaner;
154 #endif
155         };
156         typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
157
158         free_list   m_FreeList;
159         //@endcond
160
161     public:
162         /// Initializes heap
163         page_cached_allocator(
164             size_t nPageSize    ///< page size in bytes
165         )
166             : base_class( nPageSize )
167             , m_FreeList( FreeListCapacity )
168         {}
169
170         //@cond
171         ~page_cached_allocator()
172         {
173             void * pPage;
174             while ( m_FreeList.pop(pPage) )
175                 base_class::free( pPage );
176         }
177         //@endcond
178
179         /// Allocate new page
180         void * alloc()
181         {
182             void * pPage;
183             if ( !m_FreeList.pop( pPage ) )
184                 pPage = base_class::alloc();
185             return pPage;
186         }
187
188         /// Free page \p pPage
189         void free( void * pPage )
190         {
191             if ( !m_FreeList.push( pPage ))
192                 base_class::free( pPage );
193         }
194     };
195
196     /// Implementation of opt::sizeclass_selector option
197     /**
198         Default size-class selector can manage memory blocks up to 64K.
199     */
200     class CDS_EXPORT_API default_sizeclass_selector
201     {
202         //@cond
203         /// Count of different size-classes
204         static const size_t c_nSizeClassCount = 63;
205
206         /// Max block size
207         static const size_t c_nMaxBlockSize = 64 * 1024;
208
209         /// Page size of type 0 (64K)
210         static const unsigned int c_nPage64K = 64 * 1024 - 32;
211
212         /// Page size of type 1 (1M)
213         static const unsigned int c_nPage1M = 1024 * 1024;
214
215         static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
216         static size_class const m_szClass[c_nSizeClassCount];
217         static unsigned char const m_szClassMap[];
218         //@endcond
219     public:
220         /// Type of size-class index
221         typedef unsigned int sizeclass_index;
222
223 #ifdef _DEBUG
224         default_sizeclass_selector();
225 #endif
226
227         /// "No size class" index
228         static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
229
230         /// Returns size-class count
231         static sizeclass_index size()
232         {
233             return c_nSizeClassCount;
234         }
235
236         /// Returns page size in bytes for given page type \p nPageType
237         static size_t page_size(size_t nPageType )
238         {
239             switch (nPageType) {
240             case 0:
241                 return c_nPage64K;
242             case 1:
243                 return c_nPage1M;
244             default:
245                 assert(false)   ;   // anything forgotten?..
246             }
247             return c_nPage1M;
248         }
249
250         /// Returns count of page size-class
251         /**
252             This class supports pages of two types: 64K page for small objects and 1M page for other objects.
253         */
254         static size_t pageTypeCount()
255         {
256             return 2;
257         }
258
259         /// Returns size-class index for \p nSize
260         /**
261             For large blocks that cannot be allocated by Michael's allocator
262             the function must return -1.
263         */
264         static sizeclass_index find( size_t nSize )
265         {
266             if ( nSize > c_nMaxBlockSize ) {
267                 // Too large block - allocate from system
268                 return c_nNoSizeClass;
269             }
270             sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
271             assert( nSize <= m_szClassBounds[ szClass ] );
272             assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
273
274             return szClass;
275         }
276
277         /// Gets details::size_class struct for size-class index \p nIndex
278         static const size_class * at( sizeclass_index nIndex )
279         {
280             assert( nIndex < size() );
281             return m_szClass + nIndex;
282         }
283     };
284
285     //@cond
286     namespace details {
287         struct free_list_tag;
288         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > >  free_list_locked_hook;
289
290         struct partial_list_tag;
291         typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > >  partial_list_locked_hook;
292
293         struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
294         {};
295     }
296     //@endcond
297
298     /// List of free superblock descriptor
299     /**
300         This class is a implementation of \ref opt::free_list option
301     */
302     template <class Lock, class T = details::intrusive_superblock_desc>
303     class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
304     {
305         //@cond
306         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
307     public:
308         typedef details::free_list_locked_hook item_hook;
309         typedef Lock lock_type;
310     protected:
311         typedef std::unique_lock<lock_type>   auto_lock;
312
313         mutable lock_type   m_access;
314         //@endcond
315
316     public:
317         /// Rebinds to other item type \p T2
318         template <class T2>
319         struct rebind {
320             typedef free_list_locked<Lock, T2>    other   ;   ///< rebind result
321         };
322
323     public:
324         /// Push superblock descriptor to free-list
325         void push( T * pDesc )
326         {
327             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
328             auto_lock al(m_access);
329             base_class::push_back( *pDesc );
330         }
331
332         /// Pop superblock descriptor from free-list
333         T *   pop()
334         {
335             auto_lock al(m_access);
336             if ( base_class::empty() )
337                 return nullptr;
338             T& rDesc = base_class::front();
339             base_class::pop_front();
340             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
341             return &rDesc;
342         }
343
344         /// Returns current count of superblocks in free-list
345         size_t  size() const
346         {
347             auto_lock al(m_access);
348             return base_class::size();
349         }
350     };
351
352     /// List of partial filled superblock descriptor
353     /**
354         This class is a implementation of \ref opt::partial_list option
355     */
356     template <class Lock, class T = details::intrusive_superblock_desc>
357     class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
358     {
359         //@cond
360         typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
361     public:
362         typedef details::partial_list_locked_hook item_hook;
363         typedef Lock    lock_type;
364     protected:
365         typedef std::unique_lock<lock_type>   auto_lock;
366
367         mutable lock_type   m_access;
368         //@endcond
369
370     public:
371         /// Rebinds to other item type \p T2
372         template <class T2>
373         struct rebind {
374             typedef partial_list_locked<Lock, T2>    other   ;   ///< rebind result
375         };
376
377     public:
378         /// Push a superblock \p pDesc to the list
379         void    push( T * pDesc )
380         {
381             auto_lock al( m_access );
382             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
383             base_class::push_back( *pDesc );
384         }
385
386         /// Pop superblock from the list
387         T * pop()
388         {
389             auto_lock al( m_access );
390             if ( base_class::empty() )
391                 return nullptr;
392             T& rDesc = base_class::front();
393             base_class::pop_front();
394             assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
395             return &rDesc;
396         }
397
398         /// Removes \p pDesc descriptor from the free-list
399         bool unlink( T * pDesc )
400         {
401             assert(pDesc != nullptr);
402             auto_lock al( m_access );
403             // !inited(pDesc) is equal to "pDesc is being linked to partial list"
404             if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
405                 base_class::erase( base_class::iterator_to( *pDesc ) );
406                 return true;
407             }
408             return false;
409         }
410
411         /// Count of element in the list
412         size_t size() const
413         {
414             auto_lock al( m_access );
415             return base_class::size();
416         }
417     };
418
419     /// Summary processor heap statistics
420     /**
421         Summary heap statistics for use with Heap::summaryStat function.
422     */
423     struct summary_stat
424     {
425         size_t      nAllocFromActive    ;  ///< Event count of allocation from active superblock
426         size_t      nAllocFromPartial   ;  ///< Event count of allocation from partial superblock
427         size_t      nAllocFromNew       ;  ///< Event count of allocation from new superblock
428         size_t      nFreeCount          ;  ///< Count of \p free function call
429         size_t      nPageAllocCount     ;  ///< Count of page (superblock) allocated
430         size_t      nPageDeallocCount   ;  ///< Count of page (superblock) deallocated
431         size_t      nDescAllocCount     ;  ///< Count of superblock descriptors
432         size_t      nDescFull           ;  ///< Count of full superblock
433         atomic64u_t nBytesAllocated     ;  ///< Count of allocated bytes (for heap managed memory blocks)
434         atomic64u_t nBytesDeallocated   ;  ///< Count of deallocated bytes (for heap managed memory blocks)
435
436         size_t      nSysAllocCount      ;  ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
437         size_t      nSysFreeCount       ;  ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
438         atomic64u_t nSysBytesAllocated  ;  ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
439         atomic64_t  nSysBytesDeallocated;  ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
440
441         // Internal contention indicators
442         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
443         /**
444             Contention indicator. The less value is better
445         */
446         size_t      nActiveDescCASFailureCount;
447         /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
448         /**
449             Contention indicator. The less value is better
450         */
451         size_t      nActiveAnchorCASFailureCount;
452         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
453         /**
454             Contention indicator. The less value is better
455         */
456         size_t      nPartialDescCASFailureCount;
457         /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
458         /**
459             Contention indicator. The less value is better
460         */
461         size_t      nPartialAnchorCASFailureCount;
462
463
464     public:
465         /// Constructs empty statistics. All counters are zero.
466         summary_stat()
467         {
468             clear();
469         }
470
471         /// Difference statistics
472         /**
473             This operator computes difference between \p *this and \p stat and places the difference to \p this.
474             Returns \p *this;
475         */
476         summary_stat& operator -=( const summary_stat& stat )
477         {
478             nAllocFromActive    -= stat.nAllocFromActive;
479             nAllocFromPartial   -= stat.nAllocFromPartial;
480             nAllocFromNew       -= stat.nAllocFromNew;
481             nFreeCount          -= stat.nFreeCount;
482             nPageAllocCount     -= stat.nPageAllocCount;
483             nPageDeallocCount   -= stat.nPageDeallocCount;
484             nDescAllocCount     -= stat.nDescAllocCount;
485             nDescFull           -= stat.nDescFull;
486             nBytesAllocated     -= stat.nBytesAllocated;
487             nBytesDeallocated   -= stat.nBytesDeallocated;
488
489             nSysAllocCount      -= stat.nSysAllocCount;
490             nSysFreeCount       -= stat.nSysFreeCount;
491             nSysBytesAllocated  -= stat.nSysBytesAllocated;
492             nSysBytesDeallocated -= stat.nSysBytesDeallocated;
493
494             nActiveDescCASFailureCount      -= stat.nActiveDescCASFailureCount;
495             nActiveAnchorCASFailureCount    -= stat.nActiveAnchorCASFailureCount;
496             nPartialDescCASFailureCount     -= stat.nPartialDescCASFailureCount;
497             nPartialAnchorCASFailureCount   -= stat.nPartialAnchorCASFailureCount;
498
499             return *this;
500         }
501
502         /// Clears statistics
503         /**
504             All counters are set to zero.
505         */
506         void clear()
507         {
508             memset( this, 0, sizeof(*this));
509         }
510
511         //@cond
512         template <typename Stat>
513         summary_stat& add_procheap_stat( const Stat& stat )
514         {
515             nAllocFromActive    += stat.allocFromActive();
516             nAllocFromPartial   += stat.allocFromPartial();
517             nAllocFromNew       += stat.allocFromNew();
518             nFreeCount          += stat.freeCount();
519             nPageAllocCount     += stat.blockAllocated();
520             nPageDeallocCount   += stat.blockDeallocated();
521             nDescAllocCount     += stat.descAllocCount();
522             nDescFull           += stat.descFull();
523             nBytesAllocated     += stat.allocatedBytes();
524             nBytesDeallocated   += stat.deallocatedBytes();
525
526             nActiveDescCASFailureCount      += stat.activeDescCASFailureCount();
527             nActiveAnchorCASFailureCount    += stat.activeAnchorCASFailureCount();
528             nPartialDescCASFailureCount     += stat.partialDescCASFailureCount();
529             nPartialAnchorCASFailureCount   += stat.partialAnchorCASFailureCount();
530
531             return *this;
532         }
533
534         template <typename Stat>
535         summary_stat& add_heap_stat( const Stat& stat )
536         {
537             nSysAllocCount      += stat.allocCount();
538             nSysFreeCount       += stat.freeCount();
539
540             nSysBytesAllocated  += stat.allocatedBytes();
541             nSysBytesDeallocated+= stat.deallocatedBytes();
542
543             return *this;
544         }
545         //@endcond
546     };
547
548     /// Michael's allocator
549     /**
550         This class provides base functionality for Michael's allocator. It does not provide
551         the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
552         The heap interface is closer to semantics of \p malloc / \p free system functions.
553         The heap supports allocation of aligned and unaligned data.
554
555         The algorithm is based on simplified version of
556             \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
557
558         that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
559             \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
560
561         This is powerful, scalable, fully customizable heap with fast-path without any locks
562         that has been developed specifically for multi-threading.
563         With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
564         You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
565         allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
566         like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
567         The opt::check_bounds feature can help you to find a memory buffer overflow.
568
569         Brief algorithm description from Michael's work:
570
571         Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
572         the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
573         Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
574         heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
575         An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
576         that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
577         block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
578         structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
579         in a lock-free manner.
580
581         Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
582         the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
583         atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
584         pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
585         available blocks of its original superblock by atomically updating its descriptor.
586
587         <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
588         superblock size.
589
590         Available \p Options:
591         - \ref opt::sys_topology - class that describes system topology needed for allocator.
592             Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
593         - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
594             Default is \ref malloc_heap.
595         - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
596             Default is \ref aligned_malloc_heap
597         - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
598             Default is \ref page_cached_allocator
599         - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
600             for incoming allocation request.
601             Default is \ref default_sizeclass_selector
602         - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
603             Default is \ref free_list_locked
604         - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
605             Default is \ref partial_list_locked
606         - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
607             that is maintained by the heap.
608             Default is \ref procheap_empty_stat
609         - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
610             allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
611             and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
612             OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
613             Default is \ref os_allocated_empty
614         - \ref opt::check_bounds - a bound checker.
615             Default is no bound checker (cds::opt::none)
616
617         \par Usage:
618         The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
619
620         \code
621         #include <cds/memory/michael/allocator.h>
622
623         // Heap with explicitly defined options:
624         cds::memory::michael::Heap<
625             opt::aligned_heap< aligned_malloc_heap >,
626             opt::page_heap< page_cached_allocator<16, malloc_heap> >
627         >   myHeap;
628
629         // Heap with default options:
630         cds::memory::michael::Heap<>    myDefHeap;
631         \endcode
632
633         \par How to make std-like allocator
634
635         There are serious differencies of heap and <tt>std::allocator</tt> interface:
636             - Heap is stateful, and \p std::allocator is stateless.
637             - Heap has much more template parameters than \p std::allocator
638             - Heap has low-level interface for memory allocating only unlike the allocator
639                 interface that can construct/destroy objects of any type T.
640
641         To convert heap interface into \p std::allocator -like interface you should:
642             - Declare object of class cds::memory::michael::Heap specifying the necessary
643                 template parameters; this is usually static object
644             - Create a class with \p std::allocator interface that uses the function of heap.
645         \code
646         #include <cds/memory/michael/allocator.h>
647
648         template <class T>
649         class MichaelAllocator
650         {
651             typedef std::allocator<T>               std_allocator;
652             typedef cds::memory::michael::Heap<>    michael_heap;
653
654             // Michael heap static object
655             static michael_heap     s_Heap;
656         public:
657             // Declare typedefs from std::allocator
658             typedef typename std_allocator::const_pointer   const_pointer;
659             typedef typename std_allocator::pointer         pointer;
660             typedef typename std_allocator::const_reference const_reference;
661             typedef typename std_allocator::reference       reference;
662             typedef typename std_allocator::difference_type difference_type;
663             typedef typename std_allocator::size_type       size_type;
664             typedef typename std_allocator::value_type      value_type;
665
666             // Allocation function
667             pointer allocate( size_type _Count, const void* _Hint )
668             {
669                 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
670             }
671
672             // Deallocation function
673             void deallocate( pointer _Ptr, size_type _Count )
674             {
675                 s_Heap.free( _Ptr );
676             }
677
678             // Other std::allocator specific functions: address, construct, destroy, etc.
679             ...
680
681             // Rebinding allocator to other type
682             template <class _Other>
683             struct rebind {
684                 typedef MichaelAllocator<_Other> other;
685             };
686         };
687
688         // In .cpp file:
689         MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
690
691         \endcode
692     */
693     template <typename... Options>
694     class Heap {
695     protected:
696
697         //@cond
698         static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
699         static const unsigned int c_nDefaultBlockAlignment = 8;
700
701         struct default_options {
702             typedef cds::OS::topology           sys_topology;
703             typedef malloc_heap                 system_heap;
704             typedef page_cached_allocator<>     page_heap;
705             typedef aligned_malloc_heap         aligned_heap;
706             typedef default_sizeclass_selector  sizeclass_selector;
707             typedef free_list_locked<cds::sync::spin>    free_list;
708             typedef partial_list_locked<cds::sync::spin> partial_list;
709             typedef procheap_empty_stat         procheap_stat;
710             typedef os_allocated_empty          os_allocated_stat;
711             typedef cds::opt::none              check_bounds;
712         };
713         //@endcond
714
715     protected:
716         //@cond
717         typedef typename opt::make_options<default_options, Options...>::type   options;
718         //@endcond
719
720         //@cond
721         typedef unsigned char   byte;
722         //@endcond
723     public:
724         typedef typename options::sys_topology          sys_topology        ;   ///< effective system topology
725         typedef typename options::system_heap           system_heap         ;   ///< effective system heap
726         typedef typename options::aligned_heap          aligned_heap        ;   ///< effective aligned heap
727         typedef typename options::sizeclass_selector    sizeclass_selector  ;   ///< effective sizeclass selector
728         typedef typename options::page_heap             page_heap           ;   ///< effective page heap
729         typedef typename options::procheap_stat         procheap_stat       ;   ///< effective processor heap statistics
730         typedef typename options::os_allocated_stat     os_allocated_stat   ;   ///< effective OS-allocated memory statistics
731         typedef details::bound_checker_selector< typename options::check_bounds >    bound_checker   ;  ///< effective bound checker
732
733         // forward declarations
734         //@cond
735         struct superblock_desc;
736         struct processor_heap_base;
737         struct processor_desc;
738         //@endcond
739
740         /// Superblock states
741         /**
742             A superblock can be in one of four states: \p ACTIVE, \p FULL,
743             \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
744             superblock in a heap, or if a thread intends to try to install it
745             as such. A superblock is \p FULL if all its blocks are either allocated
746             or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
747             and contains unreserved available blocks. A superblock is
748             \p EMPTY if all its blocks are free and it is not \p ACTIVE.
749         */
750         enum superblock_state {
751             SBSTATE_ACTIVE  = 0,    ///< superblock is active
752             SBSTATE_FULL    = 1,    ///< superblock is full
753             SBSTATE_PARTIAL = 2,    ///< superblock is partially allocated
754             SBSTATE_EMPTY   = 3     ///< superblock is empty and may be freed
755         };
756
757         static const size_t c_nMaxBlockInSuperBlock = 1024 * 2  ;   ///< Max count of blocks in superblock (2 ** 11)
758
759         /// Anchor of the superblock descriptor. Updated by CAS
760         struct anchor_tag {
761             unsigned long long  avail:11    ;   ///< index of first available block in the superblock
762             unsigned long long  count:11    ;   ///< number of unreserved blocks in the superblock
763             unsigned long long  state: 2    ;   ///< state of the superblock (see \ref superblock_state enum)
764             unsigned long long    tag:40    ;   ///< ABA prevention tag
765         };
766
767         /// Superblock descriptor
768         struct superblock_desc
769             : public options::free_list::item_hook
770             , public options::partial_list::item_hook
771         {
772             atomics::atomic<anchor_tag>          anchor      ;   ///< anchor, see \ref anchor_tag
773             byte *              pSB         ;   ///< ptr to superblock
774             processor_heap_base * pProcHeap ;   ///< pointer to owner processor heap
775             unsigned int        nBlockSize  ;   ///< block size in bytes
776             unsigned int        nCapacity   ;   ///< superblock size/block size
777
778             //@cond
779             superblock_desc()
780                 : pSB(nullptr)
781                 , pProcHeap( nullptr )
782             {}
783             //@endcond
784         };
785
786         //@cond
787         typedef typename options::free_list::template rebind<superblock_desc>::other    free_list;
788         typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
789         //@endcond
790
791 #if CDS_BUILD_BITS == 32
792         /// Allocated block header
793         /**
794             Each allocated block has 8-byte header.
795             The header contains pointer to owner superblock descriptor and the redirection flag.
796             If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
797             \code
798                 +---------------+
799                 | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
800                 +---------------+
801                 |               | <- memory allocated
802                 |   memory      |
803                 |               |
804                 +---------------+
805             \endcode
806             If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
807             In this case the redirection flag is 1 and the block's structure is:
808             \code
809                 +---------------+
810             +-> | blockHeader   |   [8 byte] pointer to owner superblock (flag=0)
811             |   +---------------+
812             |   |   padding     |
813             |   |   (unused)    |
814             |   |               |
815             |   +---------------+
816             +-- | blockHeader   |   [8 byte] pointer to block head (flag=1)
817                 +---------------+
818                 |               | <- memory allocated
819                 |   memory      |
820                 |               |
821                 +---------------+
822             \endcode
823         */
824         class block_header
825         {
826         //@cond
827             enum {
828                 bitAligned = 1,
829                 bitOSAllocated = 2
830             };
831
832             union {
833                 superblock_desc *   pDesc       ;   // pointer to superblock descriptor
834                 uint32_t         nSize       ;   // block size (allocated form OS)
835             };
836             uint32_t         nFlags;
837
838         public:
839             void  set( superblock_desc * pdesc, uint32_t isAligned )
840             {
841                 pDesc = pdesc;
842                 nFlags = isAligned ? bitAligned : 0;
843             }
844
845             superblock_desc * desc()
846             {
847                 assert( (nFlags & bitOSAllocated) == 0 );
848                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
849             }
850
851             block_header * begin()
852             {
853                 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
854             }
855
856             bool isAligned() const
857             {
858                 return (nFlags & bitAligned) != 0;
859             }
860
861             bool isOSAllocated() const
862             {
863                 return (nFlags & bitOSAllocated) != 0;
864             }
865
866             void setOSAllocated( size_t sz )
867             {
868                 nSize = sz;
869                 nFlags = bitOSAllocated;
870             }
871
872             size_t getOSAllocSize() const
873             {
874                 assert( isOSAllocated() );
875                 return nSize;
876             }
877
878         //@endcond
879         };
880 #elif CDS_BUILD_BITS == 64
881         //@cond
882         class block_header
883         {
884             enum {
885                 bitAligned = 1,
886                 bitOSAllocated = 2
887             };
888             typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
889             // If bitOSAllocated is set the pDesc contains size of memory block
890             // allocated from OS
891             marked_desc_ptr     pDesc;
892         public:
893             void  set( superblock_desc * pdesc, uint32_t isAligned )
894             {
895                 pDesc = marked_desc_ptr( pdesc, isAligned );
896             }
897
898             superblock_desc * desc()
899             {
900                 assert( !isOSAllocated() );
901                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
902             }
903
904             block_header * begin()
905             {
906                 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
907             }
908
909             bool isAligned() const
910             {
911                 return (pDesc.bits() & bitAligned) != 0;
912             }
913
914             bool isOSAllocated() const
915             {
916                 return (pDesc.bits() & bitOSAllocated) != 0;
917             }
918
919             void setOSAllocated( size_t nSize )
920             {
921
922                 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
923             }
924
925             size_t getOSAllocSize() const
926             {
927                 assert( isOSAllocated() );
928                 return reinterpret_cast<uintptr_t>( pDesc.ptr() ) >> 2;
929             }
930
931         };
932         //@endcond
933 #else
934 #       error "Unexpected value of CDS_BUILD_BITS"
935 #endif  // CDS_BUILD_BITS
936
937         //@cond
938         struct free_block_header: block_header {
939             unsigned int    nNextFree;
940         };
941         //@endcond
942
943 #if CDS_BUILD_BITS == 32
944         /// Processor heap's \p active field
945         /**
946             The \p active field in the processor heap structure is primarily a pointer to the descriptor
947             of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
948             guaranteed that the active superblock has at least one block available for reservation.
949             Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
950             of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
951             of blocks available for reservation in the active superblock less one. That is, if the value
952             of credits is n, then the active superblock contains n+1 blocks available for reservation
953             through the \p active field. Note that the number of blocks in a superblock is not limited
954             to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
955             (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
956             atomically decrements credits while validating that the active superblock is still valid.
957         */
958         class active_tag {
959         //@cond
960             superblock_desc *       pDesc;
961             uint32_t             nCredits;
962
963         public:
964             static const unsigned int c_nMaxCredits = 0 - 1;
965
966         public:
967             CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
968                 : pDesc( nullptr )
969                 , nCredits(0)
970             {}
971
972             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
973             ~active_tag() CDS_NOEXCEPT = default;
974             active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
975 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
976             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
977             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
978 #       endif
979
980             /// Returns pointer to superblock descriptor
981             superblock_desc * ptr() const
982             {
983                 return pDesc;
984             }
985
986             /// Sets superblock descriptor
987             void ptr( superblock_desc * p )
988             {
989                 pDesc = p;
990             }
991
992             unsigned int credits() const
993             {
994                 return nCredits;
995             }
996
997             void credits( unsigned int n )
998             {
999                 nCredits = n;
1000             }
1001
1002             void clear()
1003             {
1004                 pDesc = nullptr;
1005                 nCredits = 0;
1006             }
1007
1008             void set( superblock_desc * pSB, unsigned int n )
1009             {
1010                 pDesc = pSB;
1011                 nCredits = n;
1012             }
1013         //@endcond
1014         };
1015 #elif CDS_BUILD_BITS == 64
1016         //@cond
1017         class active_tag
1018         {
1019         public:
1020             static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1021         protected:
1022             typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1023             marked_desc_ptr pDesc;
1024
1025         public:
1026             active_tag() CDS_NOEXCEPT
1027                 : pDesc( nullptr )
1028             {}
1029             // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1030             //active_tag() CDS_NOEXCEPT = default;
1031             active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1032             ~active_tag() CDS_NOEXCEPT = default;
1033             active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1034 #       if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1035             active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1036             active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1037 #       endif
1038             superblock_desc *    ptr() const
1039             {
1040                 return pDesc.ptr();
1041             }
1042
1043             void ptr( superblock_desc * p )
1044             {
1045                 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1046                 pDesc = marked_desc_ptr( p, pDesc.bits());
1047             }
1048
1049             unsigned int credits()
1050             {
1051                 return (unsigned int) pDesc.bits();
1052             }
1053
1054             void credits( unsigned int n )
1055             {
1056                 assert( n <= c_nMaxCredits );
1057                 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1058             }
1059
1060             void clear()
1061             {
1062                 pDesc = marked_desc_ptr();
1063             }
1064
1065             void set( superblock_desc * pSB, unsigned int n )
1066             {
1067                 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1068                 pDesc = marked_desc_ptr( pSB, n );
1069             }
1070
1071         };
1072         //@endcond
1073 #else
1074 #       error "Unexpected value of CDS_BUILD_BITS"
1075 #endif  // CDS_BUILD_BITS
1076
1077
1078         /// Processor heap
1079         struct processor_heap_base
1080         {
1081             CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active;   ///< pointer to the descriptor of active superblock owned by processor heap
1082             processor_desc *    pProcDesc   ;   ///< pointer to parent processor descriptor
1083             const size_class *  pSizeClass  ;   ///< pointer to size class
1084             atomics::atomic<superblock_desc *>   pPartial    ;   ///< pointer to partial filled superblock (may be \p nullptr)
1085             partial_list        partialList ;   ///< list of partial filled superblocks owned by the processor heap
1086             unsigned int        nPageIdx    ;   ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1087
1088             /// Small page marker
1089             /**
1090                 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1091                 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1092                 to less memory fragmentation.
1093             */
1094             static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1095
1096             procheap_stat       stat        ;   ///< heap statistics
1097             //processor_heap_statistics   stat;
1098
1099             //@cond
1100             processor_heap_base() CDS_NOEXCEPT
1101                 : pProcDesc( nullptr )
1102                 , pSizeClass( nullptr )
1103                 , pPartial( nullptr )
1104             {
1105                 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1106             }
1107             //@endcond
1108
1109             /// Get partial superblock owned by the processor heap
1110             superblock_desc * get_partial()
1111             {
1112                 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1113                 do {
1114                     if ( !pDesc ) {
1115                         pDesc =  partialList.pop();
1116                         break;
1117                     }
1118                 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1119
1120                 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1121                 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1122                 return pDesc;
1123             }
1124
1125             /// Add partial superblock \p pDesc to the list
1126             void add_partial( superblock_desc * pDesc )
1127             {
1128                 assert( pPartial != pDesc );
1129                 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1130
1131                 superblock_desc * pCur = nullptr;
1132                 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1133                     partialList.push( pDesc );
1134             }
1135
1136
1137             /// Remove superblock \p pDesc from the list of partial superblock
1138             bool unlink_partial( superblock_desc * pDesc )
1139             {
1140                 return partialList.unlink( pDesc );
1141             }
1142         };
1143
1144         /// Aligned superblock descriptor
1145         typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1146
1147         /// Processor descriptor
1148         struct processor_desc
1149         {
1150             processor_heap *    arrProcHeap     ; ///< array of processor heap
1151             free_list           listSBDescFree  ; ///< List of free superblock descriptors
1152             page_heap *         pageHeaps       ; ///< array of page heap (one for each page size)
1153
1154             //@cond
1155             processor_desc()
1156                 : arrProcHeap( nullptr )
1157                 , pageHeaps( nullptr )
1158             {}
1159             //@endcond
1160         };
1161
1162
1163     protected:
1164         sys_topology        m_Topology           ;  ///< System topology
1165         system_heap         m_LargeHeap          ;  ///< Heap for large block
1166         aligned_heap        m_AlignedHeap        ;  ///< Internal aligned heap
1167         sizeclass_selector  m_SizeClassSelector  ;  ///< Size-class selector
1168         atomics::atomic<processor_desc *> *   m_arrProcDesc  ;  ///< array of pointers to the processor descriptors
1169         unsigned int        m_nProcessorCount    ;  ///< Processor count
1170         bound_checker       m_BoundChecker       ;  ///< Bound checker
1171
1172         os_allocated_stat   m_OSAllocStat        ;  ///< OS-allocated memory statistics
1173
1174     protected:
1175         //@cond
1176
1177         /// Allocates large block from system memory
1178         block_header * alloc_from_OS( size_t nSize )
1179         {
1180             block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1181             m_OSAllocStat.incBytesAllocated( nSize );
1182             p->setOSAllocated( nSize );
1183             return p;
1184         }
1185
1186         /// Allocates from the active superblock if it possible
1187         block_header * alloc_from_active( processor_heap * pProcHeap )
1188         {
1189             active_tag  oldActive;
1190             int nCollision = -1;
1191
1192             // Reserve block
1193             while ( true ) {
1194                 ++nCollision;
1195                 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1196                 if ( !oldActive.ptr() )
1197                     return nullptr;
1198                 unsigned int nCredits = oldActive.credits();
1199                 active_tag  newActive   ; // default = 0
1200                 if ( nCredits != 0 ) {
1201                     newActive = oldActive;
1202                     newActive.credits( nCredits - 1 );
1203                 }
1204                 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1205                     break;
1206             }
1207
1208             if ( nCollision )
1209                 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1210
1211             // pop block
1212             superblock_desc * pDesc = oldActive.ptr();
1213
1214             anchor_tag  oldAnchor;
1215             anchor_tag  newAnchor;
1216             byte * pAddr;
1217             unsigned int nMoreCredits = 0;
1218
1219             nCollision = -1;
1220             do {
1221                 ++nCollision;
1222                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1223
1224                 assert( oldAnchor.avail < pDesc->nCapacity );
1225                 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1226
1227                 // TSan reports data race if the block contained atomic ops before
1228                 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1229                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1230                 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1231                 newAnchor.tag += 1;
1232
1233                 if ( oldActive.credits() == 0 ) {
1234                     // state must be ACTIVE
1235                     if ( oldAnchor.count == 0 )
1236                         newAnchor.state = SBSTATE_FULL;
1237                     else {
1238                         nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1239                         newAnchor.count -= nMoreCredits;
1240                     }
1241                 }
1242             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1243
1244             if ( nCollision )
1245                 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1246
1247             assert( newAnchor.state != SBSTATE_EMPTY );
1248
1249             if ( newAnchor.state == SBSTATE_FULL )
1250                 pProcHeap->stat.incDescFull();
1251             if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1252                 update_active( pProcHeap, pDesc, nMoreCredits );
1253
1254             pProcHeap->stat.incAllocFromActive();
1255
1256             // block_header fields is not needed to setup
1257             // It was set in alloc_from_new_superblock
1258             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1259             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1260             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1261
1262             return reinterpret_cast<block_header *>( pAddr );
1263         }
1264
1265         /// Allocates from a partial filled superblock if it possible
1266         block_header * alloc_from_partial( processor_heap * pProcHeap )
1267         {
1268         retry:
1269             superblock_desc * pDesc = pProcHeap->get_partial();
1270             if ( !pDesc )
1271                 return nullptr;
1272
1273             // reserve blocks
1274             anchor_tag  oldAnchor;
1275             anchor_tag  newAnchor;
1276             //byte * pAddr;
1277             unsigned int nMoreCredits = 0;
1278
1279             int nCollision = -1;
1280             do {
1281                 ++nCollision;
1282
1283                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1284                 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1285                     free_superblock( pDesc );
1286                     goto retry;
1287                 }
1288
1289                 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1290                 newAnchor.count -= nMoreCredits + 1;
1291                 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1292                 newAnchor.tag += 1;
1293             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1294
1295             if ( nCollision )
1296                 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1297
1298             if ( newAnchor.state == SBSTATE_FULL )
1299                 pProcHeap->stat.incDescFull();
1300
1301             // Now, the thread is guaranteed to have reserved one or more blocks
1302             // pop reserved block
1303             byte * pAddr;
1304             nCollision = -1;
1305             do {
1306                 ++nCollision;
1307
1308                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1309
1310                 assert( oldAnchor.avail < pDesc->nCapacity );
1311                 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1312                 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1313                 ++newAnchor.tag;
1314             } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1315
1316             if ( nCollision )
1317                 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1318
1319             assert( newAnchor.state != SBSTATE_EMPTY );
1320
1321             pProcHeap->stat.incAllocFromPartial();
1322
1323             if ( nMoreCredits > 0 )
1324                 update_active( pProcHeap, pDesc, nMoreCredits );
1325
1326             // block_header fields is not needed to setup
1327             // It was set in alloc_from_new_superblock
1328             assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1329             assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1330             assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1331
1332             return reinterpret_cast<block_header *>( pAddr );
1333         }
1334
1335         /// Allocates from the new superblock
1336         block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1337         {
1338             superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1339             assert( pDesc != nullptr );
1340             pDesc->pSB = new_superblock_buffer( pProcHeap );
1341
1342             anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1343             anchor.tag += 1;
1344
1345             // Make single-linked list of free blocks in superblock
1346             byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1347             unsigned int nNext = 0;
1348             const unsigned int nBlockSize = pDesc->nBlockSize;
1349             for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1350                 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1351                 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1352             }
1353             reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1354
1355             active_tag newActive;
1356             newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1357
1358             anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1359             anchor.state = SBSTATE_ACTIVE;
1360             pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1361
1362             active_tag curActive;
1363             if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1364                 pProcHeap->stat.incAllocFromNew();
1365                 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1366                 return reinterpret_cast<block_header *>( pDesc->pSB );
1367             }
1368
1369             free_superblock( pDesc );
1370             return nullptr;
1371         }
1372
1373         /// Find appropriate processor heap based on size-class selected
1374         processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1375         {
1376             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1377
1378             unsigned int nProcessorId = m_Topology.current_processor();
1379             assert( nProcessorId < m_nProcessorCount );
1380
1381             if ( nProcessorId >= m_nProcessorCount )
1382                 nProcessorId = 0;
1383
1384             processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1385             while ( !pDesc ) {
1386
1387                 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1388                 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1389                     pDesc = pNewDesc;
1390                     break;
1391                 }
1392                 free_processor_desc( pNewDesc );
1393             }
1394
1395             return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1396         }
1397
1398         /// Updates active field of processor heap \p pProcHeap
1399         void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1400         {
1401             assert( pProcHeap == pDesc->pProcHeap );
1402
1403             active_tag  nullActive;
1404             active_tag  newActive;
1405             newActive.set( pDesc, nCredits - 1 );
1406
1407             if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1408                 return;
1409
1410             // Someone installed another active superblock.
1411             // Return credits to superblock and make it partial
1412
1413             anchor_tag  oldAnchor;
1414             anchor_tag  newAnchor;
1415
1416             do {
1417                 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1418                 newAnchor.count += nCredits;
1419                 newAnchor.state = SBSTATE_PARTIAL;
1420             } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1421
1422             pDesc->pProcHeap->add_partial( pDesc );
1423         }
1424
1425         /// Allocates new processor descriptor
1426         processor_desc * new_processor_desc( unsigned int nProcessorId )
1427         {
1428             CDS_UNUSED( nProcessorId );
1429             processor_desc * pDesc;
1430             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1431
1432             /*
1433                 Processor descriptor layout
1434
1435                 proc_desc -  64-byte alignment
1436                 page_heap[0] 64-byte alignment
1437                 page_heap[1] 64-byte alignment
1438                 ...
1439                 page_heap[P] 64-byte alignment
1440
1441                 proc_heap[0] 64-byte alignment
1442                 proc_heap[1] 64-byte alignment
1443                 ...
1444                 proc_heap[N] 64-byte alignment
1445             */
1446
1447             const size_t szDesc =
1448                 ( sizeof(processor_desc)
1449                     + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1450                     + c_nAlignment - 1
1451                 ) / c_nAlignment
1452 ;
1453
1454             const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1455
1456             static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1457
1458             // TSan false positive: a new descriptor will be linked further with release fence
1459             CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1460
1461             pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1462
1463             pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1464             for ( size_t i = 0; i < nPageHeapCount; ++i )
1465                 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1466
1467             // initialize processor heaps
1468             pDesc->arrProcHeap =
1469                 reinterpret_cast<processor_heap *>(
1470                     reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1471                     & ~(uintptr_t(c_nAlignment) - 1)
1472                 );
1473
1474             processor_heap * pProcHeap = pDesc->arrProcHeap;
1475             processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1476             for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1477                 new (pProcHeap) processor_heap();
1478                 pProcHeap->pProcDesc = pDesc;
1479                 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1480                 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1481                     pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1482                 else
1483                     pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1484             }
1485             CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1486             return pDesc;
1487         }
1488
1489
1490         void free_processor_heap( processor_heap * pProcHeap )
1491         {
1492             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1493                 superblock_desc * pDesc;
1494
1495                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1496                     free( pDesc->pSB );
1497                     m_AlignedHeap.free( pDesc );
1498                 }
1499
1500                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1501                 if ( pPartial ) {
1502                     free( pPartial->pSB );
1503                     m_AlignedHeap.free( pPartial );
1504                 }
1505
1506                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1507                 if ( pDesc ) {
1508                     free( pDesc->pSB );
1509                     m_AlignedHeap.free( pDesc );
1510                 }
1511             }
1512             else {
1513                 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1514                 superblock_desc * pDesc;
1515
1516                 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1517                     pageHeap.free( pDesc->pSB );
1518                     m_AlignedHeap.free( pDesc );
1519                 }
1520
1521                 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1522                 if ( pPartial ) {
1523                     pageHeap.free( pPartial->pSB );
1524                     m_AlignedHeap.free( pPartial );
1525                 }
1526
1527                 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1528                 if ( pDesc ) {
1529                     pageHeap.free( pDesc->pSB );
1530                     m_AlignedHeap.free( pDesc );
1531                 }
1532             }
1533             pProcHeap->~processor_heap();
1534         }
1535
1536         /// Frees processor descriptor
1537         void free_processor_desc( processor_desc * pDesc )
1538         {
1539             const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1540
1541             for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1542                 free_processor_heap( pDesc->arrProcHeap + j );
1543
1544             for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1545                 m_AlignedHeap.free( pSBDesc );
1546
1547             for (size_t i = 0; i < nPageHeapCount; ++i )
1548                 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1549
1550             //m_IntHeap.free( pDesc->pageHeaps );
1551             pDesc->pageHeaps = nullptr;
1552
1553             pDesc->processor_desc::~processor_desc();
1554             m_AlignedHeap.free( pDesc );
1555         }
1556
1557         /// Allocates new superblock descriptor
1558         superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1559         {
1560             anchor_tag anchor;
1561             superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1562             if ( pDesc == nullptr ) {
1563                 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1564                 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1565
1566                 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1567                 anchor.tag = 0;
1568                 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1569
1570                 pProcHeap->stat.incDescAllocCount();
1571             }
1572             pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1573             pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1574             assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1575             pDesc->pProcHeap = pProcHeap;
1576
1577             anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1578             anchor.avail = 1;
1579             pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1580
1581             return pDesc;
1582         }
1583
1584         /// Allocates superblock page
1585         byte * new_superblock_buffer( processor_heap * pProcHeap )
1586         {
1587             pProcHeap->stat.incBlockAllocated();
1588             if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1589                 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1590             }
1591             else {
1592                 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1593             }
1594         }
1595
1596         /// Frees superblock descriptor and its page
1597         void free_superblock( superblock_desc * pDesc )
1598         {
1599             pDesc->pProcHeap->stat.incBlockDeallocated();
1600             processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1601             if ( pDesc->pSB ) {
1602                 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1603                     free( pDesc->pSB );
1604                 }
1605                 else {
1606                     pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1607                 }
1608             }
1609             pProcDesc->listSBDescFree.push( pDesc );
1610         }
1611
1612         /// Allocate memory block
1613         block_header * int_alloc(
1614             size_t nSize    ///< Size of memory block to allocate in bytes
1615             )
1616         {
1617             typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1618             if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1619                 return alloc_from_OS( nSize );
1620             }
1621             assert( nSizeClassIndex < m_SizeClassSelector.size() );
1622
1623             block_header * pBlock;
1624             processor_heap * pProcHeap;
1625             while ( true ) {
1626                 pProcHeap = find_heap( nSizeClassIndex );
1627                 if ( !pProcHeap )
1628                     return alloc_from_OS( nSize );
1629
1630                 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1631                     break;
1632                 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1633                     break;
1634                 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1635                     break;
1636             }
1637
1638             pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1639
1640             assert( pBlock != nullptr );
1641             return pBlock;
1642         }
1643
1644         //@endcond
1645     public:
1646         /// Heap constructor
1647         Heap()
1648         {
1649             // Explicit libcds initialization is needed since a static object may be constructed
1650             cds::Initialize();
1651
1652             m_nProcessorCount = m_Topology.processor_count();
1653             m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1654                 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1655             memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount )    ;   // ?? memset for atomic<>
1656         }
1657
1658         /// Heap destructor
1659         /**
1660             The destructor frees all memory allocated by the heap.
1661         */
1662         ~Heap()
1663         {
1664             for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1665                 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1666                 if ( pDesc )
1667                     free_processor_desc( pDesc );
1668             }
1669
1670             m_AlignedHeap.free( m_arrProcDesc );
1671
1672             // Explicit termination of libcds
1673             cds::Terminate();
1674         }
1675
1676         /// Allocate memory block
1677         void * alloc(
1678             size_t nSize    ///< Size of memory block to allocate in bytes
1679         )
1680         {
1681             block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1682
1683             // Bound checking is only for our blocks
1684             if ( !pBlock->isOSAllocated() ) {
1685                 // the block is allocated from our heap - bound checker is applicable
1686                 m_BoundChecker.make_trailer(
1687                     reinterpret_cast<byte *>(pBlock + 1),
1688                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1689                     nSize
1690                 );
1691             }
1692
1693             CDS_TSAN_ANNOTATE_NEW_MEMORY( pBlock + 1, nSize );
1694             return pBlock + 1;
1695         }
1696
1697         /// Free previously allocated memory block
1698         void free(
1699             void * pMemory  ///< Pointer to memory block to free
1700         )
1701         {
1702             if ( !pMemory )
1703                 return;
1704
1705             block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1706             block_header * pBlock = pRedirect->begin();
1707
1708             if ( pBlock->isOSAllocated() ) {
1709                 // Block has been allocated from OS
1710                 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1711                 m_LargeHeap.free( pBlock );
1712                 return;
1713             }
1714
1715             assert( !pBlock->isAligned() );
1716             superblock_desc * pDesc = pBlock->desc();
1717
1718             m_BoundChecker.check_bounds(
1719                 pRedirect + 1,
1720                 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1721                 pDesc->nBlockSize
1722             );
1723
1724
1725             anchor_tag oldAnchor;
1726             anchor_tag newAnchor;
1727             processor_heap_base * pProcHeap = pDesc->pProcHeap;
1728
1729             pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1730
1731             oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1732             do {
1733                 newAnchor = oldAnchor;
1734                 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1735                 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1736                 newAnchor.tag += 1;
1737
1738                 assert( oldAnchor.state != SBSTATE_EMPTY );
1739
1740                 if ( oldAnchor.state == SBSTATE_FULL )
1741                     newAnchor.state = SBSTATE_PARTIAL;
1742
1743                 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1744                     //pProcHeap = pDesc->pProcHeap;
1745                     //CDS_COMPILER_RW_BARRIER         ;   // instruction fence is needed?..
1746                     newAnchor.state = SBSTATE_EMPTY;
1747                 }
1748                 else
1749                     newAnchor.count += 1;
1750             } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1751
1752             pProcHeap->stat.incFreeCount();
1753
1754             if ( newAnchor.state == SBSTATE_EMPTY ) {
1755                 if ( pProcHeap->unlink_partial( pDesc ))
1756                     free_superblock( pDesc );
1757             }
1758             else if (oldAnchor.state == SBSTATE_FULL ) {
1759                 assert( pProcHeap != nullptr );
1760                 pProcHeap->stat.decDescFull();
1761                 pProcHeap->add_partial( pDesc );
1762             }
1763         }
1764
1765         /// Reallocate memory block
1766         /**
1767             If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1768             the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1769
1770             If there is not enough available memory to expand the block to the given size,
1771             the original block is left unchanged, and \p nullptr is returned.
1772
1773             Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1774             then the return value is \p nullptr and the original block is left unchanged.
1775         */
1776         void * realloc(
1777             void *  pMemory,    ///< Pointer to previously allocated memory block
1778             size_t  nNewSize    ///< New size of memory block, in bytes
1779         )
1780         {
1781             if ( nNewSize == 0 ) {
1782                 free( pMemory );
1783                 return nullptr;
1784             }
1785
1786             const size_t nOrigSize = nNewSize;
1787             nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1788
1789             block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1790
1791             // Reallocation of aligned block is not possible
1792             if ( pBlock->isAligned() ) {
1793                 assert( false );
1794                 return nullptr;
1795             }
1796
1797             if ( pBlock->isOSAllocated() ) {
1798                 // The block has been allocated from OS
1799                 size_t nCurSize = pBlock->getOSAllocSize();
1800
1801                 if ( nCurSize >= nNewSize )
1802                     return pMemory;
1803
1804                 // Grow block size
1805                 void * pNewBuf = alloc( nOrigSize );
1806                 if ( pNewBuf ) {
1807                     memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1808                     free( pMemory );
1809                 }
1810                 return pNewBuf;
1811             }
1812
1813             superblock_desc * pDesc = pBlock->desc();
1814             if ( pDesc->nBlockSize <= nNewSize ) {
1815                 // In-place reallocation
1816                 m_BoundChecker.make_trailer(
1817                     reinterpret_cast<byte *>(pBlock + 1),
1818                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1819                     nOrigSize
1820                     );
1821
1822                 return pMemory;
1823             }
1824
1825             void * pNew = alloc( nNewSize );
1826             if ( pNew ) {
1827                 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1828                 free( pMemory );
1829                 return pNew;
1830             }
1831
1832             return nullptr;
1833         }
1834
1835         /// Allocate aligned memory block
1836         void * alloc_aligned(
1837             size_t nSize,       ///< Size of memory block to allocate in bytes
1838             size_t nAlignment   ///< Alignment
1839         )
1840         {
1841             if ( nAlignment <= c_nDefaultBlockAlignment ) {
1842                 void * p = alloc( nSize );
1843                 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1844                 return p;
1845             }
1846
1847             block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1848
1849             block_header * pRedirect;
1850             if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1851                 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1852                 assert( pRedirect != pBlock );
1853                 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1854
1855                 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1856             }
1857             else
1858                 pRedirect = pBlock;
1859
1860
1861             // Bound checking is only for our blocks
1862             if ( !pBlock->isOSAllocated() ) {
1863                 // the block is allocated from our heap - bound checker is applicable
1864                 m_BoundChecker.make_trailer(
1865                     reinterpret_cast<byte *>(pRedirect + 1),
1866                     reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1867                     nSize
1868                 );
1869             }
1870
1871             return pRedirect + 1;
1872         }
1873
1874         /// Free aligned memory block previously allocated by \ref alloc_aligned
1875         void free_aligned(
1876             void * pMemory      ///< Pointer to memory block to free
1877         )
1878         {
1879             free( pMemory );
1880         }
1881
1882     public:
1883
1884         /// Get instant summary statistics
1885         void summaryStat( summary_stat& st )
1886         {
1887             size_t nProcHeapCount = m_SizeClassSelector.size();
1888             for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1889                 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1890                 if ( pProcDesc ) {
1891                     for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1892                         processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1893                         if ( pProcHeap ) {
1894                             st.add_procheap_stat( pProcHeap->stat );
1895                         }
1896                     }
1897                 }
1898             }
1899
1900             st.add_heap_stat( m_OSAllocStat );
1901         }
1902     };
1903
1904 }}} // namespace cds::memory::michael
1905
1906 #endif // CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H