3 #ifndef CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
7 Michael allocator implementation
9 [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
12 2011.09.07 khizmax Optimization: small page (about 64K) is allocated by Heap::alloc call.
13 This optimization allows to allocate system memory more regularly,
14 in blocks of 1M that leads to less memory fragmentation.
15 2011.01.02 khizmax Created
19 #include <mutex> // unique_lock
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/sync/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
35 #include <boost/intrusive/list.hpp>
38 /// Memory-related algorithms: allocators etc.
40 /// Michael's allocator (class Heap)
43 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
45 This namespace declares the main class Heap and a lot of helper classes.
51 unsigned int nBlockSize ; ///< block size in bytes
52 unsigned int nSBSize ; ///< superblock size (64K or 1M)
53 unsigned int nCapacity ; ///< superblock capacity (nSBSize / nBlockSize)
54 unsigned int nSBSizeIdx ; ///< internal superblock size index (page index)
57 /// %Heap based on system \p malloc and \p free functions
60 /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61 static void * alloc( size_t nSize )
63 void * p = ::malloc( nSize );
66 /// Returning memory block to the system (\p free wrapper)
67 static void free( void * p )
73 /// %Heap based on system provided aligned \p malloc and \p free functions
74 struct aligned_malloc_heap
76 /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
77 static void * alloc( size_t nSize, size_t nAlignment )
79 return cds::OS::aligned_malloc( nSize, nAlignment );
81 /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
82 static void free( void * p )
84 cds::OS::aligned_free( p );
88 /// Page heap based on \p Heap
90 Page heap can allocate memory by page-sized block only.
91 \p Heap may be any heap that provides interface like \ref malloc_heap.
93 This class is one of available implementation of opt::page_heap option.
95 template <class Heap = malloc_heap>
96 class page_allocator: public Heap
99 typedef Heap base_class;
106 size_t nPageSize ///< page size in bytes
108 : m_nPageSize( nPageSize )
111 /// Allocate new page
114 return base_class::alloc( m_nPageSize );
117 /// Free page \p pPage
118 void free( void * pPage )
120 base_class::free( pPage );
124 /// Page cacheable heap
126 To improve performance this allocator maintains small list of free pages.
127 Page heap can allocate memory by page-sized block only.
130 \li \p FreeListCapacity - capacity of free-list, default value is 64 page
131 \li \p Heap may be any heap that provides interface like \ref malloc_heap.
133 This class is one of available implementation of opt::page_heap option.
135 template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
136 class page_cached_allocator: public page_allocator<Heap>
139 typedef page_allocator<Heap> base_class;
142 struct make_null_ptr {
143 void operator ()(void *& p)
149 struct free_list_traits : public cds::container::vyukov_queue::traits
151 typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
153 typedef make_null_ptr value_cleaner;
156 typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
158 free_list m_FreeList;
163 page_cached_allocator(
164 size_t nPageSize ///< page size in bytes
166 : base_class( nPageSize )
167 , m_FreeList( FreeListCapacity )
171 ~page_cached_allocator()
174 while ( m_FreeList.pop(pPage) )
175 base_class::free( pPage );
179 /// Allocate new page
183 if ( !m_FreeList.pop( pPage ) )
184 pPage = base_class::alloc();
188 /// Free page \p pPage
189 void free( void * pPage )
191 if ( !m_FreeList.push( pPage ))
192 base_class::free( pPage );
196 /// Implementation of opt::sizeclass_selector option
198 Default size-class selector can manage memory blocks up to 64K.
200 class CDS_EXPORT_API default_sizeclass_selector
203 /// Count of different size-classes
204 static const size_t c_nSizeClassCount = 63;
207 static const size_t c_nMaxBlockSize = 64 * 1024;
209 /// Page size of type 0 (64K)
210 static const unsigned int c_nPage64K = 64 * 1024 - 32;
212 /// Page size of type 1 (1M)
213 static const unsigned int c_nPage1M = 1024 * 1024;
215 static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
216 static size_class const m_szClass[c_nSizeClassCount];
217 static unsigned char const m_szClassMap[];
220 /// Type of size-class index
221 typedef unsigned int sizeclass_index;
224 default_sizeclass_selector();
227 /// "No size class" index
228 static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
230 /// Returns size-class count
231 static sizeclass_index size()
233 return c_nSizeClassCount;
236 /// Returns page size in bytes for given page type \p nPageType
237 static size_t page_size(size_t nPageType )
245 assert(false) ; // anything forgotten?..
250 /// Returns count of page size-class
252 This class supports pages of two types: 64K page for small objects and 1M page for other objects.
254 static size_t pageTypeCount()
259 /// Returns size-class index for \p nSize
261 For large blocks that cannot be allocated by Michael's allocator
262 the function must return -1.
264 static sizeclass_index find( size_t nSize )
266 if ( nSize > c_nMaxBlockSize ) {
267 // Too large block - allocate from system
268 return c_nNoSizeClass;
270 sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
271 assert( nSize <= m_szClassBounds[ szClass ] );
272 assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
277 /// Gets details::size_class struct for size-class index \p nIndex
278 static const size_class * at( sizeclass_index nIndex )
280 assert( nIndex < size() );
281 return m_szClass + nIndex;
287 struct free_list_tag;
288 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > > free_list_locked_hook;
290 struct partial_list_tag;
291 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > > partial_list_locked_hook;
293 struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
298 /// List of free superblock descriptor
300 This class is a implementation of \ref opt::free_list option
302 template <class Lock, class T = details::intrusive_superblock_desc>
303 class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
306 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
308 typedef details::free_list_locked_hook item_hook;
309 typedef Lock lock_type;
311 typedef std::unique_lock<lock_type> auto_lock;
313 mutable lock_type m_access;
317 /// Rebinds to other item type \p T2
320 typedef free_list_locked<Lock, T2> other ; ///< rebind result
324 /// Push superblock descriptor to free-list
325 void push( T * pDesc )
327 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
328 auto_lock al(m_access);
329 base_class::push_back( *pDesc );
332 /// Pop superblock descriptor from free-list
335 auto_lock al(m_access);
336 if ( base_class::empty() )
338 T& rDesc = base_class::front();
339 base_class::pop_front();
340 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
344 /// Returns current count of superblocks in free-list
347 auto_lock al(m_access);
348 return base_class::size();
352 /// List of partial filled superblock descriptor
354 This class is a implementation of \ref opt::partial_list option
356 template <class Lock, class T = details::intrusive_superblock_desc>
357 class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
360 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
362 typedef details::partial_list_locked_hook item_hook;
363 typedef Lock lock_type;
365 typedef std::unique_lock<lock_type> auto_lock;
367 mutable lock_type m_access;
371 /// Rebinds to other item type \p T2
374 typedef partial_list_locked<Lock, T2> other ; ///< rebind result
378 /// Push a superblock \p pDesc to the list
379 void push( T * pDesc )
381 auto_lock al( m_access );
382 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
383 base_class::push_back( *pDesc );
386 /// Pop superblock from the list
389 auto_lock al( m_access );
390 if ( base_class::empty() )
392 T& rDesc = base_class::front();
393 base_class::pop_front();
394 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
398 /// Removes \p pDesc descriptor from the free-list
399 bool unlink( T * pDesc )
401 assert(pDesc != nullptr);
402 auto_lock al( m_access );
403 // !inited(pDesc) is equal to "pDesc is being linked to partial list"
404 if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
405 base_class::erase( base_class::iterator_to( *pDesc ) );
411 /// Count of element in the list
414 auto_lock al( m_access );
415 return base_class::size();
419 /// Summary processor heap statistics
421 Summary heap statistics for use with Heap::summaryStat function.
425 size_t nAllocFromActive ; ///< Event count of allocation from active superblock
426 size_t nAllocFromPartial ; ///< Event count of allocation from partial superblock
427 size_t nAllocFromNew ; ///< Event count of allocation from new superblock
428 size_t nFreeCount ; ///< Count of \p free function call
429 size_t nPageAllocCount ; ///< Count of page (superblock) allocated
430 size_t nPageDeallocCount ; ///< Count of page (superblock) deallocated
431 size_t nDescAllocCount ; ///< Count of superblock descriptors
432 size_t nDescFull ; ///< Count of full superblock
433 atomic64u_t nBytesAllocated ; ///< Count of allocated bytes (for heap managed memory blocks)
434 atomic64u_t nBytesDeallocated ; ///< Count of deallocated bytes (for heap managed memory blocks)
436 size_t nSysAllocCount ; ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
437 size_t nSysFreeCount ; ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
438 atomic64u_t nSysBytesAllocated ; ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
439 atomic64_t nSysBytesDeallocated; ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
441 // Internal contention indicators
442 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
444 Contention indicator. The less value is better
446 size_t nActiveDescCASFailureCount;
447 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
449 Contention indicator. The less value is better
451 size_t nActiveAnchorCASFailureCount;
452 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
454 Contention indicator. The less value is better
456 size_t nPartialDescCASFailureCount;
457 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
459 Contention indicator. The less value is better
461 size_t nPartialAnchorCASFailureCount;
465 /// Constructs empty statistics. All counters are zero.
471 /// Difference statistics
473 This operator computes difference between \p *this and \p stat and places the difference to \p this.
476 summary_stat& operator -=( const summary_stat& stat )
478 nAllocFromActive -= stat.nAllocFromActive;
479 nAllocFromPartial -= stat.nAllocFromPartial;
480 nAllocFromNew -= stat.nAllocFromNew;
481 nFreeCount -= stat.nFreeCount;
482 nPageAllocCount -= stat.nPageAllocCount;
483 nPageDeallocCount -= stat.nPageDeallocCount;
484 nDescAllocCount -= stat.nDescAllocCount;
485 nDescFull -= stat.nDescFull;
486 nBytesAllocated -= stat.nBytesAllocated;
487 nBytesDeallocated -= stat.nBytesDeallocated;
489 nSysAllocCount -= stat.nSysAllocCount;
490 nSysFreeCount -= stat.nSysFreeCount;
491 nSysBytesAllocated -= stat.nSysBytesAllocated;
492 nSysBytesDeallocated -= stat.nSysBytesDeallocated;
494 nActiveDescCASFailureCount -= stat.nActiveDescCASFailureCount;
495 nActiveAnchorCASFailureCount -= stat.nActiveAnchorCASFailureCount;
496 nPartialDescCASFailureCount -= stat.nPartialDescCASFailureCount;
497 nPartialAnchorCASFailureCount -= stat.nPartialAnchorCASFailureCount;
502 /// Clears statistics
504 All counters are set to zero.
508 memset( this, 0, sizeof(*this));
512 template <typename Stat>
513 summary_stat& add_procheap_stat( const Stat& stat )
515 nAllocFromActive += stat.allocFromActive();
516 nAllocFromPartial += stat.allocFromPartial();
517 nAllocFromNew += stat.allocFromNew();
518 nFreeCount += stat.freeCount();
519 nPageAllocCount += stat.blockAllocated();
520 nPageDeallocCount += stat.blockDeallocated();
521 nDescAllocCount += stat.descAllocCount();
522 nDescFull += stat.descFull();
523 nBytesAllocated += stat.allocatedBytes();
524 nBytesDeallocated += stat.deallocatedBytes();
526 nActiveDescCASFailureCount += stat.activeDescCASFailureCount();
527 nActiveAnchorCASFailureCount += stat.activeAnchorCASFailureCount();
528 nPartialDescCASFailureCount += stat.partialDescCASFailureCount();
529 nPartialAnchorCASFailureCount += stat.partialAnchorCASFailureCount();
534 template <typename Stat>
535 summary_stat& add_heap_stat( const Stat& stat )
537 nSysAllocCount += stat.allocCount();
538 nSysFreeCount += stat.freeCount();
540 nSysBytesAllocated += stat.allocatedBytes();
541 nSysBytesDeallocated+= stat.deallocatedBytes();
548 /// Michael's allocator
550 This class provides base functionality for Michael's allocator. It does not provide
551 the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
552 The heap interface is closer to semantics of \p malloc / \p free system functions.
553 The heap supports allocation of aligned and unaligned data.
555 The algorithm is based on simplified version of
556 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
558 that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
559 \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
561 This is powerful, scalable, fully customizable heap with fast-path without any locks
562 that has been developed specifically for multi-threading.
563 With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
564 You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
565 allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
566 like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
567 The opt::check_bounds feature can help you to find a memory buffer overflow.
569 Brief algorithm description from Michael's work:
571 Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
572 the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
573 Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
574 heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
575 An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
576 that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
577 block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
578 structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
579 in a lock-free manner.
581 Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
582 the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
583 atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
584 pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
585 available blocks of its original superblock by atomically updating its descriptor.
587 <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
590 Available \p Options:
591 - \ref opt::sys_topology - class that describes system topology needed for allocator.
592 Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
593 - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
594 Default is \ref malloc_heap.
595 - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
596 Default is \ref aligned_malloc_heap
597 - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
598 Default is \ref page_cached_allocator
599 - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
600 for incoming allocation request.
601 Default is \ref default_sizeclass_selector
602 - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
603 Default is \ref free_list_locked
604 - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
605 Default is \ref partial_list_locked
606 - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
607 that is maintained by the heap.
608 Default is \ref procheap_empty_stat
609 - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
610 allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
611 and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
612 OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
613 Default is \ref os_allocated_empty
614 - \ref opt::check_bounds - a bound checker.
615 Default is no bound checker (cds::opt::none)
618 The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
621 #include <cds/memory/michael/allocator.h>
623 // Heap with explicitly defined options:
624 cds::memory::michael::Heap<
625 opt::aligned_heap< aligned_malloc_heap >,
626 opt::page_heap< page_cached_allocator<16, malloc_heap> >
629 // Heap with default options:
630 cds::memory::michael::Heap<> myDefHeap;
633 \par How to make std-like allocator
635 There are serious differencies of heap and <tt>std::allocator</tt> interface:
636 - Heap is stateful, and \p std::allocator is stateless.
637 - Heap has much more template parameters than \p std::allocator
638 - Heap has low-level interface for memory allocating only unlike the allocator
639 interface that can construct/destroy objects of any type T.
641 To convert heap interface into \p std::allocator -like interface you should:
642 - Declare object of class cds::memory::michael::Heap specifying the necessary
643 template parameters; this is usually static object
644 - Create a class with \p std::allocator interface that uses the function of heap.
646 #include <cds/memory/michael/allocator.h>
649 class MichaelAllocator
651 typedef std::allocator<T> std_allocator;
652 typedef cds::memory::michael::Heap<> michael_heap;
654 // Michael heap static object
655 static michael_heap s_Heap;
657 // Declare typedefs from std::allocator
658 typedef typename std_allocator::const_pointer const_pointer;
659 typedef typename std_allocator::pointer pointer;
660 typedef typename std_allocator::const_reference const_reference;
661 typedef typename std_allocator::reference reference;
662 typedef typename std_allocator::difference_type difference_type;
663 typedef typename std_allocator::size_type size_type;
664 typedef typename std_allocator::value_type value_type;
666 // Allocation function
667 pointer allocate( size_type _Count, const void* _Hint )
669 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
672 // Deallocation function
673 void deallocate( pointer _Ptr, size_type _Count )
678 // Other std::allocator specific functions: address, construct, destroy, etc.
681 // Rebinding allocator to other type
682 template <class _Other>
684 typedef MichaelAllocator<_Other> other;
689 MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
693 template <typename... Options>
698 static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
699 static const unsigned int c_nDefaultBlockAlignment = 8;
701 struct default_options {
702 typedef cds::OS::topology sys_topology;
703 typedef malloc_heap system_heap;
704 typedef page_cached_allocator<> page_heap;
705 typedef aligned_malloc_heap aligned_heap;
706 typedef default_sizeclass_selector sizeclass_selector;
707 typedef free_list_locked<cds::sync::spin> free_list;
708 typedef partial_list_locked<cds::sync::spin> partial_list;
709 typedef procheap_empty_stat procheap_stat;
710 typedef os_allocated_empty os_allocated_stat;
711 typedef cds::opt::none check_bounds;
717 typedef typename opt::make_options<default_options, Options...>::type options;
721 typedef unsigned char byte;
724 typedef typename options::sys_topology sys_topology ; ///< effective system topology
725 typedef typename options::system_heap system_heap ; ///< effective system heap
726 typedef typename options::aligned_heap aligned_heap ; ///< effective aligned heap
727 typedef typename options::sizeclass_selector sizeclass_selector ; ///< effective sizeclass selector
728 typedef typename options::page_heap page_heap ; ///< effective page heap
729 typedef typename options::procheap_stat procheap_stat ; ///< effective processor heap statistics
730 typedef typename options::os_allocated_stat os_allocated_stat ; ///< effective OS-allocated memory statistics
731 typedef details::bound_checker_selector< typename options::check_bounds > bound_checker ; ///< effective bound checker
733 // forward declarations
735 struct superblock_desc;
736 struct processor_heap_base;
737 struct processor_desc;
740 /// Superblock states
742 A superblock can be in one of four states: \p ACTIVE, \p FULL,
743 \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
744 superblock in a heap, or if a thread intends to try to install it
745 as such. A superblock is \p FULL if all its blocks are either allocated
746 or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
747 and contains unreserved available blocks. A superblock is
748 \p EMPTY if all its blocks are free and it is not \p ACTIVE.
750 enum superblock_state {
751 SBSTATE_ACTIVE = 0, ///< superblock is active
752 SBSTATE_FULL = 1, ///< superblock is full
753 SBSTATE_PARTIAL = 2, ///< superblock is partially allocated
754 SBSTATE_EMPTY = 3 ///< superblock is empty and may be freed
757 static const size_t c_nMaxBlockInSuperBlock = 1024 * 2 ; ///< Max count of blocks in superblock (2 ** 11)
759 /// Anchor of the superblock descriptor. Updated by CAS
761 unsigned long long avail:11 ; ///< index of first available block in the superblock
762 unsigned long long count:11 ; ///< number of unreserved blocks in the superblock
763 unsigned long long state: 2 ; ///< state of the superblock (see \ref superblock_state enum)
764 unsigned long long tag:40 ; ///< ABA prevention tag
767 /// Superblock descriptor
768 struct superblock_desc
769 : public options::free_list::item_hook
770 , public options::partial_list::item_hook
772 atomics::atomic<anchor_tag> anchor ; ///< anchor, see \ref anchor_tag
773 byte * pSB ; ///< ptr to superblock
774 processor_heap_base * pProcHeap ; ///< pointer to owner processor heap
775 unsigned int nBlockSize ; ///< block size in bytes
776 unsigned int nCapacity ; ///< superblock size/block size
781 , pProcHeap( nullptr )
787 typedef typename options::free_list::template rebind<superblock_desc>::other free_list;
788 typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
791 #if CDS_BUILD_BITS == 32
792 /// Allocated block header
794 Each allocated block has 8-byte header.
795 The header contains pointer to owner superblock descriptor and the redirection flag.
796 If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
799 | blockHeader | [8 byte] pointer to owner superblock (flag=0)
801 | | <- memory allocated
806 If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
807 In this case the redirection flag is 1 and the block's structure is:
810 +-> | blockHeader | [8 byte] pointer to owner superblock (flag=0)
816 +-- | blockHeader | [8 byte] pointer to block head (flag=1)
818 | | <- memory allocated
833 superblock_desc * pDesc ; // pointer to superblock descriptor
834 uint32_t nSize ; // block size (allocated form OS)
839 void set( superblock_desc * pdesc, uint32_t isAligned )
842 nFlags = isAligned ? bitAligned : 0;
845 superblock_desc * desc()
847 assert( (nFlags & bitOSAllocated) == 0 );
848 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
851 block_header * begin()
853 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
856 bool isAligned() const
858 return (nFlags & bitAligned) != 0;
861 bool isOSAllocated() const
863 return (nFlags & bitOSAllocated) != 0;
866 void setOSAllocated( size_t sz )
869 nFlags = bitOSAllocated;
872 size_t getOSAllocSize() const
874 assert( isOSAllocated() );
880 #elif CDS_BUILD_BITS == 64
888 typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
889 // If bitOSAllocated is set the pDesc contains size of memory block
891 marked_desc_ptr pDesc;
893 void set( superblock_desc * pdesc, uint32_t isAligned )
895 pDesc = marked_desc_ptr( pdesc, isAligned );
898 superblock_desc * desc()
900 assert( !isOSAllocated() );
901 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
904 block_header * begin()
906 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
909 bool isAligned() const
911 return (pDesc.bits() & bitAligned) != 0;
914 bool isOSAllocated() const
916 return (pDesc.bits() & bitOSAllocated) != 0;
919 void setOSAllocated( size_t nSize )
922 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
925 size_t getOSAllocSize() const
927 assert( isOSAllocated() );
928 return reinterpret_cast<uintptr_t>( pDesc.ptr() ) >> 2;
934 # error "Unexpected value of CDS_BUILD_BITS"
935 #endif // CDS_BUILD_BITS
938 struct free_block_header: block_header {
939 unsigned int nNextFree;
943 #if CDS_BUILD_BITS == 32
944 /// Processor heap's \p active field
946 The \p active field in the processor heap structure is primarily a pointer to the descriptor
947 of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
948 guaranteed that the active superblock has at least one block available for reservation.
949 Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
950 of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
951 of blocks available for reservation in the active superblock less one. That is, if the value
952 of credits is n, then the active superblock contains n+1 blocks available for reservation
953 through the \p active field. Note that the number of blocks in a superblock is not limited
954 to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
955 (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
956 atomically decrements credits while validating that the active superblock is still valid.
960 superblock_desc * pDesc;
964 static const unsigned int c_nMaxCredits = 0 - 1;
967 CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
972 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
973 ~active_tag() CDS_NOEXCEPT = default;
974 active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
975 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
976 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
977 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
980 /// Returns pointer to superblock descriptor
981 superblock_desc * ptr() const
986 /// Sets superblock descriptor
987 void ptr( superblock_desc * p )
992 unsigned int credits() const
997 void credits( unsigned int n )
1008 void set( superblock_desc * pSB, unsigned int n )
1015 #elif CDS_BUILD_BITS == 64
1020 static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1022 typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1023 marked_desc_ptr pDesc;
1026 active_tag() CDS_NOEXCEPT
1029 // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1030 //active_tag() CDS_NOEXCEPT = default;
1031 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1032 ~active_tag() CDS_NOEXCEPT = default;
1033 active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1034 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1035 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1036 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1038 superblock_desc * ptr() const
1043 void ptr( superblock_desc * p )
1045 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1046 pDesc = marked_desc_ptr( p, pDesc.bits());
1049 unsigned int credits()
1051 return (unsigned int) pDesc.bits();
1054 void credits( unsigned int n )
1056 assert( n <= c_nMaxCredits );
1057 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1062 pDesc = marked_desc_ptr();
1065 void set( superblock_desc * pSB, unsigned int n )
1067 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1068 pDesc = marked_desc_ptr( pSB, n );
1074 # error "Unexpected value of CDS_BUILD_BITS"
1075 #endif // CDS_BUILD_BITS
1079 struct processor_heap_base
1081 CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active; ///< pointer to the descriptor of active superblock owned by processor heap
1082 processor_desc * pProcDesc ; ///< pointer to parent processor descriptor
1083 const size_class * pSizeClass ; ///< pointer to size class
1084 atomics::atomic<superblock_desc *> pPartial ; ///< pointer to partial filled superblock (may be \p nullptr)
1085 partial_list partialList ; ///< list of partial filled superblocks owned by the processor heap
1086 unsigned int nPageIdx ; ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1088 /// Small page marker
1090 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1091 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1092 to less memory fragmentation.
1094 static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1096 procheap_stat stat ; ///< heap statistics
1097 //processor_heap_statistics stat;
1100 processor_heap_base() CDS_NOEXCEPT
1101 : pProcDesc( nullptr )
1102 , pSizeClass( nullptr )
1103 , pPartial( nullptr )
1105 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1109 /// Get partial superblock owned by the processor heap
1110 superblock_desc * get_partial()
1112 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1115 pDesc = partialList.pop();
1118 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1120 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1121 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1125 /// Add partial superblock \p pDesc to the list
1126 void add_partial( superblock_desc * pDesc )
1128 assert( pPartial != pDesc );
1129 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1131 superblock_desc * pCur = nullptr;
1132 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1133 partialList.push( pDesc );
1137 /// Remove superblock \p pDesc from the list of partial superblock
1138 bool unlink_partial( superblock_desc * pDesc )
1140 return partialList.unlink( pDesc );
1144 /// Aligned superblock descriptor
1145 typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1147 /// Processor descriptor
1148 struct processor_desc
1150 processor_heap * arrProcHeap ; ///< array of processor heap
1151 free_list listSBDescFree ; ///< List of free superblock descriptors
1152 page_heap * pageHeaps ; ///< array of page heap (one for each page size)
1156 : arrProcHeap( nullptr )
1157 , pageHeaps( nullptr )
1164 sys_topology m_Topology ; ///< System topology
1165 system_heap m_LargeHeap ; ///< Heap for large block
1166 aligned_heap m_AlignedHeap ; ///< Internal aligned heap
1167 sizeclass_selector m_SizeClassSelector ; ///< Size-class selector
1168 atomics::atomic<processor_desc *> * m_arrProcDesc ; ///< array of pointers to the processor descriptors
1169 unsigned int m_nProcessorCount ; ///< Processor count
1170 bound_checker m_BoundChecker ; ///< Bound checker
1172 os_allocated_stat m_OSAllocStat ; ///< OS-allocated memory statistics
1177 /// Allocates large block from system memory
1178 block_header * alloc_from_OS( size_t nSize )
1180 block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1181 m_OSAllocStat.incBytesAllocated( nSize );
1182 p->setOSAllocated( nSize );
1186 /// Allocates from the active superblock if it possible
1187 block_header * alloc_from_active( processor_heap * pProcHeap )
1189 active_tag oldActive;
1190 int nCollision = -1;
1195 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1196 if ( !oldActive.ptr() )
1198 unsigned int nCredits = oldActive.credits();
1199 active_tag newActive ; // default = 0
1200 if ( nCredits != 0 ) {
1201 newActive = oldActive;
1202 newActive.credits( nCredits - 1 );
1204 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1209 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1212 superblock_desc * pDesc = oldActive.ptr();
1214 anchor_tag oldAnchor;
1215 anchor_tag newAnchor;
1217 unsigned int nMoreCredits = 0;
1222 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1224 assert( oldAnchor.avail < pDesc->nCapacity );
1225 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1226 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1229 if ( oldActive.credits() == 0 ) {
1230 // state must be ACTIVE
1231 if ( oldAnchor.count == 0 )
1232 newAnchor.state = SBSTATE_FULL;
1234 nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1235 newAnchor.count -= nMoreCredits;
1238 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1241 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1243 assert( newAnchor.state != SBSTATE_EMPTY );
1245 if ( newAnchor.state == SBSTATE_FULL )
1246 pProcHeap->stat.incDescFull();
1247 if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1248 update_active( pProcHeap, pDesc, nMoreCredits );
1250 pProcHeap->stat.incAllocFromActive();
1252 // block_header fields is not needed to setup
1253 // It was set in alloc_from_new_superblock
1254 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1255 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1256 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1258 return reinterpret_cast<block_header *>( pAddr );
1261 /// Allocates from a partial filled superblock if it possible
1262 block_header * alloc_from_partial( processor_heap * pProcHeap )
1265 superblock_desc * pDesc = pProcHeap->get_partial();
1270 anchor_tag oldAnchor;
1271 anchor_tag newAnchor;
1273 unsigned int nMoreCredits = 0;
1275 int nCollision = -1;
1279 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1280 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1281 free_superblock( pDesc );
1285 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1286 newAnchor.count -= nMoreCredits + 1;
1287 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1289 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1292 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1294 if ( newAnchor.state == SBSTATE_FULL )
1295 pProcHeap->stat.incDescFull();
1297 // Now, the thread is guaranteed to have reserved one or more blocks
1298 // pop reserved block
1304 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1306 assert( oldAnchor.avail < pDesc->nCapacity );
1307 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1308 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1310 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1313 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1315 assert( newAnchor.state != SBSTATE_EMPTY );
1317 pProcHeap->stat.incAllocFromPartial();
1319 if ( nMoreCredits > 0 )
1320 update_active( pProcHeap, pDesc, nMoreCredits );
1322 // block_header fields is not needed to setup
1323 // It was set in alloc_from_new_superblock
1324 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1325 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1326 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1328 return reinterpret_cast<block_header *>( pAddr );
1331 /// Allocates from the new superblock
1332 block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1334 superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1335 assert( pDesc != nullptr );
1336 pDesc->pSB = new_superblock_buffer( pProcHeap );
1338 anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1341 // Make single-linked list of free blocks in superblock
1342 byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1343 unsigned int nNext = 0;
1344 const unsigned int nBlockSize = pDesc->nBlockSize;
1345 for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1346 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1347 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1349 reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1351 active_tag newActive;
1352 newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1354 anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1355 anchor.state = SBSTATE_ACTIVE;
1356 pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1358 active_tag curActive;
1359 if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1360 pProcHeap->stat.incAllocFromNew();
1361 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1362 return reinterpret_cast<block_header *>( pDesc->pSB );
1365 free_superblock( pDesc );
1369 /// Find appropriate processor heap based on size-class selected
1370 processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1372 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1374 unsigned int nProcessorId = m_Topology.current_processor();
1375 assert( nProcessorId < m_nProcessorCount );
1377 if ( nProcessorId >= m_nProcessorCount )
1380 processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1383 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1384 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1388 free_processor_desc( pNewDesc );
1391 return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1394 /// Updates active field of processor heap \p pProcHeap
1395 void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1397 assert( pProcHeap == pDesc->pProcHeap );
1399 active_tag nullActive;
1400 active_tag newActive;
1401 newActive.set( pDesc, nCredits - 1 );
1403 if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1406 // Someone installed another active superblock.
1407 // Return credits to superblock and make it partial
1409 anchor_tag oldAnchor;
1410 anchor_tag newAnchor;
1413 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1414 newAnchor.count += nCredits;
1415 newAnchor.state = SBSTATE_PARTIAL;
1416 } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1418 pDesc->pProcHeap->add_partial( pDesc );
1421 /// Allocates new processor descriptor
1422 processor_desc * new_processor_desc( unsigned int nProcessorId )
1424 CDS_UNUSED( nProcessorId );
1425 processor_desc * pDesc;
1426 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1429 Processor descriptor layout
1431 proc_desc - 64-byte alignment
1432 page_heap[0] 64-byte alignment
1433 page_heap[1] 64-byte alignment
1435 page_heap[P] 64-byte alignment
1437 proc_heap[0] 64-byte alignment
1438 proc_heap[1] 64-byte alignment
1440 proc_heap[N] 64-byte alignment
1443 const size_t szDesc =
1444 ( sizeof(processor_desc)
1445 + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1450 const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1452 static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1454 // TSan false positive: a new descriptor will be linked further with release fence
1455 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1457 pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1459 pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1460 for ( size_t i = 0; i < nPageHeapCount; ++i )
1461 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1463 // initialize processor heaps
1464 pDesc->arrProcHeap =
1465 reinterpret_cast<processor_heap *>(
1466 reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1467 & ~(uintptr_t(c_nAlignment) - 1)
1470 processor_heap * pProcHeap = pDesc->arrProcHeap;
1471 processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1472 for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1473 new (pProcHeap) processor_heap();
1474 pProcHeap->pProcDesc = pDesc;
1475 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1476 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1477 pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1479 pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1481 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1486 void free_processor_heap( processor_heap * pProcHeap )
1488 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1489 superblock_desc * pDesc;
1491 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1493 m_AlignedHeap.free( pDesc );
1496 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1498 free( pPartial->pSB );
1499 m_AlignedHeap.free( pPartial );
1502 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1505 m_AlignedHeap.free( pDesc );
1509 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1510 superblock_desc * pDesc;
1512 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1513 pageHeap.free( pDesc->pSB );
1514 m_AlignedHeap.free( pDesc );
1517 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1519 pageHeap.free( pPartial->pSB );
1520 m_AlignedHeap.free( pPartial );
1523 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1525 pageHeap.free( pDesc->pSB );
1526 m_AlignedHeap.free( pDesc );
1529 pProcHeap->~processor_heap();
1532 /// Frees processor descriptor
1533 void free_processor_desc( processor_desc * pDesc )
1535 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1537 for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1538 free_processor_heap( pDesc->arrProcHeap + j );
1540 for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1541 m_AlignedHeap.free( pSBDesc );
1543 for (size_t i = 0; i < nPageHeapCount; ++i )
1544 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1546 //m_IntHeap.free( pDesc->pageHeaps );
1547 pDesc->pageHeaps = nullptr;
1549 pDesc->processor_desc::~processor_desc();
1550 m_AlignedHeap.free( pDesc );
1553 /// Allocates new superblock descriptor
1554 superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1557 superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1558 if ( pDesc == nullptr ) {
1559 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1560 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1562 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1564 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1566 pProcHeap->stat.incDescAllocCount();
1568 pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1569 pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1570 assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1571 pDesc->pProcHeap = pProcHeap;
1573 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1575 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1580 /// Allocates superblock page
1581 byte * new_superblock_buffer( processor_heap * pProcHeap )
1583 pProcHeap->stat.incBlockAllocated();
1584 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1585 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1588 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1592 /// Frees superblock descriptor and its page
1593 void free_superblock( superblock_desc * pDesc )
1595 pDesc->pProcHeap->stat.incBlockDeallocated();
1596 processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1598 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1602 pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1605 pProcDesc->listSBDescFree.push( pDesc );
1608 /// Allocate memory block
1609 block_header * int_alloc(
1610 size_t nSize ///< Size of memory block to allocate in bytes
1613 typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1614 if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1615 return alloc_from_OS( nSize );
1617 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1619 block_header * pBlock;
1620 processor_heap * pProcHeap;
1622 pProcHeap = find_heap( nSizeClassIndex );
1624 return alloc_from_OS( nSize );
1626 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1628 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1630 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1634 pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1636 assert( pBlock != nullptr );
1642 /// Heap constructor
1645 // Explicit libcds initialization is needed since a static object may be constructed
1648 m_nProcessorCount = m_Topology.processor_count();
1649 m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1650 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1651 memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount ) ; // ?? memset for atomic<>
1656 The destructor frees all memory allocated by the heap.
1660 for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1661 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1663 free_processor_desc( pDesc );
1666 m_AlignedHeap.free( m_arrProcDesc );
1668 // Explicit termination of libcds
1672 /// Allocate memory block
1674 size_t nSize ///< Size of memory block to allocate in bytes
1677 block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1679 // Bound checking is only for our blocks
1680 if ( !pBlock->isOSAllocated() ) {
1681 // the block is allocated from our heap - bound checker is applicable
1682 m_BoundChecker.make_trailer(
1683 reinterpret_cast<byte *>(pBlock + 1),
1684 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1692 /// Free previously allocated memory block
1694 void * pMemory ///< Pointer to memory block to free
1700 block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1701 block_header * pBlock = pRedirect->begin();
1703 if ( pBlock->isOSAllocated() ) {
1704 // Block has been allocated from OS
1705 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1706 m_LargeHeap.free( pBlock );
1710 assert( !pBlock->isAligned() );
1711 superblock_desc * pDesc = pBlock->desc();
1713 m_BoundChecker.check_bounds(
1715 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1720 anchor_tag oldAnchor;
1721 anchor_tag newAnchor;
1722 processor_heap_base * pProcHeap = pDesc->pProcHeap;
1724 pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1726 oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1728 newAnchor = oldAnchor;
1729 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1730 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1733 assert( oldAnchor.state != SBSTATE_EMPTY );
1735 if ( oldAnchor.state == SBSTATE_FULL )
1736 newAnchor.state = SBSTATE_PARTIAL;
1738 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1739 //pProcHeap = pDesc->pProcHeap;
1740 //CDS_COMPILER_RW_BARRIER ; // instruction fence is needed?..
1741 newAnchor.state = SBSTATE_EMPTY;
1744 newAnchor.count += 1;
1745 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1747 pProcHeap->stat.incFreeCount();
1749 if ( newAnchor.state == SBSTATE_EMPTY ) {
1750 if ( pProcHeap->unlink_partial( pDesc ))
1751 free_superblock( pDesc );
1753 else if (oldAnchor.state == SBSTATE_FULL ) {
1754 assert( pProcHeap != nullptr );
1755 pProcHeap->stat.decDescFull();
1756 pProcHeap->add_partial( pDesc );
1760 /// Reallocate memory block
1762 If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1763 the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1765 If there is not enough available memory to expand the block to the given size,
1766 the original block is left unchanged, and \p nullptr is returned.
1768 Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1769 then the return value is \p nullptr and the original block is left unchanged.
1772 void * pMemory, ///< Pointer to previously allocated memory block
1773 size_t nNewSize ///< New size of memory block, in bytes
1776 if ( nNewSize == 0 ) {
1781 const size_t nOrigSize = nNewSize;
1782 nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1784 block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1786 // Reallocation of aligned block is not possible
1787 if ( pBlock->isAligned() ) {
1792 if ( pBlock->isOSAllocated() ) {
1793 // The block has been allocated from OS
1794 size_t nCurSize = pBlock->getOSAllocSize();
1796 if ( nCurSize >= nNewSize )
1800 void * pNewBuf = alloc( nOrigSize );
1802 memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1808 superblock_desc * pDesc = pBlock->desc();
1809 if ( pDesc->nBlockSize <= nNewSize ) {
1810 // In-place reallocation
1811 m_BoundChecker.make_trailer(
1812 reinterpret_cast<byte *>(pBlock + 1),
1813 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1820 void * pNew = alloc( nNewSize );
1822 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1830 /// Allocate aligned memory block
1831 void * alloc_aligned(
1832 size_t nSize, ///< Size of memory block to allocate in bytes
1833 size_t nAlignment ///< Alignment
1836 if ( nAlignment <= c_nDefaultBlockAlignment ) {
1837 void * p = alloc( nSize );
1838 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1842 block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1844 block_header * pRedirect;
1845 if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1846 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1847 assert( pRedirect != pBlock );
1848 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1850 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1856 // Bound checking is only for our blocks
1857 if ( !pBlock->isOSAllocated() ) {
1858 // the block is allocated from our heap - bound checker is applicable
1859 m_BoundChecker.make_trailer(
1860 reinterpret_cast<byte *>(pRedirect + 1),
1861 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1866 return pRedirect + 1;
1869 /// Free aligned memory block previously allocated by \ref alloc_aligned
1871 void * pMemory ///< Pointer to memory block to free
1879 /// Get instant summary statistics
1880 void summaryStat( summary_stat& st )
1882 size_t nProcHeapCount = m_SizeClassSelector.size();
1883 for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1884 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1886 for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1887 processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1889 st.add_procheap_stat( pProcHeap->stat );
1895 st.add_heap_stat( m_OSAllocStat );
1899 }}} // namespace cds::memory::michael
1901 #endif // CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H