3 #ifndef CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
7 Michael allocator implementation
9 [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
12 2011.09.07 khizmax Optimization: small page (about 64K) is allocated by Heap::alloc call.
13 This optimization allows to allocate system memory more regularly,
14 in blocks of 1M that leads to less memory fragmentation.
15 2011.01.02 khizmax Created
19 #include <mutex> // unique_lock
21 #include <cds/memory/michael/options.h>
22 #include <cds/memory/michael/bound_check.h>
23 #include <cds/memory/michael/procheap_stat.h>
24 #include <cds/memory/michael/osalloc_stat.h>
26 #include <cds/os/topology.h>
27 #include <cds/os/alloc_aligned.h>
28 #include <cds/sync/spinlock.h>
29 #include <cds/details/type_padding.h>
30 #include <cds/details/marked_ptr.h>
31 #include <cds/container/vyukov_mpmc_cycle_queue.h>
32 #include <cds/user_setup/cache_line.h>
33 #include <cds/details/lib.h>
35 #include <boost/intrusive/list.hpp>
38 /// Memory-related algorithms: allocators etc.
40 /// Michael's allocator (class Heap)
43 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
45 This namespace declares the main class Heap and a lot of helper classes.
51 unsigned int nBlockSize ; ///< block size in bytes
52 unsigned int nSBSize ; ///< superblock size (64K or 1M)
53 unsigned int nCapacity ; ///< superblock capacity (nSBSize / nBlockSize)
54 unsigned int nSBSizeIdx ; ///< internal superblock size index (page index)
57 /// %Heap based on system \p malloc and \p free functions
60 /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
61 static void * alloc( size_t nSize )
63 void * p = ::malloc( nSize );
66 /// Returning memory block to the system (\p free wrapper)
67 static void free( void * p )
73 /// %Heap based on system provided aligned \p malloc and \p free functions
74 struct aligned_malloc_heap
76 /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
77 static void * alloc( size_t nSize, size_t nAlignment )
79 return cds::OS::aligned_malloc( nSize, nAlignment );
81 /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
82 static void free( void * p )
84 cds::OS::aligned_free( p );
88 /// Page heap based on \p Heap
90 Page heap can allocate memory by page-sized block only.
91 \p Heap may be any heap that provides interface like \ref malloc_heap.
93 This class is one of available implementation of opt::page_heap option.
95 template <class Heap = malloc_heap>
96 class page_allocator: public Heap
99 typedef Heap base_class;
106 size_t nPageSize ///< page size in bytes
108 : m_nPageSize( nPageSize )
111 /// Allocate new page
114 return base_class::alloc( m_nPageSize );
117 /// Free page \p pPage
118 void free( void * pPage )
120 base_class::free( pPage );
124 /// Page cacheable heap
126 To improve performance this allocator maintains small list of free pages.
127 Page heap can allocate memory by page-sized block only.
130 \li \p FreeListCapacity - capacity of free-list, default value is 64 page
131 \li \p Heap may be any heap that provides interface like \ref malloc_heap.
133 This class is one of available implementation of opt::page_heap option.
135 template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
136 class page_cached_allocator: public page_allocator<Heap>
139 typedef page_allocator<Heap> base_class;
142 struct make_null_ptr {
143 void operator ()(void *& p)
149 struct free_list_traits : public cds::container::vyukov_queue::traits
151 typedef opt::v::static_buffer<void *, FreeListCapacity> buffer;
153 typedef make_null_ptr value_cleaner;
156 typedef container::VyukovMPMCCycleQueue< void *, free_list_traits > free_list;
158 free_list m_FreeList;
163 page_cached_allocator(
164 size_t nPageSize ///< page size in bytes
166 : base_class( nPageSize )
167 , m_FreeList( FreeListCapacity )
171 ~page_cached_allocator()
174 while ( m_FreeList.pop(pPage) )
175 base_class::free( pPage );
179 /// Allocate new page
183 if ( !m_FreeList.pop( pPage ) )
184 pPage = base_class::alloc();
188 /// Free page \p pPage
189 void free( void * pPage )
191 if ( !m_FreeList.push( pPage ))
192 base_class::free( pPage );
196 /// Implementation of opt::sizeclass_selector option
198 Default size-class selector can manage memory blocks up to 64K.
200 class CDS_EXPORT_API default_sizeclass_selector
203 /// Count of different size-classes
204 static const size_t c_nSizeClassCount = 63;
207 static const size_t c_nMaxBlockSize = 64 * 1024;
209 /// Page size of type 0 (64K)
210 static const unsigned int c_nPage64K = 64 * 1024 - 32;
212 /// Page size of type 1 (1M)
213 static const unsigned int c_nPage1M = 1024 * 1024;
215 static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
216 static size_class const m_szClass[c_nSizeClassCount];
217 static unsigned char const m_szClassMap[];
220 /// Type of size-class index
221 typedef unsigned int sizeclass_index;
224 default_sizeclass_selector();
227 /// "No size class" index
228 static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
230 /// Returns size-class count
231 static sizeclass_index size()
233 return c_nSizeClassCount;
236 /// Returns page size in bytes for given page type \p nPageType
237 static size_t page_size(size_t nPageType )
245 assert(false) ; // anything forgotten?..
250 /// Returns count of page size-class
252 This class supports pages of two types: 64K page for small objects and 1M page for other objects.
254 static size_t pageTypeCount()
259 /// Returns size-class index for \p nSize
261 For large blocks that cannot be allocated by Michael's allocator
262 the function must return -1.
264 static sizeclass_index find( size_t nSize )
266 if ( nSize > c_nMaxBlockSize ) {
267 // Too large block - allocate from system
268 return c_nNoSizeClass;
270 sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
271 assert( nSize <= m_szClassBounds[ szClass ] );
272 assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
277 /// Gets details::size_class struct for size-class index \p nIndex
278 static const size_class * at( sizeclass_index nIndex )
280 assert( nIndex < size() );
281 return m_szClass + nIndex;
287 struct free_list_tag;
288 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > > free_list_locked_hook;
290 struct partial_list_tag;
291 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > > partial_list_locked_hook;
293 struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
298 /// List of free superblock descriptor
300 This class is a implementation of \ref opt::free_list option
302 template <class Lock, class T = details::intrusive_superblock_desc>
303 class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
306 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
308 typedef details::free_list_locked_hook item_hook;
309 typedef Lock lock_type;
311 typedef std::unique_lock<lock_type> auto_lock;
313 mutable lock_type m_access;
317 /// Rebinds to other item type \p T2
320 typedef free_list_locked<Lock, T2> other ; ///< rebind result
324 /// Push superblock descriptor to free-list
325 void push( T * pDesc )
327 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
328 auto_lock al(m_access);
329 base_class::push_back( *pDesc );
332 /// Pop superblock descriptor from free-list
335 auto_lock al(m_access);
336 if ( base_class::empty() )
338 T& rDesc = base_class::front();
339 base_class::pop_front();
340 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
344 /// Returns current count of superblocks in free-list
347 auto_lock al(m_access);
348 return base_class::size();
352 /// List of partial filled superblock descriptor
354 This class is a implementation of \ref opt::partial_list option
356 template <class Lock, class T = details::intrusive_superblock_desc>
357 class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
360 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
362 typedef details::partial_list_locked_hook item_hook;
363 typedef Lock lock_type;
365 typedef std::unique_lock<lock_type> auto_lock;
367 mutable lock_type m_access;
371 /// Rebinds to other item type \p T2
374 typedef partial_list_locked<Lock, T2> other ; ///< rebind result
378 /// Push a superblock \p pDesc to the list
379 void push( T * pDesc )
381 auto_lock al( m_access );
382 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
383 base_class::push_back( *pDesc );
386 /// Pop superblock from the list
389 auto_lock al( m_access );
390 if ( base_class::empty() )
392 T& rDesc = base_class::front();
393 base_class::pop_front();
394 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
398 /// Removes \p pDesc descriptor from the free-list
399 bool unlink( T * pDesc )
401 assert(pDesc != nullptr);
402 auto_lock al( m_access );
403 // !inited(pDesc) is equal to "pDesc is being linked to partial list"
404 if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
405 base_class::erase( base_class::iterator_to( *pDesc ) );
411 /// Count of element in the list
414 auto_lock al( m_access );
415 return base_class::size();
419 /// Summary processor heap statistics
421 Summary heap statistics for use with Heap::summaryStat function.
425 size_t nAllocFromActive ; ///< Event count of allocation from active superblock
426 size_t nAllocFromPartial ; ///< Event count of allocation from partial superblock
427 size_t nAllocFromNew ; ///< Event count of allocation from new superblock
428 size_t nFreeCount ; ///< Count of \p free function call
429 size_t nPageAllocCount ; ///< Count of page (superblock) allocated
430 size_t nPageDeallocCount ; ///< Count of page (superblock) deallocated
431 size_t nDescAllocCount ; ///< Count of superblock descriptors
432 size_t nDescFull ; ///< Count of full superblock
433 atomic64u_t nBytesAllocated ; ///< Count of allocated bytes (for heap managed memory blocks)
434 atomic64u_t nBytesDeallocated ; ///< Count of deallocated bytes (for heap managed memory blocks)
436 size_t nSysAllocCount ; ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
437 size_t nSysFreeCount ; ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
438 atomic64u_t nSysBytesAllocated ; ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
439 atomic64_t nSysBytesDeallocated; ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
441 // Internal contention indicators
442 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
444 Contention indicator. The less value is better
446 size_t nActiveDescCASFailureCount;
447 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
449 Contention indicator. The less value is better
451 size_t nActiveAnchorCASFailureCount;
452 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
454 Contention indicator. The less value is better
456 size_t nPartialDescCASFailureCount;
457 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
459 Contention indicator. The less value is better
461 size_t nPartialAnchorCASFailureCount;
465 /// Constructs empty statistics. All counters are zero.
471 /// Difference statistics
473 This operator computes difference between \p *this and \p stat and places the difference to \p this.
476 summary_stat& operator -=( const summary_stat& stat )
478 nAllocFromActive -= stat.nAllocFromActive;
479 nAllocFromPartial -= stat.nAllocFromPartial;
480 nAllocFromNew -= stat.nAllocFromNew;
481 nFreeCount -= stat.nFreeCount;
482 nPageAllocCount -= stat.nPageAllocCount;
483 nPageDeallocCount -= stat.nPageDeallocCount;
484 nDescAllocCount -= stat.nDescAllocCount;
485 nDescFull -= stat.nDescFull;
486 nBytesAllocated -= stat.nBytesAllocated;
487 nBytesDeallocated -= stat.nBytesDeallocated;
489 nSysAllocCount -= stat.nSysAllocCount;
490 nSysFreeCount -= stat.nSysFreeCount;
491 nSysBytesAllocated -= stat.nSysBytesAllocated;
492 nSysBytesDeallocated -= stat.nSysBytesDeallocated;
494 nActiveDescCASFailureCount -= stat.nActiveDescCASFailureCount;
495 nActiveAnchorCASFailureCount -= stat.nActiveAnchorCASFailureCount;
496 nPartialDescCASFailureCount -= stat.nPartialDescCASFailureCount;
497 nPartialAnchorCASFailureCount -= stat.nPartialAnchorCASFailureCount;
502 /// Clears statistics
504 All counters are set to zero.
508 memset( this, 0, sizeof(*this));
512 template <typename Stat>
513 summary_stat& add_procheap_stat( const Stat& stat )
515 nAllocFromActive += stat.allocFromActive();
516 nAllocFromPartial += stat.allocFromPartial();
517 nAllocFromNew += stat.allocFromNew();
518 nFreeCount += stat.freeCount();
519 nPageAllocCount += stat.blockAllocated();
520 nPageDeallocCount += stat.blockDeallocated();
521 nDescAllocCount += stat.descAllocCount();
522 nDescFull += stat.descFull();
523 nBytesAllocated += stat.allocatedBytes();
524 nBytesDeallocated += stat.deallocatedBytes();
526 nActiveDescCASFailureCount += stat.activeDescCASFailureCount();
527 nActiveAnchorCASFailureCount += stat.activeAnchorCASFailureCount();
528 nPartialDescCASFailureCount += stat.partialDescCASFailureCount();
529 nPartialAnchorCASFailureCount += stat.partialAnchorCASFailureCount();
534 template <typename Stat>
535 summary_stat& add_heap_stat( const Stat& stat )
537 nSysAllocCount += stat.allocCount();
538 nSysFreeCount += stat.freeCount();
540 nSysBytesAllocated += stat.allocatedBytes();
541 nSysBytesDeallocated+= stat.deallocatedBytes();
548 /// Michael's allocator
550 This class provides base functionality for Michael's allocator. It does not provide
551 the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
552 The heap interface is closer to semantics of \p malloc / \p free system functions.
553 The heap supports allocation of aligned and unaligned data.
555 The algorithm is based on simplified version of
556 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
558 that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
559 \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
561 This is powerful, scalable, fully customizable heap with fast-path without any locks
562 that has been developed specifically for multi-threading.
563 With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
564 You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
565 allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
566 like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
567 The opt::check_bounds feature can help you to find a memory buffer overflow.
569 Brief algorithm description from Michael's work:
571 Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
572 the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
573 Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
574 heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
575 An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
576 that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
577 block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
578 structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
579 in a lock-free manner.
581 Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
582 the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
583 atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
584 pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
585 available blocks of its original superblock by atomically updating its descriptor.
587 <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
590 Available \p Options:
591 - \ref opt::sys_topology - class that describes system topology needed for allocator.
592 Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
593 - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
594 Default is \ref malloc_heap.
595 - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
596 Default is \ref aligned_malloc_heap
597 - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
598 Default is \ref page_cached_allocator
599 - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
600 for incoming allocation request.
601 Default is \ref default_sizeclass_selector
602 - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
603 Default is \ref free_list_locked
604 - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
605 Default is \ref partial_list_locked
606 - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
607 that is maintained by the heap.
608 Default is \ref procheap_empty_stat
609 - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
610 allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
611 and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
612 OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
613 Default is \ref os_allocated_empty
614 - \ref opt::check_bounds - a bound checker.
615 Default is no bound checker (cds::opt::none)
618 The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
621 #include <cds/memory/michael/allocator.h>
623 // Heap with explicitly defined options:
624 cds::memory::michael::Heap<
625 opt::aligned_heap< aligned_malloc_heap >,
626 opt::page_heap< page_cached_allocator<16, malloc_heap> >
629 // Heap with default options:
630 cds::memory::michael::Heap<> myDefHeap;
633 \par How to make std-like allocator
635 There are serious differencies of heap and <tt>std::allocator</tt> interface:
636 - Heap is stateful, and \p std::allocator is stateless.
637 - Heap has much more template parameters than \p std::allocator
638 - Heap has low-level interface for memory allocating only unlike the allocator
639 interface that can construct/destroy objects of any type T.
641 To convert heap interface into \p std::allocator -like interface you should:
642 - Declare object of class cds::memory::michael::Heap specifying the necessary
643 template parameters; this is usually static object
644 - Create a class with \p std::allocator interface that uses the function of heap.
646 #include <cds/memory/michael/allocator.h>
649 class MichaelAllocator
651 typedef std::allocator<T> std_allocator;
652 typedef cds::memory::michael::Heap<> michael_heap;
654 // Michael heap static object
655 static michael_heap s_Heap;
657 // Declare typedefs from std::allocator
658 typedef typename std_allocator::const_pointer const_pointer;
659 typedef typename std_allocator::pointer pointer;
660 typedef typename std_allocator::const_reference const_reference;
661 typedef typename std_allocator::reference reference;
662 typedef typename std_allocator::difference_type difference_type;
663 typedef typename std_allocator::size_type size_type;
664 typedef typename std_allocator::value_type value_type;
666 // Allocation function
667 pointer allocate( size_type _Count, const void* _Hint )
669 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
672 // Deallocation function
673 void deallocate( pointer _Ptr, size_type _Count )
678 // Other std::allocator specific functions: address, construct, destroy, etc.
681 // Rebinding allocator to other type
682 template <class _Other>
684 typedef MichaelAllocator<_Other> other;
689 MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
693 template <typename... Options>
698 static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
699 static const unsigned int c_nDefaultBlockAlignment = 8;
701 struct default_options {
702 typedef cds::OS::topology sys_topology;
703 typedef malloc_heap system_heap;
704 typedef page_cached_allocator<> page_heap;
705 typedef aligned_malloc_heap aligned_heap;
706 typedef default_sizeclass_selector sizeclass_selector;
707 typedef free_list_locked<cds::sync::spin> free_list;
708 typedef partial_list_locked<cds::sync::spin> partial_list;
709 typedef procheap_empty_stat procheap_stat;
710 typedef os_allocated_empty os_allocated_stat;
711 typedef cds::opt::none check_bounds;
717 typedef typename opt::make_options<default_options, Options...>::type options;
721 typedef unsigned char byte;
724 typedef typename options::sys_topology sys_topology ; ///< effective system topology
725 typedef typename options::system_heap system_heap ; ///< effective system heap
726 typedef typename options::aligned_heap aligned_heap ; ///< effective aligned heap
727 typedef typename options::sizeclass_selector sizeclass_selector ; ///< effective sizeclass selector
728 typedef typename options::page_heap page_heap ; ///< effective page heap
729 typedef typename options::procheap_stat procheap_stat ; ///< effective processor heap statistics
730 typedef typename options::os_allocated_stat os_allocated_stat ; ///< effective OS-allocated memory statistics
731 typedef details::bound_checker_selector< typename options::check_bounds > bound_checker ; ///< effective bound checker
733 // forward declarations
735 struct superblock_desc;
736 struct processor_heap_base;
737 struct processor_desc;
740 /// Superblock states
742 A superblock can be in one of four states: \p ACTIVE, \p FULL,
743 \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
744 superblock in a heap, or if a thread intends to try to install it
745 as such. A superblock is \p FULL if all its blocks are either allocated
746 or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
747 and contains unreserved available blocks. A superblock is
748 \p EMPTY if all its blocks are free and it is not \p ACTIVE.
750 enum superblock_state {
751 SBSTATE_ACTIVE = 0, ///< superblock is active
752 SBSTATE_FULL = 1, ///< superblock is full
753 SBSTATE_PARTIAL = 2, ///< superblock is partially allocated
754 SBSTATE_EMPTY = 3 ///< superblock is empty and may be freed
757 static const size_t c_nMaxBlockInSuperBlock = 1024 * 2 ; ///< Max count of blocks in superblock (2 ** 11)
759 /// Anchor of the superblock descriptor. Updated by CAS
761 unsigned long long avail:11 ; ///< index of first available block in the superblock
762 unsigned long long count:11 ; ///< number of unreserved blocks in the superblock
763 unsigned long long state: 2 ; ///< state of the superblock (see \ref superblock_state enum)
764 unsigned long long tag:40 ; ///< ABA prevention tag
767 /// Superblock descriptor
768 struct superblock_desc
769 : public options::free_list::item_hook
770 , public options::partial_list::item_hook
772 atomics::atomic<anchor_tag> anchor ; ///< anchor, see \ref anchor_tag
773 byte * pSB ; ///< ptr to superblock
774 processor_heap_base * pProcHeap ; ///< pointer to owner processor heap
775 unsigned int nBlockSize ; ///< block size in bytes
776 unsigned int nCapacity ; ///< superblock size/block size
781 , pProcHeap( nullptr )
787 typedef typename options::free_list::template rebind<superblock_desc>::other free_list;
788 typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
791 #if CDS_BUILD_BITS == 32
792 /// Allocated block header
794 Each allocated block has 8-byte header.
795 The header contains pointer to owner superblock descriptor and the redirection flag.
796 If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
799 | blockHeader | [8 byte] pointer to owner superblock (flag=0)
801 | | <- memory allocated
806 If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
807 In this case the redirection flag is 1 and the block's structure is:
810 +-> | blockHeader | [8 byte] pointer to owner superblock (flag=0)
816 +-- | blockHeader | [8 byte] pointer to block head (flag=1)
818 | | <- memory allocated
833 superblock_desc * pDesc ; // pointer to superblock descriptor
834 uint32_t nSize ; // block size (allocated form OS)
839 void set( superblock_desc * pdesc, uint32_t isAligned )
842 nFlags = isAligned ? bitAligned : 0;
845 superblock_desc * desc()
847 assert( (nFlags & bitOSAllocated) == 0 );
848 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
851 block_header * begin()
853 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
856 bool isAligned() const
858 return (nFlags & bitAligned) != 0;
861 bool isOSAllocated() const
863 return (nFlags & bitOSAllocated) != 0;
866 void setOSAllocated( size_t sz )
869 nFlags = bitOSAllocated;
872 size_t getOSAllocSize() const
874 assert( isOSAllocated() );
880 #elif CDS_BUILD_BITS == 64
888 typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
889 // If bitOSAllocated is set the pDesc contains size of memory block
891 marked_desc_ptr pDesc;
893 void set( superblock_desc * pdesc, uint32_t isAligned )
895 pDesc = marked_desc_ptr( pdesc, isAligned );
898 superblock_desc * desc()
900 assert( !isOSAllocated() );
901 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
904 block_header * begin()
906 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
909 bool isAligned() const
911 return (pDesc.bits() & bitAligned) != 0;
914 bool isOSAllocated() const
916 return (pDesc.bits() & bitOSAllocated) != 0;
919 void setOSAllocated( size_t nSize )
922 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
925 size_t getOSAllocSize() const
927 assert( isOSAllocated() );
928 return reinterpret_cast<uintptr_t>( pDesc.ptr() ) >> 2;
934 # error "Unexpected value of CDS_BUILD_BITS"
935 #endif // CDS_BUILD_BITS
938 struct free_block_header: block_header {
939 unsigned int nNextFree;
943 #if CDS_BUILD_BITS == 32
944 /// Processor heap's \p active field
946 The \p active field in the processor heap structure is primarily a pointer to the descriptor
947 of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
948 guaranteed that the active superblock has at least one block available for reservation.
949 Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
950 of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
951 of blocks available for reservation in the active superblock less one. That is, if the value
952 of credits is n, then the active superblock contains n+1 blocks available for reservation
953 through the \p active field. Note that the number of blocks in a superblock is not limited
954 to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
955 (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
956 atomically decrements credits while validating that the active superblock is still valid.
960 superblock_desc * pDesc;
964 static const unsigned int c_nMaxCredits = 0 - 1;
967 CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
972 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
973 ~active_tag() CDS_NOEXCEPT = default;
974 active_tag& operator=(active_tag const& ) CDS_NOEXCEPT = default;
975 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
976 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
977 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
980 /// Returns pointer to superblock descriptor
981 superblock_desc * ptr() const
986 /// Sets superblock descriptor
987 void ptr( superblock_desc * p )
992 unsigned int credits() const
997 void credits( unsigned int n )
1008 void set( superblock_desc * pSB, unsigned int n )
1015 #elif CDS_BUILD_BITS == 64
1020 static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1022 typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1023 marked_desc_ptr pDesc;
1026 active_tag() CDS_NOEXCEPT
1029 // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1030 //active_tag() CDS_NOEXCEPT = default;
1031 active_tag( active_tag const& ) CDS_NOEXCEPT = default;
1032 ~active_tag() CDS_NOEXCEPT = default;
1033 active_tag& operator=(active_tag const&) CDS_NOEXCEPT = default;
1034 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1035 active_tag( active_tag&& ) CDS_NOEXCEPT = default;
1036 active_tag& operator=(active_tag&&) CDS_NOEXCEPT = default;
1038 superblock_desc * ptr() const
1043 void ptr( superblock_desc * p )
1045 assert( (reinterpret_cast<uintptr_t>(p) & c_nMaxCredits) == 0 );
1046 pDesc = marked_desc_ptr( p, pDesc.bits());
1049 unsigned int credits()
1051 return (unsigned int) pDesc.bits();
1054 void credits( unsigned int n )
1056 assert( n <= c_nMaxCredits );
1057 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1062 pDesc = marked_desc_ptr();
1065 void set( superblock_desc * pSB, unsigned int n )
1067 assert( (reinterpret_cast<uintptr_t>(pSB) & c_nMaxCredits) == 0 );
1068 pDesc = marked_desc_ptr( pSB, n );
1074 # error "Unexpected value of CDS_BUILD_BITS"
1075 #endif // CDS_BUILD_BITS
1079 struct processor_heap_base
1081 CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active; ///< pointer to the descriptor of active superblock owned by processor heap
1082 processor_desc * pProcDesc ; ///< pointer to parent processor descriptor
1083 const size_class * pSizeClass ; ///< pointer to size class
1084 atomics::atomic<superblock_desc *> pPartial ; ///< pointer to partial filled superblock (may be \p nullptr)
1085 partial_list partialList ; ///< list of partial filled superblocks owned by the processor heap
1086 unsigned int nPageIdx ; ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1088 /// Small page marker
1090 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1091 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1092 to less memory fragmentation.
1094 static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1096 procheap_stat stat ; ///< heap statistics
1097 //processor_heap_statistics stat;
1100 processor_heap_base() CDS_NOEXCEPT
1101 : pProcDesc( nullptr )
1102 , pSizeClass( nullptr )
1103 , pPartial( nullptr )
1105 assert( (reinterpret_cast<uintptr_t>(this) & (c_nAlignment - 1)) == 0 );
1109 /// Get partial superblock owned by the processor heap
1110 superblock_desc * get_partial()
1112 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1115 pDesc = partialList.pop();
1118 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1120 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1121 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1125 /// Add partial superblock \p pDesc to the list
1126 void add_partial( superblock_desc * pDesc )
1128 assert( pPartial != pDesc );
1129 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1131 superblock_desc * pCur = nullptr;
1132 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1133 partialList.push( pDesc );
1137 /// Remove superblock \p pDesc from the list of partial superblock
1138 bool unlink_partial( superblock_desc * pDesc )
1140 return partialList.unlink( pDesc );
1144 /// Aligned superblock descriptor
1145 typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1147 /// Processor descriptor
1148 struct processor_desc
1150 processor_heap * arrProcHeap ; ///< array of processor heap
1151 free_list listSBDescFree ; ///< List of free superblock descriptors
1152 page_heap * pageHeaps ; ///< array of page heap (one for each page size)
1156 : arrProcHeap( nullptr )
1157 , pageHeaps( nullptr )
1164 sys_topology m_Topology ; ///< System topology
1165 system_heap m_LargeHeap ; ///< Heap for large block
1166 aligned_heap m_AlignedHeap ; ///< Internal aligned heap
1167 sizeclass_selector m_SizeClassSelector ; ///< Size-class selector
1168 atomics::atomic<processor_desc *> * m_arrProcDesc ; ///< array of pointers to the processor descriptors
1169 unsigned int m_nProcessorCount ; ///< Processor count
1170 bound_checker m_BoundChecker ; ///< Bound checker
1172 os_allocated_stat m_OSAllocStat ; ///< OS-allocated memory statistics
1177 /// Allocates large block from system memory
1178 block_header * alloc_from_OS( size_t nSize )
1180 block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1181 m_OSAllocStat.incBytesAllocated( nSize );
1182 p->setOSAllocated( nSize );
1186 /// Allocates from the active superblock if it possible
1187 block_header * alloc_from_active( processor_heap * pProcHeap )
1189 active_tag oldActive;
1190 int nCollision = -1;
1195 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1196 if ( !oldActive.ptr() )
1198 unsigned int nCredits = oldActive.credits();
1199 active_tag newActive ; // default = 0
1200 if ( nCredits != 0 ) {
1201 newActive = oldActive;
1202 newActive.credits( nCredits - 1 );
1204 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1209 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1212 superblock_desc * pDesc = oldActive.ptr();
1214 anchor_tag oldAnchor;
1215 anchor_tag newAnchor;
1217 unsigned int nMoreCredits = 0;
1222 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1224 assert( oldAnchor.avail < pDesc->nCapacity );
1225 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1227 // TSan reports data race if the block contained atomic ops before
1228 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1229 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1230 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1233 if ( oldActive.credits() == 0 ) {
1234 // state must be ACTIVE
1235 if ( oldAnchor.count == 0 )
1236 newAnchor.state = SBSTATE_FULL;
1238 nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1239 newAnchor.count -= nMoreCredits;
1242 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1245 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1247 assert( newAnchor.state != SBSTATE_EMPTY );
1249 if ( newAnchor.state == SBSTATE_FULL )
1250 pProcHeap->stat.incDescFull();
1251 if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1252 update_active( pProcHeap, pDesc, nMoreCredits );
1254 pProcHeap->stat.incAllocFromActive();
1256 // block_header fields is not needed to setup
1257 // It was set in alloc_from_new_superblock
1258 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1259 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1260 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1262 return reinterpret_cast<block_header *>( pAddr );
1265 /// Allocates from a partial filled superblock if it possible
1266 block_header * alloc_from_partial( processor_heap * pProcHeap )
1269 superblock_desc * pDesc = pProcHeap->get_partial();
1274 anchor_tag oldAnchor;
1275 anchor_tag newAnchor;
1277 unsigned int nMoreCredits = 0;
1279 int nCollision = -1;
1283 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1284 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1285 free_superblock( pDesc );
1289 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1290 newAnchor.count -= nMoreCredits + 1;
1291 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1293 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1296 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1298 if ( newAnchor.state == SBSTATE_FULL )
1299 pProcHeap->stat.incDescFull();
1301 // Now, the thread is guaranteed to have reserved one or more blocks
1302 // pop reserved block
1308 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1310 assert( oldAnchor.avail < pDesc->nCapacity );
1311 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1312 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1314 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1317 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1319 assert( newAnchor.state != SBSTATE_EMPTY );
1321 pProcHeap->stat.incAllocFromPartial();
1323 if ( nMoreCredits > 0 )
1324 update_active( pProcHeap, pDesc, nMoreCredits );
1326 // block_header fields is not needed to setup
1327 // It was set in alloc_from_new_superblock
1328 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1329 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1330 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1332 return reinterpret_cast<block_header *>( pAddr );
1335 /// Allocates from the new superblock
1336 block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1338 superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1339 assert( pDesc != nullptr );
1340 pDesc->pSB = new_superblock_buffer( pProcHeap );
1342 anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1345 // Make single-linked list of free blocks in superblock
1346 byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1347 unsigned int nNext = 0;
1348 const unsigned int nBlockSize = pDesc->nBlockSize;
1349 for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1350 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1351 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1353 reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1355 active_tag newActive;
1356 newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1358 anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1359 anchor.state = SBSTATE_ACTIVE;
1360 pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1362 active_tag curActive;
1363 if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1364 pProcHeap->stat.incAllocFromNew();
1365 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1366 return reinterpret_cast<block_header *>( pDesc->pSB );
1369 free_superblock( pDesc );
1373 /// Find appropriate processor heap based on size-class selected
1374 processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1376 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1378 unsigned int nProcessorId = m_Topology.current_processor();
1379 assert( nProcessorId < m_nProcessorCount );
1381 if ( nProcessorId >= m_nProcessorCount )
1384 processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1387 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1388 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1392 free_processor_desc( pNewDesc );
1395 return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1398 /// Updates active field of processor heap \p pProcHeap
1399 void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1401 assert( pProcHeap == pDesc->pProcHeap );
1403 active_tag nullActive;
1404 active_tag newActive;
1405 newActive.set( pDesc, nCredits - 1 );
1407 if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1410 // Someone installed another active superblock.
1411 // Return credits to superblock and make it partial
1413 anchor_tag oldAnchor;
1414 anchor_tag newAnchor;
1417 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1418 newAnchor.count += nCredits;
1419 newAnchor.state = SBSTATE_PARTIAL;
1420 } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1422 pDesc->pProcHeap->add_partial( pDesc );
1425 /// Allocates new processor descriptor
1426 processor_desc * new_processor_desc( unsigned int nProcessorId )
1428 CDS_UNUSED( nProcessorId );
1429 processor_desc * pDesc;
1430 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1433 Processor descriptor layout
1435 proc_desc - 64-byte alignment
1436 page_heap[0] 64-byte alignment
1437 page_heap[1] 64-byte alignment
1439 page_heap[P] 64-byte alignment
1441 proc_heap[0] 64-byte alignment
1442 proc_heap[1] 64-byte alignment
1444 proc_heap[N] 64-byte alignment
1447 const size_t szDesc =
1448 ( sizeof(processor_desc)
1449 + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1454 const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1456 static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1458 // TSan false positive: a new descriptor will be linked further with release fence
1459 CDS_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN;
1461 pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1463 pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1464 for ( size_t i = 0; i < nPageHeapCount; ++i )
1465 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1467 // initialize processor heaps
1468 pDesc->arrProcHeap =
1469 reinterpret_cast<processor_heap *>(
1470 reinterpret_cast<uintptr_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1471 & ~(uintptr_t(c_nAlignment) - 1)
1474 processor_heap * pProcHeap = pDesc->arrProcHeap;
1475 processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1476 for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1477 new (pProcHeap) processor_heap();
1478 pProcHeap->pProcDesc = pDesc;
1479 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1480 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1481 pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1483 pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1485 CDS_TSAN_ANNOTATE_IGNORE_WRITES_END;
1490 void free_processor_heap( processor_heap * pProcHeap )
1492 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1493 superblock_desc * pDesc;
1495 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1497 m_AlignedHeap.free( pDesc );
1500 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1502 free( pPartial->pSB );
1503 m_AlignedHeap.free( pPartial );
1506 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1509 m_AlignedHeap.free( pDesc );
1513 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1514 superblock_desc * pDesc;
1516 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1517 pageHeap.free( pDesc->pSB );
1518 m_AlignedHeap.free( pDesc );
1521 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1523 pageHeap.free( pPartial->pSB );
1524 m_AlignedHeap.free( pPartial );
1527 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1529 pageHeap.free( pDesc->pSB );
1530 m_AlignedHeap.free( pDesc );
1533 pProcHeap->~processor_heap();
1536 /// Frees processor descriptor
1537 void free_processor_desc( processor_desc * pDesc )
1539 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1541 for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1542 free_processor_heap( pDesc->arrProcHeap + j );
1544 for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1545 m_AlignedHeap.free( pSBDesc );
1547 for (size_t i = 0; i < nPageHeapCount; ++i )
1548 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1550 //m_IntHeap.free( pDesc->pageHeaps );
1551 pDesc->pageHeaps = nullptr;
1553 pDesc->processor_desc::~processor_desc();
1554 m_AlignedHeap.free( pDesc );
1557 /// Allocates new superblock descriptor
1558 superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1561 superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1562 if ( pDesc == nullptr ) {
1563 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1564 assert( (uintptr_t(pDesc) & (c_nAlignment - 1)) == 0 );
1566 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1568 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1570 pProcHeap->stat.incDescAllocCount();
1572 pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1573 pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1574 assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1575 pDesc->pProcHeap = pProcHeap;
1577 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1579 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1584 /// Allocates superblock page
1585 byte * new_superblock_buffer( processor_heap * pProcHeap )
1587 pProcHeap->stat.incBlockAllocated();
1588 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1589 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1592 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1596 /// Frees superblock descriptor and its page
1597 void free_superblock( superblock_desc * pDesc )
1599 pDesc->pProcHeap->stat.incBlockDeallocated();
1600 processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1602 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1606 pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1609 pProcDesc->listSBDescFree.push( pDesc );
1612 /// Allocate memory block
1613 block_header * int_alloc(
1614 size_t nSize ///< Size of memory block to allocate in bytes
1617 typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1618 if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1619 return alloc_from_OS( nSize );
1621 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1623 block_header * pBlock;
1624 processor_heap * pProcHeap;
1626 pProcHeap = find_heap( nSizeClassIndex );
1628 return alloc_from_OS( nSize );
1630 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1632 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1634 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1638 pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1640 assert( pBlock != nullptr );
1646 /// Heap constructor
1649 // Explicit libcds initialization is needed since a static object may be constructed
1652 m_nProcessorCount = m_Topology.processor_count();
1653 m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1654 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1655 memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount ) ; // ?? memset for atomic<>
1660 The destructor frees all memory allocated by the heap.
1664 for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1665 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1667 free_processor_desc( pDesc );
1670 m_AlignedHeap.free( m_arrProcDesc );
1672 // Explicit termination of libcds
1676 /// Allocate memory block
1678 size_t nSize ///< Size of memory block to allocate in bytes
1681 block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1683 // Bound checking is only for our blocks
1684 if ( !pBlock->isOSAllocated() ) {
1685 // the block is allocated from our heap - bound checker is applicable
1686 m_BoundChecker.make_trailer(
1687 reinterpret_cast<byte *>(pBlock + 1),
1688 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1693 CDS_TSAN_ANNOTATE_NEW_MEMORY( pBlock + 1, nSize );
1697 /// Free previously allocated memory block
1699 void * pMemory ///< Pointer to memory block to free
1705 block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1706 block_header * pBlock = pRedirect->begin();
1708 if ( pBlock->isOSAllocated() ) {
1709 // Block has been allocated from OS
1710 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1711 m_LargeHeap.free( pBlock );
1715 assert( !pBlock->isAligned() );
1716 superblock_desc * pDesc = pBlock->desc();
1718 m_BoundChecker.check_bounds(
1720 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1725 anchor_tag oldAnchor;
1726 anchor_tag newAnchor;
1727 processor_heap_base * pProcHeap = pDesc->pProcHeap;
1729 pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1731 oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1733 newAnchor = oldAnchor;
1734 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1735 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1738 assert( oldAnchor.state != SBSTATE_EMPTY );
1740 if ( oldAnchor.state == SBSTATE_FULL )
1741 newAnchor.state = SBSTATE_PARTIAL;
1743 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1744 //pProcHeap = pDesc->pProcHeap;
1745 //CDS_COMPILER_RW_BARRIER ; // instruction fence is needed?..
1746 newAnchor.state = SBSTATE_EMPTY;
1749 newAnchor.count += 1;
1750 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1752 pProcHeap->stat.incFreeCount();
1754 if ( newAnchor.state == SBSTATE_EMPTY ) {
1755 if ( pProcHeap->unlink_partial( pDesc ))
1756 free_superblock( pDesc );
1758 else if (oldAnchor.state == SBSTATE_FULL ) {
1759 assert( pProcHeap != nullptr );
1760 pProcHeap->stat.decDescFull();
1761 pProcHeap->add_partial( pDesc );
1765 /// Reallocate memory block
1767 If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1768 the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1770 If there is not enough available memory to expand the block to the given size,
1771 the original block is left unchanged, and \p nullptr is returned.
1773 Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1774 then the return value is \p nullptr and the original block is left unchanged.
1777 void * pMemory, ///< Pointer to previously allocated memory block
1778 size_t nNewSize ///< New size of memory block, in bytes
1781 if ( nNewSize == 0 ) {
1786 const size_t nOrigSize = nNewSize;
1787 nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1789 block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1791 // Reallocation of aligned block is not possible
1792 if ( pBlock->isAligned() ) {
1797 if ( pBlock->isOSAllocated() ) {
1798 // The block has been allocated from OS
1799 size_t nCurSize = pBlock->getOSAllocSize();
1801 if ( nCurSize >= nNewSize )
1805 void * pNewBuf = alloc( nOrigSize );
1807 memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1813 superblock_desc * pDesc = pBlock->desc();
1814 if ( pDesc->nBlockSize <= nNewSize ) {
1815 // In-place reallocation
1816 m_BoundChecker.make_trailer(
1817 reinterpret_cast<byte *>(pBlock + 1),
1818 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1825 void * pNew = alloc( nNewSize );
1827 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1835 /// Allocate aligned memory block
1836 void * alloc_aligned(
1837 size_t nSize, ///< Size of memory block to allocate in bytes
1838 size_t nAlignment ///< Alignment
1841 if ( nAlignment <= c_nDefaultBlockAlignment ) {
1842 void * p = alloc( nSize );
1843 assert( (reinterpret_cast<uintptr_t>(p) & (nAlignment - 1)) == 0 );
1847 block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1849 block_header * pRedirect;
1850 if ( (reinterpret_cast<uintptr_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1851 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uintptr_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1852 assert( pRedirect != pBlock );
1853 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1855 assert( (reinterpret_cast<uintptr_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1861 // Bound checking is only for our blocks
1862 if ( !pBlock->isOSAllocated() ) {
1863 // the block is allocated from our heap - bound checker is applicable
1864 m_BoundChecker.make_trailer(
1865 reinterpret_cast<byte *>(pRedirect + 1),
1866 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1871 return pRedirect + 1;
1874 /// Free aligned memory block previously allocated by \ref alloc_aligned
1876 void * pMemory ///< Pointer to memory block to free
1884 /// Get instant summary statistics
1885 void summaryStat( summary_stat& st )
1887 size_t nProcHeapCount = m_SizeClassSelector.size();
1888 for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1889 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1891 for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1892 processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1894 st.add_procheap_stat( pProcHeap->stat );
1900 st.add_heap_stat( m_OSAllocStat );
1904 }}} // namespace cds::memory::michael
1906 #endif // CDSLIB_MEMORY_MICHAEL_ALLOCATOR_TMPL_H