3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
7 Michael allocator implementation
9 [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
12 2011.09.07 khizmax Optimization: small page (about 64K) is allocated by Heap::alloc call.
13 This optimization allows to allocate system memory more regularly,
14 in blocks of 1M that leads to less memory fragmentation.
15 2011.01.02 khizmax Created
19 #include <cds/memory/michael/options.h>
20 #include <cds/memory/michael/bound_check.h>
21 #include <cds/memory/michael/procheap_stat.h>
22 #include <cds/memory/michael/osalloc_stat.h>
24 #include <cds/os/topology.h>
25 #include <cds/os/alloc_aligned.h>
26 #include <cds/lock/spinlock.h>
27 #include <cds/details/type_padding.h>
28 #include <cds/details/marked_ptr.h>
29 #include <cds/container/vyukov_mpmc_cycle_queue.h>
30 #include <cds/user_setup/cache_line.h>
31 #include <cds/details/lib.h>
34 #include <boost/intrusive/list.hpp>
37 /// Memory-related algorithms: allocators etc.
39 /// Michael's allocator (class Heap)
42 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
44 This namespace declares the main class Heap and a lot of helper classes.
50 unsigned int nBlockSize ; ///< block size in bytes
51 unsigned int nSBSize ; ///< superblock size (64K or 1M)
52 unsigned int nCapacity ; ///< superblock capacity (nSBSize / nBlockSize)
53 unsigned int nSBSizeIdx ; ///< internal superblock size index (page index)
56 /// %Heap based on system \p malloc and \p free functions
59 /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
60 static void * alloc( size_t nSize )
62 return ::malloc( nSize );
64 /// Returning memory block to the system (\p free wrapper)
65 static void free( void * p )
71 /// %Heap based on system provided aligned \p malloc and \p free functions
72 struct aligned_malloc_heap
74 /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
75 static void * alloc( size_t nSize, size_t nAlignment )
77 return cds::OS::aligned_malloc( nSize, nAlignment );
79 /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
80 static void free( void * p )
82 cds::OS::aligned_free( p );
86 /// Page heap based on \p Heap
88 Page heap can allocate memory by page-sized block only.
89 \p Heap may be any heap that provides interface like \ref malloc_heap.
91 This class is one of available implementation of opt::page_heap option.
93 template <class Heap = malloc_heap>
94 class page_allocator: public Heap
97 typedef Heap base_class;
104 size_t nPageSize ///< page size in bytes
106 : m_nPageSize( nPageSize )
109 /// Allocate new page
112 return base_class::alloc( m_nPageSize );
115 /// Free page \p pPage
116 void free( void * pPage )
118 base_class::free( pPage );
122 /// Page cacheable heap
124 To improve performance this allocator maintains small list of free pages.
125 Page heap can allocate memory by page-sized block only.
128 \li \p FreeListCapacity - capacity of free-list, default value is 64 page
129 \li \p Heap may be any heap that provides interface like \ref malloc_heap.
131 This class is one of available implementation of opt::page_heap option.
133 template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
134 class page_cached_allocator: public page_allocator<Heap>
137 typedef page_allocator<Heap> base_class;
140 struct make_null_ptr {
141 void operator ()(void *& p)
148 typedef container::VyukovMPMCCycleQueue<
150 opt::buffer< opt::v::static_buffer<void *, FreeListCapacity> >
152 , opt::value_cleaner< make_null_ptr >
156 free_list m_FreeList;
161 page_cached_allocator(
162 size_t nPageSize ///< page size in bytes
164 : base_class( nPageSize )
165 , m_FreeList( FreeListCapacity )
169 ~page_cached_allocator()
172 while ( m_FreeList.pop(pPage) )
173 base_class::free( pPage );
177 /// Allocate new page
181 if ( !m_FreeList.pop( pPage ) )
182 pPage = base_class::alloc();
186 /// Free page \p pPage
187 void free( void * pPage )
189 if ( !m_FreeList.push( pPage ))
190 base_class::free( pPage );
194 /// Implementation of opt::sizeclass_selector option
196 Default size-class selector can manage memory blocks up to 64K.
198 class CDS_EXPORT_API default_sizeclass_selector
201 /// Count of different size-classes
202 static const size_t c_nSizeClassCount = 63;
205 static const size_t c_nMaxBlockSize = 64 * 1024;
207 /// Page size of type 0 (64K)
208 static const unsigned int c_nPage64K = 64 * 1024 - 32;
210 /// Page size of type 1 (1M)
211 static const unsigned int c_nPage1M = 1024 * 1024;
213 static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
214 static size_class const m_szClass[c_nSizeClassCount];
215 static unsigned char const m_szClassMap[];
218 /// Type of size-class index
219 typedef unsigned int sizeclass_index;
222 default_sizeclass_selector();
225 /// "No size class" index
226 static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
228 /// Returns size-class count
229 static sizeclass_index size()
231 return c_nSizeClassCount;
234 /// Returns page size in bytes for given page type \p nPageType
235 static size_t page_size(size_t nPageType )
243 assert(false) ; // anything forgotten?..
248 /// Returns count of page size-class
250 This class supports pages of two types: 64K page for small objects and 1M page for other objects.
252 static size_t pageTypeCount()
257 /// Returns size-class index for \p nSize
259 For large blocks that cannot be allocated by Michael's allocator
260 the function must return -1.
262 static sizeclass_index find( size_t nSize )
264 if ( nSize > c_nMaxBlockSize ) {
265 // Too large block - allocate from system
266 return c_nNoSizeClass;
268 sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
269 assert( nSize <= m_szClassBounds[ szClass ] );
270 assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
275 /// Gets details::size_class struct for size-class index \p nIndex
276 static const size_class * at( sizeclass_index nIndex )
278 assert( nIndex < size() );
279 return m_szClass + nIndex;
285 struct free_list_tag;
286 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > > free_list_locked_hook;
288 struct partial_list_tag;
289 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > > partial_list_locked_hook;
291 struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
296 /// List of free superblock descriptor
298 This class is a implementation of \ref opt::free_list option
300 template <class Lock, class T = details::intrusive_superblock_desc>
301 class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
304 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
306 typedef details::free_list_locked_hook item_hook;
307 typedef Lock lock_type;
309 typedef cds::lock::scoped_lock<lock_type> auto_lock;
311 mutable lock_type m_access;
315 /// Rebinds to other item type \p T2
318 typedef free_list_locked<Lock, T2> other ; ///< rebind result
322 /// Push superblock descriptor to free-list
323 void push( T * pDesc )
325 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
326 auto_lock al(m_access);
327 base_class::push_back( *pDesc );
330 /// Pop superblock descriptor from free-list
333 auto_lock al(m_access);
334 if ( base_class::empty() )
336 T& rDesc = base_class::front();
337 base_class::pop_front();
338 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
342 /// Returns current count of superblocks in free-list
345 auto_lock al(m_access);
346 return base_class::size();
350 /// List of partial filled superblock descriptor
352 This class is a implementation of \ref opt::partial_list option
354 template <class Lock, class T = details::intrusive_superblock_desc>
355 class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
358 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
360 typedef details::partial_list_locked_hook item_hook;
361 typedef Lock lock_type;
363 typedef cds::lock::scoped_lock<lock_type> auto_lock;
365 mutable lock_type m_access;
369 /// Rebinds to other item type \p T2
372 typedef partial_list_locked<Lock, T2> other ; ///< rebind result
376 /// Push a superblock \p pDesc to the list
377 void push( T * pDesc )
379 auto_lock al( m_access );
380 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
381 base_class::push_back( *pDesc );
384 /// Pop superblock from the list
387 auto_lock al( m_access );
388 if ( base_class::empty() )
390 T& rDesc = base_class::front();
391 base_class::pop_front();
392 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
396 /// Removes \p pDesc descriptor from the free-list
397 bool unlink( T * pDesc )
399 assert(pDesc != nullptr);
400 auto_lock al( m_access );
401 // !inited(pDesc) is equal to "pDesc is being linked to partial list"
402 if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
403 base_class::erase( base_class::iterator_to( *pDesc ) );
409 /// Count of element in the list
412 auto_lock al( m_access );
413 return base_class::size();
417 /// Summary processor heap statistics
419 Summary heap statistics for use with Heap::summaryStat function.
423 size_t nAllocFromActive ; ///< Event count of allocation from active superblock
424 size_t nAllocFromPartial ; ///< Event count of allocation from partial superblock
425 size_t nAllocFromNew ; ///< Event count of allocation from new superblock
426 size_t nFreeCount ; ///< Count of \p free function call
427 size_t nPageAllocCount ; ///< Count of page (superblock) allocated
428 size_t nPageDeallocCount ; ///< Count of page (superblock) deallocated
429 size_t nDescAllocCount ; ///< Count of superblock descriptors
430 size_t nDescFull ; ///< Count of full superblock
431 atomic64u_t nBytesAllocated ; ///< Count of allocated bytes (for heap managed memory blocks)
432 atomic64u_t nBytesDeallocated ; ///< Count of deallocated bytes (for heap managed memory blocks)
434 size_t nSysAllocCount ; ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
435 size_t nSysFreeCount ; ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
436 atomic64u_t nSysBytesAllocated ; ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
437 atomic64_t nSysBytesDeallocated; ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
439 // Internal contention indicators
440 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
442 Contention indicator. The less value is better
444 size_t nActiveDescCASFailureCount;
445 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
447 Contention indicator. The less value is better
449 size_t nActiveAnchorCASFailureCount;
450 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
452 Contention indicator. The less value is better
454 size_t nPartialDescCASFailureCount;
455 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
457 Contention indicator. The less value is better
459 size_t nPartialAnchorCASFailureCount;
463 /// Constructs empty statistics. All counters are zero.
469 /// Difference statistics
471 This operator computes difference between \p *this and \p stat and places the difference to \p this.
474 summary_stat& operator -=( const summary_stat& stat )
476 nAllocFromActive -= stat.nAllocFromActive;
477 nAllocFromPartial -= stat.nAllocFromPartial;
478 nAllocFromNew -= stat.nAllocFromNew;
479 nFreeCount -= stat.nFreeCount;
480 nPageAllocCount -= stat.nPageAllocCount;
481 nPageDeallocCount -= stat.nPageDeallocCount;
482 nDescAllocCount -= stat.nDescAllocCount;
483 nDescFull -= stat.nDescFull;
484 nBytesAllocated -= stat.nBytesAllocated;
485 nBytesDeallocated -= stat.nBytesDeallocated;
487 nSysAllocCount -= stat.nSysAllocCount;
488 nSysFreeCount -= stat.nSysFreeCount;
489 nSysBytesAllocated -= stat.nSysBytesAllocated;
490 nSysBytesDeallocated -= stat.nSysBytesDeallocated;
492 nActiveDescCASFailureCount -= stat.nActiveDescCASFailureCount;
493 nActiveAnchorCASFailureCount -= stat.nActiveAnchorCASFailureCount;
494 nPartialDescCASFailureCount -= stat.nPartialDescCASFailureCount;
495 nPartialAnchorCASFailureCount -= stat.nPartialAnchorCASFailureCount;
500 /// Clears statistics
502 All counters are set to zero.
506 memset( this, 0, sizeof(*this));
510 template <typename Stat>
511 summary_stat& add_procheap_stat( const Stat& stat )
513 nAllocFromActive += stat.allocFromActive();
514 nAllocFromPartial += stat.allocFromPartial();
515 nAllocFromNew += stat.allocFromNew();
516 nFreeCount += stat.freeCount();
517 nPageAllocCount += stat.blockAllocated();
518 nPageDeallocCount += stat.blockDeallocated();
519 nDescAllocCount += stat.descAllocCount();
520 nDescFull += stat.descFull();
521 nBytesAllocated += stat.allocatedBytes();
522 nBytesDeallocated += stat.deallocatedBytes();
524 nActiveDescCASFailureCount += stat.activeDescCASFailureCount();
525 nActiveAnchorCASFailureCount += stat.activeAnchorCASFailureCount();
526 nPartialDescCASFailureCount += stat.partialDescCASFailureCount();
527 nPartialAnchorCASFailureCount += stat.partialAnchorCASFailureCount();
532 template <typename Stat>
533 summary_stat& add_heap_stat( const Stat& stat )
535 nSysAllocCount += stat.allocCount();
536 nSysFreeCount += stat.freeCount();
538 nSysBytesAllocated += stat.allocatedBytes();
539 nSysBytesDeallocated+= stat.deallocatedBytes();
546 /// Michael's allocator
548 This class provides base functionality for Michael's allocator. It does not provide
549 the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
550 The heap interface is closer to semantics of \p malloc / \p free system functions.
551 The heap supports allocation of aligned and unaligned data.
553 The algorithm is based on simplified version of
554 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
556 that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
557 \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
559 This is powerful, scalable, fully customizable heap with fast-path without any locks
560 that has been developed specifically for multi-threading.
561 With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
562 You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
563 allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
564 like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
565 The opt::check_bounds feature can help you to find a memory buffer overflow.
567 Brief algorithm description from Michael's work:
569 Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
570 the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
571 Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
572 heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
573 An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
574 that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
575 block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
576 structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
577 in a lock-free manner.
579 Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
580 the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
581 atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
582 pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
583 available blocks of its original superblock by atomically updating its descriptor.
585 <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
588 Available \p Options:
589 - \ref opt::sys_topology - class that describes system topology needed for allocator.
590 Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
591 - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
592 Default is \ref malloc_heap.
593 - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
594 Default is \ref aligned_malloc_heap
595 - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
596 Default is \ref page_cached_allocator
597 - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
598 for incoming allocation request.
599 Default is \ref default_sizeclass_selector
600 - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
601 Default is \ref free_list_locked
602 - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
603 Default is \ref partial_list_locked
604 - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
605 that is maintained by the heap.
606 Default is \ref procheap_empty_stat
607 - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
608 allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
609 and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
610 OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
611 Default is \ref os_allocated_empty
612 - \ref opt::check_bounds - a bound checker.
613 Default is no bound checker (cds::opt::none)
616 The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
619 #include <cds/memory/michael/allocator.h>
621 // Heap with explicitly defined options:
622 cds::memory::michael::Heap<
623 opt::aligned_heap< aligned_malloc_heap >,
624 opt::page_heap< page_cached_allocator<16, malloc_heap> >
627 // Heap with default options:
628 cds::memory::michael::Heap<> myDefHeap;
631 \par How to make std-like allocator
633 There are serious differencies of heap and <tt>std::allocator</tt> interface:
634 - Heap is stateful, and \p std::allocator is stateless.
635 - Heap has much more template parameters than \p std::allocator
636 - Heap has low-level interface for memory allocating only unlike the allocator
637 interface that can construct/destroy objects of any type T.
639 To convert heap interface into \p std::allocator -like interface you should:
640 - Declare object of class cds::memory::michael::Heap specifying the necessary
641 template parameters; this is usually static object
642 - Create a class with \p std::allocator interface that uses the function of heap.
644 #include <cds/memory/michael/allocator.h>
647 class MichaelAllocator
649 typedef std::allocator<T> std_allocator;
650 typedef cds::memory::michael::Heap<> michael_heap;
652 // Michael heap static object
653 static michael_heap s_Heap;
655 // Declare typedefs from std::allocator
656 typedef typename std_allocator::const_pointer const_pointer;
657 typedef typename std_allocator::pointer pointer;
658 typedef typename std_allocator::const_reference const_reference;
659 typedef typename std_allocator::reference reference;
660 typedef typename std_allocator::difference_type difference_type;
661 typedef typename std_allocator::size_type size_type;
662 typedef typename std_allocator::value_type value_type;
664 // Allocation function
665 pointer allocate( size_type _Count, const void* _Hint )
667 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
670 // Deallocation function
671 void deallocate( pointer _Ptr, size_type _Count )
676 // Other std::allocator specific functions: address, construct, destroy, etc.
679 // Rebinding allocator to other type
680 template <class _Other>
682 typedef MichaelAllocator<_Other> other;
687 MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
691 template <typename... Options>
696 static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
697 static const unsigned int c_nDefaultBlockAlignment = 8;
699 struct default_options {
700 typedef cds::OS::topology sys_topology;
701 typedef malloc_heap system_heap;
702 typedef page_cached_allocator<> page_heap;
703 typedef aligned_malloc_heap aligned_heap;
704 typedef default_sizeclass_selector sizeclass_selector;
705 typedef free_list_locked<cds::lock::Spin> free_list;
706 typedef partial_list_locked<cds::lock::Spin> partial_list;
707 typedef procheap_empty_stat procheap_stat;
708 typedef os_allocated_empty os_allocated_stat;
709 typedef cds::opt::none check_bounds;
715 typedef typename opt::make_options<default_options, Options...>::type options;
719 typedef unsigned char byte;
722 typedef typename options::sys_topology sys_topology ; ///< effective system topology
723 typedef typename options::system_heap system_heap ; ///< effective system heap
724 typedef typename options::aligned_heap aligned_heap ; ///< effective aligned heap
725 typedef typename options::sizeclass_selector sizeclass_selector ; ///< effective sizeclass selector
726 typedef typename options::page_heap page_heap ; ///< effective page heap
727 typedef typename options::procheap_stat procheap_stat ; ///< effective processor heap statistics
728 typedef typename options::os_allocated_stat os_allocated_stat ; ///< effective OS-allocated memory statistics
729 typedef details::bound_checker_selector< typename options::check_bounds > bound_checker ; ///< effective bound checker
731 // forward declarations
733 struct superblock_desc;
734 struct processor_heap_base;
735 struct processor_desc;
738 /// Superblock states
740 A superblock can be in one of four states: \p ACTIVE, \p FULL,
741 \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
742 superblock in a heap, or if a thread intends to try to install it
743 as such. A superblock is \p FULL if all its blocks are either allocated
744 or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
745 and contains unreserved available blocks. A superblock is
746 \p EMPTY if all its blocks are free and it is not \p ACTIVE.
748 enum superblock_state {
749 SBSTATE_ACTIVE = 0, ///< superblock is active
750 SBSTATE_FULL = 1, ///< superblock is full
751 SBSTATE_PARTIAL = 2, ///< superblock is partially allocated
752 SBSTATE_EMPTY = 3 ///< superblock is empty and may be freed
755 static const size_t c_nMaxBlockInSuperBlock = 1024 * 2 ; ///< Max count of blocks in superblock (2 ** 11)
757 /// Anchor of the superblock descriptor. Updated by CAS
759 unsigned long long avail:11 ; ///< index of first available block in the superblock
760 unsigned long long count:11 ; ///< number of unreserved blocks in the superblock
761 unsigned long long state: 2 ; ///< state of the superblock (see \ref superblock_state enum)
762 unsigned long long tag:40 ; ///< ABA prevention tag
765 /// Superblock descriptor
766 struct superblock_desc
767 : public options::free_list::item_hook
768 , public options::partial_list::item_hook
770 atomics::atomic<anchor_tag> anchor ; ///< anchor, see \ref anchor_tag
771 byte * pSB ; ///< ptr to superblock
772 processor_heap_base * pProcHeap ; ///< pointer to owner processor heap
773 unsigned int nBlockSize ; ///< block size in bytes
774 unsigned int nCapacity ; ///< superblock size/block size
779 , pProcHeap( nullptr )
785 typedef typename options::free_list::template rebind<superblock_desc>::other free_list;
786 typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
789 #if CDS_BUILD_BITS == 32
790 /// Allocated block header
792 Each allocated block has 8-byte header.
793 The header contains pointer to owner superblock descriptor and the redirection flag.
794 If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
797 | blockHeader | [8 byte] pointer to owner superblock (flag=0)
799 | | <- memory allocated
804 If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
805 In this case the redirection flag is 1 and the block's structure is:
808 +-> | blockHeader | [8 byte] pointer to owner superblock (flag=0)
814 +-- | blockHeader | [8 byte] pointer to block head (flag=1)
816 | | <- memory allocated
831 superblock_desc * pDesc ; // pointer to superblock descriptor
832 atomic32u_t nSize ; // block size (allocated form OS)
837 void set( superblock_desc * pdesc, atomic32u_t isAligned )
840 nFlags = isAligned ? bitAligned : 0;
843 superblock_desc * desc()
845 assert( (nFlags & bitOSAllocated) == 0 );
846 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
849 block_header * begin()
851 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
854 bool isAligned() const
856 return (nFlags & bitAligned) != 0;
859 bool isOSAllocated() const
861 return (nFlags & bitOSAllocated) != 0;
864 void setOSAllocated( size_t sz )
867 nFlags = bitOSAllocated;
870 size_t getOSAllocSize() const
872 assert( isOSAllocated() );
878 #elif CDS_BUILD_BITS == 64
886 typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
887 // If bitOSAllocated is set the pDesc contains size of memory block
889 marked_desc_ptr pDesc;
891 void set( superblock_desc * pdesc, atomic32u_t isAligned )
893 pDesc = marked_desc_ptr( pdesc, isAligned );
896 superblock_desc * desc()
898 assert( !isOSAllocated() );
899 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
902 block_header * begin()
904 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
907 bool isAligned() const
909 return (pDesc.bits() & bitAligned) != 0;
912 bool isOSAllocated() const
914 return (pDesc.bits() & bitOSAllocated) != 0;
917 void setOSAllocated( size_t nSize )
920 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
923 size_t getOSAllocSize() const
925 assert( isOSAllocated() );
926 return reinterpret_cast<uptr_atomic_t>( pDesc.ptr() ) >> 2;
932 # error "Unexpected value of CDS_BUILD_BITS"
933 #endif // CDS_BUILD_BITS
936 struct free_block_header: block_header {
937 unsigned int nNextFree;
941 #if CDS_BUILD_BITS == 32
942 /// Processor heap's \p active field
944 The \p active field in the processor heap structure is primarily a pointer to the descriptor
945 of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
946 guaranteed that the active superblock has at least one block available for reservation.
947 Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
948 of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
949 of blocks available for reservation in the active superblock less one. That is, if the value
950 of credits is n, then the active superblock contains n+1 blocks available for reservation
951 through the \p active field. Note that the number of blocks in a superblock is not limited
952 to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
953 (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
954 atomically decrements credits while validating that the active superblock is still valid.
958 superblock_desc * pDesc;
959 atomic32u_t nCredits;
962 static const unsigned int c_nMaxCredits = 0 - 1;
965 CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
970 # ifdef CDS_CXX11_EXPLICITLY_DEFAULTED_FUNCTION_SUPPORT
971 active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
972 ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
973 active_tag& operator=(active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
974 # if defined(CDS_MOVE_SEMANTICS_SUPPORT) && !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
975 active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
976 active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
980 /// Returns pointer to superblock descriptor
981 superblock_desc * ptr() const
986 /// Sets superblock descriptor
987 void ptr( superblock_desc * p )
992 unsigned int credits() const
997 void credits( unsigned int n )
1008 void set( superblock_desc * pSB, unsigned int n )
1015 #elif CDS_BUILD_BITS == 64
1020 static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1022 typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1023 marked_desc_ptr pDesc;
1026 active_tag() CDS_NOEXCEPT
1029 # ifdef CDS_CXX11_EXPLICITLY_DEFAULTED_FUNCTION_SUPPORT
1030 // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1031 //active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1032 active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
1033 ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1034 active_tag& operator=(active_tag const&) CDS_NOEXCEPT_DEFAULTED = default;
1035 # if defined(CDS_MOVE_SEMANTICS_SUPPORT) && !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1036 active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
1037 active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
1040 superblock_desc * ptr() const
1045 void ptr( superblock_desc * p )
1047 assert( (reinterpret_cast<uptr_atomic_t>(p) & c_nMaxCredits) == 0 );
1048 pDesc = marked_desc_ptr( p, pDesc.bits());
1051 unsigned int credits()
1053 return (unsigned int) pDesc.bits();
1056 void credits( unsigned int n )
1058 assert( n <= c_nMaxCredits );
1059 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1064 pDesc = marked_desc_ptr();
1067 void set( superblock_desc * pSB, unsigned int n )
1069 assert( (reinterpret_cast<uptr_atomic_t>(pSB) & c_nMaxCredits) == 0 );
1070 pDesc = marked_desc_ptr( pSB, n );
1076 # error "Unexpected value of CDS_BUILD_BITS"
1077 #endif // CDS_BUILD_BITS
1081 struct processor_heap_base
1083 CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active; ///< pointer to the descriptor of active superblock owned by processor heap
1084 processor_desc * pProcDesc ; ///< pointer to parent processor descriptor
1085 const size_class * pSizeClass ; ///< pointer to size class
1086 atomics::atomic<superblock_desc *> pPartial ; ///< pointer to partial filled superblock (may be \p nullptr)
1087 partial_list partialList ; ///< list of partial filled superblocks owned by the processor heap
1088 unsigned int nPageIdx ; ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1090 /// Small page marker
1092 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1093 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1094 to less memory fragmentation.
1096 static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1098 procheap_stat stat ; ///< heap statistics
1099 //processor_heap_statistics stat;
1102 processor_heap_base() CDS_NOEXCEPT
1103 : pProcDesc( nullptr )
1104 , pSizeClass( nullptr )
1105 , pPartial( nullptr )
1107 assert( (reinterpret_cast<uptr_atomic_t>(this) & (c_nAlignment - 1)) == 0 );
1111 /// Get partial superblock owned by the processor heap
1112 superblock_desc * get_partial()
1114 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1117 pDesc = partialList.pop();
1120 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1122 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1123 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1127 /// Add partial superblock \p pDesc to the list
1128 void add_partial( superblock_desc * pDesc )
1130 assert( pPartial != pDesc );
1131 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1133 superblock_desc * pCur = nullptr;
1134 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1135 partialList.push( pDesc );
1139 /// Remove superblock \p pDesc from the list of partial superblock
1140 bool unlink_partial( superblock_desc * pDesc )
1142 return partialList.unlink( pDesc );
1146 /// Aligned superblock descriptor
1147 typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1149 /// Processor descriptor
1150 struct processor_desc
1152 processor_heap * arrProcHeap ; ///< array of processor heap
1153 free_list listSBDescFree ; ///< List of free superblock descriptors
1154 page_heap * pageHeaps ; ///< array of page heap (one for each page size)
1158 : arrProcHeap( nullptr )
1159 , pageHeaps( nullptr )
1166 sys_topology m_Topology ; ///< System topology
1167 system_heap m_LargeHeap ; ///< Heap for large block
1168 aligned_heap m_AlignedHeap ; ///< Internal aligned heap
1169 sizeclass_selector m_SizeClassSelector ; ///< Size-class selector
1170 atomics::atomic<processor_desc *> * m_arrProcDesc ; ///< array of pointers to the processor descriptors
1171 unsigned int m_nProcessorCount ; ///< Processor count
1172 bound_checker m_BoundChecker ; ///< Bound checker
1174 os_allocated_stat m_OSAllocStat ; ///< OS-allocated memory statistics
1179 /// Allocates large block from system memory
1180 block_header * alloc_from_OS( size_t nSize )
1182 block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1183 m_OSAllocStat.incBytesAllocated( nSize );
1184 p->setOSAllocated( nSize );
1188 /// Allocates from the active superblock if it possible
1189 block_header * alloc_from_active( processor_heap * pProcHeap )
1191 active_tag oldActive;
1192 int nCollision = -1;
1197 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1198 if ( !oldActive.ptr() )
1200 unsigned int nCredits = oldActive.credits();
1201 active_tag newActive ; // default = 0
1202 if ( nCredits != 0 ) {
1203 newActive = oldActive;
1204 newActive.credits( nCredits - 1 );
1206 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1211 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1214 superblock_desc * pDesc = oldActive.ptr();
1216 anchor_tag oldAnchor;
1217 anchor_tag newAnchor;
1219 unsigned int nMoreCredits = 0;
1224 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1226 assert( oldAnchor.avail < pDesc->nCapacity );
1227 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1228 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1231 if ( oldActive.credits() == 0 ) {
1232 // state must be ACTIVE
1233 if ( oldAnchor.count == 0 )
1234 newAnchor.state = SBSTATE_FULL;
1236 nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1237 newAnchor.count -= nMoreCredits;
1240 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1243 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1245 assert( newAnchor.state != SBSTATE_EMPTY );
1247 if ( newAnchor.state == SBSTATE_FULL )
1248 pProcHeap->stat.incDescFull();
1249 if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1250 update_active( pProcHeap, pDesc, nMoreCredits );
1252 pProcHeap->stat.incAllocFromActive();
1254 // block_header fields is not needed to setup
1255 // It was set in alloc_from_new_superblock
1256 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1257 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1258 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1260 return reinterpret_cast<block_header *>( pAddr );
1263 /// Allocates from a partial filled superblock if it possible
1264 block_header * alloc_from_partial( processor_heap * pProcHeap )
1267 superblock_desc * pDesc = pProcHeap->get_partial();
1272 anchor_tag oldAnchor;
1273 anchor_tag newAnchor;
1275 unsigned int nMoreCredits = 0;
1277 int nCollision = -1;
1281 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1282 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1283 free_superblock( pDesc );
1287 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1288 newAnchor.count -= nMoreCredits + 1;
1289 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1291 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1294 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1296 if ( newAnchor.state == SBSTATE_FULL )
1297 pProcHeap->stat.incDescFull();
1299 // Now, the thread is guaranteed to have reserved one or more blocks
1300 // pop reserved block
1306 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1308 assert( oldAnchor.avail < pDesc->nCapacity );
1309 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1310 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1312 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1315 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1317 assert( newAnchor.state != SBSTATE_EMPTY );
1319 pProcHeap->stat.incAllocFromPartial();
1321 if ( nMoreCredits > 0 )
1322 update_active( pProcHeap, pDesc, nMoreCredits );
1324 // block_header fields is not needed to setup
1325 // It was set in alloc_from_new_superblock
1326 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1327 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1328 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1330 return reinterpret_cast<block_header *>( pAddr );
1333 /// Allocates from the new superblock
1334 block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1336 superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1337 assert( pDesc != nullptr );
1338 pDesc->pSB = new_superblock_buffer( pProcHeap );
1340 anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1343 // Make single-linked list of free blocks in superblock
1344 byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1345 unsigned int nNext = 0;
1346 const unsigned int nBlockSize = pDesc->nBlockSize;
1347 for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1348 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1349 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1351 reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1353 active_tag newActive;
1354 newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1356 anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1357 anchor.state = SBSTATE_ACTIVE;
1358 pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1360 active_tag curActive;
1361 if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1362 pProcHeap->stat.incAllocFromNew();
1363 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1364 return reinterpret_cast<block_header *>( pDesc->pSB );
1367 free_superblock( pDesc );
1371 /// Find appropriate processor heap based on size-class selected
1372 processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1374 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1376 unsigned int nProcessorId = m_Topology.current_processor();
1377 assert( nProcessorId < m_nProcessorCount );
1379 if ( nProcessorId >= m_nProcessorCount )
1382 processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1385 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1386 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1390 free_processor_desc( pNewDesc );
1393 return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1396 /// Updates active field of processor heap \p pProcHeap
1397 void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1399 assert( pProcHeap == pDesc->pProcHeap );
1401 active_tag nullActive;
1402 active_tag newActive;
1403 newActive.set( pDesc, nCredits - 1 );
1405 if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1408 // Someone installed another active superblock.
1409 // Return credits to superblock and make it partial
1411 anchor_tag oldAnchor;
1412 anchor_tag newAnchor;
1415 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1416 newAnchor.count += nCredits;
1417 newAnchor.state = SBSTATE_PARTIAL;
1418 } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1420 pDesc->pProcHeap->add_partial( pDesc );
1423 /// Allocates new processor descriptor
1424 processor_desc * new_processor_desc( unsigned int nProcessorId )
1426 processor_desc * pDesc;
1427 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1430 Processor descriptor layout
1432 proc_desc - 64-byte alignment
1433 page_heap[0] 64-byte alignment
1434 page_heap[1] 64-byte alignment
1436 page_heap[P] 64-byte alignment
1438 proc_heap[0] 64-byte alignment
1439 proc_heap[1] 64-byte alignment
1441 proc_heap[N] 64-byte alignment
1444 const size_t szDesc =
1445 ( sizeof(processor_desc)
1446 + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1451 const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1453 static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1455 pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1457 pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1458 for ( size_t i = 0; i < nPageHeapCount; ++i )
1459 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1461 // initialize processor heaps
1462 pDesc->arrProcHeap =
1463 reinterpret_cast<processor_heap *>(
1464 reinterpret_cast<uptr_atomic_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1465 & ~(uptr_atomic_t(c_nAlignment) - 1)
1468 processor_heap * pProcHeap = pDesc->arrProcHeap;
1469 processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1470 for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1471 new (pProcHeap) processor_heap();
1472 pProcHeap->pProcDesc = pDesc;
1473 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1474 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1475 pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1477 pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1483 void free_processor_heap( processor_heap * pProcHeap )
1485 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1486 superblock_desc * pDesc;
1488 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1490 m_AlignedHeap.free( pDesc );
1493 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1495 free( pPartial->pSB );
1496 m_AlignedHeap.free( pPartial );
1499 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1502 m_AlignedHeap.free( pDesc );
1506 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1507 superblock_desc * pDesc;
1509 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1510 pageHeap.free( pDesc->pSB );
1511 m_AlignedHeap.free( pDesc );
1514 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1516 pageHeap.free( pPartial->pSB );
1517 m_AlignedHeap.free( pPartial );
1520 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1522 pageHeap.free( pDesc->pSB );
1523 m_AlignedHeap.free( pDesc );
1526 pProcHeap->~processor_heap();
1529 /// Frees processor descriptor
1530 void free_processor_desc( processor_desc * pDesc )
1532 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1534 for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1535 free_processor_heap( pDesc->arrProcHeap + j );
1537 for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1538 m_AlignedHeap.free( pSBDesc );
1540 for (size_t i = 0; i < nPageHeapCount; ++i )
1541 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1543 //m_IntHeap.free( pDesc->pageHeaps );
1544 pDesc->pageHeaps = nullptr;
1546 pDesc->processor_desc::~processor_desc();
1547 m_AlignedHeap.free( pDesc );
1550 /// Allocates new superblock descriptor
1551 superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1554 superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1555 if ( pDesc == nullptr ) {
1556 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1557 assert( (uptr_atomic_t(pDesc) & (c_nAlignment - 1)) == 0 );
1559 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1561 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1563 pProcHeap->stat.incDescAllocCount();
1565 pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1566 pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1567 assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1568 pDesc->pProcHeap = pProcHeap;
1570 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1572 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1577 /// Allocates superblock page
1578 byte * new_superblock_buffer( processor_heap * pProcHeap )
1580 pProcHeap->stat.incBlockAllocated();
1581 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1582 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1585 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1589 /// Frees superblock descriptor and its page
1590 void free_superblock( superblock_desc * pDesc )
1592 pDesc->pProcHeap->stat.incBlockDeallocated();
1593 processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1595 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1599 pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1602 pProcDesc->listSBDescFree.push( pDesc );
1605 /// Allocate memory block
1606 block_header * int_alloc(
1607 size_t nSize ///< Size of memory block to allocate in bytes
1610 typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1611 if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1612 return alloc_from_OS( nSize );
1614 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1616 block_header * pBlock;
1617 processor_heap * pProcHeap;
1619 pProcHeap = find_heap( nSizeClassIndex );
1621 return alloc_from_OS( nSize );
1623 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1625 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1627 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1631 pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1633 assert( pBlock != nullptr );
1639 /// Heap constructor
1642 // Explicit libcds initialization is needed since a static object may be constructed
1645 m_nProcessorCount = m_Topology.processor_count();
1646 m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1647 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1648 memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount ) ; // ?? memset for atomic<>
1653 The destructor frees all memory allocated by the heap.
1657 for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1658 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1660 free_processor_desc( pDesc );
1663 m_AlignedHeap.free( m_arrProcDesc );
1665 // Explicit termination of libcds
1669 /// Allocate memory block
1671 size_t nSize ///< Size of memory block to allocate in bytes
1674 block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1676 // Bound checking is only for our blocks
1677 if ( !pBlock->isOSAllocated() ) {
1678 // the block is allocated from our heap - bound checker is applicable
1679 m_BoundChecker.make_trailer(
1680 reinterpret_cast<byte *>(pBlock + 1),
1681 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1689 /// Free previously allocated memory block
1691 void * pMemory ///< Pointer to memory block to free
1697 block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1698 block_header * pBlock = pRedirect->begin();
1700 if ( pBlock->isOSAllocated() ) {
1701 // Block has been allocated from OS
1702 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1703 m_LargeHeap.free( pBlock );
1707 assert( !pBlock->isAligned() );
1708 superblock_desc * pDesc = pBlock->desc();
1710 m_BoundChecker.check_bounds(
1712 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1717 anchor_tag oldAnchor;
1718 anchor_tag newAnchor;
1719 processor_heap_base * pProcHeap = pDesc->pProcHeap;
1721 pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1723 oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1725 newAnchor = oldAnchor;
1726 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1727 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1730 assert( oldAnchor.state != SBSTATE_EMPTY );
1732 if ( oldAnchor.state == SBSTATE_FULL )
1733 newAnchor.state = SBSTATE_PARTIAL;
1735 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1736 //pProcHeap = pDesc->pProcHeap;
1737 //CDS_COMPILER_RW_BARRIER ; // instruction fence is needed?..
1738 newAnchor.state = SBSTATE_EMPTY;
1741 newAnchor.count += 1;
1742 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1744 pProcHeap->stat.incFreeCount();
1746 if ( newAnchor.state == SBSTATE_EMPTY ) {
1747 if ( pProcHeap->unlink_partial( pDesc ))
1748 free_superblock( pDesc );
1750 else if (oldAnchor.state == SBSTATE_FULL ) {
1751 assert( pProcHeap != nullptr );
1752 pProcHeap->stat.decDescFull();
1753 pProcHeap->add_partial( pDesc );
1757 /// Reallocate memory block
1759 If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1760 the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1762 If there is not enough available memory to expand the block to the given size,
1763 the original block is left unchanged, and \p nullptr is returned.
1765 Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1766 then the return value is \p nullptr and the original block is left unchanged.
1769 void * pMemory, ///< Pointer to previously allocated memory block
1770 size_t nNewSize ///< New size of memory block, in bytes
1773 if ( nNewSize == 0 ) {
1778 const size_t nOrigSize = nNewSize;
1779 nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1781 block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1783 // Reallocation of aligned block is not possible
1784 if ( pBlock->isAligned() ) {
1789 if ( pBlock->isOSAllocated() ) {
1790 // The block has been allocated from OS
1791 size_t nCurSize = pBlock->getOSAllocSize();
1793 if ( nCurSize >= nNewSize )
1797 void * pNewBuf = alloc( nOrigSize );
1799 memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1805 superblock_desc * pDesc = pBlock->desc();
1806 if ( pDesc->nBlockSize <= nNewSize ) {
1807 // In-place reallocation
1808 m_BoundChecker.make_trailer(
1809 reinterpret_cast<byte *>(pBlock + 1),
1810 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1817 void * pNew = alloc( nNewSize );
1819 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1827 /// Allocate aligned memory block
1828 void * alloc_aligned(
1829 size_t nSize, ///< Size of memory block to allocate in bytes
1830 size_t nAlignment ///< Alignment
1833 if ( nAlignment <= c_nDefaultBlockAlignment ) {
1834 void * p = alloc( nSize );
1835 assert( (reinterpret_cast<uptr_atomic_t>(p) & (nAlignment - 1)) == 0 );
1839 block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1841 block_header * pRedirect;
1842 if ( (reinterpret_cast<uptr_atomic_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1843 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uptr_atomic_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1844 assert( pRedirect != pBlock );
1845 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1847 assert( (reinterpret_cast<uptr_atomic_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1853 // Bound checking is only for our blocks
1854 if ( !pBlock->isOSAllocated() ) {
1855 // the block is allocated from our heap - bound checker is applicable
1856 m_BoundChecker.make_trailer(
1857 reinterpret_cast<byte *>(pRedirect + 1),
1858 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1863 return pRedirect + 1;
1866 /// Free aligned memory block previously allocated by \ref alloc_aligned
1868 void * pMemory ///< Pointer to memory block to free
1876 /// Get instant summary statistics
1877 void summaryStat( summary_stat& st )
1879 size_t nProcHeapCount = m_SizeClassSelector.size();
1880 for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1881 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1883 for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1884 processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1886 st.add_procheap_stat( pProcHeap->stat );
1892 st.add_heap_stat( m_OSAllocStat );
1896 }}} // namespace cds::memory::michael
1898 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H