3 #ifndef __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
4 #define __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H
7 Michael allocator implementation
9 [2004] Maged Michael "Scalable Lock-Free Dynamic Memory Allocation"
12 2011.09.07 khizmax Optimization: small page (about 64K) is allocated by Heap::alloc call.
13 This optimization allows to allocate system memory more regularly,
14 in blocks of 1M that leads to less memory fragmentation.
15 2011.01.02 khizmax Created
19 #include <cds/memory/michael/options.h>
20 #include <cds/memory/michael/bound_check.h>
21 #include <cds/memory/michael/procheap_stat.h>
22 #include <cds/memory/michael/osalloc_stat.h>
24 #include <cds/os/topology.h>
25 #include <cds/os/alloc_aligned.h>
26 #include <cds/lock/spinlock.h>
27 #include <cds/details/type_padding.h>
28 #include <cds/details/marked_ptr.h>
29 #include <cds/container/vyukov_mpmc_cycle_queue.h>
30 #include <cds/user_setup/cache_line.h>
31 #include <cds/details/lib.h>
34 #include <boost/intrusive/list.hpp>
37 /// Memory-related algorithms: allocators etc.
39 /// Michael's allocator (class Heap)
42 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
44 This namespace declares the main class Heap and a lot of helper classes.
50 unsigned int nBlockSize ; ///< block size in bytes
51 unsigned int nSBSize ; ///< superblock size (64K or 1M)
52 unsigned int nCapacity ; ///< superblock capacity (nSBSize / nBlockSize)
53 unsigned int nSBSizeIdx ; ///< internal superblock size index (page index)
56 /// %Heap based on system \p malloc and \p free functions
59 /// Allocates memory block of \p nSize bytes (\p malloc wrapper)
60 static void * alloc( size_t nSize )
62 return ::malloc( nSize );
64 /// Returning memory block to the system (\p free wrapper)
65 static void free( void * p )
71 /// %Heap based on system provided aligned \p malloc and \p free functions
72 struct aligned_malloc_heap
74 /// Allocates aligned memory block of \p nSize bytes with alignment \p nAlignment
75 static void * alloc( size_t nSize, size_t nAlignment )
77 return cds::OS::aligned_malloc( nSize, nAlignment );
79 /// Frees aligned memory block \p p that has been previosly allocated by \ref alloc call
80 static void free( void * p )
82 cds::OS::aligned_free( p );
86 /// Page heap based on \p Heap
88 Page heap can allocate memory by page-sized block only.
89 \p Heap may be any heap that provides interface like \ref malloc_heap.
91 This class is one of available implementation of opt::page_heap option.
93 template <class Heap = malloc_heap>
94 class page_allocator: public Heap
97 typedef Heap base_class;
104 size_t nPageSize ///< page size in bytes
106 : m_nPageSize( nPageSize )
109 /// Allocate new page
112 return base_class::alloc( m_nPageSize );
115 /// Free page \p pPage
116 void free( void * pPage )
118 base_class::free( pPage );
122 /// Page cacheable heap
124 To improve performance this allocator maintains small list of free pages.
125 Page heap can allocate memory by page-sized block only.
128 \li \p FreeListCapacity - capacity of free-list, default value is 64 page
129 \li \p Heap may be any heap that provides interface like \ref malloc_heap.
131 This class is one of available implementation of opt::page_heap option.
133 template <size_t FreeListCapacity = 64, class Heap = malloc_heap>
134 class page_cached_allocator: public page_allocator<Heap>
137 typedef page_allocator<Heap> base_class;
140 struct make_null_ptr {
141 void operator ()(void *& p)
148 typedef container::VyukovMPMCCycleQueue<
150 opt::buffer< opt::v::static_buffer<void *, FreeListCapacity> >
152 , opt::value_cleaner< make_null_ptr >
156 free_list m_FreeList;
161 page_cached_allocator(
162 size_t nPageSize ///< page size in bytes
164 : base_class( nPageSize )
165 , m_FreeList( FreeListCapacity )
169 ~page_cached_allocator()
172 while ( m_FreeList.pop(pPage) )
173 base_class::free( pPage );
177 /// Allocate new page
181 if ( !m_FreeList.pop( pPage ) )
182 pPage = base_class::alloc();
186 /// Free page \p pPage
187 void free( void * pPage )
189 if ( !m_FreeList.push( pPage ))
190 base_class::free( pPage );
194 /// Implementation of opt::sizeclass_selector option
196 Default size-class selector can manage memory blocks up to 64K.
198 class CDS_EXPORT_API default_sizeclass_selector
201 /// Count of different size-classes
202 static const size_t c_nSizeClassCount = 63;
205 static const size_t c_nMaxBlockSize = 64 * 1024;
207 /// Page size of type 0 (64K)
208 static const unsigned int c_nPage64K = 64 * 1024 - 32;
210 /// Page size of type 1 (1M)
211 static const unsigned int c_nPage1M = 1024 * 1024;
213 static CDS_DATA_ALIGNMENT(128) unsigned int const m_szClassBounds[c_nSizeClassCount];
214 static size_class const m_szClass[c_nSizeClassCount];
215 static unsigned char const m_szClassMap[];
218 /// Type of size-class index
219 typedef unsigned int sizeclass_index;
222 default_sizeclass_selector();
225 /// "No size class" index
226 static const sizeclass_index c_nNoSizeClass = (unsigned int) (0 - 1);
228 /// Returns size-class count
229 static sizeclass_index size()
231 return c_nSizeClassCount;
234 /// Returns page size in bytes for given page type \p nPageType
235 static size_t page_size(size_t nPageType )
243 assert(false) ; // anything forgotten?..
248 /// Returns count of page size-class
250 This class supports pages of two types: 64K page for small objects and 1M page for other objects.
252 static size_t pageTypeCount()
257 /// Returns size-class index for \p nSize
259 For large blocks that cannot be allocated by Michael's allocator
260 the function must return -1.
262 static sizeclass_index find( size_t nSize )
264 if ( nSize > c_nMaxBlockSize ) {
265 // Too large block - allocate from system
266 return c_nNoSizeClass;
268 sizeclass_index szClass = m_szClassMap[ (nSize + 15) / 16 ];
269 assert( nSize <= m_szClassBounds[ szClass ] );
270 assert( szClass == 0 || m_szClassBounds[ szClass - 1] < nSize );
275 /// Gets details::size_class struct for size-class index \p nIndex
276 static const size_class * at( sizeclass_index nIndex )
278 assert( nIndex < size() );
279 return m_szClass + nIndex;
285 struct free_list_tag;
286 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< free_list_tag > > free_list_locked_hook;
288 struct partial_list_tag;
289 typedef boost::intrusive::list_base_hook< boost::intrusive::tag< partial_list_tag > > partial_list_locked_hook;
291 struct intrusive_superblock_desc: public free_list_locked_hook, partial_list_locked_hook
296 /// List of free superblock descriptor
298 This class is a implementation of \ref opt::free_list option
300 template <class Lock, class T = details::intrusive_superblock_desc>
301 class free_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> >
304 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::free_list_locked_hook> > base_class;
306 typedef details::free_list_locked_hook item_hook;
307 typedef Lock lock_type;
309 typedef cds::lock::scoped_lock<lock_type> auto_lock;
311 mutable lock_type m_access;
315 /// Rebinds to other item type \p T2
318 typedef free_list_locked<Lock, T2> other ; ///< rebind result
322 /// Push superblock descriptor to free-list
323 void push( T * pDesc )
325 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
326 auto_lock al(m_access);
327 base_class::push_back( *pDesc );
330 /// Pop superblock descriptor from free-list
333 auto_lock al(m_access);
334 if ( base_class::empty() )
336 T& rDesc = base_class::front();
337 base_class::pop_front();
338 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
342 /// Returns current count of superblocks in free-list
345 auto_lock al(m_access);
346 return base_class::size();
350 /// List of partial filled superblock descriptor
352 This class is a implementation of \ref opt::partial_list option
354 template <class Lock, class T = details::intrusive_superblock_desc>
355 class partial_list_locked: public boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> >
358 typedef boost::intrusive::list<T, boost::intrusive::base_hook<details::partial_list_locked_hook> > base_class;
360 typedef details::partial_list_locked_hook item_hook;
361 typedef Lock lock_type;
363 typedef cds::lock::scoped_lock<lock_type> auto_lock;
365 mutable lock_type m_access;
369 /// Rebinds to other item type \p T2
372 typedef partial_list_locked<Lock, T2> other ; ///< rebind result
376 /// Push a superblock \p pDesc to the list
377 void push( T * pDesc )
379 auto_lock al( m_access );
380 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) );
381 base_class::push_back( *pDesc );
384 /// Pop superblock from the list
387 auto_lock al( m_access );
388 if ( base_class::empty() )
390 T& rDesc = base_class::front();
391 base_class::pop_front();
392 assert( base_class::node_algorithms::inited( static_cast<item_hook *>(&rDesc) ) );
396 /// Removes \p pDesc descriptor from the free-list
397 bool unlink( T * pDesc )
399 assert(pDesc != nullptr);
400 auto_lock al( m_access );
401 // !inited(pDesc) is equal to "pDesc is being linked to partial list"
402 if ( !base_class::node_algorithms::inited( static_cast<item_hook *>(pDesc) ) ) {
403 base_class::erase( base_class::iterator_to( *pDesc ) );
409 /// Count of element in the list
412 auto_lock al( m_access );
413 return base_class::size();
417 /// Summary processor heap statistics
419 Summary heap statistics for use with Heap::summaryStat function.
423 size_t nAllocFromActive ; ///< Event count of allocation from active superblock
424 size_t nAllocFromPartial ; ///< Event count of allocation from partial superblock
425 size_t nAllocFromNew ; ///< Event count of allocation from new superblock
426 size_t nFreeCount ; ///< Count of \p free function call
427 size_t nPageAllocCount ; ///< Count of page (superblock) allocated
428 size_t nPageDeallocCount ; ///< Count of page (superblock) deallocated
429 size_t nDescAllocCount ; ///< Count of superblock descriptors
430 size_t nDescFull ; ///< Count of full superblock
431 atomic64u_t nBytesAllocated ; ///< Count of allocated bytes (for heap managed memory blocks)
432 atomic64u_t nBytesDeallocated ; ///< Count of deallocated bytes (for heap managed memory blocks)
434 size_t nSysAllocCount ; ///< Count of \p alloc and \p alloc_aligned function call (for large memory blocks that allocated directly from OS)
435 size_t nSysFreeCount ; ///< Count of \p free and \p free_aligned function call (for large memory blocks that allocated directly from OS)
436 atomic64u_t nSysBytesAllocated ; ///< Count of allocated bytes (for large memory blocks that allocated directly from OS)
437 atomic64_t nSysBytesDeallocated; ///< Count of deallocated bytes (for large memory blocks that allocated directly from OS)
439 // Internal contention indicators
440 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
442 Contention indicator. The less value is better
444 size_t nActiveDescCASFailureCount;
445 /// CAS failure counter for updating active field of active block of \p alloc_from_active Heap internal function
447 Contention indicator. The less value is better
449 size_t nActiveAnchorCASFailureCount;
450 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
452 Contention indicator. The less value is better
454 size_t nPartialDescCASFailureCount;
455 /// CAS failure counter for updating anchor field of partial block of \p alloc_from_partial Heap internal function
457 Contention indicator. The less value is better
459 size_t nPartialAnchorCASFailureCount;
463 /// Constructs empty statistics. All counters are zero.
469 /// Difference statistics
471 This operator computes difference between \p *this and \p stat and places the difference to \p this.
474 summary_stat& operator -=( const summary_stat& stat )
476 nAllocFromActive -= stat.nAllocFromActive;
477 nAllocFromPartial -= stat.nAllocFromPartial;
478 nAllocFromNew -= stat.nAllocFromNew;
479 nFreeCount -= stat.nFreeCount;
480 nPageAllocCount -= stat.nPageAllocCount;
481 nPageDeallocCount -= stat.nPageDeallocCount;
482 nDescAllocCount -= stat.nDescAllocCount;
483 nDescFull -= stat.nDescFull;
484 nBytesAllocated -= stat.nBytesAllocated;
485 nBytesDeallocated -= stat.nBytesDeallocated;
487 nSysAllocCount -= stat.nSysAllocCount;
488 nSysFreeCount -= stat.nSysFreeCount;
489 nSysBytesAllocated -= stat.nSysBytesAllocated;
490 nSysBytesDeallocated -= stat.nSysBytesDeallocated;
492 nActiveDescCASFailureCount -= stat.nActiveDescCASFailureCount;
493 nActiveAnchorCASFailureCount -= stat.nActiveAnchorCASFailureCount;
494 nPartialDescCASFailureCount -= stat.nPartialDescCASFailureCount;
495 nPartialAnchorCASFailureCount -= stat.nPartialAnchorCASFailureCount;
500 /// Clears statistics
502 All counters are set to zero.
506 memset( this, 0, sizeof(*this));
510 template <typename Stat>
511 summary_stat& add_procheap_stat( const Stat& stat )
513 nAllocFromActive += stat.allocFromActive();
514 nAllocFromPartial += stat.allocFromPartial();
515 nAllocFromNew += stat.allocFromNew();
516 nFreeCount += stat.freeCount();
517 nPageAllocCount += stat.blockAllocated();
518 nPageDeallocCount += stat.blockDeallocated();
519 nDescAllocCount += stat.descAllocCount();
520 nDescFull += stat.descFull();
521 nBytesAllocated += stat.allocatedBytes();
522 nBytesDeallocated += stat.deallocatedBytes();
524 nActiveDescCASFailureCount += stat.activeDescCASFailureCount();
525 nActiveAnchorCASFailureCount += stat.activeAnchorCASFailureCount();
526 nPartialDescCASFailureCount += stat.partialDescCASFailureCount();
527 nPartialAnchorCASFailureCount += stat.partialAnchorCASFailureCount();
532 template <typename Stat>
533 summary_stat& add_heap_stat( const Stat& stat )
535 nSysAllocCount += stat.allocCount();
536 nSysFreeCount += stat.freeCount();
538 nSysBytesAllocated += stat.allocatedBytes();
539 nSysBytesDeallocated+= stat.deallocatedBytes();
546 /// Michael's allocator
548 This class provides base functionality for Michael's allocator. It does not provide
549 the interface described by \p std::allocator, therefore, we name it as a heap, not as an allocator.
550 The heap interface is closer to semantics of \p malloc / \p free system functions.
551 The heap supports allocation of aligned and unaligned data.
553 The algorithm is based on simplified version of
554 \li [2004] M.Michael "Scalable Lock-free Dynamic Memory Allocation"
556 that, in turn, is concurrent version of well-known Hoard allocator developed by Emery Berger, see
557 \li [2002] Emery Berger "Memory Management for High-Performance Application", PhD thesis
559 This is powerful, scalable, fully customizable heap with fast-path without any locks
560 that has been developed specifically for multi-threading.
561 With opt:sys_topology you can set as many allocation arena ("processor heap") as you need.
562 You can manually bound any your thread to any arena ("processor"). With opt::sizeclass_selector option you can manage
563 allocation granularity. With opt::page_heap you can utilize any OS-provided features for page allocation
564 like \p mmap, \p VirtualAlloc etc. The heap can gather internal statistics that helps you to tune your application.
565 The opt::check_bounds feature can help you to find a memory buffer overflow.
567 Brief algorithm description from Michael's work:
569 Large blocks (greater than 64K) are allocated directly from the OS and freed directly to the OS. For smaller block sizes,
570 the heap is composed of large superblocks (64 KB or 1MB size). Each superblock is divided into multiple equal-sized blocks.
571 Superblocks are distributed among size classes based on their block sizes. Each size class contains multiple processor
572 heaps proportional to the number of processors in the system. A processor heap contains at most one active superblock.
573 An active superblock contains one or more blocks available for reservation that are guaranteed to be available to threads
574 that reach them through the header of the processor heap. Each superblock is associated with a descriptor. Each allocated
575 block contains a prefix (8 bytes) that points to the descriptor of its superblock. On the first call to malloc, the static
576 structures for the size classes and processor heaps (about 16 KB for a 16 processor machine) are allocated and initialized
577 in a lock-free manner.
579 Malloc starts by identifying the appropriate processor heap, based on the requested block size and the identity of
580 the calling thread. Typically, the heap already has an active superblock with blocks available for reservation. The thread
581 atomically reads a pointer to the descriptor of the active superblock and reserves a block. Next, the thread atomically
582 pops a block from that superblock and updates its descriptor. A typical free pushes the freed block into the list of
583 available blocks of its original superblock by atomically updating its descriptor.
585 <b>Constraint</b>: one superblock may contain up to 2048 block. This restriction imposes a restriction on the maximum
588 Available \p Options:
589 - \ref opt::sys_topology - class that describes system topology needed for allocator.
590 Default is \p cds::OS::topology (see cds::OS::Win32::topology for interface description)
591 - \ref opt::system_heap - option setter for an allocator for large blocks that is used for direct allocation from OS.
592 Default is \ref malloc_heap.
593 - \ref opt::aligned_heap - option setter for a heap used for internal aligned memory management.
594 Default is \ref aligned_malloc_heap
595 - \ref opt::page_heap - option setter for a heap used for page (superblock) allocation of 64K/1M size.
596 Default is \ref page_cached_allocator
597 - \ref opt::sizeclass_selector - option setter for a class used to select appropriate size-class
598 for incoming allocation request.
599 Default is \ref default_sizeclass_selector
600 - \ref opt::free_list - option setter for a class to manage a list of free superblock descriptors
601 Default is \ref free_list_locked
602 - \ref opt::partial_list - option setter for a class to manage a list of partial filled superblocks
603 Default is \ref partial_list_locked
604 - \ref opt::procheap_stat - option setter for a class to gather internal statistics for memory allocation
605 that is maintained by the heap.
606 Default is \ref procheap_empty_stat
607 - \ref opt::os_allocated_stat - option setter for a class to gather internal statistics for large block
608 allocation. Term "large block" is specified by the size-class selector (see \ref opt::sizeclass_selector)
609 and it is 64K for \ref default_sizeclass_selector. Any block that is large that 64K is allocated from
610 OS directly. \p os_allocated_stat option is set a class to gather statistics for large blocks.
611 Default is \ref os_allocated_empty
612 - \ref opt::check_bounds - a bound checker.
613 Default is no bound checker (cds::opt::none)
616 The heap is the basic building block for your allocator or <tt> operator new</tt> implementation.
619 #include <cds/memory/michael/allocator.h>
621 // Heap with explicitly defined options:
622 cds::memory::michael::Heap<
623 opt::aligned_heap< aligned_malloc_heap >,
624 opt::page_heap< page_cached_allocator<16, malloc_heap> >
627 // Heap with default options:
628 cds::memory::michael::Heap<> myDefHeap;
631 \par How to make std-like allocator
633 There are serious differencies of heap and <tt>std::allocator</tt> interface:
634 - Heap is stateful, and \p std::allocator is stateless.
635 - Heap has much more template parameters than \p std::allocator
636 - Heap has low-level interface for memory allocating only unlike the allocator
637 interface that can construct/destroy objects of any type T.
639 To convert heap interface into \p std::allocator -like interface you should:
640 - Declare object of class cds::memory::michael::Heap specifying the necessary
641 template parameters; this is usually static object
642 - Create a class with \p std::allocator interface that uses the function of heap.
644 #include <cds/memory/michael/allocator.h>
647 class MichaelAllocator
649 typedef std::allocator<T> std_allocator;
650 typedef cds::memory::michael::Heap<> michael_heap;
652 // Michael heap static object
653 static michael_heap s_Heap;
655 // Declare typedefs from std::allocator
656 typedef typename std_allocator::const_pointer const_pointer;
657 typedef typename std_allocator::pointer pointer;
658 typedef typename std_allocator::const_reference const_reference;
659 typedef typename std_allocator::reference reference;
660 typedef typename std_allocator::difference_type difference_type;
661 typedef typename std_allocator::size_type size_type;
662 typedef typename std_allocator::value_type value_type;
664 // Allocation function
665 pointer allocate( size_type _Count, const void* _Hint )
667 return reinterpret_cast<pointer>( s_Heap.alloc( sizeof(T) * _Count ));
670 // Deallocation function
671 void deallocate( pointer _Ptr, size_type _Count )
676 // Other std::allocator specific functions: address, construct, destroy, etc.
679 // Rebinding allocator to other type
680 template <class _Other>
682 typedef MichaelAllocator<_Other> other;
687 MichaelAllocator::michael_heap MichaelAllocator::s_Heap;
691 template <typename... Options>
696 static const unsigned int c_nAlignment = cds::c_nCacheLineSize;
697 static const unsigned int c_nDefaultBlockAlignment = 8;
699 struct default_options {
700 typedef cds::OS::topology sys_topology;
701 typedef malloc_heap system_heap;
702 typedef page_cached_allocator<> page_heap;
703 typedef aligned_malloc_heap aligned_heap;
704 typedef default_sizeclass_selector sizeclass_selector;
705 typedef free_list_locked<cds::lock::Spin> free_list;
706 typedef partial_list_locked<cds::lock::Spin> partial_list;
707 typedef procheap_empty_stat procheap_stat;
708 typedef os_allocated_empty os_allocated_stat;
709 typedef cds::opt::none check_bounds;
715 typedef typename opt::make_options<default_options, Options...>::type options;
719 typedef unsigned char byte;
722 typedef typename options::sys_topology sys_topology ; ///< effective system topology
723 typedef typename options::system_heap system_heap ; ///< effective system heap
724 typedef typename options::aligned_heap aligned_heap ; ///< effective aligned heap
725 typedef typename options::sizeclass_selector sizeclass_selector ; ///< effective sizeclass selector
726 typedef typename options::page_heap page_heap ; ///< effective page heap
727 typedef typename options::procheap_stat procheap_stat ; ///< effective processor heap statistics
728 typedef typename options::os_allocated_stat os_allocated_stat ; ///< effective OS-allocated memory statistics
729 typedef details::bound_checker_selector< typename options::check_bounds > bound_checker ; ///< effective bound checker
731 // forward declarations
733 struct superblock_desc;
734 struct processor_heap_base;
735 struct processor_desc;
738 /// Superblock states
740 A superblock can be in one of four states: \p ACTIVE, \p FULL,
741 \p PARTIAL, or \p EMPTY. A superblock is \p ACTIVE if it is the active
742 superblock in a heap, or if a thread intends to try to install it
743 as such. A superblock is \p FULL if all its blocks are either allocated
744 or reserved. A superblock is \p PARTIAL if it is not \p ACTIVE
745 and contains unreserved available blocks. A superblock is
746 \p EMPTY if all its blocks are free and it is not \p ACTIVE.
748 enum superblock_state {
749 SBSTATE_ACTIVE = 0, ///< superblock is active
750 SBSTATE_FULL = 1, ///< superblock is full
751 SBSTATE_PARTIAL = 2, ///< superblock is partially allocated
752 SBSTATE_EMPTY = 3 ///< superblock is empty and may be freed
755 static const size_t c_nMaxBlockInSuperBlock = 1024 * 2 ; ///< Max count of blocks in superblock (2 ** 11)
757 /// Anchor of the superblock descriptor. Updated by CAS
759 unsigned long long avail:11 ; ///< index of first available block in the superblock
760 unsigned long long count:11 ; ///< number of unreserved blocks in the superblock
761 unsigned long long state: 2 ; ///< state of the superblock (see \ref superblock_state enum)
762 unsigned long long tag:40 ; ///< ABA prevention tag
765 /// Superblock descriptor
766 struct superblock_desc
767 : public options::free_list::item_hook
768 , public options::partial_list::item_hook
770 atomics::atomic<anchor_tag> anchor ; ///< anchor, see \ref anchor_tag
771 byte * pSB ; ///< ptr to superblock
772 processor_heap_base * pProcHeap ; ///< pointer to owner processor heap
773 unsigned int nBlockSize ; ///< block size in bytes
774 unsigned int nCapacity ; ///< superblock size/block size
779 , pProcHeap( nullptr )
785 typedef typename options::free_list::template rebind<superblock_desc>::other free_list;
786 typedef typename options::partial_list::template rebind<superblock_desc>::other partial_list;
789 #if CDS_BUILD_BITS == 32
790 /// Allocated block header
792 Each allocated block has 8-byte header.
793 The header contains pointer to owner superblock descriptor and the redirection flag.
794 If the block has been allocated by \ref alloc, then the redirection flag is 0 and the block's structure is:
797 | blockHeader | [8 byte] pointer to owner superblock (flag=0)
799 | | <- memory allocated
804 If the block has been allocated by \ref alloc_aligned, then it is possible that pointer returned must be aligned.
805 In this case the redirection flag is 1 and the block's structure is:
808 +-> | blockHeader | [8 byte] pointer to owner superblock (flag=0)
814 +-- | blockHeader | [8 byte] pointer to block head (flag=1)
816 | | <- memory allocated
831 superblock_desc * pDesc ; // pointer to superblock descriptor
832 atomic32u_t nSize ; // block size (allocated form OS)
837 void set( superblock_desc * pdesc, atomic32u_t isAligned )
840 nFlags = isAligned ? bitAligned : 0;
843 superblock_desc * desc()
845 assert( (nFlags & bitOSAllocated) == 0 );
846 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>( pDesc )->desc() : pDesc;
849 block_header * begin()
851 return (nFlags & bitAligned) ? reinterpret_cast<block_header *>(pDesc) : this;
854 bool isAligned() const
856 return (nFlags & bitAligned) != 0;
859 bool isOSAllocated() const
861 return (nFlags & bitOSAllocated) != 0;
864 void setOSAllocated( size_t sz )
867 nFlags = bitOSAllocated;
870 size_t getOSAllocSize() const
872 assert( isOSAllocated() );
878 #elif CDS_BUILD_BITS == 64
886 typedef cds::details::marked_ptr<superblock_desc, bitAligned|bitOSAllocated> marked_desc_ptr;
887 // If bitOSAllocated is set the pDesc contains size of memory block
889 marked_desc_ptr pDesc;
891 void set( superblock_desc * pdesc, atomic32u_t isAligned )
893 pDesc = marked_desc_ptr( pdesc, isAligned );
896 superblock_desc * desc()
898 assert( !isOSAllocated() );
899 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() )->desc() : pDesc.ptr();
902 block_header * begin()
904 return (pDesc.bits() & bitAligned) ? reinterpret_cast<block_header *>( pDesc.ptr() ) : this;
907 bool isAligned() const
909 return (pDesc.bits() & bitAligned) != 0;
912 bool isOSAllocated() const
914 return (pDesc.bits() & bitOSAllocated) != 0;
917 void setOSAllocated( size_t nSize )
920 pDesc = marked_desc_ptr( reinterpret_cast<superblock_desc *>(nSize << 2), bitOSAllocated );
923 size_t getOSAllocSize() const
925 assert( isOSAllocated() );
926 return reinterpret_cast<uptr_atomic_t>( pDesc.ptr() ) >> 2;
932 # error "Unexpected value of CDS_BUILD_BITS"
933 #endif // CDS_BUILD_BITS
936 struct free_block_header: block_header {
937 unsigned int nNextFree;
941 #if CDS_BUILD_BITS == 32
942 /// Processor heap's \p active field
944 The \p active field in the processor heap structure is primarily a pointer to the descriptor
945 of the active superblock owned by the processor heap. If the value of \p active is not \p nullptr, it is
946 guaranteed that the active superblock has at least one block available for reservation.
947 Since the addresses of superblock descriptors can be guaranteed to be aligned to some power
948 of 2 (e.g., 64), as an optimization, we can carve a credits subfield to hold the number
949 of blocks available for reservation in the active superblock less one. That is, if the value
950 of credits is n, then the active superblock contains n+1 blocks available for reservation
951 through the \p active field. Note that the number of blocks in a superblock is not limited
952 to the maximum reservations that can be held in the credits subfield. In a typical malloc operation
953 (i.e., when \p active != \p nullptr and \p credits > 0), the thread reads \p active and then
954 atomically decrements credits while validating that the active superblock is still valid.
958 superblock_desc * pDesc;
959 atomic32u_t nCredits;
962 static const unsigned int c_nMaxCredits = 0 - 1;
965 CDS_CONSTEXPR active_tag() CDS_NOEXCEPT
970 active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
971 ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
972 active_tag& operator=(active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
973 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
974 active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
975 active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
978 /// Returns pointer to superblock descriptor
979 superblock_desc * ptr() const
984 /// Sets superblock descriptor
985 void ptr( superblock_desc * p )
990 unsigned int credits() const
995 void credits( unsigned int n )
1006 void set( superblock_desc * pSB, unsigned int n )
1013 #elif CDS_BUILD_BITS == 64
1018 static const unsigned int c_nMaxCredits = c_nAlignment - 1 ; // 0x003F;
1020 typedef cds::details::marked_ptr<superblock_desc, c_nMaxCredits> marked_desc_ptr;
1021 marked_desc_ptr pDesc;
1024 active_tag() CDS_NOEXCEPT
1027 // Clang 3.1: error: first argument to atomic operation must be a pointer to a trivially-copyable type
1028 //active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1029 active_tag( active_tag const& ) CDS_NOEXCEPT_DEFAULTED = default;
1030 ~active_tag() CDS_NOEXCEPT_DEFAULTED = default;
1031 active_tag& operator=(active_tag const&) CDS_NOEXCEPT_DEFAULTED = default;
1032 # if !defined(CDS_DISABLE_DEFAULT_MOVE_CTOR)
1033 active_tag( active_tag&& ) CDS_NOEXCEPT_DEFAULTED = default;
1034 active_tag& operator=(active_tag&&) CDS_NOEXCEPT_DEFAULTED = default;
1036 superblock_desc * ptr() const
1041 void ptr( superblock_desc * p )
1043 assert( (reinterpret_cast<uptr_atomic_t>(p) & c_nMaxCredits) == 0 );
1044 pDesc = marked_desc_ptr( p, pDesc.bits());
1047 unsigned int credits()
1049 return (unsigned int) pDesc.bits();
1052 void credits( unsigned int n )
1054 assert( n <= c_nMaxCredits );
1055 pDesc = marked_desc_ptr( pDesc.ptr(), n );
1060 pDesc = marked_desc_ptr();
1063 void set( superblock_desc * pSB, unsigned int n )
1065 assert( (reinterpret_cast<uptr_atomic_t>(pSB) & c_nMaxCredits) == 0 );
1066 pDesc = marked_desc_ptr( pSB, n );
1072 # error "Unexpected value of CDS_BUILD_BITS"
1073 #endif // CDS_BUILD_BITS
1077 struct processor_heap_base
1079 CDS_DATA_ALIGNMENT(8) atomics::atomic<active_tag> active; ///< pointer to the descriptor of active superblock owned by processor heap
1080 processor_desc * pProcDesc ; ///< pointer to parent processor descriptor
1081 const size_class * pSizeClass ; ///< pointer to size class
1082 atomics::atomic<superblock_desc *> pPartial ; ///< pointer to partial filled superblock (may be \p nullptr)
1083 partial_list partialList ; ///< list of partial filled superblocks owned by the processor heap
1084 unsigned int nPageIdx ; ///< page size-class index, \ref c_nPageSelfAllocation - "small page"
1086 /// Small page marker
1088 If page is small and can be allocated by the Heap, the \p nPageIdx value is \p c_nPageSelfAllocation.
1089 This optimization allows to allocate system memory more regularly, in blocks of 1M that leads
1090 to less memory fragmentation.
1092 static const unsigned int c_nPageSelfAllocation = (unsigned int) -1;
1094 procheap_stat stat ; ///< heap statistics
1095 //processor_heap_statistics stat;
1098 processor_heap_base() CDS_NOEXCEPT
1099 : pProcDesc( nullptr )
1100 , pSizeClass( nullptr )
1101 , pPartial( nullptr )
1103 assert( (reinterpret_cast<uptr_atomic_t>(this) & (c_nAlignment - 1)) == 0 );
1107 /// Get partial superblock owned by the processor heap
1108 superblock_desc * get_partial()
1110 superblock_desc * pDesc = pPartial.load(atomics::memory_order_acquire);
1113 pDesc = partialList.pop();
1116 } while ( !pPartial.compare_exchange_weak( pDesc, nullptr, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1118 //assert( pDesc == nullptr || free_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_free_list_hook *>(pDesc) ));
1119 //assert( pDesc == nullptr || partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1123 /// Add partial superblock \p pDesc to the list
1124 void add_partial( superblock_desc * pDesc )
1126 assert( pPartial != pDesc );
1127 //assert( partial_desc_list<superblock_desc>::node_algorithms::inited( static_cast<sb_partial_list_hook *>(pDesc) ) );
1129 superblock_desc * pCur = nullptr;
1130 if ( !pPartial.compare_exchange_strong(pCur, pDesc, atomics::memory_order_acq_rel, atomics::memory_order_relaxed) )
1131 partialList.push( pDesc );
1135 /// Remove superblock \p pDesc from the list of partial superblock
1136 bool unlink_partial( superblock_desc * pDesc )
1138 return partialList.unlink( pDesc );
1142 /// Aligned superblock descriptor
1143 typedef typename cds::details::type_padding<processor_heap_base, c_nAlignment>::type processor_heap;
1145 /// Processor descriptor
1146 struct processor_desc
1148 processor_heap * arrProcHeap ; ///< array of processor heap
1149 free_list listSBDescFree ; ///< List of free superblock descriptors
1150 page_heap * pageHeaps ; ///< array of page heap (one for each page size)
1154 : arrProcHeap( nullptr )
1155 , pageHeaps( nullptr )
1162 sys_topology m_Topology ; ///< System topology
1163 system_heap m_LargeHeap ; ///< Heap for large block
1164 aligned_heap m_AlignedHeap ; ///< Internal aligned heap
1165 sizeclass_selector m_SizeClassSelector ; ///< Size-class selector
1166 atomics::atomic<processor_desc *> * m_arrProcDesc ; ///< array of pointers to the processor descriptors
1167 unsigned int m_nProcessorCount ; ///< Processor count
1168 bound_checker m_BoundChecker ; ///< Bound checker
1170 os_allocated_stat m_OSAllocStat ; ///< OS-allocated memory statistics
1175 /// Allocates large block from system memory
1176 block_header * alloc_from_OS( size_t nSize )
1178 block_header * p = reinterpret_cast<block_header *>( m_LargeHeap.alloc( nSize ) );
1179 m_OSAllocStat.incBytesAllocated( nSize );
1180 p->setOSAllocated( nSize );
1184 /// Allocates from the active superblock if it possible
1185 block_header * alloc_from_active( processor_heap * pProcHeap )
1187 active_tag oldActive;
1188 int nCollision = -1;
1193 oldActive = pProcHeap->active.load(atomics::memory_order_acquire);
1194 if ( !oldActive.ptr() )
1196 unsigned int nCredits = oldActive.credits();
1197 active_tag newActive ; // default = 0
1198 if ( nCredits != 0 ) {
1199 newActive = oldActive;
1200 newActive.credits( nCredits - 1 );
1202 if ( pProcHeap->active.compare_exchange_strong( oldActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed ))
1207 pProcHeap->stat.incActiveDescCASFailureCount( nCollision );
1210 superblock_desc * pDesc = oldActive.ptr();
1212 anchor_tag oldAnchor;
1213 anchor_tag newAnchor;
1215 unsigned int nMoreCredits = 0;
1220 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1222 assert( oldAnchor.avail < pDesc->nCapacity );
1223 pAddr = pDesc->pSB + oldAnchor.avail * (unsigned long long) pDesc->nBlockSize;
1224 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1227 if ( oldActive.credits() == 0 ) {
1228 // state must be ACTIVE
1229 if ( oldAnchor.count == 0 )
1230 newAnchor.state = SBSTATE_FULL;
1232 nMoreCredits = oldAnchor.count < active_tag::c_nMaxCredits ? ((unsigned int) oldAnchor.count) : active_tag::c_nMaxCredits;
1233 newAnchor.count -= nMoreCredits;
1236 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1239 pProcHeap->stat.incActiveAnchorCASFailureCount( nCollision );
1241 assert( newAnchor.state != SBSTATE_EMPTY );
1243 if ( newAnchor.state == SBSTATE_FULL )
1244 pProcHeap->stat.incDescFull();
1245 if ( oldActive.credits() == 0 && oldAnchor.count > 0 )
1246 update_active( pProcHeap, pDesc, nMoreCredits );
1248 pProcHeap->stat.incAllocFromActive();
1250 // block_header fields is not needed to setup
1251 // It was set in alloc_from_new_superblock
1252 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1253 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1254 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1256 return reinterpret_cast<block_header *>( pAddr );
1259 /// Allocates from a partial filled superblock if it possible
1260 block_header * alloc_from_partial( processor_heap * pProcHeap )
1263 superblock_desc * pDesc = pProcHeap->get_partial();
1268 anchor_tag oldAnchor;
1269 anchor_tag newAnchor;
1271 unsigned int nMoreCredits = 0;
1273 int nCollision = -1;
1277 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1278 if ( oldAnchor.state == SBSTATE_EMPTY ) {
1279 free_superblock( pDesc );
1283 nMoreCredits = ((unsigned int)(oldAnchor.count - 1)) < active_tag::c_nMaxCredits ? (unsigned int)(oldAnchor.count - 1) : active_tag::c_nMaxCredits;
1284 newAnchor.count -= nMoreCredits + 1;
1285 newAnchor.state = (nMoreCredits > 0) ? SBSTATE_ACTIVE : SBSTATE_FULL;
1287 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1290 pProcHeap->stat.incPartialDescCASFailureCount( nCollision );
1292 if ( newAnchor.state == SBSTATE_FULL )
1293 pProcHeap->stat.incDescFull();
1295 // Now, the thread is guaranteed to have reserved one or more blocks
1296 // pop reserved block
1302 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1304 assert( oldAnchor.avail < pDesc->nCapacity );
1305 pAddr = pDesc->pSB + oldAnchor.avail * pDesc->nBlockSize;
1306 newAnchor.avail = reinterpret_cast<free_block_header *>( pAddr )->nNextFree;
1308 } while ( !pDesc->anchor.compare_exchange_strong(oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed) );
1311 pProcHeap->stat.incPartialAnchorCASFailureCount( nCollision );
1313 assert( newAnchor.state != SBSTATE_EMPTY );
1315 pProcHeap->stat.incAllocFromPartial();
1317 if ( nMoreCredits > 0 )
1318 update_active( pProcHeap, pDesc, nMoreCredits );
1320 // block_header fields is not needed to setup
1321 // It was set in alloc_from_new_superblock
1322 assert( reinterpret_cast<block_header *>( pAddr )->desc() == pDesc );
1323 assert( !reinterpret_cast<block_header *>( pAddr )->isAligned() );
1324 assert( !reinterpret_cast<block_header *>( pAddr )->isOSAllocated() );
1326 return reinterpret_cast<block_header *>( pAddr );
1329 /// Allocates from the new superblock
1330 block_header * alloc_from_new_superblock( processor_heap * pProcHeap )
1332 superblock_desc * pDesc = new_superblock_desc( pProcHeap );
1333 assert( pDesc != nullptr );
1334 pDesc->pSB = new_superblock_buffer( pProcHeap );
1336 anchor_tag anchor = pDesc->anchor.load(atomics::memory_order_relaxed);
1339 // Make single-linked list of free blocks in superblock
1340 byte * pEnd = pDesc->pSB + pDesc->nCapacity * pDesc->nBlockSize;
1341 unsigned int nNext = 0;
1342 const unsigned int nBlockSize = pDesc->nBlockSize;
1343 for ( byte * p = pDesc->pSB; p < pEnd; p += nBlockSize ) {
1344 reinterpret_cast<block_header *>( p )->set( pDesc, 0 );
1345 reinterpret_cast<free_block_header *>( p )->nNextFree = ++nNext;
1347 reinterpret_cast<free_block_header *>( pEnd - nBlockSize )->nNextFree = 0;
1349 active_tag newActive;
1350 newActive.set( pDesc, ( (pDesc->nCapacity - 1 < active_tag::c_nMaxCredits) ? pDesc->nCapacity - 1 : active_tag::c_nMaxCredits ) - 1 );
1352 anchor.count = pDesc->nCapacity - 1 - (newActive.credits() + 1);
1353 anchor.state = SBSTATE_ACTIVE;
1354 pDesc->anchor.store(anchor, atomics::memory_order_relaxed);
1356 active_tag curActive;
1357 if ( pProcHeap->active.compare_exchange_strong( curActive, newActive, atomics::memory_order_release, atomics::memory_order_relaxed )) {
1358 pProcHeap->stat.incAllocFromNew();
1359 //reinterpret_cast<block_header *>( pDesc->pSB )->set( pDesc, 0 );
1360 return reinterpret_cast<block_header *>( pDesc->pSB );
1363 free_superblock( pDesc );
1367 /// Find appropriate processor heap based on size-class selected
1368 processor_heap * find_heap( typename sizeclass_selector::sizeclass_index nSizeClassIndex )
1370 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1372 unsigned int nProcessorId = m_Topology.current_processor();
1373 assert( nProcessorId < m_nProcessorCount );
1375 if ( nProcessorId >= m_nProcessorCount )
1378 processor_desc * pDesc = m_arrProcDesc[ nProcessorId ].load( atomics::memory_order_relaxed );
1381 processor_desc * pNewDesc = new_processor_desc( nProcessorId );
1382 if ( m_arrProcDesc[nProcessorId].compare_exchange_strong( pDesc, pNewDesc, atomics::memory_order_release, atomics::memory_order_relaxed ) ) {
1386 free_processor_desc( pNewDesc );
1389 return &( pDesc->arrProcHeap[ nSizeClassIndex ] );
1392 /// Updates active field of processor heap \p pProcHeap
1393 void update_active( processor_heap * pProcHeap, superblock_desc * pDesc, unsigned int nCredits )
1395 assert( pProcHeap == pDesc->pProcHeap );
1397 active_tag nullActive;
1398 active_tag newActive;
1399 newActive.set( pDesc, nCredits - 1 );
1401 if ( pProcHeap->active.compare_exchange_strong( nullActive, newActive, atomics::memory_order_seq_cst, atomics::memory_order_relaxed ) )
1404 // Someone installed another active superblock.
1405 // Return credits to superblock and make it partial
1407 anchor_tag oldAnchor;
1408 anchor_tag newAnchor;
1411 newAnchor = oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1412 newAnchor.count += nCredits;
1413 newAnchor.state = SBSTATE_PARTIAL;
1414 } while ( !pDesc->anchor.compare_exchange_weak( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ));
1416 pDesc->pProcHeap->add_partial( pDesc );
1419 /// Allocates new processor descriptor
1420 processor_desc * new_processor_desc( unsigned int nProcessorId )
1422 processor_desc * pDesc;
1423 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1426 Processor descriptor layout
1428 proc_desc - 64-byte alignment
1429 page_heap[0] 64-byte alignment
1430 page_heap[1] 64-byte alignment
1432 page_heap[P] 64-byte alignment
1434 proc_heap[0] 64-byte alignment
1435 proc_heap[1] 64-byte alignment
1437 proc_heap[N] 64-byte alignment
1440 const size_t szDesc =
1441 ( sizeof(processor_desc)
1442 + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount
1447 const size_t szTotal = szDesc * c_nAlignment + sizeof(processor_heap) * m_SizeClassSelector.size();
1449 static_assert( (sizeof(processor_heap) % c_nAlignment) == 0, "sizeof(processor_heap) error" );
1451 pDesc = new( m_AlignedHeap.alloc( szTotal, c_nAlignment ) ) processor_desc;
1453 pDesc->pageHeaps = reinterpret_cast<page_heap *>( pDesc + 1 );
1454 for ( size_t i = 0; i < nPageHeapCount; ++i )
1455 new (pDesc->pageHeaps + i) page_heap( m_SizeClassSelector.page_size(i));
1457 // initialize processor heaps
1458 pDesc->arrProcHeap =
1459 reinterpret_cast<processor_heap *>(
1460 reinterpret_cast<uptr_atomic_t>(reinterpret_cast<byte *>(pDesc + 1) + sizeof(pDesc->pageHeaps[0]) * nPageHeapCount + c_nAlignment - 1)
1461 & ~(uptr_atomic_t(c_nAlignment) - 1)
1464 processor_heap * pProcHeap = pDesc->arrProcHeap;
1465 processor_heap * pProcHeapEnd = pDesc->arrProcHeap + m_SizeClassSelector.size();
1466 for ( unsigned int i = 0; pProcHeap != pProcHeapEnd; ++pProcHeap, ++i ) {
1467 new (pProcHeap) processor_heap();
1468 pProcHeap->pProcDesc = pDesc;
1469 pProcHeap->pSizeClass = m_SizeClassSelector.at(i);
1470 if ( m_SizeClassSelector.find( pProcHeap->pSizeClass->nSBSize ) != sizeclass_selector::c_nNoSizeClass )
1471 pProcHeap->nPageIdx = processor_heap::c_nPageSelfAllocation;
1473 pProcHeap->nPageIdx = pProcHeap->pSizeClass->nSBSizeIdx;
1479 void free_processor_heap( processor_heap * pProcHeap )
1481 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1482 superblock_desc * pDesc;
1484 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1486 m_AlignedHeap.free( pDesc );
1489 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1491 free( pPartial->pSB );
1492 m_AlignedHeap.free( pPartial );
1495 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1498 m_AlignedHeap.free( pDesc );
1502 page_heap& pageHeap = pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx];
1503 superblock_desc * pDesc;
1505 for ( pDesc = pProcHeap->partialList.pop(); pDesc; pDesc = pProcHeap->partialList.pop()) {
1506 pageHeap.free( pDesc->pSB );
1507 m_AlignedHeap.free( pDesc );
1510 superblock_desc * pPartial = pProcHeap->pPartial.load(atomics::memory_order_relaxed);
1512 pageHeap.free( pPartial->pSB );
1513 m_AlignedHeap.free( pPartial );
1516 pDesc = pProcHeap->active.load(atomics::memory_order_relaxed).ptr();
1518 pageHeap.free( pDesc->pSB );
1519 m_AlignedHeap.free( pDesc );
1522 pProcHeap->~processor_heap();
1525 /// Frees processor descriptor
1526 void free_processor_desc( processor_desc * pDesc )
1528 const size_t nPageHeapCount = m_SizeClassSelector.pageTypeCount();
1530 for (unsigned int j = 0; j < m_SizeClassSelector.size(); ++j )
1531 free_processor_heap( pDesc->arrProcHeap + j );
1533 for ( superblock_desc * pSBDesc = pDesc->listSBDescFree.pop(); pSBDesc; pSBDesc = pDesc->listSBDescFree.pop())
1534 m_AlignedHeap.free( pSBDesc );
1536 for (size_t i = 0; i < nPageHeapCount; ++i )
1537 (pDesc->pageHeaps + i)->page_heap::~page_heap();
1539 //m_IntHeap.free( pDesc->pageHeaps );
1540 pDesc->pageHeaps = nullptr;
1542 pDesc->processor_desc::~processor_desc();
1543 m_AlignedHeap.free( pDesc );
1546 /// Allocates new superblock descriptor
1547 superblock_desc * new_superblock_desc( processor_heap * pProcHeap )
1550 superblock_desc * pDesc = pProcHeap->pProcDesc->listSBDescFree.pop();
1551 if ( pDesc == nullptr ) {
1552 pDesc = new( m_AlignedHeap.alloc(sizeof(superblock_desc), c_nAlignment ) ) superblock_desc;
1553 assert( (uptr_atomic_t(pDesc) & (c_nAlignment - 1)) == 0 );
1555 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1557 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1559 pProcHeap->stat.incDescAllocCount();
1561 pDesc->nBlockSize = pProcHeap->pSizeClass->nBlockSize;
1562 pDesc->nCapacity = pProcHeap->pSizeClass->nCapacity;
1563 assert( pDesc->nCapacity <= c_nMaxBlockInSuperBlock );
1564 pDesc->pProcHeap = pProcHeap;
1566 anchor = pDesc->anchor.load( atomics::memory_order_relaxed );
1568 pDesc->anchor.store( anchor, atomics::memory_order_relaxed );
1573 /// Allocates superblock page
1574 byte * new_superblock_buffer( processor_heap * pProcHeap )
1576 pProcHeap->stat.incBlockAllocated();
1577 if ( pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1578 return (byte *) alloc( pProcHeap->pSizeClass->nSBSize );
1581 return (byte *) pProcHeap->pProcDesc->pageHeaps[pProcHeap->nPageIdx].alloc();
1585 /// Frees superblock descriptor and its page
1586 void free_superblock( superblock_desc * pDesc )
1588 pDesc->pProcHeap->stat.incBlockDeallocated();
1589 processor_desc * pProcDesc = pDesc->pProcHeap->pProcDesc;
1591 if ( pDesc->pProcHeap->nPageIdx == processor_heap::c_nPageSelfAllocation ) {
1595 pProcDesc->pageHeaps[pDesc->pProcHeap->nPageIdx].free( pDesc->pSB );
1598 pProcDesc->listSBDescFree.push( pDesc );
1601 /// Allocate memory block
1602 block_header * int_alloc(
1603 size_t nSize ///< Size of memory block to allocate in bytes
1606 typename sizeclass_selector::sizeclass_index nSizeClassIndex = m_SizeClassSelector.find( nSize );
1607 if ( nSizeClassIndex == sizeclass_selector::c_nNoSizeClass ) {
1608 return alloc_from_OS( nSize );
1610 assert( nSizeClassIndex < m_SizeClassSelector.size() );
1612 block_header * pBlock;
1613 processor_heap * pProcHeap;
1615 pProcHeap = find_heap( nSizeClassIndex );
1617 return alloc_from_OS( nSize );
1619 if ( (pBlock = alloc_from_active( pProcHeap )) != nullptr )
1621 if ( (pBlock = alloc_from_partial( pProcHeap )) != nullptr )
1623 if ( (pBlock = alloc_from_new_superblock( pProcHeap )) != nullptr )
1627 pProcHeap->stat.incAllocatedBytes( pProcHeap->pSizeClass->nBlockSize );
1629 assert( pBlock != nullptr );
1635 /// Heap constructor
1638 // Explicit libcds initialization is needed since a static object may be constructed
1641 m_nProcessorCount = m_Topology.processor_count();
1642 m_arrProcDesc = new( m_AlignedHeap.alloc(sizeof(processor_desc *) * m_nProcessorCount, c_nAlignment ))
1643 atomics::atomic<processor_desc *>[ m_nProcessorCount ];
1644 memset( m_arrProcDesc, 0, sizeof(processor_desc *) * m_nProcessorCount ) ; // ?? memset for atomic<>
1649 The destructor frees all memory allocated by the heap.
1653 for ( unsigned int i = 0; i < m_nProcessorCount; ++i ) {
1654 processor_desc * pDesc = m_arrProcDesc[i].load(atomics::memory_order_relaxed);
1656 free_processor_desc( pDesc );
1659 m_AlignedHeap.free( m_arrProcDesc );
1661 // Explicit termination of libcds
1665 /// Allocate memory block
1667 size_t nSize ///< Size of memory block to allocate in bytes
1670 block_header * pBlock = int_alloc( nSize + sizeof(block_header) + bound_checker::trailer_size );
1672 // Bound checking is only for our blocks
1673 if ( !pBlock->isOSAllocated() ) {
1674 // the block is allocated from our heap - bound checker is applicable
1675 m_BoundChecker.make_trailer(
1676 reinterpret_cast<byte *>(pBlock + 1),
1677 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1685 /// Free previously allocated memory block
1687 void * pMemory ///< Pointer to memory block to free
1693 block_header * pRedirect = (reinterpret_cast<block_header *>( pMemory ) - 1);
1694 block_header * pBlock = pRedirect->begin();
1696 if ( pBlock->isOSAllocated() ) {
1697 // Block has been allocated from OS
1698 m_OSAllocStat.incBytesDeallocated( pBlock->getOSAllocSize() );
1699 m_LargeHeap.free( pBlock );
1703 assert( !pBlock->isAligned() );
1704 superblock_desc * pDesc = pBlock->desc();
1706 m_BoundChecker.check_bounds(
1708 reinterpret_cast<byte *>( pBlock ) + pDesc->nBlockSize,
1713 anchor_tag oldAnchor;
1714 anchor_tag newAnchor;
1715 processor_heap_base * pProcHeap = pDesc->pProcHeap;
1717 pProcHeap->stat.incDeallocatedBytes( pDesc->nBlockSize );
1719 oldAnchor = pDesc->anchor.load(atomics::memory_order_acquire);
1721 newAnchor = oldAnchor;
1722 reinterpret_cast<free_block_header *>( pBlock )->nNextFree = oldAnchor.avail;
1723 newAnchor.avail = (reinterpret_cast<byte *>( pBlock ) - pDesc->pSB) / pDesc->nBlockSize;
1726 assert( oldAnchor.state != SBSTATE_EMPTY );
1728 if ( oldAnchor.state == SBSTATE_FULL )
1729 newAnchor.state = SBSTATE_PARTIAL;
1731 if ( oldAnchor.count == pDesc->nCapacity - 1 ) {
1732 //pProcHeap = pDesc->pProcHeap;
1733 //CDS_COMPILER_RW_BARRIER ; // instruction fence is needed?..
1734 newAnchor.state = SBSTATE_EMPTY;
1737 newAnchor.count += 1;
1738 } while ( !pDesc->anchor.compare_exchange_strong( oldAnchor, newAnchor, atomics::memory_order_release, atomics::memory_order_relaxed ) );
1740 pProcHeap->stat.incFreeCount();
1742 if ( newAnchor.state == SBSTATE_EMPTY ) {
1743 if ( pProcHeap->unlink_partial( pDesc ))
1744 free_superblock( pDesc );
1746 else if (oldAnchor.state == SBSTATE_FULL ) {
1747 assert( pProcHeap != nullptr );
1748 pProcHeap->stat.decDescFull();
1749 pProcHeap->add_partial( pDesc );
1753 /// Reallocate memory block
1755 If \p nNewSize is zero, then the block pointed to by \p pMemory is freed;
1756 the return value is \p nullptr, and \p pMemory is left pointing at a freed block.
1758 If there is not enough available memory to expand the block to the given size,
1759 the original block is left unchanged, and \p nullptr is returned.
1761 Aligned memory block cannot be realloc'ed: if \p pMemory has been allocated by \ref alloc_aligned,
1762 then the return value is \p nullptr and the original block is left unchanged.
1765 void * pMemory, ///< Pointer to previously allocated memory block
1766 size_t nNewSize ///< New size of memory block, in bytes
1769 if ( nNewSize == 0 ) {
1774 const size_t nOrigSize = nNewSize;
1775 nNewSize += sizeof(block_header) + bound_checker::trailer_size;
1777 block_header * pBlock = reinterpret_cast<block_header *>( pMemory ) - 1;
1779 // Reallocation of aligned block is not possible
1780 if ( pBlock->isAligned() ) {
1785 if ( pBlock->isOSAllocated() ) {
1786 // The block has been allocated from OS
1787 size_t nCurSize = pBlock->getOSAllocSize();
1789 if ( nCurSize >= nNewSize )
1793 void * pNewBuf = alloc( nOrigSize );
1795 memcpy( pNewBuf, pMemory, nCurSize - sizeof(block_header) );
1801 superblock_desc * pDesc = pBlock->desc();
1802 if ( pDesc->nBlockSize <= nNewSize ) {
1803 // In-place reallocation
1804 m_BoundChecker.make_trailer(
1805 reinterpret_cast<byte *>(pBlock + 1),
1806 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1813 void * pNew = alloc( nNewSize );
1815 memcpy( pNew, pMemory, pDesc->nBlockSize - sizeof(block_header) );
1823 /// Allocate aligned memory block
1824 void * alloc_aligned(
1825 size_t nSize, ///< Size of memory block to allocate in bytes
1826 size_t nAlignment ///< Alignment
1829 if ( nAlignment <= c_nDefaultBlockAlignment ) {
1830 void * p = alloc( nSize );
1831 assert( (reinterpret_cast<uptr_atomic_t>(p) & (nAlignment - 1)) == 0 );
1835 block_header * pBlock = int_alloc( nSize + nAlignment + sizeof(block_header) + bound_checker::trailer_size );
1837 block_header * pRedirect;
1838 if ( (reinterpret_cast<uptr_atomic_t>( pBlock + 1) & (nAlignment - 1)) != 0 ) {
1839 pRedirect = reinterpret_cast<block_header *>( (reinterpret_cast<uptr_atomic_t>( pBlock ) & ~(nAlignment - 1)) + nAlignment ) - 1;
1840 assert( pRedirect != pBlock );
1841 pRedirect->set( reinterpret_cast<superblock_desc *>(pBlock), 1 );
1843 assert( (reinterpret_cast<uptr_atomic_t>(pRedirect + 1) & (nAlignment - 1)) == 0 );
1849 // Bound checking is only for our blocks
1850 if ( !pBlock->isOSAllocated() ) {
1851 // the block is allocated from our heap - bound checker is applicable
1852 m_BoundChecker.make_trailer(
1853 reinterpret_cast<byte *>(pRedirect + 1),
1854 reinterpret_cast<byte *>(pBlock) + pBlock->desc()->nBlockSize,
1859 return pRedirect + 1;
1862 /// Free aligned memory block previously allocated by \ref alloc_aligned
1864 void * pMemory ///< Pointer to memory block to free
1872 /// Get instant summary statistics
1873 void summaryStat( summary_stat& st )
1875 size_t nProcHeapCount = m_SizeClassSelector.size();
1876 for ( unsigned int nProcessor = 0; nProcessor < m_nProcessorCount; ++nProcessor ) {
1877 processor_desc * pProcDesc = m_arrProcDesc[nProcessor].load(atomics::memory_order_relaxed);
1879 for ( unsigned int i = 0; i < nProcHeapCount; ++i ) {
1880 processor_heap_base * pProcHeap = pProcDesc->arrProcHeap + i;
1882 st.add_procheap_stat( pProcHeap->stat );
1888 st.add_heap_stat( m_OSAllocStat );
1892 }}} // namespace cds::memory::michael
1894 #endif // __CDS_MEMORY_MICHAEL_ALLOCATOR_TMPL_H