From 0099707af905cddaab3d51a5639a1a2ae21ecf3c Mon Sep 17 00:00:00 2001 From: daan Date: Wed, 15 Jan 2020 17:19:01 -0800 Subject: [PATCH] use delayed free for all pages; reduce size of the page structure for improved address calculation --- include/mimalloc-internal.h | 45 +++++++-- include/mimalloc-types.h | 57 ++++++----- src/alloc.c | 131 +++++++++++++------------ src/heap.c | 67 +++++++------ src/init.c | 11 +-- src/page-queue.c | 44 ++++----- src/page.c | 190 +++++++++++++++--------------------- src/segment.c | 19 ++-- 8 files changed, 296 insertions(+), 268 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index d5ce9f59..a9391a40 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -308,7 +308,7 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - const size_t bsize = page->block_size; + const size_t bsize = page->xblock_size; mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); return _mi_segment_page_start(segment, page, bsize, page_size, NULL); } @@ -318,7 +318,40 @@ static inline mi_page_t* _mi_ptr_page(void* p) { return _mi_segment_page_of(_mi_ptr_segment(p), p); } +// Get the block size of a page (special cased for huge objects) +static inline size_t mi_page_block_size(const mi_page_t* page) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0); + if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) { + return bsize; + } + else { + size_t psize; + _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL); + return psize; + } +} + // Thread free access +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { + return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3); +} + +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { + return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3); +} + +// Heap access +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { + return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap)); +} + +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); + mi_atomic_write(&page->xheap,(uintptr_t)heap); +} + +// Thread free flag helpers static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { return (mi_block_t*)(tf & ~0x03); } @@ -338,7 +371,7 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* // are all blocks in a page freed? static inline bool mi_page_all_free(const mi_page_t* page) { mi_assert_internal(page != NULL); - return (page->used - page->thread_freed == 0); + return (page->used == 0); } // are there immediately available blocks @@ -349,8 +382,8 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { // are there free blocks in this page? static inline bool mi_page_has_free(mi_page_t* page) { mi_assert_internal(page != NULL); - bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL)); - mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity); + bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL)); + mi_assert_internal(hasfree || page->used == page->capacity); return hasfree; } @@ -364,7 +397,7 @@ static inline bool mi_page_all_used(mi_page_t* page) { static inline bool mi_page_mostly_used(const mi_page_t* page) { if (page==NULL) return true; uint16_t frac = page->reserved / 8U; - return (page->reserved - page->used + page->thread_freed <= frac); + return (page->reserved - page->used <= frac); } static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { @@ -467,7 +500,7 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { - _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); + _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; } return next; diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index da9bfbac..bf288d60 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -124,6 +124,9 @@ terms of the MIT license. A copy of the license can be found in the file #error "define more bins" #endif +// Used as a special value to encode block sizes in 32 bits. +#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) + // The free lists use encoded next fields // (Only actually encodes when MI_ENCODED_FREELIST is defined.) typedef uintptr_t mi_encoded_t; @@ -136,10 +139,10 @@ typedef struct mi_block_s { // The delayed flags are used for efficient multi-threaded free-ing typedef enum mi_delayed_e { - MI_NO_DELAYED_FREE = 0, - MI_USE_DELAYED_FREE = 1, - MI_DELAYED_FREEING = 2, - MI_NEVER_DELAYED_FREE = 3 + MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list + MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap + MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list + MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim } mi_delayed_t; @@ -167,14 +170,28 @@ typedef uintptr_t mi_thread_free_t; // implement a monotonic heartbeat. The `thread_free` list is needed for // avoiding atomic operations in the common case. // -// `used - thread_freed` == actual blocks that are in use (alive) -// `used - thread_freed + |free| + |local_free| == capacity` // -// note: we don't count `freed` (as |free|) instead of `used` to reduce -// the number of memory accesses in the `mi_page_all_free` function(s). -// note: the funny layout here is due to: -// - access is optimized for `mi_free` and `mi_page_alloc` -// - using `uint16_t` does not seem to slow things down +// `used - |thread_free|` == actual blocks that are in use (alive) +// `used - |thread_free| + |free| + |local_free| == capacity` +// +// We don't count `freed` (as |free|) but use `used` to reduce +// the number of memory accesses in the `mi_page_all_free` function(s). +// +// Notes: +// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) +// - Using `uint16_t` does not seem to slow things down +// - The size is 8 words on 64-bit which helps the page index calculations +// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 +// and 12 are still good for address calculation) +// - To limit the structure size, the `xblock_size` is 32-bits only; for +// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size +// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// concurrent frees where only the first concurrent free adds to the owning +// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// The invariant is that no-delayed-free is only set if there is +// at least one block that will be added, or as already been added, to +// the owning heap `thread_delayed_free` list. This guarantees that pages +// will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { // "owned" by the segment uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` @@ -194,23 +211,15 @@ typedef struct mi_page_s { #ifdef MI_ENCODE_FREELIST uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif - size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) - volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` - volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads - - // less accessed info - size_t block_size; // size available in each block (always `>0`) - mi_heap_t* heap; // the owning heap + volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + volatile _Atomic(uintptr_t) xheap; + struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` - - // improve page index calculation - // without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds two words - #if (MI_INTPTR_SIZE==4) - void* padding[1]; // 12/14 words on 32-bit plain - #endif } mi_page_t; diff --git a/src/alloc.c b/src/alloc.c index bd81aba0..621fb0db 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { - mi_assert_internal(page->block_size==0||page->block_size >= size); + mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); mi_block_t* block = page->free; if (mi_unlikely(block == NULL)) { return _mi_malloc_generic(heap, size); // slow path @@ -94,16 +94,16 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) UNUSED(size); mi_assert_internal(p != NULL); - mi_assert_internal(size > 0 && page->block_size >= size); + mi_assert_internal(size > 0 && mi_page_block_size(page) >= size); mi_assert_internal(_mi_ptr_page(p)==page); if (page->is_zero) { // already zero initialized memory? ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p,page->block_size)); + mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page))); } else { // otherwise memset - memset(p, 0, page->block_size); + memset(p, 0, mi_page_block_size(page)); } } @@ -141,13 +141,12 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { // The decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - mi_thread_free_t tf = (mi_thread_free_t)mi_atomic_read_relaxed(mi_atomic_cast(uintptr_t, &page->thread_free)); + // Walk the free lists to verify positively if it is already freed if (mi_list_contains(page, page->free, block) || mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, mi_tf_block(tf), block)) + mi_list_contains(page, mi_page_thread_free(page), block)) { - _mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); + _mi_fatal_error("double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); return true; } return false; @@ -177,44 +176,50 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // Free // ------------------------------------------------------ +// free huge block from another thread +static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + // huge page segments are always abandoned and can be freed immediately + mi_assert_internal(segment->page_kind==MI_PAGE_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + + // claim it and free + mi_heap_t* heap = mi_get_default_heap(); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + page->is_zero = false; + mi_assert(page->used == 0); + mi_tld_t* tld = heap->tld; + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&tld->stats.giant, bsize); + } + else { + _mi_stat_decrease(&tld->stats.huge, bsize); + } + _mi_segment_page_free(page, true, &tld->segments); + } +} + // multi-threaded free static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { - mi_thread_free_t tfree; - mi_thread_free_t tfreex; - bool use_delayed; - + // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { - // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); - mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL); - // claim it and free - mi_heap_t* heap = mi_get_default_heap(); - // paranoia: if this it the last reference, the cas should always succeed - if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) { - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; - page->is_zero = false; - mi_assert(page->used == 0); - mi_tld_t* tld = heap->tld; - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&tld->stats.giant, page->block_size); - } - else { - _mi_stat_decrease(&tld->stats.huge, page->block_size); - } - _mi_segment_page_free(page,true,&tld->segments); - } + mi_free_huge_block_mt(segment, page, block); return; } + mi_thread_free_t tfree; + mi_thread_free_t tfreex; + bool use_delayed; do { - tfree = page->thread_free; - use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || - (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page - ); + tfree = mi_atomic_read_relaxed(&page->xthread_free); + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); @@ -224,15 +229,11 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_block_set_next(page, block, mi_tf_block(tfree)); tfreex = mi_tf_set_block(tfree,block); } - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); - if (mi_likely(!use_delayed)) { - // increment the thread free count and return - mi_atomic_increment(&page->thread_freed); - } - else { + if (mi_unlikely(use_delayed)) { // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); + mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL); if (heap != NULL) { // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) @@ -245,10 +246,10 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // and reset the MI_DELAYED_FREEING flag do { - tfreex = tfree = page->thread_free; - mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); - if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free); + mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } } @@ -257,7 +258,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) { #if (MI_DEBUG) - memset(block, MI_DEBUG_FREED, page->block_size); + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif // and push it on the free list @@ -284,7 +285,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_assert_internal(page!=NULL && p!=NULL); size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - size_t adjust = (diff % page->block_size); + size_t adjust = (diff % mi_page_block_size(page)); return (mi_block_t*)((uintptr_t)p - adjust); } @@ -329,8 +330,8 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_STAT>1) mi_heap_t* heap = mi_heap_get_default(); mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); - if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); + if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1); } // huge page stat is accounted for in `_mi_page_retire` #endif @@ -342,7 +343,9 @@ void mi_free(void* p) mi_attr_noexcept mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; - if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } + if (mi_unlikely(mi_page_all_free(page))) { + _mi_page_retire(page); + } } else { // non-local, aligned blocks, or a full page; use the more generic path @@ -356,13 +359,19 @@ bool _mi_free_delayed_block(mi_block_t* block) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_thread_id() == segment->thread_id); mi_page_t* page = _mi_segment_page_of(segment, block); - if (mi_tf_delayed(page->thread_free) == MI_DELAYED_FREEING) { - // we might already start delayed freeing while another thread has not yet - // reset the delayed_freeing flag; in that case don't free it quite yet if - // this is the last block remaining. - if (page->used - page->thread_freed == 1) return false; - } - _mi_free_block(page,true,block); + + // Clear the no-delayed flag so delayed freeing is used again for this page. + // This must be done before collecting the free lists on this page -- otherwise + // some blocks may end up in the page `thread_free` list with no blocks in the + // heap `thread_delayed_free` list which may cause the page to be never freed! + // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */); + + // collect all other non-local frees to ensure up-to-date `used` count + _mi_page_free_collect(page, false); + + // and free the block (possibly freeing the page as well since used is updated) + _mi_free_block(page, true, block); return true; } @@ -371,7 +380,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept { if (p==NULL) return 0; const mi_segment_t* segment = _mi_ptr_segment(p); const mi_page_t* page = _mi_segment_page_of(segment,p); - size_t size = page->block_size; + size_t size = mi_page_block_size(page); if (mi_unlikely(mi_page_has_aligned(page))) { ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); diff --git a/src/heap.c b/src/heap.c index 4a589e5c..9f2a4457 100644 --- a/src/heap.c +++ b/src/heap.c @@ -34,7 +34,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue - mi_assert_internal(page->heap == heap); + mi_assert_internal(mi_page_heap(page) == heap); count++; if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue @@ -50,7 +50,7 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ UNUSED(arg1); UNUSED(arg2); UNUSED(pq); - mi_assert_internal(page->heap == heap); + mi_assert_internal(mi_page_heap(page) == heap); mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->thread_id == heap->thread_id); mi_assert_expensive(_mi_page_is_valid(page)); @@ -118,13 +118,18 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // this may free some segments (but also take ownership of abandoned pages) _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); } - #if MI_DEBUG - else if (collect == ABANDON && _mi_is_main_thread() && mi_heap_is_backing(heap)) { + else if ( + #ifdef NDEBUG + collect == FORCE + #else + collect >= FORCE + #endif + && _mi_is_main_thread() && mi_heap_is_backing(heap)) + { // the main thread is abandoned, try to free all abandoned segments. // if all memory is freed by now, all segments should be freed. _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); } - #endif } // if abandoning, mark all pages to no longer add to delayed_free @@ -245,25 +250,27 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); // stats - if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&heap->tld->stats.giant,page->block_size); + const size_t bsize = mi_page_block_size(page); + if (bsize > MI_LARGE_OBJ_SIZE_MAX) { + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { + _mi_stat_decrease(&heap->tld->stats.giant, bsize); } else { - _mi_stat_decrease(&heap->tld->stats.huge, page->block_size); + _mi_stat_decrease(&heap->tld->stats.huge, bsize); } } - #if (MI_STAT>1) - size_t inuse = page->used - page->thread_freed; - if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); +#if (MI_STAT>1) + _mi_page_free_collect(page, false); // update used count + const size_t inuse = page->used; + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], inuse); } - mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... - #endif + mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... +#endif - // pretend it is all free now - mi_assert_internal(page->thread_freed<=0xFFFF); - page->used = (uint16_t)page->thread_freed; + /// pretend it is all free now + mi_assert_internal(mi_page_thread_free(page) == NULL); + page->used = 0; // and free the page _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); @@ -374,7 +381,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) { bool valid = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(valid); if (mi_unlikely(!valid)) return NULL; - return _mi_segment_page_of(segment,p)->heap; + return mi_page_heap(_mi_segment_page_of(segment,p)); } bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { @@ -390,7 +397,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa bool* found = (bool*)vfound; mi_segment_t* segment = _mi_page_segment(page); void* start = _mi_page_start(segment, page, NULL); - void* end = (uint8_t*)start + (page->capacity * page->block_size); + void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page)); *found = (p >= start && p < end); return (!*found); // continue if not found } @@ -432,13 +439,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal(page->local_free == NULL); if (page->used == 0) return true; + const size_t bsize = mi_page_block_size(page); size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); if (page->capacity == 1) { // optimize page with one block mi_assert_internal(page->used == 1 && page->free == NULL); - return visitor(page->heap, area, pstart, page->block_size, arg); + return visitor(mi_page_heap(page), area, pstart, bsize, arg); } // create a bitmap of free blocks. @@ -451,8 +459,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v free_count++; mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; - mi_assert_internal(offset % page->block_size == 0); - size_t blockidx = offset / page->block_size; // Todo: avoid division? + mi_assert_internal(offset % bsize == 0); + size_t blockidx = offset / bsize; // Todo: avoid division? mi_assert_internal( blockidx < MI_MAX_BLOCKS); size_t bitidx = (blockidx / sizeof(uintptr_t)); size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); @@ -471,8 +479,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v } else if ((m & ((uintptr_t)1 << bit)) == 0) { used_count++; - uint8_t* block = pstart + (i * page->block_size); - if (!visitor(page->heap, area, block, page->block_size, arg)) return false; + uint8_t* block = pstart + (i * bsize); + if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false; } } mi_assert_internal(page->used == used_count); @@ -487,12 +495,13 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa UNUSED(pq); mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; + const size_t bsize = mi_page_block_size(page); xarea.page = page; - xarea.area.reserved = page->reserved * page->block_size; - xarea.area.committed = page->capacity * page->block_size; + xarea.area.reserved = page->reserved * bsize; + xarea.area.committed = page->capacity * bsize; xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); - xarea.area.used = page->used - page->thread_freed; // race is ok - xarea.area.block_size = page->block_size; + xarea.area.used = page->used; + xarea.area.block_size = bsize; return fun(heap, &xarea, arg); } diff --git a/src/init.c b/src/init.c index 79e1e044..d81d7459 100644 --- a/src/init.c +++ b/src/init.c @@ -23,12 +23,11 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, #endif 0, // used - NULL, - ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), - 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==4) - , { NULL } // padding - #endif + 0, // xblock_size + NULL, // local_free + ATOMIC_VAR_INIT(0), // xthread_free + ATOMIC_VAR_INIT(0), // xheap + NULL, NULL }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) diff --git a/src/page-queue.c b/src/page-queue.c index 95443a69..68e2aaa4 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -178,20 +178,20 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* #endif static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); - mi_heap_t* heap = page->heap; + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); + mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(bin >= MI_BIN_HUGE || page->block_size == pq->block_size); + mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size); mi_assert_expensive(mi_page_queue_contains(pq, page)); return pq; } static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size); + mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size); return pq; } @@ -246,35 +246,35 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == queue->last) queue->last = page->prev; if (page == queue->first) { queue->first = page->next; // update first - mi_heap_t* heap = page->heap; mi_assert_internal(mi_heap_contains_queue(heap, queue)); mi_heap_queue_first_update(heap,queue); } - page->heap->page_count--; + heap->page_count--; page->next = NULL; page->prev = NULL; - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_assert_internal(page->block_size == queue->block_size || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || + mi_assert_internal(page->xblock_size == queue->block_size || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -296,19 +296,19 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); - mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || - (page->block_size == to->block_size && mi_page_queue_is_full(from)) || - (page->block_size == from->block_size && mi_page_queue_is_full(to)) || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) || + (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) || + (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || + (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == from->last) from->last = page->prev; if (page == from->first) { from->first = page->next; // update first - mi_heap_t* heap = page->heap; mi_assert_internal(mi_heap_contains_queue(heap, from)); mi_heap_queue_first_update(heap, from); } @@ -316,14 +316,14 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro page->prev = to->last; page->next = NULL; if (to->last != NULL) { - mi_assert_internal(page->heap == to->last->heap); + mi_assert_internal(heap == mi_page_heap(to->last)); to->last->next = page; to->last = page; } else { to->first = page; to->last = page; - mi_heap_queue_first_update(page->heap, to); + mi_heap_queue_first_update(heap, to); } mi_page_set_in_full(page, mi_page_queue_is_full(to)); @@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); + mi_page_set_heap(page,heap); count++; } diff --git a/src/page.c b/src/page.c index 6a6e09d6..40aec0c6 100644 --- a/src/page.c +++ b/src/page.c @@ -29,10 +29,11 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ // Index a block in a page -static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t i) { +static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) { + UNUSED(page); mi_assert_internal(page != NULL); mi_assert_internal(i <= page->reserved); - return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size)); + return (mi_block_t*)((uint8_t*)page_start + (i * block_size)); } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); @@ -69,13 +70,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { } static bool mi_page_is_valid_init(mi_page_t* page) { - mi_assert_internal(page->block_size > 0); + mi_assert_internal(page->xblock_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); + const size_t bsize = mi_page_block_size(page); mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL,NULL)); + mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -89,10 +91,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) { } #endif - mi_block_t* tfree = mi_tf_block(page->thread_free); + mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); - size_t tfree_count = mi_page_list_count(page, tfree); - mi_assert_internal(tfree_count <= page->thread_freed + 1); + //size_t tfree_count = mi_page_list_count(page, tfree); + //mi_assert_internal(tfree_count <= page->thread_freed + 1); size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); mi_assert_internal(page->used + free_count == page->capacity); @@ -105,14 +107,14 @@ bool _mi_page_is_valid(mi_page_t* page) { #if MI_SECURE mi_assert_internal(page->key != 0); #endif - if (page->heap!=NULL) { + if (mi_page_heap(page)!=NULL) { mi_segment_t* segment = _mi_page_segment(page); - mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0); + mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0); if (segment->page_kind != MI_PAGE_HUGE) { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); - mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); - mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); + mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); + mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq)); } } return true; @@ -124,20 +126,20 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid mi_thread_free_t tfreex; mi_delayed_t old_delay; do { - tfree = mi_atomic_read_relaxed(&page->thread_free); + tfree = mi_atomic_read(&page->xthread_free); tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { - mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + // mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. + tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } else if (delay == old_delay) { break; // avoid atomic operation if already equal } else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { - break; // leave never set + break; // leave never-delayed flag set } - } while ((old_delay == MI_DELAYED_FREEING) || - !mi_atomic_cas_weak(mi_atomic_cast(uintptr_t, &page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); } /* ----------------------------------------------------------- @@ -154,17 +156,17 @@ static void _mi_page_thread_free_collect(mi_page_t* page) mi_thread_free_t tfree; mi_thread_free_t tfreex; do { - tfree = page->thread_free; + tfree = mi_atomic_read_relaxed(&page->xthread_free); head = mi_tf_block(tfree); tfreex = mi_tf_set_block(tfree,NULL); - } while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); + } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree)); // return if the list is empty if (head == NULL) return; // find the tail -- also to get a proper count (without data races) - uintptr_t max_count = page->capacity; // cannot collect more than capacity - uintptr_t count = 1; + uint32_t max_count = page->capacity; // cannot collect more than capacity + uint32_t count = 1; mi_block_t* tail = head; mi_block_t* next; while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { @@ -182,7 +184,6 @@ static void _mi_page_thread_free_collect(mi_page_t* page) page->local_free = head; // update counts now - mi_atomic_subu(&page->thread_freed, count); page->used -= count; } @@ -190,7 +191,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_assert_internal(page!=NULL); // collect the thread free list - if (force || mi_tf_block(page->thread_free) != NULL) { // quick test to avoid an atomic operation + if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation _mi_page_thread_free_collect(page); } @@ -228,15 +229,16 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // called from segments when reclaiming abandoned pages void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == NULL); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(!page->is_reset); - mi_assert_internal(mi_tf_delayed(page->thread_free) == MI_NEVER_DELAYED_FREE); - _mi_page_free_collect(page,false); - mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); + mi_assert_internal(mi_page_thread_free_flag(page) == MI_NEVER_DELAYED_FREE); + mi_page_set_heap(page, heap); + mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); - mi_assert_internal(page->heap != NULL); - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, true); // override never (after push so heap is set) + _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) + // _mi_page_free_collect(page,false); // no need, as it is just done before reclaim + mi_assert_internal(mi_page_heap(page)!= NULL); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -270,8 +272,8 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { // otherwise allocate the page page = mi_page_fresh_alloc(heap, pq, pq->block_size); if (page==NULL) return NULL; - mi_assert_internal(pq->block_size==page->block_size); - mi_assert_internal(pq==mi_page_queue(heap,page->block_size)); + mi_assert_internal(pq->block_size==mi_page_block_size(page)); + mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); return page; } @@ -312,11 +314,9 @@ void _mi_page_unfull(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_is_in_full(page)); - - _mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE, false); if (!mi_page_is_in_full(page)) return; - mi_heap_t* heap = page->heap; + mi_heap_t* heap = mi_page_heap(page); mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); @@ -329,10 +329,8 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page)); - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); if (mi_page_is_in_full(page)) return; - - mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page); + mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page); _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set } @@ -345,18 +343,17 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); - mi_assert_internal(page->heap != NULL); + mi_assert_internal(mi_page_heap(page) != NULL); -#if MI_DEBUG > 1 - mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); -#endif + mi_heap_t* pheap = mi_page_heap(page); // remove from our page list - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_segments_tld_t* segments_tld = &pheap->tld->segments; mi_page_queue_remove(pq, page); // page is no longer associated with our heap - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_page_set_heap(page, NULL); #if MI_DEBUG>1 // check there are no references left.. @@ -366,7 +363,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { #endif // and abandon it - mi_assert_internal(page->heap == NULL); + mi_assert_internal(mi_page_heap(page) == NULL); _mi_segment_page_abandon(page,segments_tld); } @@ -377,33 +374,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(mi_page_all_free(page)); - #if MI_DEBUG>1 - // check if we can safely free - mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE); - free = mi_atomic_exchange(&page->thread_free, free); - mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING); - #endif + mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING); + // no more aligned blocks in here mi_page_set_has_aligned(page, false); - // account for huge pages here - // (note: no longer necessary as huge pages are always abandoned) - if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size); - } - else { - _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); - } - } - // remove from the page list // (no need to do _mi_heap_delayed_free first as all blocks are already free) - mi_segments_tld_t* segments_tld = &page->heap->tld->segments; + mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments; mi_page_queue_remove(pq, page); // and free it - mi_assert_internal(page->heap == NULL); + mi_page_set_heap(page,NULL); _mi_segment_page_free(page, force, segments_tld); } @@ -427,7 +409,7 @@ void _mi_page_retire(mi_page_t* page) { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) { if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); page->retire_expire = 4; @@ -469,15 +451,15 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) -static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { +static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); - const size_t bsize = page->block_size; // initialize a randomized free list // set up `slice_count` slices to alternate between @@ -491,7 +473,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice for (size_t i = 0; i < slice_count; i++) { - blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend); counts[i] = slice_extend; } counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) @@ -526,7 +508,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co page->free = free_start; } -static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) +static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { UNUSED(stats); #if (MI_SECURE <= 2) @@ -534,12 +516,13 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); + mi_assert_internal(bsize == mi_page_block_size(page)); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); - const size_t bsize = page->block_size; - mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); + + mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); // initialize a sequential free list - mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1); mi_block_t* block = start; while(block <= last) { mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); @@ -581,8 +564,9 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count + const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size); size_t extend = page->reserved - page->capacity; - size_t max_extend = (page->block_size >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)page->block_size); + size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize); if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; if (extend > max_extend) { @@ -596,20 +580,20 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // commit on-demand for large and huge pages? if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - uint8_t* start = page_start + (page->capacity * page->block_size); - _mi_mem_commit(start, extend * page->block_size, NULL, &tld->os); + uint8_t* start = page_start + (page->capacity * bsize); + _mi_mem_commit(start, extend * bsize, NULL, &tld->os); } // and append the extend the free list if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(page, extend, &tld->stats ); + mi_page_free_list_extend(page, bsize, extend, &tld->stats ); } else { - mi_page_free_list_extend_secure(heap, page, extend, &tld->stats); + mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats); } // enable the new free list page->capacity += (uint16_t)extend; - mi_stat_increase(tld->stats.page_committed, extend * page->block_size); + mi_stat_increase(tld->stats.page_committed, extend * bsize); // extension into zero initialized memory preserves the zero'd free list if (!page->is_zero_init) { @@ -625,9 +609,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert(segment != NULL); mi_assert_internal(block_size > 0); // set fields + mi_page_set_heap(page, heap); size_t page_size; _mi_segment_page_start(segment, page, block_size, &page_size, NULL); - page->block_size = block_size; + page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); #ifdef MI_ENCODE_FREELIST @@ -639,14 +624,14 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); - mi_assert_internal(page->thread_free == 0); - mi_assert_internal(page->thread_freed == 0); + mi_assert_internal(page->xthread_free == 0); mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); #if (MI_ENCODE_FREELIST) - mi_assert_internal(page->key != 0); + mi_assert_internal(page->key[1] != 0); + mi_assert_internal(page->key[2] != 0); #endif mi_assert_expensive(mi_page_is_valid_init(page)); @@ -664,34 +649,19 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) { // search through the pages in "next fit" order - mi_page_t* rpage = NULL; size_t count = 0; - size_t page_free_count = 0; mi_page_t* page = pq->first; - while( page != NULL) + while (page != NULL) { mi_page_t* next = page->next; // remember next count++; // 0. collect freed blocks by us and other threads - _mi_page_free_collect(page,false); + _mi_page_free_collect(page, false); // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { - // If all blocks are free, we might retire this page instead. - // do this at most 8 times to bound allocation time. - // (note: this can happen if a page was earlier not retired due - // to having neighbours that were mostly full or due to concurrent frees) - if (page_free_count < 8 && mi_page_all_free(page)) { - page_free_count++; - if (rpage != NULL) _mi_page_free(rpage,pq,false); - rpage = page; - page = next; - continue; // and keep looking - } - else { - break; // pick this one - } + break; // pick this one } // 2. Try to extend @@ -704,20 +674,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // 3. If the page is completely full, move it to the `mi_pages_full` // queue so we don't visit long-lived pages too often. mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); - mi_page_to_full(page,pq); + mi_page_to_full(page, pq); page = next; } // for each page - mi_stat_counter_increase(heap->tld->stats.searches,count); - - if (page == NULL) { - page = rpage; - rpage = NULL; - } - if (rpage != NULL) { - _mi_page_free(rpage,pq,false); - } + mi_stat_counter_increase(heap->tld->stats.searches, count); if (page == NULL) { page = mi_page_fresh(heap, pq); @@ -729,11 +691,12 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p mi_assert_internal(page == NULL || mi_page_immediate_available(page)); // finally collect retired pages - _mi_heap_collect_retired(heap,false); + _mi_heap_collect_retired(heap, false); return page; } + // Find a page with free blocks of `size`. static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); @@ -794,14 +757,15 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); if (page != NULL) { + const size_t bsize = mi_page_block_size(page); mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(page->block_size == block_size); + mi_assert_internal(bsize >= size); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue - mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); + mi_page_set_heap(page, NULL); - if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { + if (bsize > MI_HUGE_OBJ_SIZE_MAX) { _mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); } @@ -849,7 +813,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept if (page == NULL) return NULL; // out of memory mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(page->block_size >= size); + mi_assert_internal(mi_page_block_size(page) >= size); // and try again, this time succeeding! (i.e. this should never recurse) return _mi_page_malloc(heap, page, size); diff --git a/src/segment.c b/src/segment.c index 97859fa9..4fb3e28b 100644 --- a/src/segment.c +++ b/src/segment.c @@ -208,8 +208,8 @@ static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, m mi_assert_internal(size <= psize); size_t reset_size = (size == 0 || size > psize ? psize : size); if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - mi_assert_internal(page->block_size > 0); - reset_size = page->capacity * page->block_size; + mi_assert_internal(page->xblock_size > 0); + reset_size = page->capacity * mi_page_block_size(page); } _mi_mem_reset(start, reset_size, tld->os); } @@ -223,8 +223,8 @@ static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); size_t unreset_size = (size == 0 || size > psize ? psize : size); if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) { - mi_assert_internal(page->block_size > 0); - unreset_size = page->capacity * page->block_size; + mi_assert_internal(page->xblock_size > 0); + unreset_size = page->capacity * mi_page_block_size(page); } bool is_zero = false; _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); @@ -255,7 +255,7 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ } if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); + mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } @@ -278,7 +278,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->block_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(page->xblock_size==0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } @@ -605,7 +605,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(page->is_committed); - size_t inuse = page->capacity * page->block_size; + size_t inuse = page->capacity * mi_page_block_size(page); _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); @@ -707,6 +707,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert(page != NULL); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment)); segment->abandoned++; @@ -765,9 +767,12 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen if (page->segment_in_use) { mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); + mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); + mi_assert_internal(mi_page_heap(page) == NULL); segment->abandoned--; mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); + _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { // if everything free by now, free the page mi_segment_page_clear(segment,page,tld);