From 70115d8b8c0e52d8f196622901639fffed41ff9c Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 6 Dec 2024 23:25:53 -0800 Subject: [PATCH] small fixes --- include/mimalloc/internal.h | 41 +++++--------------- src/arena.c | 8 ++-- src/free.c | 12 +++--- src/heap.c | 15 +------- src/os.c | 6 +-- src/page-queue.c | 26 ++++--------- src/page.c | 77 ++----------------------------------- 7 files changed, 34 insertions(+), 151 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ad7c41c6..28eca4bb 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -540,30 +540,16 @@ static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { if (heap != NULL) { - // mi_atomic_store_release(&page->xheap, (uintptr_t)heap); page->heap = heap; page->heap_tag = heap->tag; mi_atomic_store_release(&page->xthread_id, heap->thread_id); } else { - // mi_atomic_store_release(&page->xheap, (uintptr_t)heap->tld->subproc); page->heap = NULL; mi_atomic_store_release(&page->xthread_id,0); } } -//static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { -// mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); -// if (heap != NULL) { -// mi_atomic_store_release(&page->xheap, (uintptr_t)heap); -// page->heap_tag = heap->tag; -// mi_atomic_store_release(&page->xthread_id, heap->thread_id); -// } -// else { -// mi_atomic_store_release(&page->xheap, (uintptr_t)mi_page_heap(page)->tld->subproc); -// mi_atomic_store_release(&page->xthread_id,0); -// } -//} // Thread free flag helpers static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { @@ -650,24 +636,24 @@ static inline bool mi_page_is_used_at_frac(const mi_page_t* page, uint16_t n) { static inline bool mi_page_is_abandoned(const mi_page_t* page) { // note: the xheap field of an abandoned heap is set to the subproc (for fast reclaim-on-free) - return (mi_atomic_load_acquire(&page->xthread_id) <= 1); + return (mi_atomic_load_relaxed(&page->xthread_id) <= 1); } static inline bool mi_page_is_abandoned_mapped(const mi_page_t* page) { - return (mi_atomic_load_acquire(&page->xthread_id) == 1); + return (mi_atomic_load_relaxed(&page->xthread_id) == 1); } static inline void mi_page_set_abandoned_mapped(mi_page_t* page) { - mi_atomic_or_acq_rel(&page->xthread_id, (uintptr_t)1); + mi_atomic_or_relaxed(&page->xthread_id, (uintptr_t)1); } static inline void mi_page_clear_abandoned_mapped(mi_page_t* page) { - mi_atomic_and_acq_rel(&page->xthread_id, ~(uintptr_t)1); + mi_atomic_and_relaxed(&page->xthread_id, ~(uintptr_t)1); } static inline bool mi_page_is_huge(const mi_page_t* page) { - return (page->block_size > MI_LARGE_MAX_OBJ_SIZE || + return (page->block_size > MI_LARGE_MAX_OBJ_SIZE || (mi_memkind_is_os(page->memid.memkind) && page->memid.mem.os.base < (void*)page)); } @@ -683,15 +669,6 @@ static inline void _mi_page_unown_unconditional(mi_page_t* page) { mi_assert_internal(mi_page_thread_id(page)==0); const uintptr_t old = mi_atomic_and_acq_rel(&page->xthread_free, ~((uintptr_t)1)); mi_assert_internal((old&1)==1); MI_UNUSED(old); - /* - mi_thread_free_t tf_new; - mi_thread_free_t tf_old; - do { - tf_old = mi_atomic_load_relaxed(&page->xthread_free); - mi_assert_internal(mi_tf_is_owned(tf_old)); - tf_new = mi_tf_create(mi_tf_block(tf_old), false); - } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf_old, tf_new)); - */ } @@ -721,7 +698,7 @@ static inline bool _mi_page_unown(mi_page_t* page) { } mi_assert_internal(mi_tf_block(tf_old)==NULL); tf_new = mi_tf_create(NULL, false); - } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf_old, tf_new)); + } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new)); return false; } @@ -729,15 +706,15 @@ static inline bool _mi_page_unown(mi_page_t* page) { // Page flags //----------------------------------------------------------- static inline mi_page_flags_t mi_page_flags(const mi_page_t* page) { - return mi_atomic_load_acquire(&page->xflags); + return mi_atomic_load_relaxed(&page->xflags); } static inline void mi_page_flags_set(mi_page_t* page, bool set, mi_page_flags_t newflag) { if (set) { - mi_atomic_or_acq_rel(&page->xflags, newflag); + mi_atomic_or_relaxed(&page->xflags, newflag); } else { - mi_atomic_and_acq_rel(&page->xflags, ~newflag); + mi_atomic_and_relaxed(&page->xflags, ~newflag); } } diff --git a/src/arena.c b/src/arena.c index 45697081..8362a31f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -944,7 +944,7 @@ void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) > (const uint8_t*)p) { return true; } @@ -1140,7 +1140,7 @@ static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bi if (i<10) { buf[k++] = ('0' + (char)i); buf[k++] = ' '; buf[k++] = ' '; } else if (i<100) { buf[k++] = ('0' + (char)(i/10)); buf[k++] = ('0' + (char)(i%10)); buf[k++] = ' '; } else if (i<1000) { buf[k++] = ('0' + (char)(i/100)); buf[k++] = ('0' + (char)((i%100)/10)); buf[k++] = ('0' + (char)(i%10)); } - + for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) { if (j > 0 && (j % 4) == 0) { buf[k++] = '\n'; _mi_memset(buf+k,' ',5); k += 5; @@ -1174,7 +1174,7 @@ void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) //size_t abandoned_total = 0; size_t purge_total = 0; for (size_t i = 0; i < max_arenas; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; slice_total += arena->slice_count; _mi_output_message("arena %zu: %zu slices (%zu MiB)%s\n", i, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : "")); @@ -1324,7 +1324,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_ static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats) { if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled - const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); if (max_arena == 0) return; // _mi_error_message(EFAULT, "purging not yet implemented\n"); diff --git a/src/free.c b/src/free.c index afb23838..ece55599 100644 --- a/src/free.c +++ b/src/free.c @@ -70,7 +70,7 @@ static inline void mi_free_block_mt(mi_page_t* page, mi_block_t* block) do { mi_block_set_next(page, block, mi_tf_block(tf_old)); tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */); - } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new)); + } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new)); // todo: release is enough? // and atomically try to collect the page if it was abandoned const bool is_owned_now = !mi_tf_is_owned(tf_old); @@ -207,17 +207,17 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { #endif // 1. free if the page is free now - if (mi_page_all_free(page)) + if (mi_page_all_free(page)) { // first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish) - _mi_arena_page_unabandon(page); + _mi_arena_page_unabandon(page); // we can free the page directly _mi_arena_page_free(page); return; } - + // 2. if the page is not too full, we can try to reclaim it for ourselves - if (_mi_option_get_fast(mi_option_reclaim_on_free) != 0 && + if (_mi_option_get_fast(mi_option_reclaim_on_free) != 0 && !mi_page_is_used_at_frac(page,8)) { // the page has still some blocks in use (but not too many) @@ -234,7 +234,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? ) (_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?) ) - { + { if (mi_page_queue(tagheap, page->block_size)->first != NULL) { // don't reclaim for an block_size we don't use // first remove it from the abandoned pages in the arena -- this waits for any readers to finish _mi_arena_page_unabandon(page); diff --git a/src/heap.c b/src/heap.c index 2ff40930..d687f25e 100644 --- a/src/heap.c +++ b/src/heap.c @@ -136,24 +136,11 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_arena_reclaim_all_abandoned(heap); } - // if abandoning, mark all pages to no longer add to delayed_free - //if (collect == MI_ABANDON) { - // mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); - //} - - // free all current thread delayed blocks. - // (if abandoning, after this there are no more thread-delayed references into the pages.) - // _mi_heap_delayed_free_all(heap); - // collect retired pages _mi_heap_collect_retired(heap, force); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - // mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); - - // collect segments (purge pages, this can be expensive so don't force on abandonment) - // _mi_segments_collect(collect == MI_FORCE, &heap->tld->segments); // if forced, collect thread data cache on program-exit (or shared library unload) if (force && is_main_thread && mi_heap_is_backing(heap)) { @@ -219,7 +206,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool if (poolData != NULL) { heap->no_reclaim = true; } - } + } #endif if (heap == tld->heap_backing) { diff --git a/src/os.c b/src/os.c index 156a655b..b05068fd 100644 --- a/src/os.c +++ b/src/os.c @@ -113,8 +113,8 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st if (err != 0) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } - if (still_committed) { - _mi_stat_decrease(&stats->committed, size); + if (still_committed) { + _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); } @@ -556,7 +556,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { #endif } end = start + size; - } while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end)); + } while (!mi_atomic_cas_weak_acq_rel(&mi_huge_start, &huge_start, end)); if (total_size != NULL) *total_size = size; return (uint8_t*)start; diff --git a/src/page-queue.c b/src/page-queue.c index ad616b1d..9e3aaacc 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MI_IN_PAGE_C #error "this file should be included from 'page.c'" // include to help an IDE -#include "mimalloc.h" +#include "mimalloc.h" #include "mimalloc/internal.h" #include "mimalloc/atomic.h" #endif @@ -83,10 +83,10 @@ static inline uint8_t mi_bin(size_t size) { #if defined(MI_ALIGN4W) if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes #endif - wsize--; + wsize--; mi_assert_internal(wsize!=0); // find the highest bit position - uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize)); + uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize)); // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin @@ -211,8 +211,8 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(mi_page_block_size(page) == queue->block_size || - (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || + mi_assert_internal(mi_page_block_size(page) == queue->block_size || + (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; @@ -227,7 +227,6 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { heap->page_count--; page->next = NULL; page->prev = NULL; - // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_in_full(page,false); } @@ -243,7 +242,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); - // mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap); + page->next = queue->first; page->prev = NULL; if (queue->first != NULL) { @@ -346,8 +345,8 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* page->prev = to->first; page->next = next; to->first->next = page; - if (next != NULL) { - next->prev = page; + if (next != NULL) { + next->prev = page; } else { to->last = page; @@ -385,15 +384,6 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue // set append pages to new heap and count size_t count = 0; for (mi_page_t* page = append->first; page != NULL; page = page->next) { - /* - // inline `mi_page_set_heap` to avoid wrong assertion during absorption; - // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive. - mi_atomic_store_release(&page->xheap, (uintptr_t)heap); - // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a - // side effect that it spins until any DELAYED_FREEING is finished. This ensures - // that after appending only the new heap will be used for delayed free operations. - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false); - */ mi_page_set_heap(page, heap); count++; } diff --git a/src/page.c b/src/page.c index 056c9506..54e7b539 100644 --- a/src/page.c +++ b/src/page.c @@ -132,40 +132,6 @@ bool _mi_page_is_valid(mi_page_t* page) { } #endif -/* -void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { - while (!_mi_page_try_use_delayed_free(page, delay, override_never)) { - mi_atomic_yield(); - } -} - -bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { - mi_thread_free_t tfreex; - mi_delayed_t old_delay; - mi_thread_free_t tfree; - size_t yield_count = 0; - do { - tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; - tfreex = mi_tf_set_delayed(tfree, delay); - old_delay = mi_tf_delayed(tfree); - if mi_unlikely(old_delay == MI_DELAYED_FREEING) { - if (yield_count >= 4) return false; // give up after 4 tries - yield_count++; - mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. - // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail - } - else if (delay == old_delay) { - break; // avoid atomic operation if already equal - } - else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) { - break; // leave never-delayed flag set - } - } while ((old_delay == MI_DELAYED_FREEING) || - !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); - - return true; // success -} -*/ /* ----------------------------------------------------------- Page collect the `local_free` and `thread_free` lists @@ -181,7 +147,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) head = mi_tf_block(tfree); if (head == NULL) return; // return if the list is empty tfreex = mi_tf_create(NULL,mi_tf_is_owned(tfree)); // set the thread free list to NULL - } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); + } while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); // release is enough? mi_assert_internal(head != NULL); // find the tail -- also to get a proper count (without data races) @@ -334,43 +300,6 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { return page; } -/* ----------------------------------------------------------- - Do any delayed frees - (put there by other threads if they deallocated in a full page) ------------------------------------------------------------ */ -/* -void _mi_heap_delayed_free_all(mi_heap_t* heap) { - while (!_mi_heap_delayed_free_partial(heap)) { - mi_atomic_yield(); - } -} - -// returns true if all delayed frees were processed -bool _mi_heap_delayed_free_partial(mi_heap_t* heap) { - // take over the list (note: no atomic exchange since it is often NULL) - mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); - while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { }; - bool all_freed = true; - - // and free them all - while(block != NULL) { - mi_block_t* next = mi_block_nextx(heap,block, heap->keys); - // use internal free instead of regular one to keep stats etc correct - if (!_mi_free_delayed_block(block)) { - // we might already start delayed freeing while another thread has not yet - // reset the delayed_freeing flag; in that case delay it further by reinserting the current block - // into the delayed free list - all_freed = false; - mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); - do { - mi_block_set_nextx(heap, block, dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); - } - block = next; - } - return all_freed; -} -*/ /* ----------------------------------------------------------- Unfull, abandon, free and retire @@ -765,7 +694,7 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m #if MI_STAT size_t count = 0; #endif - long candidate_limit = 0; // we reset this on the first candidate to limit the search + long candidate_limit = 0; // we reset this on the first candidate to limit the search long full_page_retain = _mi_option_get_fast(mi_option_full_page_retain); mi_page_t* page_candidate = NULL; // a page with free space mi_page_t* page = pq->first; @@ -777,7 +706,7 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m count++; #endif candidate_limit--; - + // collect freed blocks by us and other threads _mi_page_free_collect(page, false);