diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index 672cbb87..fd88cd8e 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -279,8 +279,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index ab1e161d..0c7fafe3 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -417,6 +417,8 @@ static inline void mi_atomic_yield(void) { #if defined(_WIN32) +#if 0 + #define mi_lock_t CRITICAL_SECTION static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -436,7 +438,8 @@ static inline void mi_lock_done(mi_lock_t* lock) { DeleteCriticalSection(lock); } -#if 0 +#else + #define mi_lock_t SRWLOCK // slim reader-writer lock static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -455,6 +458,7 @@ static inline void mi_lock_init(mi_lock_t* lock) { static inline void mi_lock_done(mi_lock_t* lock) { (void)(lock); } + #endif #elif defined(MI_USE_PTHREADS) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 24792f8c..7774b378 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -90,7 +90,6 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c -extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; void _mi_process_load(void); void mi_cdecl _mi_process_done(void); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 4d43e887..ca3913ad 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -293,7 +293,7 @@ typedef struct mi_page_s { uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary #endif - mi_heap_t* heap; // heap this threads belong to. + mi_heap_t* heap; // the heap owning this page (or NULL for abandoned pages) struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` mi_memid_t memid; // provenance of the page memory @@ -394,7 +394,7 @@ typedef struct mi_padding_s { // A heap owns a set of pages. struct mi_heap_s { mi_tld_t* tld; // thread-local data - mi_arena_t* exclusive_arena; // if the heap belongs to a specific arena (or NULL) + mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation @@ -444,18 +444,18 @@ typedef struct mi_stat_counter_s { } mi_stat_counter_t; typedef struct mi_stats_s { - mi_stat_count_t pages; - mi_stat_count_t reserved; - mi_stat_count_t committed; - mi_stat_count_t reset; - mi_stat_count_t purged; - mi_stat_count_t page_committed; - mi_stat_count_t pages_abandoned; - mi_stat_count_t threads; - mi_stat_count_t normal; - mi_stat_count_t huge; - mi_stat_count_t giant; - mi_stat_count_t malloc; + mi_stat_count_t pages; + mi_stat_count_t reserved; + mi_stat_count_t committed; + mi_stat_count_t reset; + mi_stat_count_t purged; + mi_stat_count_t page_committed; + mi_stat_count_t pages_abandoned; + mi_stat_count_t threads; + mi_stat_count_t normal; + mi_stat_count_t huge; + mi_stat_count_t giant; + mi_stat_count_t malloc; mi_stat_counter_t pages_extended; mi_stat_counter_t pages_reclaim_on_alloc; mi_stat_counter_t pages_reclaim_on_free; @@ -479,37 +479,72 @@ typedef struct mi_stats_s { // add to stat keeping track of the peak -void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); -void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void __mi_stat_increase(mi_stat_count_t* stat, size_t amount); +void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount); +void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount); // adjust stat in special cases to compensate for double counting -void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc); -void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_free); +void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc); +void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_free); +void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc); +void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount, bool on_free); // counters can just be increased -void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); +void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); +void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount); #if (MI_STAT) -#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) -#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) -#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) -#define mi_stat_adjust_increase(stat,amnt,b) _mi_stat_adjust_increase( &(stat), amnt, b) -#define mi_stat_adjust_decrease(stat,amnt,b) _mi_stat_adjust_decrease( &(stat), amnt, b) +#define mi_debug_stat_increase(stat,amount) __mi_stat_increase( &(stat), amount) +#define mi_debug_stat_decrease(stat,amount) __mi_stat_decrease( &(stat), amount) +#define mi_debug_stat_counter_increase(stat,amount) __mi_stat_counter_increase( &(stat), amount) +#define mi_debug_stat_increase_mt(stat,amount) __mi_stat_increase_mt( &(stat), amount) +#define mi_debug_stat_decrease_mt(stat,amount) __mi_stat_decrease_mt( &(stat), amount) +#define mi_debug_stat_counter_increase_mt(stat,amount) __mi_stat_counter_increase_mt( &(stat), amount) +#define mi_debug_stat_adjust_increase_mt(stat,amnt,b) __mi_stat_adjust_increase_mt( &(stat), amnt, b) +#define mi_debug_stat_adjust_decrease_mt(stat,amnt,b) __mi_stat_adjust_decrease_mt( &(stat), amnt, b) #else -#define mi_stat_increase(stat,amount) ((void)0) -#define mi_stat_decrease(stat,amount) ((void)0) -#define mi_stat_counter_increase(stat,amount) ((void)0) -#define mi_stat_adjuct_increase(stat,amnt,b) ((void)0) -#define mi_stat_adjust_decrease(stat,amnt,b) ((void)0) +#define mi_debug_stat_increase(stat,amount) ((void)0) +#define mi_debug_stat_decrease(stat,amount) ((void)0) +#define mi_debug_stat_counter_increase(stat,amount) ((void)0) +#define mi_debug_stat_increase_mt(stat,amount) ((void)0) +#define mi_debug_stat_decrease_mt(stat,amount) ((void)0) +#define mi_debug_stat_counter_increase_mt(stat,amount) ((void)0) +#define mi_debug_stat_adjust_increase(stat,amnt,b) ((void)0) +#define mi_debug_stat_adjust_decrease(stat,amnt,b) ((void)0) #endif -#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) -#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) -#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) +#define mi_subproc_stat_counter_increase(subproc,stat,amount) __mi_stat_counter_increase_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_increase(subproc,stat,amount) __mi_stat_increase_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_decrease(subproc,stat,amount) __mi_stat_decrease_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_adjust_increase(subproc,stat,amnt,b) __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amnt, b) +#define mi_subproc_stat_adjust_decrease(subproc,stat,amnt,b) __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amnt, b) + +#define mi_os_stat_counter_increase(stat,amount) mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount) +#define mi_os_stat_increase(stat,amount) mi_subproc_stat_increase(_mi_subproc(),stat,amount) +#define mi_os_stat_decrease(stat,amount) mi_subproc_stat_decrease(_mi_subproc(),stat,amount) + +#define mi_tld_stat_counter_increase(tld,stat,amount) __mi_stat_counter_increase( &(tld)->stats.stat, amount) +#define mi_tld_stat_increase(tld,stat,amount) __mi_stat_increase( &(tld)->stats.stat, amount) +#define mi_tld_stat_decrease(tld,stat,amount) __mi_stat_decrease( &(tld)->stats.stat, amount) + +#define mi_debug_tld_stat_counter_increase(tld,stat,amount) mi_debug_stat_counter_increase( (tld)->stats.stat, amount) +#define mi_debug_tld_stat_increase(tld,stat,amount) mi_debug_stat_increase( (tld)->stats.stat, amount) +#define mi_debug_tld_stat_decrease(tld,stat,amount) mi_debug_stat_decrease( (tld)->stats.stat, amount) + +#define mi_heap_stat_counter_increase(heap,stat,amount) mi_tld_stat_counter_increase((heap)->tld, stat, amount) +#define mi_heap_stat_increase(heap,stat,amount) mi_tld_stat_increase( (heap)->tld, stat, amount) +#define mi_heap_stat_decrease(heap,stat,amount) mi_tld_stat_decrease( (heap)->tld, stat, amount) + +#define mi_debug_heap_stat_counter_increase(heap,stat,amount) mi_debug_tld_stat_counter_increase((heap)->tld, stat, amount) +#define mi_debug_heap_stat_increase(heap,stat,amount) mi_debug_tld_stat_increase( (heap)->tld, stat, amount) +#define mi_debug_heap_stat_decrease(heap,stat,amount) mi_debug_tld_stat_decrease( (heap)->tld, stat, amount) // ------------------------------------------------------ // Sub processes use separate arena's and no heaps/pages/blocks // are shared between sub processes. -// Each thread should also belong to one sub-process only +// The subprocess structure contains essentially all static variables (except per subprocess :-)) +// +// Each thread should belong to one sub-process only // ------------------------------------------------------ #define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`) @@ -519,10 +554,13 @@ typedef struct mi_subproc_s { _Atomic(size_t) arena_count; // current count of arena's _Atomic(mi_arena_t*) arenas[MI_MAX_ARENAS]; // arena's of this sub-process mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time - _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process + + _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process mi_page_queue_t os_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on) mi_lock_t os_pages_lock; // lock for the os pages list (this lock protects list operations) + mi_memid_t memid; // provenance of this memory block (meta or OS) + mi_stats_t stats; // sub-process statistics (tld stats are merged in on thread termination) } mi_subproc_t; @@ -535,16 +573,16 @@ typedef int64_t mi_msecs_t; // Thread local data struct mi_tld_s { - mi_threadid_t thread_id; // thread id of this thread - size_t thread_seq; // thread sequence id (linear count of created threads) - mi_subproc_t* subproc; // sub-process this thread belongs to. - mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) - mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) - unsigned long long heartbeat; // monotonic heartbeat count - bool recurse; // true if deferred was called; used to prevent infinite recursion. - bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) - mi_stats_t stats; // statistics - mi_memid_t memid; // provenance of the tld memory itself (meta or OS) + mi_threadid_t thread_id; // thread id of this thread + size_t thread_seq; // thread sequence id (linear count of created threads) + mi_subproc_t* subproc; // sub-process this thread belongs to. + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) + unsigned long long heartbeat; // monotonic heartbeat count + bool recurse; // true if deferred was called; used to prevent infinite recursion. + bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) + mi_stats_t stats; // statistics + mi_memid_t memid; // provenance of the tld memory itself (meta or OS) }; diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 14cbee45..5da9fc0c 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -193,9 +193,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; if mi_likely(is_aligned) { - #if MI_STAT>1 - mi_heap_stat_increase(heap, malloc, size); - #endif + mi_debug_heap_stat_increase(heap, malloc, size); void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); diff --git a/src/arena.c b/src/arena.c index fd914f43..dcff8920 100644 --- a/src/arena.c +++ b/src/arena.c @@ -69,10 +69,6 @@ typedef struct mi_purge_info_s { Arena id's ----------------------------------------------------------- */ -static mi_arena_id_t mi_arena_id_create(mi_arena_t* arena) { - return arena; -} - mi_arena_id_t _mi_arena_id_none(void) { return NULL; } @@ -222,14 +218,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count); // adjust the stats so we don't double count the commits if (already_committed_count > 0) { - _mi_stat_adjust_decrease(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count), true /* on alloc */); + mi_subproc_stat_adjust_decrease(arena->subproc, committed, mi_size_of_slices(already_committed_count), true /* on alloc */); } // now actually commit bool commit_zero = false; if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero)) { // failed to commit (todo: give warning?) if (already_committed_count > 0) { - _mi_stat_increase(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); + mi_subproc_stat_increase(arena->subproc, committed, mi_size_of_slices(already_committed_count)); } memid->initially_committed = false; } @@ -251,7 +247,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // if the OS has overcommit, and this is the first time we access these pages, then // count the commit now (as at arena reserve we didn't count those commits as these are on-demand) if (_mi_os_has_overcommit() && touched_slices > 0) { - _mi_stat_increase(&_mi_stats_main.committed, mi_size_of_slices(touched_slices)); + mi_subproc_stat_increase( arena->subproc, committed, mi_size_of_slices(touched_slices)); } } // tool support @@ -325,18 +321,18 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_ // on an OS with overcommit (Linux) we don't count the commit yet as it is on-demand. Once a slice // is actually allocated for the first time it will be counted. const bool adjust = (overcommit && arena_commit); - if (adjust) { _mi_stat_adjust_decrease(&_mi_stats_main.committed, arena_reserve, true /* on alloc */); } + if (adjust) { mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve, true /* on alloc */); } // and try to reserve the arena int err = mi_reserve_os_memory_ex2(subproc, arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); if (err != 0) { - if (adjust) { _mi_stat_adjust_increase(&_mi_stats_main.committed, arena_reserve, true); } // roll back + if (adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve, true); } // roll back // failed, try a smaller size? const size_t small_arena_reserve = (MI_SIZE_BITS == 32 ? 128*MI_MiB : 1*MI_GiB); - if (adjust) { _mi_stat_adjust_decrease(&_mi_stats_main.committed, arena_reserve, true); } + if (adjust) { mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve, true); } if (arena_reserve > small_arena_reserve) { // try again err = mi_reserve_os_memory_ex(small_arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); - if (err != 0 && adjust) { _mi_stat_adjust_increase(&_mi_stats_main.committed, arena_reserve, true); } // roll back + if (err != 0 && adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve, true); } // roll back } } return (err==0); @@ -579,8 +575,8 @@ static mi_page_t* mi_arena_page_try_find_abandoned(mi_subproc_t* subproc, size_t mi_assert_internal(mi_page_is_abandoned(page)); mi_assert_internal(mi_arena_has_page(arena,page)); mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]); - _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); - _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1); + mi_subproc_stat_decrease( arena->subproc, pages_abandoned, 1); + mi_subproc_stat_counter_increase(arena->subproc, pages_reclaim_on_alloc, 1); _mi_page_free_collect(page, false); // update `used` count mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); @@ -828,12 +824,13 @@ void _mi_arena_page_abandon(mi_page_t* page) { const bool wasclear = mi_bitmap_set(arena->pages_abandoned[bin], slice_index); MI_UNUSED(wasclear); mi_assert_internal(wasclear); mi_atomic_increment_relaxed(&arena->subproc->abandoned_count[bin]); + mi_subproc_stat_increase(arena->subproc, pages_abandoned, 1); } else { // page is full (or a singleton), page is OS/externally allocated // leave as is; it will be reclaimed when an object is free'd in the page - } - _mi_stat_increase(&_mi_stats_main.pages_abandoned, 1); + mi_subproc_stat_increase(_mi_subproc(), pages_abandoned, 1); + } _mi_page_unown(page); } @@ -850,8 +847,9 @@ bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page) { return false; } else { - _mi_stat_counter_increase(&_mi_stats_main.pages_reabandon_full, 1); - _mi_stat_adjust_decrease(&_mi_stats_main.pages_abandoned, 1, true /* on alloc */); // adjust as we are not abandoning fresh + mi_subproc_t* subproc = _mi_subproc(); + mi_subproc_stat_counter_increase( subproc, pages_reabandon_full, 1); + mi_subproc_stat_adjust_decrease( subproc, pages_abandoned, 1, true /* on alloc */); // adjust as we are not abandoning fresh _mi_arena_page_abandon(page); return true; } @@ -879,13 +877,14 @@ void _mi_arena_page_unabandon(mi_page_t* page) { mi_bitmap_clear_once_set(arena->pages_abandoned[bin], slice_index); mi_page_clear_abandoned_mapped(page); mi_atomic_decrement_relaxed(&arena->subproc->abandoned_count[bin]); + mi_subproc_stat_decrease(arena->subproc, pages_abandoned, 1); } else { - // page is full (or a singleton), page is OS/nly allocated + // page is full (or a singleton), page is OS allocated // nothing to do // TODO: maintain count of these as well? - } - _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); + mi_subproc_stat_decrease(_mi_subproc(), pages_abandoned, 1); + } } void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { @@ -1016,7 +1015,7 @@ void _mi_arena_unsafe_destroy_all(void) { Add an arena. ----------------------------------------------------------- */ -static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { +static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal(arena->slice_count > 0); if (arena_id != NULL) { *arena_id = NULL; } @@ -1043,7 +1042,7 @@ static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t return false; } - _mi_stat_counter_increase(&stats->arena_count,1); + mi_subproc_stat_counter_increase(arena->subproc, arena_count, 1); mi_atomic_store_ptr_release(mi_arena_t,&subproc->arenas[i], arena); if (arena_id != NULL) { *arena_id = arena; } return true; @@ -1149,7 +1148,7 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s mi_bitmap_setN(arena->slices_dirty, 0, info_slices, NULL); } - return mi_arena_add(subproc, arena, arena_id, &_mi_stats_main); + return mi_arena_add(subproc, arena, arena_id); } @@ -1414,7 +1413,7 @@ static bool mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_c // update committed bitmap if (needs_recommit) { - _mi_stat_adjust_decrease(&_mi_stats_main.committed, mi_size_of_slices(slice_count - already_committed), false /* on freed */); + mi_subproc_stat_adjust_decrease( arena->subproc, committed, mi_size_of_slices(slice_count - already_committed), false /* on freed */); mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count); } return needs_recommit; @@ -1506,7 +1505,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire_base, (mi_msecs_t)0)) { mi_atomic_storei64_release(&arena->purge_expire_extend, (mi_msecs_t)0); // and also reset the extend } - _mi_stat_counter_increase(&_mi_stats_main.arena_purges, 1); + mi_subproc_stat_counter_increase(arena->subproc, arena_purges, 1); // go through all purge info's (with max MI_BFIELD_BITS ranges at a time) // this also clears those ranges atomically (so any newly freed blocks will get purged next @@ -1647,7 +1646,7 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, arena->is_exclusive = true; arena->is_large = is_large; arena->subproc = NULL; - if (!mi_arena_add(_mi_subproc(), arena, arena_id, &_mi_stats_main)) { + if (!mi_arena_add(_mi_subproc(), arena, arena_id)) { return false; } mi_arena_pages_reregister(arena); diff --git a/src/bitmap.c b/src/bitmap.c index 6352e4ea..e4a4cc2d 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -106,7 +106,9 @@ static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_ do { if mi_unlikely((old&mask) == 0) { old = mi_atomic_load_acquire(b); - if ((old&mask)==0) { _mi_stat_counter_increase(&_mi_stats_main.pages_unabandon_busy_wait, 1); } + if ((old&mask)==0) { + mi_subproc_stat_counter_increase(_mi_subproc(), pages_unabandon_busy_wait, 1); + } while ((old&mask)==0) { // busy wait mi_atomic_yield(); old = mi_atomic_load_acquire(b); diff --git a/src/free.c b/src/free.c index 770856da..88f784c7 100644 --- a/src/free.c +++ b/src/free.c @@ -242,7 +242,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { // first remove it from the abandoned pages in the arena -- this waits for any readers to finish _mi_arena_page_unabandon(page); _mi_heap_page_reclaim(tagheap, page); - _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_free, 1); + mi_heap_stat_counter_increase(tagheap, pages_reclaim_on_free, 1); return; } } diff --git a/src/heap.c b/src/heap.c index e8743691..d82b383f 100644 --- a/src/heap.c +++ b/src/heap.c @@ -141,7 +141,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - + // collect arenas (this is program wide so don't force purges on abandonment of threads) _mi_arenas_collect(collect == MI_FORCE /* force purge? */); } @@ -183,9 +183,9 @@ mi_heap_t* mi_heap_get_backing(void) { } // todo: make order of parameters consistent (but would that break compat with CPython?) -void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t heap_tag, mi_tld_t* tld) +void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t heap_tag, mi_tld_t* tld) { - mi_assert_internal(heap!=NULL); + mi_assert_internal(heap!=NULL); mi_memid_t memid = heap->memid; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->memid = memid; @@ -204,7 +204,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint heap->full_page_retain = heap->full_page_retain / 4; } } - + if (heap->tld->heap_backing == NULL) { heap->tld->heap_backing = heap; // first heap becomes the backing heap _mi_random_init(&heap->random); @@ -240,7 +240,7 @@ mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id) { mi_heap_t* bheap = mi_heap_get_backing(); mi_assert_internal(bheap != NULL); - return _mi_heap_create(heap_tag, allow_destroy, arena_id, bheap->tld); + return _mi_heap_create(heap_tag, allow_destroy, arena_id, bheap->tld); } mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { @@ -333,17 +333,17 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ if (bsize > MI_LARGE_MAX_OBJ_SIZE) { mi_heap_stat_decrease(heap, huge, bsize); } -#if (MI_STAT) + #if (MI_STAT) _mi_page_free_collect(page, false); // update used count const size_t inuse = page->used; if (bsize <= MI_LARGE_MAX_OBJ_SIZE) { mi_heap_stat_decrease(heap, normal, bsize * inuse); -#if (MI_STAT>1) + #if (MI_STAT>1) mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], inuse); -#endif + #endif } mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... -#endif + #endif /// pretend it is all free now mi_assert_internal(mi_page_thread_free(page) == NULL); @@ -460,7 +460,7 @@ void mi_heap_delete(mi_heap_t* heap) // transfer still used pages to the backing heap mi_heap_absorb(bheap, heap); } - else + else */ { // abandon all pages diff --git a/src/init.c b/src/init.c index 177ca2bd..5159941a 100644 --- a/src/init.c +++ b/src/init.c @@ -34,7 +34,7 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, // keys #endif NULL, // xheap - NULL, NULL, // next, prev + NULL, NULL, // next, prev MI_MEMID_STATIC // memid }; @@ -103,7 +103,7 @@ static mi_decl_cache_align mi_tld_t tld_empty = { 0, // thread_seq &subproc_main, // subproc NULL, // heap_backing - NULL, // heaps list + NULL, // heaps list 0, // heartbeat false, // recurse false, // is_in_threadpool @@ -139,7 +139,7 @@ static mi_decl_cache_align mi_tld_t tld_main = { 0, // thread_seq &subproc_main, // subproc &heap_main, // heap_backing - &heap_main, // heaps list + &heap_main, // heaps list 0, // heartbeat false, // recurse false, // is_in_threadpool @@ -165,7 +165,7 @@ mi_decl_cache_align mi_heap_t heap_main = { #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, - MI_MEMID_STATIC + MI_MEMID_STATIC }; @@ -237,7 +237,7 @@ static void mi_tld_main_init(void) { // Initialization of the (statically allocated) main heap, and the main tld and subproc. static void mi_heap_main_init(void) { - if (heap_main.cookie == 0) { + if (heap_main.cookie == 0) { mi_subproc_main_init(); mi_tld_main_init(); // heap @@ -249,7 +249,7 @@ static void mi_heap_main_init(void) { #endif heap_main.cookie = _mi_heap_random_next(&heap_main); heap_main.keys[0] = _mi_heap_random_next(&heap_main); - heap_main.keys[1] = _mi_heap_random_next(&heap_main); + heap_main.keys[1] = _mi_heap_random_next(&heap_main); _mi_heap_guarded_init(&heap_main); heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); @@ -266,14 +266,21 @@ mi_heap_t* heap_main_get(void) { Thread local data ----------------------------------------------------------- */ -// Thread sequence number -static _Atomic(size_t) mi_tcount; +// Count current and total created threads +static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); +static _Atomic(size_t) thread_total_count; + +size_t _mi_current_thread_count(void) { + return mi_atomic_load_relaxed(&thread_count); +} + // The mimalloc thread local data -mi_decl_thread mi_tld_t* mi_tld; +mi_decl_thread mi_tld_t* thread_tld = &tld_empty; // Allocate fresh tld static mi_tld_t* mi_tld_alloc(void) { + mi_atomic_increment_relaxed(&thread_count); if (_mi_is_main_thread()) { return &tld_main; } @@ -292,7 +299,7 @@ static mi_tld_t* mi_tld_alloc(void) { tld->heaps = NULL; tld->subproc = &subproc_main; tld->thread_id = _mi_prim_thread_id(); - tld->thread_seq = mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1); tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool(); return tld; } @@ -301,28 +308,38 @@ static mi_tld_t* mi_tld_alloc(void) { #define MI_TLD_INVALID ((mi_tld_t*)1) mi_decl_noinline static void mi_tld_free(void) { - mi_tld_t* tld = _mi_tld(); - mi_tld = MI_TLD_INVALID; - _mi_meta_free(tld, sizeof(mi_tld_t), tld->memid); + mi_tld_t* tld = _mi_tld(); + if (tld != NULL && tld != MI_TLD_INVALID) { + _mi_stats_done(&tld->stats); + _mi_meta_free(tld, sizeof(mi_tld_t), tld->memid); + } + tld = MI_TLD_INVALID; + mi_atomic_decrement_relaxed(&thread_count); } mi_decl_noinline mi_tld_t* _mi_tld(void) { - if (mi_tld == MI_TLD_INVALID) { - _mi_error_message(EFAULT, "internal error: tld accessed after the thread terminated\n"); - mi_tld = NULL; + mi_tld_t* tld = thread_tld; + if (tld == MI_TLD_INVALID) { + _mi_error_message(EFAULT, "internal error: tld is accessed after the thread terminated\n"); + thread_tld = &tld_empty; } - if (mi_tld==NULL) { - mi_tld = mi_tld_alloc(); + if (tld==&tld_empty) { + thread_tld = tld = mi_tld_alloc(); } - return mi_tld; + return tld; } mi_subproc_t* _mi_subproc(void) { - if (_mi_is_main_thread()) { // during initialization we should not recurse over reading the _mi_tld - return &subproc_main; + // should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()` + // todo: this will still fail on OS systems where the first access to a thread-local causes allocation. + // on such systems we can check for this with the _mi_prim_get_default_heap as those are protected (by being + // stored in a TLS slot for example) + mi_heap_t* heap = mi_prim_get_default_heap(); + if (heap == NULL || heap == &_mi_heap_empty) { + return _mi_subproc_main(); } else { - return _mi_tld()->subproc; + return thread_tld->subproc; // don't call `_mi_tld()` } } @@ -396,11 +413,11 @@ static bool _mi_thread_heap_init(void) { //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { - // allocates tld data - // note: we cannot access thread-locals yet as that can cause (recursive) allocation + // allocates tld data + // note: we cannot access thread-locals yet as that can cause (recursive) allocation // (on macOS <= 14 for example where the loader allocates thread-local data on demand). - mi_tld_t* tld = mi_tld_alloc(); - + mi_tld_t* tld = mi_tld_alloc(); + // allocate and initialize the heap mi_heap_t* heap = _mi_heap_create(0 /* default tag */, false /* allow destroy? */, _mi_arena_id_none(), tld); @@ -409,7 +426,7 @@ static bool _mi_thread_heap_init(void) { _mi_heap_set_default_direct(heap); // now that the heap is set for this thread, we can set the thread-local tld. - mi_tld = tld; + thread_tld = tld; } return false; } @@ -444,9 +461,6 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { _mi_heap_collect_abandon(heap); } - // merge stats - _mi_stats_done(&heap->tld->stats); - // free heap meta data _mi_meta_free(heap, sizeof(mi_heap_t), heap->memid); @@ -494,11 +508,6 @@ bool _mi_is_main_thread(void) { return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id()); } -static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); - -size_t _mi_current_thread_count(void) { - return mi_atomic_load_relaxed(&thread_count); -} // This is called from the `mi_malloc_generic` void mi_thread_init(void) mi_attr_noexcept @@ -511,8 +520,7 @@ void mi_thread_init(void) mi_attr_noexcept // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_thread_heap_init()) return; // returns true if already initialized - _mi_stat_increase(&_mi_stats_main.threads, 1); - mi_atomic_increment_relaxed(&thread_count); + mi_subproc_stat_increase(_mi_subproc_main(), threads, 1); //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } @@ -534,15 +542,14 @@ void _mi_thread_done(mi_heap_t* heap) } // adjust stats - mi_atomic_decrement_relaxed(&thread_count); - _mi_stat_decrease(&_mi_stats_main.threads, 1); + mi_subproc_stat_decrease(_mi_subproc_main(), threads, 1); // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->tld->thread_id != _mi_prim_thread_id()) return; // abandon the thread local heap _mi_thread_heap_done(heap); // returns true if already ran - + // free thread local data mi_tld_free(); } @@ -654,7 +661,7 @@ void mi_process_init(void) mi_attr_noexcept { _mi_prim_thread_associate_default_heap(NULL); #endif - mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) mi_track_init(); if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { diff --git a/src/os.c b/src/os.c index 86ecb16b..53e8f571 100644 --- a/src/os.c +++ b/src/os.c @@ -114,9 +114,9 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } if (still_committed) { - _mi_stat_decrease(&os_stats->committed, size); + mi_os_stat_decrease(committed, size); } - _mi_stat_decrease(&os_stats->reserved, size); + mi_os_stat_decrease(reserved, size); } void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { @@ -171,11 +171,11 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large); } - _mi_stat_counter_increase(&os_stats->mmap_calls, 1); + mi_os_stat_counter_increase(mmap_calls, 1); if (p != NULL) { - _mi_stat_increase(&os_stats->reserved, size); + mi_os_stat_increase(reserved, size); if (commit) { - _mi_stat_increase(&os_stats->committed, size); + mi_os_stat_increase(committed, size); // seems needed for asan (or `mimalloc-test-api` fails) #ifdef MI_TRACK_ASAN if (*is_zero) { mi_track_mem_defined(p,size); } @@ -290,7 +290,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); - + bool os_is_large = false; bool os_is_zero = false; void* os_base = NULL; @@ -379,8 +379,8 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* bool _mi_os_commit(void* addr, size_t size, bool* is_zero) { if (is_zero != NULL) { *is_zero = false; } - _mi_stat_increase(&os_stats->committed, size); // use size for precise commit vs. decommit - _mi_stat_counter_increase(&os_stats->commit_calls, 1); + mi_os_stat_increase(committed, size); // use size for precise commit vs. decommit + mi_os_stat_counter_increase(commit_calls, 1); // page align range size_t csize; @@ -408,7 +408,7 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero) { static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit) { mi_assert_internal(needs_recommit!=NULL); - _mi_stat_decrease(&os_stats->committed, size); + mi_os_stat_decrease(committed, size); // page align size_t csize; @@ -440,8 +440,8 @@ bool _mi_os_reset(void* addr, size_t size) { size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) - _mi_stat_increase(&os_stats->reset, csize); - _mi_stat_counter_increase(&os_stats->reset_calls, 1); + mi_os_stat_increase(reset, csize); + mi_os_stat_counter_increase(reset_calls, 1); #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN memset(start, 0, csize); // pretend it is eagerly reset @@ -460,8 +460,8 @@ bool _mi_os_reset(void* addr, size_t size) { bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset) { if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? - _mi_stat_counter_increase(&os_stats->purge_calls, 1); - _mi_stat_increase(&os_stats->purged, size); + mi_os_stat_counter_increase(purge_calls, 1); + mi_os_stat_increase(purged, size); if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) @@ -595,8 +595,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // success, record it page++; // increase before timeout check (see issue #711) - _mi_stat_increase(&os_stats->committed, MI_HUGE_OS_PAGE_SIZE); - _mi_stat_increase(&os_stats->reserved, MI_HUGE_OS_PAGE_SIZE); + mi_os_stat_increase(committed, MI_HUGE_OS_PAGE_SIZE); + mi_os_stat_increase(reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout if (max_msecs > 0) { diff --git a/src/page.c b/src/page.c index 0444b47e..31dbcc7d 100644 --- a/src/page.c +++ b/src/page.c @@ -387,9 +387,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { const size_t bsize = mi_page_block_size(page); if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? - mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); + mi_debug_heap_stat_counter_increase(heap, page_no_retire, 1); + page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE); @@ -554,7 +554,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { size_t page_size; //uint8_t* page_start = mi_page_area(page, &page_size); - mi_heap_stat_counter_increase(heap, pages_extended, 1); + mi_debug_heap_stat_counter_increase(heap, pages_extended, 1); // calculate the extend count const size_t bsize = mi_page_block_size(page); @@ -583,7 +583,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { } // enable the new free list page->capacity += (uint16_t)extend; - mi_heap_stat_increase(heap, page_committed, extend * bsize); + mi_debug_heap_stat_increase(heap, page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -709,8 +709,8 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m page = next; } // for each page - mi_heap_stat_counter_increase(heap, searches, count); - + mi_debug_heap_stat_counter_increase(heap, searches, count); + // set the page to the best candidate if (page_candidate != NULL) { page = page_candidate; diff --git a/src/stats.c b/src/stats.c index bb17b936..2a395ed5 100644 --- a/src/stats.c +++ b/src/stats.c @@ -19,88 +19,93 @@ terms of the MIT license. A copy of the license can be found in the file Statistics operations ----------------------------------------------------------- */ -static bool mi_is_in_main(void* stat) { - return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main - && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t))); +static void mi_stat_update_mt(mi_stat_count_t* stat, int64_t amount) { + if (amount == 0) return; + // add atomically + int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_maxi64_relaxed(&stat->peak, current + amount); + if (amount > 0) { + mi_atomic_addi64_relaxed(&stat->allocated, amount); + } + else { + mi_atomic_addi64_relaxed(&stat->freed, -amount); + } } static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (amount == 0) return; - if mi_unlikely(mi_is_in_main(stat)) - { - // add atomically (for abandoned pages) - int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); - mi_atomic_maxi64_relaxed(&stat->peak, current + amount); - if (amount > 0) { - mi_atomic_addi64_relaxed(&stat->allocated,amount); - } - else { - mi_atomic_addi64_relaxed(&stat->freed, -amount); - } + // add thread local + stat->current += amount; + if (stat->current > stat->peak) stat->peak = stat->current; + if (amount > 0) { + stat->allocated += amount; } else { - // add thread local - stat->current += amount; - if (stat->current > stat->peak) stat->peak = stat->current; - if (amount > 0) { - stat->allocated += amount; - } - else { - stat->freed += -amount; - } + stat->freed += -amount; } } + // Adjust stats to compensate; for example before committing a range, // first adjust downwards with parts that were already committed so // we avoid double counting. +static void mi_stat_adjust_mt(mi_stat_count_t* stat, int64_t amount, bool on_alloc) { + if (amount == 0) return; + // adjust atomically + mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_addi64_relaxed((on_alloc ? &stat->allocated : &stat->freed), amount); +} + static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount, bool on_alloc) { if (amount == 0) return; - if mi_unlikely(mi_is_in_main(stat)) - { - // adjust atomically - mi_atomic_addi64_relaxed(&stat->current, amount); - mi_atomic_addi64_relaxed((on_alloc ? &stat->allocated : &stat->freed), amount); + stat->current += amount; + if (on_alloc) { + stat->allocated += amount; } else { - // don't affect the peak - stat->current += amount; - if (on_alloc) { - stat->allocated += amount; - } - else { - stat->freed += amount; - } + stat->freed += amount; } } -void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { - if (mi_is_in_main(stat)) { - mi_atomic_addi64_relaxed( &stat->count, 1 ); - mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount ); - } - else { - stat->count++; - stat->total += amount; - } +void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount) { + mi_atomic_addi64_relaxed(&stat->count, 1); + mi_atomic_addi64_relaxed(&stat->total, (int64_t)amount); } -void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) { +void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { + stat->count++; + stat->total += amount; +} + +void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount) { + mi_stat_update_mt(stat, (int64_t)amount); +} +void __mi_stat_increase(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, (int64_t)amount); } -void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { +void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount) { + mi_stat_update_mt(stat, -((int64_t)amount)); +} +void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, -((int64_t)amount)); } -void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc) { +void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc) { + mi_stat_adjust_mt(stat, (int64_t)amount, on_alloc); +} +void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc) { mi_stat_adjust(stat, (int64_t)amount, on_alloc); } -void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_alloc) { +void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc) { + mi_stat_adjust_mt(stat, -((int64_t)amount), on_alloc); +} +void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_alloc) { mi_stat_adjust(stat, -((int64_t)amount), on_alloc); } + // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; @@ -401,27 +406,29 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) static mi_msecs_t mi_process_start; // = 0 -static mi_stats_t* mi_stats_get_default(void) { - mi_heap_t* heap = mi_heap_get_default(); - return &heap->tld->stats; +// return thread local stats +static mi_stats_t* mi_get_tld_stats(void) { + return &_mi_tld()->stats; } static void mi_stats_merge_from(mi_stats_t* stats) { - if (stats != &_mi_stats_main) { - mi_stats_add(&_mi_stats_main, stats); - memset(stats, 0, sizeof(mi_stats_t)); + mi_subproc_t* subproc = _mi_subproc(); + if (stats != &subproc->stats) { + mi_stats_add(&subproc->stats, stats); + _mi_memzero(stats, sizeof(mi_stats_t)); } } void mi_stats_reset(void) mi_attr_noexcept { - mi_stats_t* stats = mi_stats_get_default(); - if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); } - memset(&_mi_stats_main, 0, sizeof(mi_stats_t)); + mi_stats_t* stats = mi_get_tld_stats(); + mi_subproc_t* subproc = _mi_subproc(); + if (stats != &subproc->stats) { _mi_memzero(stats, sizeof(mi_stats_t)); } + _mi_memzero(&subproc->stats, sizeof(mi_stats_t)); if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); }; } void mi_stats_merge(void) mi_attr_noexcept { - mi_stats_merge_from( mi_stats_get_default() ); + mi_stats_merge_from( mi_get_tld_stats() ); } void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` @@ -429,8 +436,8 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` } void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - mi_stats_merge_from(mi_stats_get_default()); - _mi_stats_print(&_mi_stats_main, out, arg); + mi_stats_merge_from(mi_get_tld_stats()); + _mi_stats_print(&_mi_subproc()->stats, out, arg); } void mi_stats_print(void* out) mi_attr_noexcept { @@ -439,7 +446,7 @@ void mi_stats_print(void* out) mi_attr_noexcept { } void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - _mi_stats_print(mi_stats_get_default(), out, arg); + _mi_stats_print(mi_get_tld_stats(), out, arg); } @@ -473,11 +480,12 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { + mi_subproc_t* subproc = _mi_subproc(); mi_process_info_t pinfo; _mi_memzero_var(pinfo); pinfo.elapsed = _mi_clock_end(mi_process_start); - pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.current))); + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.peak))); pinfo.current_rss = pinfo.current_commit; pinfo.peak_rss = pinfo.peak_commit; pinfo.utime = 0; diff --git a/test/test-stress.c b/test/test-stress.c index b35743df..0920a02e 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -48,10 +48,10 @@ static int ITER = 20; static int THREADS = 32; static int SCALE = 50; static int ITER = 50; -#elif 0 -static int THREADS = 64; -static int SCALE = 400; -static int ITER = 10; +#elif 1 +static int THREADS = 32; +static int SCALE = 25; +static int ITER = 50; #define ALLOW_LARGE true #else static int THREADS = 32; // more repeatable if THREADS <= #processors