From 9a35bca55645a131092a91797f851794423175f6 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 16:12:17 -0700 Subject: [PATCH 01/20] possible fix for wrong accounting of committed bytes (issue #1035) --- src/arena.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index 9d40a271..1f6f6d9d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -648,15 +648,16 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); + const bool decommitted_size = (committed_size <= size ? size - committed_size : 0); // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) mi_track_mem_undefined(p,size); if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through - if (!all_committed && committed_size > 0) { - // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) - _mi_stat_decrease(&_mi_stats_main.committed, committed_size); + if (!all_committed && decommitted_size > 0) { + // if partially committed, adjust the committed stats (as `_mi_os_free` will decrease commit by the full size) + _mi_stat_increase(&_mi_stats_main.committed, decommitted_size); } _mi_os_free(p, size, memid); } @@ -695,7 +696,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_track_mem_noaccess(p,size); if (committed_size > 0) { // if partially committed, adjust the committed stats (is it will be recommitted when re-using) - // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. + // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed. _mi_stat_decrease(&_mi_stats_main.committed, committed_size); } // note: if not all committed, it may be that the purge will reset/decommit the entire range From 26fa8be42759ac39f7b4869b4e0936bd35a8be17 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 18:50:53 -0700 Subject: [PATCH 02/20] improved accounting of committed bytes (issue #1035) --- include/mimalloc/internal.h | 16 +++++++++++ src/arena.c | 39 ++++++++++++++++--------- src/bitmap.c | 28 +++++++++++------- src/bitmap.h | 4 +-- src/libc.c | 57 +++++++++++++++++++++++++++++++++++++ src/stats.c | 1 + 6 files changed, 119 insertions(+), 26 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 106da0d1..5b3e7e23 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -127,6 +127,7 @@ bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size); bool _mi_os_commit(void* p, size_t size, bool* is_zero); +bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); bool _mi_os_decommit(void* addr, size_t size); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); @@ -947,6 +948,21 @@ static inline size_t mi_bsr(size_t x) { return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x)); } +size_t _mi_popcount_generic(size_t x); + +static inline size_t mi_popcount(size_t x) { + if (x<=1) return x; + if (x==SIZE_MAX) return MI_SIZE_BITS; + #if defined(__GNUC__) + #if (SIZE_MAX == ULONG_MAX) + return __builtin_popcountl(x); + #else + return __builtin_popcountll(x); + #endif + #else + return _mi_popcount_generic(x); + #endif +} // --------------------------------------------------------------------------------- // Provide our own `_mi_memcpy` for potential performance optimizations. diff --git a/src/arena.c b/src/arena.c index 1f6f6d9d..a7c20764 100644 --- a/src/arena.c +++ b/src/arena.c @@ -255,7 +255,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar // set the dirty bits (todo: no need for an atomic op here?) if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { - memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL); } // set commit state @@ -267,10 +267,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; bool any_uncommitted; - _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); + size_t already_committed = 0; + _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed); if (any_uncommitted) { + mi_assert_internal(already_committed < needed_bcount); + const size_t commit_size = mi_arena_block_size(needed_bcount); + const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed); bool commit_zero = false; - if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) { + if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) { memid->initially_committed = false; } else { @@ -280,7 +284,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar } else { // no need to commit, but check if already fully committed - memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + size_t already_committed = 0; + memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed); + if (!memid->initially_committed && already_committed > 0) { + // partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted. + mi_assert_internal(already_committed < needed_bcount); + _mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed)); + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + } } return p; @@ -464,17 +475,19 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks) const size_t size = mi_arena_block_size(blocks); void* const p = mi_arena_block_start(arena, bitmap_idx); bool needs_recommit; - if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { + size_t already_committed = 0; + if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) { // all blocks are committed, we can purge freely + mi_assert_internal(already_committed == blocks); needs_recommit = _mi_os_purge(p, size); } else { // some blocks are not committed -- this can happen when a partially committed block is freed // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge - // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), - // and also undo the decommit stats (as it was already adjusted) + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory). + mi_assert_internal(already_committed < blocks); mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); - needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed)); } // clear the purged blocks @@ -508,7 +521,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t else { // already an expiration was set } - _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); + _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL); } } @@ -648,7 +661,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); - const bool decommitted_size = (committed_size <= size ? size - committed_size : 0); + const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0); // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) mi_track_mem_undefined(p,size); @@ -691,14 +704,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_assert_internal(arena->blocks_purge != NULL); if (!all_committed) { - // mark the entire range as no longer committed (so we recommit the full range when re-using) + // mark the entire range as no longer committed (so we will recommit the full range when re-using) _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); mi_track_mem_noaccess(p,size); - if (committed_size > 0) { + //if (committed_size > 0) { // if partially committed, adjust the committed stats (is it will be recommitted when re-using) // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed. _mi_stat_decrease(&_mi_stats_main.committed, committed_size); - } + //} // note: if not all committed, it may be that the purge will reset/decommit the entire range // that contains already decommitted parts. Since purge consistently uses reset or decommit that // works (as we should never reset decommitted parts). diff --git a/src/bitmap.c b/src/bitmap.c index 9ef784d6..50f4df2b 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -351,7 +351,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; @@ -359,28 +359,31 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_zero = true; bool any_zero = false; + size_t one_count = 0; _Atomic(size_t)*field = &bitmap[idx]; size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); - if ((prev & pre_mask) != 0) all_zero = false; + if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); } if ((prev & pre_mask) != pre_mask) any_zero = true; while (mid_count-- > 0) { prev = mi_atomic_or_acq_rel(field++, mid_mask); - if ((prev & mid_mask) != 0) all_zero = false; + if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); } if ((prev & mid_mask) != mid_mask) any_zero = true; } if (post_mask!=0) { prev = mi_atomic_or_acq_rel(field, post_mask); - if ((prev & post_mask) != 0) all_zero = false; + if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); } if ((prev & post_mask) != post_mask) any_zero = true; } if (pany_zero != NULL) { *pany_zero = any_zero; } + if (already_set != NULL) { *already_set = one_count; }; + mi_assert_internal(all_zero ? one_count == 0 : one_count <= count); return all_zero; } // Returns `true` if all `count` bits were 1. // `any_ones` is `true` if there was at least one bit set to one. -static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { +static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) { size_t idx = mi_bitmap_index_field(bitmap_idx); size_t pre_mask; size_t mid_mask; @@ -388,30 +391,33 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_ones = true; bool any_ones = false; + size_t one_count = 0; mi_bitmap_field_t* field = &bitmap[idx]; size_t prev = mi_atomic_load_relaxed(field++); if ((prev & pre_mask) != pre_mask) all_ones = false; - if ((prev & pre_mask) != 0) any_ones = true; + if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); } while (mid_count-- > 0) { prev = mi_atomic_load_relaxed(field++); if ((prev & mid_mask) != mid_mask) all_ones = false; - if ((prev & mid_mask) != 0) any_ones = true; + if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); } } if (post_mask!=0) { prev = mi_atomic_load_relaxed(field); if ((prev & post_mask) != post_mask) all_ones = false; - if ((prev & post_mask) != 0) any_ones = true; + if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); } } if (pany_ones != NULL) { *pany_ones = any_ones; } + if (already_set != NULL) { *already_set = one_count; } + mi_assert_internal(all_ones ? one_count == count : one_count < count); return all_ones; } -bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { - return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) { + return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set); } bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool any_ones; - mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); + mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL); return any_ones; } diff --git a/src/bitmap.h b/src/bitmap.h index d60668cb..60b38815 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -102,9 +102,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. -bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); +bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set); -bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set); bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); #endif diff --git a/src/libc.c b/src/libc.c index 1bd97aa3..52d095eb 100644 --- a/src/libc.c +++ b/src/libc.c @@ -275,3 +275,60 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) { va_end(args); return written; } + + +#if MI_SIZE_SIZE == 4 +#define mi_mask_even_bits32 (0x55555555) +#define mi_mask_even_pairs32 (0x33333333) +#define mi_mask_even_nibbles32 (0x0F0F0F0F) + +// sum of all the bytes in `x` if it is guaranteed that the sum < 256! +static size_t mi_byte_sum32(uint32_t x) { + // perform `x * 0x01010101`: the highest byte contains the sum of all bytes. + x += (x << 8); + x += (x << 16); + return (size_t)(x >> 24); +} + +static size_t mi_popcount_generic32(uint32_t x) { + // first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10 + // in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair + // into the lower bit-pair: + x = x - ((x >> 1) & mi_mask_even_bits32); + // add the 2-bit pair results + x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32); + // add the 4-bit nibble results + x = (x + (x >> 4)) & mi_mask_even_nibbles32; + // each byte now has a count of its bits, we can sum them now: + return mi_byte_sum32(x); +} + +mi_decl_noinline size_t _mi_popcount_generic(size_t x) { + return mi_popcount_generic32(x); +} + +#else +#define mi_mask_even_bits64 (0x5555555555555555) +#define mi_mask_even_pairs64 (0x3333333333333333) +#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F) + +// sum of all the bytes in `x` if it is guaranteed that the sum < 256! +static size_t mi_byte_sum64(uint64_t x) { + x += (x << 8); + x += (x << 16); + x += (x << 32); + return (size_t)(x >> 56); +} + +static size_t mi_popcount_generic64(uint64_t x) { + x = x - ((x >> 1) & mi_mask_even_bits64); + x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64); + x = (x + (x >> 4)) & mi_mask_even_nibbles64; + return mi_byte_sum64(x); +} + +mi_decl_noinline size_t _mi_popcount_generic(size_t x) { + return mi_popcount_generic64(x); +} +#endif + diff --git a/src/stats.c b/src/stats.c index 1cfc3104..6a480816 100644 --- a/src/stats.c +++ b/src/stats.c @@ -30,6 +30,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { { // add atomically (for abandoned pages) int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); + // if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); }; mi_atomic_maxi64_relaxed(&stat->peak, current + amount); if (amount > 0) { mi_atomic_addi64_relaxed(&stat->total,amount); From 47bf3a5b1b5dd1f85a1ff75bb046f9f8e6dfcdb1 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 19:06:21 -0700 Subject: [PATCH 03/20] potential fix for sporadic assertion failure on random returning 0 (issue #1039) --- src/random.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/random.c b/src/random.c index 4fc8b2f8..f17698ba 100644 --- a/src/random.c +++ b/src/random.c @@ -143,13 +143,17 @@ void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) { uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { mi_assert_internal(mi_random_is_initialized(ctx)); - #if MI_INTPTR_SIZE <= 4 - return chacha_next32(ctx); - #elif MI_INTPTR_SIZE == 8 - return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); - #else - # error "define mi_random_next for this platform" - #endif + uintptr_t r; + do { + #if MI_INTPTR_SIZE <= 4 + r = chacha_next32(ctx); + #elif MI_INTPTR_SIZE == 8 + r = (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx)); + #else + # error "define mi_random_next for this platform" + #endif + } while (r==0); + return r; } @@ -163,7 +167,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { x ^= _mi_prim_clock_now(); // and do a few randomization steps uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; - for (uintptr_t i = 0; i < max; i++) { + for (uintptr_t i = 0; i < max || x==0; i++, x++) { x = _mi_random_shuffle(x); } mi_assert_internal(x != 0); @@ -179,7 +183,7 @@ static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); } #endif uintptr_t x = _mi_os_random_weak(0); - for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. + for (size_t i = 0; i < 8; i++, x++) { // key is eight 32-bit words. x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } From 1aa88e0d9ad631ce7ed737a41aca873a61534939 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 19:11:38 -0700 Subject: [PATCH 04/20] try to fix pipeline trigger --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index a803cd15..c4dc1627 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -7,9 +7,9 @@ trigger: branches: include: - master - - dev - - dev2 - dev3 + - dev2 + - dev tags: include: - v* From afbc581f8dfdc92f69faa2ec57e18128c54fcd44 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 19:16:10 -0700 Subject: [PATCH 05/20] add Windows x86 to the build pipeline --- azure-pipelines.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c4dc1627..25d4a6e0 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -34,6 +34,14 @@ jobs: BuildType: secure cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON MSBuildConfiguration: Release + Debug x86: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -A Win32 + MSBuildConfiguration: Debug + Release x86: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32 + MSBuildConfiguration: Release steps: - task: CMake@1 inputs: From b2dcab58f7d1696795bae0e5bf33ffc229662ee9 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 20:20:36 -0700 Subject: [PATCH 06/20] fix assertion failure (issue #1031) --- src/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index e2730b7f..75f8dacb 100644 --- a/src/segment.c +++ b/src/segment.c @@ -523,7 +523,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se void _mi_segments_collect(bool force, mi_segments_tld_t* tld) { mi_pages_try_purge(force,tld); #if MI_DEBUG>=2 - if (!_mi_is_main_thread()) { + if (!_mi_is_main_thread() && force) { mi_assert_internal(tld->pages_purge.first == NULL); mi_assert_internal(tld->pages_purge.last == NULL); } From 7eafaa968598fc6b1261103f0f53b0db2bc56139 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 20:22:02 -0700 Subject: [PATCH 07/20] fix visibility warning (issue #1031) --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index 215eed20..8a48ae5e 100644 --- a/src/init.c +++ b/src/init.c @@ -95,7 +95,7 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- -mi_decl_hidden mi_decl_cache_align const mi_heap_t _mi_heap_empty = { +mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, MI_ATOMIC_VAR_INIT(NULL), 0, // tid From 660d749d77822e54b77acecf82f1aa8f348625ae Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 19 Mar 2025 20:29:29 -0700 Subject: [PATCH 08/20] do not default to MI_DEBUG=2 in release mode builds even when NDEBUG is not defined by defininig MI_BUILD_RELEASE (issue #1037) --- CMakeLists.txt | 4 +++- include/mimalloc/types.h | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d780fa1..2b1292cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -532,7 +532,9 @@ if(MI_TRACK_ASAN) endif() string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC) list(APPEND mi_defines "MI_CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE_LC}") #todo: multi-config project needs $ ? -if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$")) +if(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$") + list(APPEND mi_defines MI_BUILD_RELEASE) +else() set(mi_libname "${mi_libname}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version endif() diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 9f743149..5bcdb07f 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -66,10 +66,10 @@ terms of the MIT license. A copy of the license can be found in the file // #define MI_DEBUG 2 // + internal assertion checks // #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) #if !defined(MI_DEBUG) -#if !defined(NDEBUG) || defined(_DEBUG) -#define MI_DEBUG 2 -#else +#if defined(MI_BUILD_RELEASE) || defined(NDEBUG) #define MI_DEBUG 0 +#else +#define MI_DEBUG 2 #endif #endif From cf08c27d2b6b82232dcfc0882642b5983efd95f9 Mon Sep 17 00:00:00 2001 From: Jo Bates <29763794+jbatez@users.noreply.github.com> Date: Thu, 20 Mar 2025 11:24:59 -0700 Subject: [PATCH 09/20] support MI_OPT_ARCH when using CMAKE_OSX_ARCHITECTURES with non-Apple Clang --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b1292cc..b7154b20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -434,7 +434,7 @@ endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") if(MI_OPT_ARCH) - if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) + if(APPLE AND CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a") endif() From 01ee3568c1a2d82779887577e4427b8d65df47ce Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 16:19:54 -0700 Subject: [PATCH 10/20] name anonymous mmap address ranges for debugging on Linux (based on PR #1032 by @zhuker) --- src/prim/unix/prim.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 8e3180e6..994dbb93 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -205,14 +205,24 @@ static int unix_madvise(void* addr, size_t size, int advice) { return (res==0 ? 0 : errno); } -static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { +static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) { + void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */); + #if (defined(__linux__) || defined(__ANDROID__)) + if (p!=MAP_FAILED && p!=NULL) { + prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc"); + } + #endif + return p; +} + +static void* unix_mmap_prim_aligned(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { MI_UNUSED(try_alignment); void* p = NULL; #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB - p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + p = unix_mmap_prim(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { int err = errno; _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); @@ -223,7 +233,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p } #elif defined(MAP_ALIGN) // Solaris if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + p = unix_mmap_prim((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd); // addr parameter is the required alignment if (p!=MAP_FAILED) return p; // fall back to regular mmap } @@ -233,7 +243,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p if (addr == NULL) { void* hint = _mi_os_get_aligned_hint(try_alignment, size); if (hint != NULL) { - p = mmap(hint, size, protect_flags, flags, fd, 0); + p = unix_mmap_prim(hint, size, protect_flags, flags, fd); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? int err = 0; @@ -248,7 +258,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p } #endif // regular mmap - p = mmap(addr, size, protect_flags, flags, fd, 0); + p = unix_mmap_prim(addr, size, protect_flags, flags, fd); if (p!=MAP_FAILED) return p; // failed to allocate return NULL; @@ -319,7 +329,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec if (large_only || lflags != flags) { // try large OS page allocation *is_large = true; - p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd); #ifdef MAP_HUGE_1GB if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) { mi_huge_pages_available = false; // don't try huge 1GiB pages again @@ -327,7 +337,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); } lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); - p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, lflags, lfd); } #endif if (large_only) return p; @@ -340,7 +350,7 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec // regular allocation if (p == NULL) { *is_large = false; - p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); + p = unix_mmap_prim_aligned(addr, size, try_alignment, protect_flags, flags, fd); if (p != NULL) { #if defined(MADV_HUGEPAGE) // Many Linux systems don't allow MAP_HUGETLB but they support instead From 6ed451c555da0725bd660440a584188370f46b8b Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 16:48:50 -0700 Subject: [PATCH 11/20] fix linux compile by including linux/prctl.h --- src/prim/unix/prim.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 994dbb93..32004fe4 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -31,11 +31,12 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include + #include // PR_SET_VMA //#if defined(MI_NO_THP) - #include // THP disable + #include // THP disable //#endif #if defined(__GLIBC__) - #include // linux mmap flags + #include // linux mmap flags #else #include #endif @@ -207,7 +208,7 @@ static int unix_madvise(void* addr, size_t size, int advice) { static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) { void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */); - #if (defined(__linux__) || defined(__ANDROID__)) + #if (defined(__linux__) && defined(PR_SET_VMA)) if (p!=MAP_FAILED && p!=NULL) { prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc"); } From 02607f2b8d6fa70dfa632d3851930dadeeb5079f Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 17:22:36 -0700 Subject: [PATCH 12/20] reduce test sizes for 32-bit --- test/test-api.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test-api.c b/test/test-api.c index 15484544..6f5d6722 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -203,7 +203,11 @@ int main(void) { CHECK_BODY("malloc-aligned9") { // test large alignments bool ok = true; void* p[8]; - size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 }; + size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, + #if SIZE_MAX > UINT32_MAX + 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, + #endif + 0 }; for (int i = 0; i < 28 && ok; i++) { int align = (1 << i); for (int j = 0; j < 8 && ok; j++) { From 1fa591cc891c1631fd475ee4ccc695c912eda4f7 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 17:38:39 -0700 Subject: [PATCH 13/20] attempt to fix test pipeline on x86 --- test/test-api.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/test-api.c b/test/test-api.c index cef9c293..20d85314 100644 --- a/test/test-api.c +++ b/test/test-api.c @@ -203,12 +203,15 @@ int main(void) { CHECK_BODY("malloc-aligned9") { // test large alignments bool ok = true; void* p[8]; - size_t sizes[8] = { 8, 512, 1024 * 1024, MI_PAGE_MAX_OVERALLOC_ALIGN, MI_PAGE_MAX_OVERALLOC_ALIGN + 1, + const int max_align_shift = #if SIZE_MAX > UINT32_MAX - 2 * MI_PAGE_MAX_OVERALLOC_ALIGN, 8 * MI_PAGE_MAX_OVERALLOC_ALIGN, + 28 + #else + 20 #endif - 0 }; - for (int i = 0; i < 28 && ok; i++) { + ; + size_t sizes[8] = { 8, 512, 1024 * 1024, MI_PAGE_MAX_OVERALLOC_ALIGN, MI_PAGE_MAX_OVERALLOC_ALIGN + 1, 2 * MI_PAGE_MAX_OVERALLOC_ALIGN, 8 * MI_PAGE_MAX_OVERALLOC_ALIGN, 0 }; + for (int i = 0; i < max_align_shift && ok; i++) { int align = (1 << i); for (int j = 0; j < 8 && ok; j++) { p[j] = mi_zalloc_aligned(sizes[j], align); From d48bafe2bb63120c1327fe61a13aafd893c97760 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 19:21:41 -0700 Subject: [PATCH 14/20] print statistics nicer --- src/stats.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/stats.c b/src/stats.c index 6a480816..70f16ef3 100644 --- a/src/stats.c +++ b/src/stats.c @@ -215,7 +215,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* static void mi_print_header(mi_output_fun* out, void* arg ) { - _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "unit ", "total# "); + _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "current ", "block ", "total# "); } #if MI_STAT>1 @@ -284,10 +284,10 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) // and print using that mi_print_header(out,arg); #if MI_STAT>1 - mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "normal",out,arg); + mi_stats_print_bins(stats->malloc_bins, MI_BIN_HUGE, "bin",out,arg); #endif #if MI_STAT - mi_stat_print(&stats->malloc_normal, "normal", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg); + mi_stat_print(&stats->malloc_normal, "binned", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg); mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg); mi_stat_count_t total = { 0,0,0 }; mi_stat_count_add_mt(&total, &stats->malloc_normal); @@ -295,7 +295,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_print_ex(&total, "total", 1, out, arg, ""); #endif #if MI_STAT>1 - mi_stat_print_ex(&stats->malloc_requested, "malloc req", 1, out, arg, ""); + mi_stat_peak_print(&stats->malloc_requested, "malloc req", 1, out, arg); _mi_fprintf(out, arg, "\n"); #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); From a077311a5ec418e2e11c5cb99b82a41c188045b3 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 19:40:44 -0700 Subject: [PATCH 15/20] improve tracking of malloc_requested count --- include/mimalloc/types.h | 5 ++++- src/alloc-aligned.c | 3 ++- src/alloc.c | 17 ++++------------- src/free.c | 14 ++++++-------- src/heap.c | 10 +++++----- src/stats.c | 28 +++++++++++++++++++++++++++- 6 files changed, 48 insertions(+), 29 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 5bcdb07f..ab697f23 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -625,22 +625,25 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line // add to stat keeping track of the peak void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount); // counters can just be increased void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #if (MI_STAT) #define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) #define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) +#define mi_stat_adjust_decrease(stat,amount) _mi_stat_adjust_decrease( &(stat), amount) #define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) #else #define mi_stat_increase(stat,amount) ((void)0) #define mi_stat_decrease(stat,amount) ((void)0) +#define mi_stat_adjust_decrease(stat,amount) ((void)0) #define mi_stat_counter_increase(stat,amount) ((void)0) #endif #define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) - +#define mi_heap_stat_adjust_decrease(heap,stat,amount) mi_stat_adjust_decrease( (heap)->tld->stats.stat, amount) #endif diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index d0e691b3..e28cb0de 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -191,10 +191,11 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; if mi_likely(is_aligned) { + void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen #if MI_STAT>1 + mi_heap_stat_adjust_decrease(heap, malloc_requested, padsize); mi_heap_stat_increase(heap, malloc_requested, size); #endif - void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); mi_track_malloc(p,size,zero); diff --git a/src/alloc.c b/src/alloc.c index 15867315..0c4e4391 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -30,6 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file // Note: in release mode the (inlined) routine is about 7 instructions with a single test. extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept { + mi_assert_internal(size >= MI_PADDING_SIZE); mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); // check the free list @@ -88,6 +89,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ #if (MI_STAT>1) const size_t bin = _mi_bin(bsize); mi_heap_stat_increase(heap, malloc_bins[bin], 1); + mi_heap_stat_increase(heap, malloc_requested, size - MI_PADDING_SIZE); #endif } #endif @@ -146,12 +148,6 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); mi_track_malloc(p,size,zero); - #if MI_STAT>1 - if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } - mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); - } - #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); @@ -188,12 +184,6 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_track_malloc(p,size,zero); - #if MI_STAT>1 - if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } - mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); - } - #endif #if MI_DEBUG>3 if (p != NULL && zero) { mi_assert_expensive(mi_mem_is_zero(p, size)); @@ -666,7 +656,8 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo if (p != NULL) { if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } #if MI_STAT>1 - mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p)); + mi_heap_stat_adjust_decrease(heap, malloc_requested, req_size); + mi_heap_stat_increase(heap, malloc_requested, size); #endif _mi_stat_counter_increase(&heap->tld->stats.malloc_guarded_count, 1); } diff --git a/src/free.c b/src/free.c index a1732e8c..7e529530 100644 --- a/src/free.c +++ b/src/free.c @@ -514,20 +514,18 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { // only maintain stats for smaller objects if requested #if (MI_STAT>0) static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { -#if (MI_STAT < 2) MI_UNUSED(block); -#endif mi_heap_t* const heap = mi_heap_get_default(); const size_t bsize = mi_page_usable_block_size(page); -#if (MI_STAT>1) - const size_t usize = mi_page_usable_size_of(page, block); - mi_heap_stat_decrease(heap, malloc_requested, usize); -#endif + // #if (MI_STAT>1) + // const size_t usize = mi_page_usable_size_of(page, block); + // mi_heap_stat_decrease(heap, malloc_requested, usize); + // #endif if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, malloc_normal, bsize); -#if (MI_STAT > 1) + #if (MI_STAT > 1) mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], 1); -#endif + #endif } else { const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc diff --git a/src/heap.c b/src/heap.c index 7c235a7b..0ea9a2ff 100644 --- a/src/heap.c +++ b/src/heap.c @@ -331,17 +331,17 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ if (bsize > MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, malloc_huge, bsize); } -#if (MI_STAT) + #if (MI_STAT>0) _mi_page_free_collect(page, false); // update used count const size_t inuse = page->used; if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_decrease(heap, malloc_normal, bsize * inuse); -#if (MI_STAT>1) + #if (MI_STAT>1) mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], inuse); -#endif + #endif } - mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks... -#endif + // mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks... + #endif /// pretend it is all free now mi_assert_internal(mi_page_thread_free(page) == NULL); diff --git a/src/stats.c b/src/stats.c index 70f16ef3..07ce7d16 100644 --- a/src/stats.c +++ b/src/stats.c @@ -62,6 +62,25 @@ void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { } +static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) { + if (amount == 0) return; + if mi_unlikely(mi_is_in_main(stat)) + { + // adjust atomically + mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_addi64_relaxed(&stat->total,amount); + } + else { + // adjust local + stat->current += amount; + stat->total += amount; + } +} + +void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) { + mi_stat_adjust(stat, -((int64_t)amount)); +} + // must be thread safe as it is called from stats_merge static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) { @@ -199,6 +218,13 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int _mi_fprintf(out, arg, "\n"); } +static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { + _mi_fprintf(out, arg, "%10s:", msg); + _mi_fprintf(out, arg, "%12s", " "); // no peak + mi_print_amount(stat->total, unit, out, arg); + _mi_fprintf(out, arg, "\n"); +} + static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); @@ -295,7 +321,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_print_ex(&total, "total", 1, out, arg, ""); #endif #if MI_STAT>1 - mi_stat_peak_print(&stats->malloc_requested, "malloc req", 1, out, arg); + mi_stat_total_print(&stats->malloc_requested, "malloc req", 1, out, arg); _mi_fprintf(out, arg, "\n"); #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); From a15a999881b9044d151496e723045d8dea8dd8b1 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 19:57:23 -0700 Subject: [PATCH 16/20] add chunk bin output to json stats --- src/stats.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/stats.c b/src/stats.c index 95eefac8..7a045386 100644 --- a/src/stats.c +++ b/src/stats.c @@ -551,6 +551,21 @@ static void mi_heap_buf_print_count_bin(mi_heap_buf_t* hbuf, const char* prefix, mi_heap_buf_print(hbuf, buf); } +static void mi_heap_buf_print_count_cbin(mi_heap_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, mi_chunkbin_t bin, bool add_comma) { + const char* cbin = " "; + switch(bin) { + case MI_CBIN_SMALL: cbin = "S"; break; + case MI_CBIN_MEDIUM: cbin = "M"; break; + case MI_CBIN_LARGE: cbin = "L"; break; + case MI_CBIN_OTHER: cbin = "X"; break; + default: cbin = " "; break; + } + char buf[128]; + _mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld, \"bin\": \"%s\" }%s\n", prefix, stat->total, stat->peak, stat->current, cbin, (add_comma ? "," : "")); + buf[127] = 0; + mi_heap_buf_print(hbuf, buf); +} + static void mi_heap_buf_print_count(mi_heap_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, bool add_comma) { char buf[128]; _mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld }%s\n", prefix, stat->total, stat->peak, stat->current, (add_comma ? "," : "")); @@ -637,6 +652,11 @@ char* mi_stats_get_json(size_t output_size, char* output_buf) mi_attr_noexcept { for (size_t i = 0; i <= MI_BIN_HUGE; i++) { mi_heap_buf_print_count_bin(&hbuf, " ", &stats->page_bins[i], i, i!=MI_BIN_HUGE); } + mi_heap_buf_print(&hbuf, " ],\n"); + mi_heap_buf_print(&hbuf, " \"chunk_bins\": [\n"); + for (size_t i = 0; i < MI_CBIN_COUNT; i++) { + mi_heap_buf_print_count_cbin(&hbuf, " ", &stats->chunk_bins[i], (mi_chunkbin_t)i, i!=MI_CBIN_COUNT-1); + } mi_heap_buf_print(&hbuf, " ]\n"); mi_heap_buf_print(&hbuf, "}\n"); return hbuf.buf; From 26b792d93b4e8f389a5c724feeabb86038b39e53 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 21 Mar 2025 20:07:16 -0700 Subject: [PATCH 17/20] fix aligned malloc_requested statistic --- src/alloc-aligned.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index e28cb0de..8d2bde74 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -192,10 +192,6 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t if mi_likely(is_aligned) { void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen - #if MI_STAT>1 - mi_heap_stat_adjust_decrease(heap, malloc_requested, padsize); - mi_heap_stat_increase(heap, malloc_requested, size); - #endif mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); mi_track_malloc(p,size,zero); From 34cc5c8fd9e84fbfdfa45ed5db5b09f74a448a3b Mon Sep 17 00:00:00 2001 From: Peiyuan Song Date: Mon, 24 Mar 2025 09:39:42 +0800 Subject: [PATCH 18/20] remove the `lib` prefix when enabling mimalloc-redirect for mingw --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b7154b20..283af66d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -594,6 +594,9 @@ if(MI_BUILD_SHARED) # install(FILES "$/${mi_libname}.dll.pdb" DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() if(WIN32 AND MI_WIN_REDIRECT) + if(MINGW) + set_property(TARGET mimalloc PROPERTY PREFIX "") + endif() # On windows, link and copy the mimalloc redirection dll too. if(CMAKE_GENERATOR_PLATFORM STREQUAL "arm64ec") set(MIMALLOC_REDIRECT_SUFFIX "-arm64ec") From 797ca19ba93bb92f9c7c97923aa3e43485cbb3de Mon Sep 17 00:00:00 2001 From: Maksim Bondarenkov <119937608+ognevny@users.noreply.github.com> Date: Mon, 24 Mar 2025 08:35:15 +0300 Subject: [PATCH 19/20] cmake: don't change properties of import lib on Windows/MinGW CMake handles import lib for it automatically, and using `.dll.lib` extension is MSVC-specific hack --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b7154b20..46435eca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -584,7 +584,7 @@ if(MI_BUILD_SHARED) install(TARGETS mimalloc EXPORT mimalloc ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir}) - if(WIN32) + if(WIN32 AND NOT MINGW) # On windows, the import library name for the dll would clash with the static mimalloc.lib library # so we postfix the dll import library with `.dll.lib` (and also the .pdb debug file) set_property(TARGET mimalloc PROPERTY ARCHIVE_OUTPUT_NAME "${mi_libname}.dll" ) From 632eab958bb91fb8bd273efe58995023e5087aaa Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 25 Mar 2025 16:02:29 -0700 Subject: [PATCH 20/20] fix for atomic_yield on arm 32-bit, issue #1046 --- include/mimalloc/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 2984f50f..c0425f67 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -380,7 +380,7 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { __asm__ volatile("wfe"); } -#elif (defined(__arm__) && __ARM_ARCH__ >= 7) +#elif (defined(__arm__) && __ARM_ARCH >= 7) static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); }