improved accounting of committed bytes (issue #1035)

This commit is contained in:
Daan 2025-03-19 18:50:53 -07:00
parent 9a35bca556
commit 26fa8be427
6 changed files with 119 additions and 26 deletions

View file

@ -127,6 +127,7 @@ bool _mi_os_has_virtual_reserve(void);
bool _mi_os_reset(void* addr, size_t size); bool _mi_os_reset(void* addr, size_t size);
bool _mi_os_commit(void* p, size_t size, bool* is_zero); bool _mi_os_commit(void* p, size_t size, bool* is_zero);
bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
bool _mi_os_decommit(void* addr, size_t size); bool _mi_os_decommit(void* addr, size_t size);
bool _mi_os_protect(void* addr, size_t size); bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size);
@ -947,6 +948,21 @@ static inline size_t mi_bsr(size_t x) {
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x)); return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
} }
size_t _mi_popcount_generic(size_t x);
static inline size_t mi_popcount(size_t x) {
if (x<=1) return x;
if (x==SIZE_MAX) return MI_SIZE_BITS;
#if defined(__GNUC__)
#if (SIZE_MAX == ULONG_MAX)
return __builtin_popcountl(x);
#else
return __builtin_popcountll(x);
#endif
#else
return _mi_popcount_generic(x);
#endif
}
// --------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------
// Provide our own `_mi_memcpy` for potential performance optimizations. // Provide our own `_mi_memcpy` for potential performance optimizations.

View file

@ -255,7 +255,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
// set the dirty bits (todo: no need for an atomic op here?) // set the dirty bits (todo: no need for an atomic op here?)
if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { if (arena->memid.initially_zero && arena->blocks_dirty != NULL) {
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL);
} }
// set commit state // set commit state
@ -267,10 +267,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
// commit requested, but the range may not be committed as a whole: ensure it is committed now // commit requested, but the range may not be committed as a whole: ensure it is committed now
memid->initially_committed = true; memid->initially_committed = true;
bool any_uncommitted; bool any_uncommitted;
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); size_t already_committed = 0;
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed);
if (any_uncommitted) { if (any_uncommitted) {
mi_assert_internal(already_committed < needed_bcount);
const size_t commit_size = mi_arena_block_size(needed_bcount);
const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed);
bool commit_zero = false; bool commit_zero = false;
if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) { if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) {
memid->initially_committed = false; memid->initially_committed = false;
} }
else { else {
@ -280,7 +284,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
} }
else { else {
// no need to commit, but check if already fully committed // no need to commit, but check if already fully committed
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); size_t already_committed = 0;
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed);
if (!memid->initially_committed && already_committed > 0) {
// partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted.
mi_assert_internal(already_committed < needed_bcount);
_mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed));
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
}
} }
return p; return p;
@ -464,17 +475,19 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks)
const size_t size = mi_arena_block_size(blocks); const size_t size = mi_arena_block_size(blocks);
void* const p = mi_arena_block_start(arena, bitmap_idx); void* const p = mi_arena_block_start(arena, bitmap_idx);
bool needs_recommit; bool needs_recommit;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { size_t already_committed = 0;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) {
// all blocks are committed, we can purge freely // all blocks are committed, we can purge freely
mi_assert_internal(already_committed == blocks);
needs_recommit = _mi_os_purge(p, size); needs_recommit = _mi_os_purge(p, size);
} }
else { else {
// some blocks are not committed -- this can happen when a partially committed block is freed // some blocks are not committed -- this can happen when a partially committed block is freed
// in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory).
// and also undo the decommit stats (as it was already adjusted) mi_assert_internal(already_committed < blocks);
mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0); needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed));
} }
// clear the purged blocks // clear the purged blocks
@ -508,7 +521,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
else { else {
// already an expiration was set // already an expiration was set
} }
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL);
} }
} }
@ -648,7 +661,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
if (p==NULL) return; if (p==NULL) return;
if (size==0) return; if (size==0) return;
const bool all_committed = (committed_size == size); const bool all_committed = (committed_size == size);
const bool decommitted_size = (committed_size <= size ? size - committed_size : 0); const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0);
// need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
mi_track_mem_undefined(p,size); mi_track_mem_undefined(p,size);
@ -691,14 +704,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(arena->blocks_purge != NULL);
if (!all_committed) { if (!all_committed) {
// mark the entire range as no longer committed (so we recommit the full range when re-using) // mark the entire range as no longer committed (so we will recommit the full range when re-using)
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
mi_track_mem_noaccess(p,size); mi_track_mem_noaccess(p,size);
if (committed_size > 0) { //if (committed_size > 0) {
// if partially committed, adjust the committed stats (is it will be recommitted when re-using) // if partially committed, adjust the committed stats (is it will be recommitted when re-using)
// in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed. // in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed.
_mi_stat_decrease(&_mi_stats_main.committed, committed_size); _mi_stat_decrease(&_mi_stats_main.committed, committed_size);
} //}
// note: if not all committed, it may be that the purge will reset/decommit the entire range // note: if not all committed, it may be that the purge will reset/decommit the entire range
// that contains already decommitted parts. Since purge consistently uses reset or decommit that // that contains already decommitted parts. Since purge consistently uses reset or decommit that
// works (as we should never reset decommitted parts). // works (as we should never reset decommitted parts).

View file

@ -351,7 +351,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Set `count` bits at `bitmap_idx` to 1 atomically // Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) {
size_t idx = mi_bitmap_index_field(bitmap_idx); size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask; size_t pre_mask;
size_t mid_mask; size_t mid_mask;
@ -359,28 +359,31 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_zero = true; bool all_zero = true;
bool any_zero = false; bool any_zero = false;
size_t one_count = 0;
_Atomic(size_t)*field = &bitmap[idx]; _Atomic(size_t)*field = &bitmap[idx];
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
if ((prev & pre_mask) != 0) all_zero = false; if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); }
if ((prev & pre_mask) != pre_mask) any_zero = true; if ((prev & pre_mask) != pre_mask) any_zero = true;
while (mid_count-- > 0) { while (mid_count-- > 0) {
prev = mi_atomic_or_acq_rel(field++, mid_mask); prev = mi_atomic_or_acq_rel(field++, mid_mask);
if ((prev & mid_mask) != 0) all_zero = false; if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); }
if ((prev & mid_mask) != mid_mask) any_zero = true; if ((prev & mid_mask) != mid_mask) any_zero = true;
} }
if (post_mask!=0) { if (post_mask!=0) {
prev = mi_atomic_or_acq_rel(field, post_mask); prev = mi_atomic_or_acq_rel(field, post_mask);
if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); }
if ((prev & post_mask) != post_mask) any_zero = true; if ((prev & post_mask) != post_mask) any_zero = true;
} }
if (pany_zero != NULL) { *pany_zero = any_zero; } if (pany_zero != NULL) { *pany_zero = any_zero; }
if (already_set != NULL) { *already_set = one_count; };
mi_assert_internal(all_zero ? one_count == 0 : one_count <= count);
return all_zero; return all_zero;
} }
// Returns `true` if all `count` bits were 1. // Returns `true` if all `count` bits were 1.
// `any_ones` is `true` if there was at least one bit set to one. // `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) {
size_t idx = mi_bitmap_index_field(bitmap_idx); size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask; size_t pre_mask;
size_t mid_mask; size_t mid_mask;
@ -388,30 +391,33 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_ones = true; bool all_ones = true;
bool any_ones = false; bool any_ones = false;
size_t one_count = 0;
mi_bitmap_field_t* field = &bitmap[idx]; mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_load_relaxed(field++); size_t prev = mi_atomic_load_relaxed(field++);
if ((prev & pre_mask) != pre_mask) all_ones = false; if ((prev & pre_mask) != pre_mask) all_ones = false;
if ((prev & pre_mask) != 0) any_ones = true; if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); }
while (mid_count-- > 0) { while (mid_count-- > 0) {
prev = mi_atomic_load_relaxed(field++); prev = mi_atomic_load_relaxed(field++);
if ((prev & mid_mask) != mid_mask) all_ones = false; if ((prev & mid_mask) != mid_mask) all_ones = false;
if ((prev & mid_mask) != 0) any_ones = true; if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); }
} }
if (post_mask!=0) { if (post_mask!=0) {
prev = mi_atomic_load_relaxed(field); prev = mi_atomic_load_relaxed(field);
if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) any_ones = true; if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); }
} }
if (pany_ones != NULL) { *pany_ones = any_ones; } if (pany_ones != NULL) { *pany_ones = any_ones; }
if (already_set != NULL) { *already_set = one_count; }
mi_assert_internal(all_ones ? one_count == count : one_count < count);
return all_ones; return all_ones;
} }
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) {
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL); return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set);
} }
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones; bool any_ones;
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones); mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL);
return any_ones; return any_ones;
} }

View file

@ -102,9 +102,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Set `count` bits at `bitmap_idx` to 1 atomically // Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero); bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set);
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#endif #endif

View file

@ -275,3 +275,60 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
va_end(args); va_end(args);
return written; return written;
} }
#if MI_SIZE_SIZE == 4
#define mi_mask_even_bits32 (0x55555555)
#define mi_mask_even_pairs32 (0x33333333)
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum32(uint32_t x) {
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
x += (x << 8);
x += (x << 16);
return (size_t)(x >> 24);
}
static size_t mi_popcount_generic32(uint32_t x) {
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
// into the lower bit-pair:
x = x - ((x >> 1) & mi_mask_even_bits32);
// add the 2-bit pair results
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
// add the 4-bit nibble results
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
// each byte now has a count of its bits, we can sum them now:
return mi_byte_sum32(x);
}
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic32(x);
}
#else
#define mi_mask_even_bits64 (0x5555555555555555)
#define mi_mask_even_pairs64 (0x3333333333333333)
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum64(uint64_t x) {
x += (x << 8);
x += (x << 16);
x += (x << 32);
return (size_t)(x >> 56);
}
static size_t mi_popcount_generic64(uint64_t x) {
x = x - ((x >> 1) & mi_mask_even_bits64);
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
return mi_byte_sum64(x);
}
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic64(x);
}
#endif

View file

@ -30,6 +30,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
{ {
// add atomically (for abandoned pages) // add atomically (for abandoned pages)
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
// if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); };
mi_atomic_maxi64_relaxed(&stat->peak, current + amount); mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) { if (amount > 0) {
mi_atomic_addi64_relaxed(&stat->total,amount); mi_atomic_addi64_relaxed(&stat->total,amount);