Merge branch 'dev' into dev2

This commit is contained in:
Daan 2025-03-19 18:50:59 -07:00
commit 992ebd4eae
6 changed files with 119 additions and 26 deletions

View file

@ -127,6 +127,7 @@ bool _mi_os_has_virtual_reserve(void);
bool _mi_os_reset(void* addr, size_t size);
bool _mi_os_commit(void* p, size_t size, bool* is_zero);
bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
bool _mi_os_decommit(void* addr, size_t size);
bool _mi_os_protect(void* addr, size_t size);
bool _mi_os_unprotect(void* addr, size_t size);
@ -1030,6 +1031,21 @@ static inline size_t mi_bsr(size_t x) {
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
}
size_t _mi_popcount_generic(size_t x);
static inline size_t mi_popcount(size_t x) {
if (x<=1) return x;
if (x==SIZE_MAX) return MI_SIZE_BITS;
#if defined(__GNUC__)
#if (SIZE_MAX == ULONG_MAX)
return __builtin_popcountl(x);
#else
return __builtin_popcountll(x);
#endif
#else
return _mi_popcount_generic(x);
#endif
}
// ---------------------------------------------------------------------------------
// Provide our own `_mi_memcpy` for potential performance optimizations.

View file

@ -259,7 +259,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
// set the dirty bits (todo: no need for an atomic op here?)
if (arena->memid.initially_zero && arena->blocks_dirty != NULL) {
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL);
}
// set commit state
@ -271,10 +271,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
// commit requested, but the range may not be committed as a whole: ensure it is committed now
memid->initially_committed = true;
bool any_uncommitted;
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
size_t already_committed = 0;
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed);
if (any_uncommitted) {
mi_assert_internal(already_committed < needed_bcount);
const size_t commit_size = mi_arena_block_size(needed_bcount);
const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed);
bool commit_zero = false;
if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) {
if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) {
memid->initially_committed = false;
}
else {
@ -284,7 +288,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
}
else {
// no need to commit, but check if already fully committed
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
size_t already_committed = 0;
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed);
if (!memid->initially_committed && already_committed > 0) {
// partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted.
mi_assert_internal(already_committed < needed_bcount);
_mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed));
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
}
}
return p;
@ -468,17 +479,19 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks)
const size_t size = mi_arena_block_size(blocks);
void* const p = mi_arena_block_start(arena, bitmap_idx);
bool needs_recommit;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
size_t already_committed = 0;
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) {
// all blocks are committed, we can purge freely
mi_assert_internal(already_committed == blocks);
needs_recommit = _mi_os_purge(p, size);
}
else {
// some blocks are not committed -- this can happen when a partially committed block is freed
// in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
// and also undo the decommit stats (as it was already adjusted)
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory).
mi_assert_internal(already_committed < blocks);
mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0);
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed));
}
// clear the purged blocks
@ -512,7 +525,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
else {
// already an expiration was set
}
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL);
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL);
}
}
@ -652,7 +665,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
if (p==NULL) return;
if (size==0) return;
const bool all_committed = (committed_size == size);
const bool decommitted_size = (committed_size <= size ? size - committed_size : 0);
const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0);
// need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
mi_track_mem_undefined(p,size);
@ -695,14 +708,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
mi_assert_internal(arena->blocks_purge != NULL);
if (!all_committed) {
// mark the entire range as no longer committed (so we recommit the full range when re-using)
// mark the entire range as no longer committed (so we will recommit the full range when re-using)
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
mi_track_mem_noaccess(p,size);
if (committed_size > 0) {
//if (committed_size > 0) {
// if partially committed, adjust the committed stats (is it will be recommitted when re-using)
// in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed.
_mi_stat_decrease(&_mi_stats_main.committed, committed_size);
}
//}
// note: if not all committed, it may be that the purge will reset/decommit the entire range
// that contains already decommitted parts. Since purge consistently uses reset or decommit that
// works (as we should never reset decommitted parts).

View file

@ -369,7 +369,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
@ -377,28 +377,31 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_zero = true;
bool any_zero = false;
size_t one_count = 0;
_Atomic(size_t)*field = &bitmap[idx];
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
if ((prev & pre_mask) != 0) all_zero = false;
if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); }
if ((prev & pre_mask) != pre_mask) any_zero = true;
while (mid_count-- > 0) {
prev = mi_atomic_or_acq_rel(field++, mid_mask);
if ((prev & mid_mask) != 0) all_zero = false;
if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); }
if ((prev & mid_mask) != mid_mask) any_zero = true;
}
if (post_mask!=0) {
prev = mi_atomic_or_acq_rel(field, post_mask);
if ((prev & post_mask) != 0) all_zero = false;
if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); }
if ((prev & post_mask) != post_mask) any_zero = true;
}
if (pany_zero != NULL) { *pany_zero = any_zero; }
if (already_set != NULL) { *already_set = one_count; };
mi_assert_internal(all_zero ? one_count == 0 : one_count <= count);
return all_zero;
}
// Returns `true` if all `count` bits were 1.
// `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
size_t pre_mask;
size_t mid_mask;
@ -406,30 +409,33 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_ones = true;
bool any_ones = false;
size_t one_count = 0;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_load_relaxed(field++);
if ((prev & pre_mask) != pre_mask) all_ones = false;
if ((prev & pre_mask) != 0) any_ones = true;
if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); }
while (mid_count-- > 0) {
prev = mi_atomic_load_relaxed(field++);
if ((prev & mid_mask) != mid_mask) all_ones = false;
if ((prev & mid_mask) != 0) any_ones = true;
if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); }
}
if (post_mask!=0) {
prev = mi_atomic_load_relaxed(field);
if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) any_ones = true;
if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); }
}
if (pany_ones != NULL) { *pany_ones = any_ones; }
if (already_set != NULL) { *already_set = one_count; }
mi_assert_internal(all_ones ? one_count == count : one_count < count);
return all_ones;
}
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) {
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set);
}
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL);
return any_ones;
}

View file

@ -111,9 +111,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set);
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#endif

View file

@ -275,3 +275,60 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
va_end(args);
return written;
}
#if MI_SIZE_SIZE == 4
#define mi_mask_even_bits32 (0x55555555)
#define mi_mask_even_pairs32 (0x33333333)
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum32(uint32_t x) {
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
x += (x << 8);
x += (x << 16);
return (size_t)(x >> 24);
}
static size_t mi_popcount_generic32(uint32_t x) {
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
// into the lower bit-pair:
x = x - ((x >> 1) & mi_mask_even_bits32);
// add the 2-bit pair results
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
// add the 4-bit nibble results
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
// each byte now has a count of its bits, we can sum them now:
return mi_byte_sum32(x);
}
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic32(x);
}
#else
#define mi_mask_even_bits64 (0x5555555555555555)
#define mi_mask_even_pairs64 (0x3333333333333333)
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum64(uint64_t x) {
x += (x << 8);
x += (x << 16);
x += (x << 32);
return (size_t)(x >> 56);
}
static size_t mi_popcount_generic64(uint64_t x) {
x = x - ((x >> 1) & mi_mask_even_bits64);
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
return mi_byte_sum64(x);
}
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic64(x);
}
#endif

View file

@ -30,6 +30,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
{
// add atomically (for abandoned pages)
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
// if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); };
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) {
mi_atomic_addi64_relaxed(&stat->total,amount);