mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-03 14:09:31 +03:00
improved accounting of committed bytes (issue #1035)
This commit is contained in:
parent
9a35bca556
commit
26fa8be427
6 changed files with 119 additions and 26 deletions
|
@ -127,6 +127,7 @@ bool _mi_os_has_virtual_reserve(void);
|
|||
|
||||
bool _mi_os_reset(void* addr, size_t size);
|
||||
bool _mi_os_commit(void* p, size_t size, bool* is_zero);
|
||||
bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
|
||||
bool _mi_os_decommit(void* addr, size_t size);
|
||||
bool _mi_os_protect(void* addr, size_t size);
|
||||
bool _mi_os_unprotect(void* addr, size_t size);
|
||||
|
@ -947,6 +948,21 @@ static inline size_t mi_bsr(size_t x) {
|
|||
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
|
||||
}
|
||||
|
||||
size_t _mi_popcount_generic(size_t x);
|
||||
|
||||
static inline size_t mi_popcount(size_t x) {
|
||||
if (x<=1) return x;
|
||||
if (x==SIZE_MAX) return MI_SIZE_BITS;
|
||||
#if defined(__GNUC__)
|
||||
#if (SIZE_MAX == ULONG_MAX)
|
||||
return __builtin_popcountl(x);
|
||||
#else
|
||||
return __builtin_popcountll(x);
|
||||
#endif
|
||||
#else
|
||||
return _mi_popcount_generic(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------
|
||||
// Provide our own `_mi_memcpy` for potential performance optimizations.
|
||||
|
|
39
src/arena.c
39
src/arena.c
|
@ -255,7 +255,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
|
|||
|
||||
// set the dirty bits (todo: no need for an atomic op here?)
|
||||
if (arena->memid.initially_zero && arena->blocks_dirty != NULL) {
|
||||
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
|
||||
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL);
|
||||
}
|
||||
|
||||
// set commit state
|
||||
|
@ -267,10 +267,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
|
|||
// commit requested, but the range may not be committed as a whole: ensure it is committed now
|
||||
memid->initially_committed = true;
|
||||
bool any_uncommitted;
|
||||
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
|
||||
size_t already_committed = 0;
|
||||
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed);
|
||||
if (any_uncommitted) {
|
||||
mi_assert_internal(already_committed < needed_bcount);
|
||||
const size_t commit_size = mi_arena_block_size(needed_bcount);
|
||||
const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed);
|
||||
bool commit_zero = false;
|
||||
if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) {
|
||||
if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) {
|
||||
memid->initially_committed = false;
|
||||
}
|
||||
else {
|
||||
|
@ -280,7 +284,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
|
|||
}
|
||||
else {
|
||||
// no need to commit, but check if already fully committed
|
||||
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
|
||||
size_t already_committed = 0;
|
||||
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed);
|
||||
if (!memid->initially_committed && already_committed > 0) {
|
||||
// partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted.
|
||||
mi_assert_internal(already_committed < needed_bcount);
|
||||
_mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed));
|
||||
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
|
@ -464,17 +475,19 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks)
|
|||
const size_t size = mi_arena_block_size(blocks);
|
||||
void* const p = mi_arena_block_start(arena, bitmap_idx);
|
||||
bool needs_recommit;
|
||||
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
|
||||
size_t already_committed = 0;
|
||||
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) {
|
||||
// all blocks are committed, we can purge freely
|
||||
mi_assert_internal(already_committed == blocks);
|
||||
needs_recommit = _mi_os_purge(p, size);
|
||||
}
|
||||
else {
|
||||
// some blocks are not committed -- this can happen when a partially committed block is freed
|
||||
// in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
|
||||
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
|
||||
// and also undo the decommit stats (as it was already adjusted)
|
||||
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory).
|
||||
mi_assert_internal(already_committed < blocks);
|
||||
mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
|
||||
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0);
|
||||
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed));
|
||||
}
|
||||
|
||||
// clear the purged blocks
|
||||
|
@ -508,7 +521,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
|
|||
else {
|
||||
// already an expiration was set
|
||||
}
|
||||
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL);
|
||||
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -648,7 +661,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
|
|||
if (p==NULL) return;
|
||||
if (size==0) return;
|
||||
const bool all_committed = (committed_size == size);
|
||||
const bool decommitted_size = (committed_size <= size ? size - committed_size : 0);
|
||||
const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0);
|
||||
|
||||
// need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
|
||||
mi_track_mem_undefined(p,size);
|
||||
|
@ -691,14 +704,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
|
|||
mi_assert_internal(arena->blocks_purge != NULL);
|
||||
|
||||
if (!all_committed) {
|
||||
// mark the entire range as no longer committed (so we recommit the full range when re-using)
|
||||
// mark the entire range as no longer committed (so we will recommit the full range when re-using)
|
||||
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
|
||||
mi_track_mem_noaccess(p,size);
|
||||
if (committed_size > 0) {
|
||||
//if (committed_size > 0) {
|
||||
// if partially committed, adjust the committed stats (is it will be recommitted when re-using)
|
||||
// in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed.
|
||||
_mi_stat_decrease(&_mi_stats_main.committed, committed_size);
|
||||
}
|
||||
//}
|
||||
// note: if not all committed, it may be that the purge will reset/decommit the entire range
|
||||
// that contains already decommitted parts. Since purge consistently uses reset or decommit that
|
||||
// works (as we should never reset decommitted parts).
|
||||
|
|
28
src/bitmap.c
28
src/bitmap.c
|
@ -351,7 +351,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
|
|||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
|
@ -359,28 +359,31 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
|
|||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_zero = true;
|
||||
bool any_zero = false;
|
||||
size_t one_count = 0;
|
||||
_Atomic(size_t)*field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
|
||||
if ((prev & pre_mask) != 0) all_zero = false;
|
||||
if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); }
|
||||
if ((prev & pre_mask) != pre_mask) any_zero = true;
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_or_acq_rel(field++, mid_mask);
|
||||
if ((prev & mid_mask) != 0) all_zero = false;
|
||||
if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); }
|
||||
if ((prev & mid_mask) != mid_mask) any_zero = true;
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_or_acq_rel(field, post_mask);
|
||||
if ((prev & post_mask) != 0) all_zero = false;
|
||||
if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); }
|
||||
if ((prev & post_mask) != post_mask) any_zero = true;
|
||||
}
|
||||
if (pany_zero != NULL) { *pany_zero = any_zero; }
|
||||
if (already_set != NULL) { *already_set = one_count; };
|
||||
mi_assert_internal(all_zero ? one_count == 0 : one_count <= count);
|
||||
return all_zero;
|
||||
}
|
||||
|
||||
|
||||
// Returns `true` if all `count` bits were 1.
|
||||
// `any_ones` is `true` if there was at least one bit set to one.
|
||||
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
|
||||
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) {
|
||||
size_t idx = mi_bitmap_index_field(bitmap_idx);
|
||||
size_t pre_mask;
|
||||
size_t mid_mask;
|
||||
|
@ -388,30 +391,33 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
|
|||
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
|
||||
bool all_ones = true;
|
||||
bool any_ones = false;
|
||||
size_t one_count = 0;
|
||||
mi_bitmap_field_t* field = &bitmap[idx];
|
||||
size_t prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & pre_mask) != pre_mask) all_ones = false;
|
||||
if ((prev & pre_mask) != 0) any_ones = true;
|
||||
if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); }
|
||||
while (mid_count-- > 0) {
|
||||
prev = mi_atomic_load_relaxed(field++);
|
||||
if ((prev & mid_mask) != mid_mask) all_ones = false;
|
||||
if ((prev & mid_mask) != 0) any_ones = true;
|
||||
if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); }
|
||||
}
|
||||
if (post_mask!=0) {
|
||||
prev = mi_atomic_load_relaxed(field);
|
||||
if ((prev & post_mask) != post_mask) all_ones = false;
|
||||
if ((prev & post_mask) != 0) any_ones = true;
|
||||
if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); }
|
||||
}
|
||||
if (pany_ones != NULL) { *pany_ones = any_ones; }
|
||||
if (already_set != NULL) { *already_set = one_count; }
|
||||
mi_assert_internal(all_ones ? one_count == count : one_count < count);
|
||||
return all_ones;
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) {
|
||||
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set);
|
||||
}
|
||||
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
|
||||
bool any_ones;
|
||||
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
|
||||
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL);
|
||||
return any_ones;
|
||||
}
|
||||
|
|
|
@ -102,9 +102,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
|
|||
|
||||
// Set `count` bits at `bitmap_idx` to 1 atomically
|
||||
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
|
||||
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set);
|
||||
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set);
|
||||
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
|
||||
|
||||
#endif
|
||||
|
|
57
src/libc.c
57
src/libc.c
|
@ -275,3 +275,60 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
|
|||
va_end(args);
|
||||
return written;
|
||||
}
|
||||
|
||||
|
||||
#if MI_SIZE_SIZE == 4
|
||||
#define mi_mask_even_bits32 (0x55555555)
|
||||
#define mi_mask_even_pairs32 (0x33333333)
|
||||
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
|
||||
|
||||
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
|
||||
static size_t mi_byte_sum32(uint32_t x) {
|
||||
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
|
||||
x += (x << 8);
|
||||
x += (x << 16);
|
||||
return (size_t)(x >> 24);
|
||||
}
|
||||
|
||||
static size_t mi_popcount_generic32(uint32_t x) {
|
||||
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
|
||||
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
|
||||
// into the lower bit-pair:
|
||||
x = x - ((x >> 1) & mi_mask_even_bits32);
|
||||
// add the 2-bit pair results
|
||||
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
|
||||
// add the 4-bit nibble results
|
||||
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
|
||||
// each byte now has a count of its bits, we can sum them now:
|
||||
return mi_byte_sum32(x);
|
||||
}
|
||||
|
||||
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
|
||||
return mi_popcount_generic32(x);
|
||||
}
|
||||
|
||||
#else
|
||||
#define mi_mask_even_bits64 (0x5555555555555555)
|
||||
#define mi_mask_even_pairs64 (0x3333333333333333)
|
||||
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
|
||||
|
||||
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
|
||||
static size_t mi_byte_sum64(uint64_t x) {
|
||||
x += (x << 8);
|
||||
x += (x << 16);
|
||||
x += (x << 32);
|
||||
return (size_t)(x >> 56);
|
||||
}
|
||||
|
||||
static size_t mi_popcount_generic64(uint64_t x) {
|
||||
x = x - ((x >> 1) & mi_mask_even_bits64);
|
||||
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
|
||||
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
|
||||
return mi_byte_sum64(x);
|
||||
}
|
||||
|
||||
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
|
||||
return mi_popcount_generic64(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
|
|||
{
|
||||
// add atomically (for abandoned pages)
|
||||
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
|
||||
// if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); };
|
||||
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
|
||||
if (amount > 0) {
|
||||
mi_atomic_addi64_relaxed(&stat->total,amount);
|
||||
|
|
Loading…
Add table
Reference in a new issue