record max_clear bit

This commit is contained in:
daanx 2024-12-06 21:03:33 -08:00
parent 61436a92b9
commit 5a5943ad33
4 changed files with 70 additions and 25 deletions

View file

@ -476,23 +476,30 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t
Arena page allocation
----------------------------------------------------------- */
static bool mi_arena_claim_abandoned(size_t slice_index, void* arg1, void* arg2, bool* keep_abandoned) {
static bool mi_arena_try_claim_abandoned(size_t slice_index, void* arg1, void* arg2, bool* keep_abandoned) {
// found an abandoned page of the right size
mi_arena_t* const arena = (mi_arena_t*)arg1;
mi_subproc_t* const subproc = (mi_subproc_t*)arg2;
mi_page_t* const page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
// can we claim ownership?
if (!mi_page_try_claim_ownership(page)) {
// there was a concurrent free ..
// we need to keep it in the abandoned map as the free will call `mi_arena_page_unabandon`,
// and wait for readers (us!) to finish. This is why it is very important to set the abandoned
// bit again (or otherwise the unabandon will never stop waiting).
*keep_abandoned = true;
return false;
}
if (subproc != page->subproc) {
// wrong sub-process.. we need to unown again, and perhaps not keep it abandoned
// wrong sub-process.. we need to unown again
// (an unown might free the page, and depending on that we can keep it in the abandoned map or not)
// note: a minor wrinkle: the page will still be mapped but the abandoned map entry is (temporarily) clear at this point.
// so we cannot check in `mi_arena_free` for this invariant to hold.
const bool freed = _mi_page_unown(page);
*keep_abandoned = !freed;
return false;
}
// yes, we can reclaim it
// yes, we can reclaim it, keep the abandaned map entry clear
*keep_abandoned = false;
return true;
}
@ -515,7 +522,7 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl
size_t slice_index;
mi_bitmap_t* const bitmap = arena->pages_abandoned[bin];
if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_claim_abandoned, arena, subproc)) {
if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, subproc)) {
// found an abandoned page of the right size
// and claimed ownership.
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
@ -703,6 +710,9 @@ void _mi_arena_page_free(mi_page_t* page) {
mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1));
// note: we cannot check for `!mi_page_is_abandoned_and_mapped` since that may
// be (temporarily) not true if the free happens while trying to reclaim
// see `mi_arana_try_claim_abandoned`
}
#endif
@ -1087,10 +1097,11 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc
return ENOMEM;
}
_mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : "");
// mi_debug_show_arenas(true, true, false);
return 0;
}
// Manage a range of regular OS memory
bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL);
@ -1121,13 +1132,22 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
size_t bit_set_count = 0;
for (size_t i = 0; i < mi_bitmap_chunk_count(bitmap) && bit_count < slice_count; i++) {
char buf[MI_BCHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf));
size_t k = 0;
mi_bchunk_t* chunk = &bitmap->chunks[i];
for (size_t j = 0, k = 0; j < MI_BCHUNK_FIELDS; j++) {
if (i<10) { buf[k++] = ' '; }
if (i<100) { itoa((int)i, buf+k, 10); k += (i < 10 ? 1 : 2); }
buf[k++] = ' ';
for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
if (j > 0 && (j % 4) == 0) {
buf[k++] = '\n';
_mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix);
buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = ' ';
}
if (bit_count < slice_count) {
mi_bfield_t bfield = chunk->bfields[j];

View file

@ -87,7 +87,7 @@ static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx, bo
}
// Clear a bit but only when/once it is set. This is used by concurrent free's while
// the page is abandoned and mapped.
// the page is abandoned and mapped.
static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS);
const mi_bfield_t mask = mi_bfield_one()<<idx;
@ -101,7 +101,7 @@ static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_
old = mi_atomic_load_acquire(b);
}
}
} while (!mi_atomic_cas_weak_acq_rel(b,&old, (old&~mask)));
} while (!mi_atomic_cas_weak_acq_rel(b,&old, (old&~mask)));
mi_assert_internal((old&mask)==mask); // we should only clear when it was set
}
@ -187,10 +187,10 @@ static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bf
mi_assert_internal(mask != 0);
mi_bfield_t old = mi_atomic_load_relaxed(b);
do {
if ((old&mask) != mask) {
if ((old&mask) != mask) {
// the mask bits are no longer set
if (all_clear != NULL) { *all_clear = (old==0); }
return false;
return false;
}
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
if (all_clear != NULL) { *all_clear = ((old&~mask) == 0); }
@ -708,19 +708,27 @@ static bool mi_bchunk_find_and_try_clearN_(mi_bchunk_t* chunk, size_t n, size_t*
const size_t field_count = _mi_divide_up(n, MI_BFIELD_BITS); // we need this many fields
for (size_t i = 0; i <= MI_BCHUNK_FIELDS - field_count; i++)
{
// first pre-scan for a range of fields that are all set
// first pre-scan for a range of fields that are all set (up to the last one)
bool allset = true;
size_t j = 0;
size_t m = n;
do {
mi_assert_internal(i + j < MI_BCHUNK_FIELDS);
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i+j]);
if (~b != 0) {
allset = false;
i += j; // no need to look again at the previous fields
break;
size_t idx;
if (mi_bfield_find_least_bit(~b,&idx)) {
if (m > idx) {
allset = false;
i += j; // no need to look again at the previous fields
break;
}
}
else {
// all bits in b were set
m -= MI_BFIELD_BITS; // note: can underflow
}
} while (++j < field_count);
// if all set, we can try to atomically clear them
if (allset) {
const size_t cidx = i*MI_BFIELD_BITS;
@ -796,6 +804,11 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
return false;
}
// record the max clear
size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_clear);
do {
if mi_likely(chunk_idx <= oldmax) break;
} while (!mi_atomic_cas_weak_acq_rel(&bitmap->chunk_max_clear, &oldmax, chunk_idx));
return true;
}
@ -853,6 +866,7 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
if ((chunk_idx % MI_BFIELD_BITS) == 0 && (chunk_idx + MI_BFIELD_BITS <= end_chunk)) {
// optimize: we can set a full bfield in the chunkmap
mi_atomic_store_relaxed( &bitmap->chunkmap.bfields[chunk_idx/MI_BFIELD_BITS], mi_bfield_all_set());
mi_bitmap_chunkmap_set(bitmap, chunk_idx + MI_BFIELD_BITS - 1); // track the max set
chunk_idx += MI_BFIELD_BITS;
}
else {
@ -1032,20 +1046,24 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
{ \
/* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \
MI_UNUSED(tseq); \
const size_t chunk_start = 0; /* tseq % (1 + mi_bitmap_find_hi_chunk(bitmap)); */ \
const size_t chunk_max = mi_atomic_load_acquire(&bitmap->chunk_max_clear); /* mi_bitmap_chunk_count(bitmap) */ \
const size_t chunk_start = 0; /* (chunk_max <= 1 ? 0 : (tseq % chunk_max)); */ /* space out threads */ \
const size_t chunkmap_max_bfield = _mi_divide_up( mi_bitmap_chunk_count(bitmap), MI_BCHUNK_BITS ); \
const size_t chunkmap_start = chunk_start / MI_BFIELD_BITS; \
const size_t chunkmap_start_idx = chunk_start % MI_BFIELD_BITS; \
/* for each chunkmap entry `i` */ \
for (size_t _i = 0; _i < chunkmap_max_bfield; _i++) { \
size_t i = (_i + chunkmap_start); \
if (i >= chunkmap_max_bfield) { i -= chunkmap_max_bfield; } /* adjust for the start position */ \
\
if (i >= chunkmap_max_bfield) { \
i -= chunkmap_max_bfield; /* adjust for the start position */ \
} \
const size_t chunk_idx0 = i*MI_BFIELD_BITS; \
mi_bfield_t cmap = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]); \
size_t cmap_idx_shift = 0; /* shift through the cmap */ \
if (_i == 0) { cmap = mi_bfield_rotate_right(cmap, chunkmap_start_idx); cmap_idx_shift = chunkmap_start_idx; } /* rotate right for the start position (on the first iteration) */ \
\
if (_i == 0) { \
cmap = mi_bfield_rotate_right(cmap, chunkmap_start_idx); /* rotate right for the start position (on the first iteration) */ \
cmap_idx_shift = chunkmap_start_idx; \
} \
size_t cmap_idx; \
while (mi_bfield_find_least_bit(cmap, &cmap_idx)) { \
/* set the chunk idx */ \
@ -1065,6 +1083,7 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
// (Used to find fresh free slices.)
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx)
{
mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx)
@ -1087,6 +1106,8 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t
}
// Find a set bit in the bitmap and try to atomically clear it and claim it.
// (Used to find pages in the pages_abandoned bitmaps.)
mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx,
mi_claim_fun_t* claim, void* arg1, void* arg2)
{
@ -1108,7 +1129,7 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t
if (keep_set) {
const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx);
mi_assert_internal(wasclear); MI_UNUSED(wasclear);
}
}
// continue
}
}

View file

@ -91,8 +91,9 @@ typedef mi_bchunk_t mi_bchunkmap_t;
// An atomic bitmap
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
_Atomic(size_t) chunk_max_clear; // max chunk index that was once cleared
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
mi_bchunkmap_t chunkmap;
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bitmap_t;

View file

@ -13,7 +13,10 @@ mi_decl_cache_align uint8_t* _mi_page_map = NULL;
static bool mi_page_map_all_committed = false;
static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE;
static mi_memid_t mi_page_map_memid;
static mi_bitmap_t mi_page_map_commit = { MI_BITMAP_DEFAULT_CHUNK_COUNT, { 0 }, { 0 }, { { 0 } } };
// (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization)
static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0),
{ 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} };
bool _mi_page_map_init(void) {
size_t vbits = _mi_os_virtual_address_bits();