record max_clear bit

This commit is contained in:
daanx 2024-12-06 21:03:33 -08:00
parent 61436a92b9
commit 5a5943ad33
4 changed files with 70 additions and 25 deletions

View file

@ -476,23 +476,30 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t
Arena page allocation Arena page allocation
----------------------------------------------------------- */ ----------------------------------------------------------- */
static bool mi_arena_claim_abandoned(size_t slice_index, void* arg1, void* arg2, bool* keep_abandoned) { static bool mi_arena_try_claim_abandoned(size_t slice_index, void* arg1, void* arg2, bool* keep_abandoned) {
// found an abandoned page of the right size // found an abandoned page of the right size
mi_arena_t* const arena = (mi_arena_t*)arg1; mi_arena_t* const arena = (mi_arena_t*)arg1;
mi_subproc_t* const subproc = (mi_subproc_t*)arg2; mi_subproc_t* const subproc = (mi_subproc_t*)arg2;
mi_page_t* const page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); mi_page_t* const page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
// can we claim ownership? // can we claim ownership?
if (!mi_page_try_claim_ownership(page)) { if (!mi_page_try_claim_ownership(page)) {
// there was a concurrent free ..
// we need to keep it in the abandoned map as the free will call `mi_arena_page_unabandon`,
// and wait for readers (us!) to finish. This is why it is very important to set the abandoned
// bit again (or otherwise the unabandon will never stop waiting).
*keep_abandoned = true; *keep_abandoned = true;
return false; return false;
} }
if (subproc != page->subproc) { if (subproc != page->subproc) {
// wrong sub-process.. we need to unown again, and perhaps not keep it abandoned // wrong sub-process.. we need to unown again
// (an unown might free the page, and depending on that we can keep it in the abandoned map or not)
// note: a minor wrinkle: the page will still be mapped but the abandoned map entry is (temporarily) clear at this point.
// so we cannot check in `mi_arena_free` for this invariant to hold.
const bool freed = _mi_page_unown(page); const bool freed = _mi_page_unown(page);
*keep_abandoned = !freed; *keep_abandoned = !freed;
return false; return false;
} }
// yes, we can reclaim it // yes, we can reclaim it, keep the abandaned map entry clear
*keep_abandoned = false; *keep_abandoned = false;
return true; return true;
} }
@ -515,7 +522,7 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl
size_t slice_index; size_t slice_index;
mi_bitmap_t* const bitmap = arena->pages_abandoned[bin]; mi_bitmap_t* const bitmap = arena->pages_abandoned[bin];
if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_claim_abandoned, arena, subproc)) { if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, subproc)) {
// found an abandoned page of the right size // found an abandoned page of the right size
// and claimed ownership. // and claimed ownership.
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
@ -703,6 +710,9 @@ void _mi_arena_page_free(mi_page_t* page) {
mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1)); mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1));
// note: we cannot check for `!mi_page_is_abandoned_and_mapped` since that may
// be (temporarily) not true if the free happens while trying to reclaim
// see `mi_arana_try_claim_abandoned`
} }
#endif #endif
@ -1087,10 +1097,11 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc
return ENOMEM; return ENOMEM;
} }
_mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : "");
// mi_debug_show_arenas(true, true, false);
return 0; return 0;
} }
// Manage a range of regular OS memory // Manage a range of regular OS memory
bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL);
@ -1121,13 +1132,22 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_
size_t bit_set_count = 0; size_t bit_set_count = 0;
for (size_t i = 0; i < mi_bitmap_chunk_count(bitmap) && bit_count < slice_count; i++) { for (size_t i = 0; i < mi_bitmap_chunk_count(bitmap) && bit_count < slice_count; i++) {
char buf[MI_BCHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf)); char buf[MI_BCHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf));
size_t k = 0;
mi_bchunk_t* chunk = &bitmap->chunks[i]; mi_bchunk_t* chunk = &bitmap->chunks[i];
for (size_t j = 0, k = 0; j < MI_BCHUNK_FIELDS; j++) {
if (i<10) { buf[k++] = ' '; }
if (i<100) { itoa((int)i, buf+k, 10); k += (i < 10 ? 1 : 2); }
buf[k++] = ' ';
for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
if (j > 0 && (j % 4) == 0) { if (j > 0 && (j % 4) == 0) {
buf[k++] = '\n'; buf[k++] = '\n';
_mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix); _mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix);
buf[k++] = ' '; buf[k++] = ' ';
buf[k++] = ' '; buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = ' ';
buf[k++] = ' ';
} }
if (bit_count < slice_count) { if (bit_count < slice_count) {
mi_bfield_t bfield = chunk->bfields[j]; mi_bfield_t bfield = chunk->bfields[j];

View file

@ -87,7 +87,7 @@ static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx, bo
} }
// Clear a bit but only when/once it is set. This is used by concurrent free's while // Clear a bit but only when/once it is set. This is used by concurrent free's while
// the page is abandoned and mapped. // the page is abandoned and mapped.
static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_t idx) { static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal(idx < MI_BFIELD_BITS); mi_assert_internal(idx < MI_BFIELD_BITS);
const mi_bfield_t mask = mi_bfield_one()<<idx; const mi_bfield_t mask = mi_bfield_one()<<idx;
@ -101,7 +101,7 @@ static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_
old = mi_atomic_load_acquire(b); old = mi_atomic_load_acquire(b);
} }
} }
} while (!mi_atomic_cas_weak_acq_rel(b,&old, (old&~mask))); } while (!mi_atomic_cas_weak_acq_rel(b,&old, (old&~mask)));
mi_assert_internal((old&mask)==mask); // we should only clear when it was set mi_assert_internal((old&mask)==mask); // we should only clear when it was set
} }
@ -187,10 +187,10 @@ static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bf
mi_assert_internal(mask != 0); mi_assert_internal(mask != 0);
mi_bfield_t old = mi_atomic_load_relaxed(b); mi_bfield_t old = mi_atomic_load_relaxed(b);
do { do {
if ((old&mask) != mask) { if ((old&mask) != mask) {
// the mask bits are no longer set // the mask bits are no longer set
if (all_clear != NULL) { *all_clear = (old==0); } if (all_clear != NULL) { *all_clear = (old==0); }
return false; return false;
} }
} while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits } while (!mi_atomic_cas_weak_acq_rel(b, &old, old&~mask)); // try to atomically clear the mask bits
if (all_clear != NULL) { *all_clear = ((old&~mask) == 0); } if (all_clear != NULL) { *all_clear = ((old&~mask) == 0); }
@ -708,19 +708,27 @@ static bool mi_bchunk_find_and_try_clearN_(mi_bchunk_t* chunk, size_t n, size_t*
const size_t field_count = _mi_divide_up(n, MI_BFIELD_BITS); // we need this many fields const size_t field_count = _mi_divide_up(n, MI_BFIELD_BITS); // we need this many fields
for (size_t i = 0; i <= MI_BCHUNK_FIELDS - field_count; i++) for (size_t i = 0; i <= MI_BCHUNK_FIELDS - field_count; i++)
{ {
// first pre-scan for a range of fields that are all set // first pre-scan for a range of fields that are all set (up to the last one)
bool allset = true; bool allset = true;
size_t j = 0; size_t j = 0;
size_t m = n;
do { do {
mi_assert_internal(i + j < MI_BCHUNK_FIELDS); mi_assert_internal(i + j < MI_BCHUNK_FIELDS);
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i+j]); mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i+j]);
if (~b != 0) { size_t idx;
allset = false; if (mi_bfield_find_least_bit(~b,&idx)) {
i += j; // no need to look again at the previous fields if (m > idx) {
break; allset = false;
i += j; // no need to look again at the previous fields
break;
}
}
else {
// all bits in b were set
m -= MI_BFIELD_BITS; // note: can underflow
} }
} while (++j < field_count); } while (++j < field_count);
// if all set, we can try to atomically clear them // if all set, we can try to atomically clear them
if (allset) { if (allset) {
const size_t cidx = i*MI_BFIELD_BITS; const size_t cidx = i*MI_BFIELD_BITS;
@ -796,6 +804,11 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
mi_bchunk_set(&bitmap->chunkmap, chunk_idx); mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
return false; return false;
} }
// record the max clear
size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_clear);
do {
if mi_likely(chunk_idx <= oldmax) break;
} while (!mi_atomic_cas_weak_acq_rel(&bitmap->chunk_max_clear, &oldmax, chunk_idx));
return true; return true;
} }
@ -853,6 +866,7 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
if ((chunk_idx % MI_BFIELD_BITS) == 0 && (chunk_idx + MI_BFIELD_BITS <= end_chunk)) { if ((chunk_idx % MI_BFIELD_BITS) == 0 && (chunk_idx + MI_BFIELD_BITS <= end_chunk)) {
// optimize: we can set a full bfield in the chunkmap // optimize: we can set a full bfield in the chunkmap
mi_atomic_store_relaxed( &bitmap->chunkmap.bfields[chunk_idx/MI_BFIELD_BITS], mi_bfield_all_set()); mi_atomic_store_relaxed( &bitmap->chunkmap.bfields[chunk_idx/MI_BFIELD_BITS], mi_bfield_all_set());
mi_bitmap_chunkmap_set(bitmap, chunk_idx + MI_BFIELD_BITS - 1); // track the max set
chunk_idx += MI_BFIELD_BITS; chunk_idx += MI_BFIELD_BITS;
} }
else { else {
@ -1032,20 +1046,24 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
{ \ { \
/* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \ /* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \
MI_UNUSED(tseq); \ MI_UNUSED(tseq); \
const size_t chunk_start = 0; /* tseq % (1 + mi_bitmap_find_hi_chunk(bitmap)); */ \ const size_t chunk_max = mi_atomic_load_acquire(&bitmap->chunk_max_clear); /* mi_bitmap_chunk_count(bitmap) */ \
const size_t chunk_start = 0; /* (chunk_max <= 1 ? 0 : (tseq % chunk_max)); */ /* space out threads */ \
const size_t chunkmap_max_bfield = _mi_divide_up( mi_bitmap_chunk_count(bitmap), MI_BCHUNK_BITS ); \ const size_t chunkmap_max_bfield = _mi_divide_up( mi_bitmap_chunk_count(bitmap), MI_BCHUNK_BITS ); \
const size_t chunkmap_start = chunk_start / MI_BFIELD_BITS; \ const size_t chunkmap_start = chunk_start / MI_BFIELD_BITS; \
const size_t chunkmap_start_idx = chunk_start % MI_BFIELD_BITS; \ const size_t chunkmap_start_idx = chunk_start % MI_BFIELD_BITS; \
/* for each chunkmap entry `i` */ \ /* for each chunkmap entry `i` */ \
for (size_t _i = 0; _i < chunkmap_max_bfield; _i++) { \ for (size_t _i = 0; _i < chunkmap_max_bfield; _i++) { \
size_t i = (_i + chunkmap_start); \ size_t i = (_i + chunkmap_start); \
if (i >= chunkmap_max_bfield) { i -= chunkmap_max_bfield; } /* adjust for the start position */ \ if (i >= chunkmap_max_bfield) { \
\ i -= chunkmap_max_bfield; /* adjust for the start position */ \
} \
const size_t chunk_idx0 = i*MI_BFIELD_BITS; \ const size_t chunk_idx0 = i*MI_BFIELD_BITS; \
mi_bfield_t cmap = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]); \ mi_bfield_t cmap = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]); \
size_t cmap_idx_shift = 0; /* shift through the cmap */ \ size_t cmap_idx_shift = 0; /* shift through the cmap */ \
if (_i == 0) { cmap = mi_bfield_rotate_right(cmap, chunkmap_start_idx); cmap_idx_shift = chunkmap_start_idx; } /* rotate right for the start position (on the first iteration) */ \ if (_i == 0) { \
\ cmap = mi_bfield_rotate_right(cmap, chunkmap_start_idx); /* rotate right for the start position (on the first iteration) */ \
cmap_idx_shift = chunkmap_start_idx; \
} \
size_t cmap_idx; \ size_t cmap_idx; \
while (mi_bfield_find_least_bit(cmap, &cmap_idx)) { \ while (mi_bfield_find_least_bit(cmap, &cmap_idx)) { \
/* set the chunk idx */ \ /* set the chunk idx */ \
@ -1065,6 +1083,7 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
// (Used to find fresh free slices.)
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx)
{ {
mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx) mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx)
@ -1087,6 +1106,8 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t
} }
// Find a set bit in the bitmap and try to atomically clear it and claim it.
// (Used to find pages in the pages_abandoned bitmaps.)
mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx,
mi_claim_fun_t* claim, void* arg1, void* arg2) mi_claim_fun_t* claim, void* arg1, void* arg2)
{ {
@ -1108,7 +1129,7 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t
if (keep_set) { if (keep_set) {
const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx);
mi_assert_internal(wasclear); MI_UNUSED(wasclear); mi_assert_internal(wasclear); MI_UNUSED(wasclear);
} }
// continue // continue
} }
} }

View file

@ -91,8 +91,9 @@ typedef mi_bchunk_t mi_bchunkmap_t;
// An atomic bitmap // An atomic bitmap
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s { typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS) _Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc _Atomic(size_t) chunk_max_clear; // max chunk index that was once cleared
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
mi_bchunkmap_t chunkmap; mi_bchunkmap_t chunkmap;
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bitmap_t; } mi_bitmap_t;

View file

@ -13,7 +13,10 @@ mi_decl_cache_align uint8_t* _mi_page_map = NULL;
static bool mi_page_map_all_committed = false; static bool mi_page_map_all_committed = false;
static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE;
static mi_memid_t mi_page_map_memid; static mi_memid_t mi_page_map_memid;
static mi_bitmap_t mi_page_map_commit = { MI_BITMAP_DEFAULT_CHUNK_COUNT, { 0 }, { 0 }, { { 0 } } };
// (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization)
static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0),
{ 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} };
bool _mi_page_map_init(void) { bool _mi_page_map_init(void) {
size_t vbits = _mi_os_virtual_address_bits(); size_t vbits = _mi_os_virtual_address_bits();