From 5a5943ad33551c4fcd84d62be1564445f25d52d4 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 6 Dec 2024 21:03:33 -0800 Subject: [PATCH] record max_clear bit --- src/arena.c | 32 ++++++++++++++++++++++++------ src/bitmap.c | 53 +++++++++++++++++++++++++++++++++++--------------- src/bitmap.h | 5 +++-- src/page-map.c | 5 ++++- 4 files changed, 70 insertions(+), 25 deletions(-) diff --git a/src/arena.c b/src/arena.c index fd609fe0..2c215264 100644 --- a/src/arena.c +++ b/src/arena.c @@ -476,23 +476,30 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t Arena page allocation ----------------------------------------------------------- */ -static bool mi_arena_claim_abandoned(size_t slice_index, void* arg1, void* arg2, bool* keep_abandoned) { +static bool mi_arena_try_claim_abandoned(size_t slice_index, void* arg1, void* arg2, bool* keep_abandoned) { // found an abandoned page of the right size mi_arena_t* const arena = (mi_arena_t*)arg1; mi_subproc_t* const subproc = (mi_subproc_t*)arg2; mi_page_t* const page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); // can we claim ownership? if (!mi_page_try_claim_ownership(page)) { + // there was a concurrent free .. + // we need to keep it in the abandoned map as the free will call `mi_arena_page_unabandon`, + // and wait for readers (us!) to finish. This is why it is very important to set the abandoned + // bit again (or otherwise the unabandon will never stop waiting). *keep_abandoned = true; return false; } if (subproc != page->subproc) { - // wrong sub-process.. we need to unown again, and perhaps not keep it abandoned + // wrong sub-process.. we need to unown again + // (an unown might free the page, and depending on that we can keep it in the abandoned map or not) + // note: a minor wrinkle: the page will still be mapped but the abandoned map entry is (temporarily) clear at this point. + // so we cannot check in `mi_arena_free` for this invariant to hold. const bool freed = _mi_page_unown(page); *keep_abandoned = !freed; return false; } - // yes, we can reclaim it + // yes, we can reclaim it, keep the abandaned map entry clear *keep_abandoned = false; return true; } @@ -515,7 +522,7 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl size_t slice_index; mi_bitmap_t* const bitmap = arena->pages_abandoned[bin]; - if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_claim_abandoned, arena, subproc)) { + if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, subproc)) { // found an abandoned page of the right size // and claimed ownership. mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); @@ -703,6 +710,9 @@ void _mi_arena_page_free(mi_page_t* page) { mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1)); + // note: we cannot check for `!mi_page_is_abandoned_and_mapped` since that may + // be (temporarily) not true if the free happens while trying to reclaim + // see `mi_arana_try_claim_abandoned` } #endif @@ -1087,10 +1097,11 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc return ENOMEM; } _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); + // mi_debug_show_arenas(true, true, false); + return 0; } - // Manage a range of regular OS memory bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); @@ -1121,13 +1132,22 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_ size_t bit_set_count = 0; for (size_t i = 0; i < mi_bitmap_chunk_count(bitmap) && bit_count < slice_count; i++) { char buf[MI_BCHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf)); + size_t k = 0; mi_bchunk_t* chunk = &bitmap->chunks[i]; - for (size_t j = 0, k = 0; j < MI_BCHUNK_FIELDS; j++) { + + if (i<10) { buf[k++] = ' '; } + if (i<100) { itoa((int)i, buf+k, 10); k += (i < 10 ? 1 : 2); } + buf[k++] = ' '; + + for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) { if (j > 0 && (j % 4) == 0) { buf[k++] = '\n'; _mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix); buf[k++] = ' '; buf[k++] = ' '; + buf[k++] = ' '; + buf[k++] = ' '; + buf[k++] = ' '; } if (bit_count < slice_count) { mi_bfield_t bfield = chunk->bfields[j]; diff --git a/src/bitmap.c b/src/bitmap.c index 0916aaae..15401d8d 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -87,7 +87,7 @@ static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx, bo } // Clear a bit but only when/once it is set. This is used by concurrent free's while -// the page is abandoned and mapped. +// the page is abandoned and mapped. static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_t idx) { mi_assert_internal(idx < MI_BFIELD_BITS); const mi_bfield_t mask = mi_bfield_one()<bfields[i+j]); - if (~b != 0) { - allset = false; - i += j; // no need to look again at the previous fields - break; + size_t idx; + if (mi_bfield_find_least_bit(~b,&idx)) { + if (m > idx) { + allset = false; + i += j; // no need to look again at the previous fields + break; + } + } + else { + // all bits in b were set + m -= MI_BFIELD_BITS; // note: can underflow } } while (++j < field_count); - + // if all set, we can try to atomically clear them if (allset) { const size_t cidx = i*MI_BFIELD_BITS; @@ -796,6 +804,11 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) mi_bchunk_set(&bitmap->chunkmap, chunk_idx); return false; } + // record the max clear + size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_clear); + do { + if mi_likely(chunk_idx <= oldmax) break; + } while (!mi_atomic_cas_weak_acq_rel(&bitmap->chunk_max_clear, &oldmax, chunk_idx)); return true; } @@ -853,6 +866,7 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { if ((chunk_idx % MI_BFIELD_BITS) == 0 && (chunk_idx + MI_BFIELD_BITS <= end_chunk)) { // optimize: we can set a full bfield in the chunkmap mi_atomic_store_relaxed( &bitmap->chunkmap.bfields[chunk_idx/MI_BFIELD_BITS], mi_bfield_all_set()); + mi_bitmap_chunkmap_set(bitmap, chunk_idx + MI_BFIELD_BITS - 1); // track the max set chunk_idx += MI_BFIELD_BITS; } else { @@ -1032,20 +1046,24 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n { \ /* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \ MI_UNUSED(tseq); \ - const size_t chunk_start = 0; /* tseq % (1 + mi_bitmap_find_hi_chunk(bitmap)); */ \ + const size_t chunk_max = mi_atomic_load_acquire(&bitmap->chunk_max_clear); /* mi_bitmap_chunk_count(bitmap) */ \ + const size_t chunk_start = 0; /* (chunk_max <= 1 ? 0 : (tseq % chunk_max)); */ /* space out threads */ \ const size_t chunkmap_max_bfield = _mi_divide_up( mi_bitmap_chunk_count(bitmap), MI_BCHUNK_BITS ); \ const size_t chunkmap_start = chunk_start / MI_BFIELD_BITS; \ const size_t chunkmap_start_idx = chunk_start % MI_BFIELD_BITS; \ /* for each chunkmap entry `i` */ \ for (size_t _i = 0; _i < chunkmap_max_bfield; _i++) { \ size_t i = (_i + chunkmap_start); \ - if (i >= chunkmap_max_bfield) { i -= chunkmap_max_bfield; } /* adjust for the start position */ \ - \ + if (i >= chunkmap_max_bfield) { \ + i -= chunkmap_max_bfield; /* adjust for the start position */ \ + } \ const size_t chunk_idx0 = i*MI_BFIELD_BITS; \ mi_bfield_t cmap = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]); \ size_t cmap_idx_shift = 0; /* shift through the cmap */ \ - if (_i == 0) { cmap = mi_bfield_rotate_right(cmap, chunkmap_start_idx); cmap_idx_shift = chunkmap_start_idx; } /* rotate right for the start position (on the first iteration) */ \ - \ + if (_i == 0) { \ + cmap = mi_bfield_rotate_right(cmap, chunkmap_start_idx); /* rotate right for the start position (on the first iteration) */ \ + cmap_idx_shift = chunkmap_start_idx; \ + } \ size_t cmap_idx; \ while (mi_bfield_find_least_bit(cmap, &cmap_idx)) { \ /* set the chunk idx */ \ @@ -1065,6 +1083,7 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. +// (Used to find fresh free slices.) mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) { mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx) @@ -1087,6 +1106,8 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t } +// Find a set bit in the bitmap and try to atomically clear it and claim it. +// (Used to find pages in the pages_abandoned bitmaps.) mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, mi_claim_fun_t* claim, void* arg1, void* arg2) { @@ -1108,7 +1129,7 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t if (keep_set) { const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); mi_assert_internal(wasclear); MI_UNUSED(wasclear); - } + } // continue } } diff --git a/src/bitmap.h b/src/bitmap.h index 7b6000cc..7938bfa0 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -91,8 +91,9 @@ typedef mi_bchunk_t mi_bchunkmap_t; // An atomic bitmap typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s { - _Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS) - size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc + _Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS) + _Atomic(size_t) chunk_max_clear; // max chunk index that was once cleared + size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc mi_bchunkmap_t chunkmap; mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT } mi_bitmap_t; diff --git a/src/page-map.c b/src/page-map.c index c292378b..ca0e2481 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -13,7 +13,10 @@ mi_decl_cache_align uint8_t* _mi_page_map = NULL; static bool mi_page_map_all_committed = false; static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; static mi_memid_t mi_page_map_memid; -static mi_bitmap_t mi_page_map_commit = { MI_BITMAP_DEFAULT_CHUNK_COUNT, { 0 }, { 0 }, { { 0 } } }; + +// (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization) +static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0), + { 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} }; bool _mi_page_map_init(void) { size_t vbits = _mi_os_virtual_address_bits();