From 216c04f8d91cd433897e5c2e46a4a24554558c5d Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 13 Dec 2024 18:39:03 -0800 Subject: [PATCH] clean up bitmap api --- include/mimalloc/types.h | 3 +- src/arena.c | 7 +- src/bitmap.c | 363 ++++++++++++++++++--------------------- src/bitmap.h | 54 +++--- src/init.c | 3 +- src/stats.c | 11 +- test/test-stress.c | 2 +- 7 files changed, 202 insertions(+), 241 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index bf1cb5c8..bf91a58a 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -471,13 +471,12 @@ typedef struct mi_stats_s { mi_stat_counter_t commit_calls; mi_stat_counter_t reset_calls; mi_stat_counter_t purge_calls; + mi_stat_counter_t arena_purges; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t normal_count; mi_stat_counter_t huge_count; mi_stat_counter_t arena_count; - mi_stat_counter_t arena_crossover_count; - mi_stat_counter_t arena_rollback_count; mi_stat_counter_t guarded_alloc_count; #if MI_STAT>1 mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; diff --git a/src/arena.c b/src/arena.c index 9f95a699..7aec429e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1225,7 +1225,7 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, mi_arena_t else if (slice_index + bit < arena->info_slices) { c = 'i'; } // else if (mi_bitmap_is_setN(arena->pages_purge, slice_index + bit, NULL)) { c = '*'; } else if (mi_bitmap_is_set(arena->slices_free, slice_index+bit)) { - if (mi_bitmap_is_set(arena->slices_purge, slice_index + bit)) { c = '!'; } + if (mi_bitmap_is_set(arena->slices_purge, slice_index + bit)) { c = '~'; } else if (mi_bitmap_is_setN(arena->slices_committed, slice_index + bit, 1)) { c = '_'; } else { c = '.'; } } @@ -1297,7 +1297,7 @@ void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) // purge_total += mi_debug_show_bitmap("purgeable slices", arena->slice_count, arena->slices_purge, false, NULL); //} if (show_pages) { - page_total += mi_debug_show_bitmap("pages (p:page, a:abandoned, f:full-abandoned, s:singleton-abandoned, i:arena-info, m:heap-meta-data, !:free-purgable, _:free-committed, .:free-reserved)", arena->slice_count, arena->pages, false, arena); + page_total += mi_debug_show_bitmap("pages (p:page, a:abandoned, f:full-abandoned, s:singleton-abandoned, i:arena-info, m:heap-meta-data, ~:free-purgable, _:free-committed, .:free-reserved)", arena->slice_count, arena->pages, false, arena); } } if (show_inuse) _mi_output_message("total inuse slices : %zu\n", slice_total - free_total); @@ -1470,8 +1470,6 @@ static bool mi_arena_try_purge_visitor(size_t slice_index, size_t slice_count, m mi_bitmap_clearN(arena->slices_purge, slice_index, slice_count); return true; // continue } - - // returns true if anything was purged static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) @@ -1486,6 +1484,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) // reset expire (if not already set concurrently) mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0); + _mi_stat_counter_increase(&_mi_stats_main.arena_purges, 1); // go through all purge info's // todo: instead of visiting per-bit, we should visit per range of bits diff --git a/src/bitmap.c b/src/bitmap.c index 88b45a5e..f689ee58 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -34,7 +34,6 @@ static inline mi_bfield_t mi_bfield_clear_least_bit(mi_bfield_t x) { return (x & (x-1)); } - // find the least significant bit that is set (i.e. count trailing zero's) // return false if `x==0` (with `*idx` undefined) and true otherwise, // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). @@ -42,17 +41,13 @@ static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) { return mi_bsf(x,idx); } -// find each set bit in a bit field `x` until it becomes zero. +// find each set bit in a bit field `x` and clear it, until it becomes zero. static inline bool mi_bfield_foreach_bit(mi_bfield_t* x, size_t* idx) { const bool found = mi_bfield_find_least_bit(*x, idx); *x = mi_bfield_clear_least_bit(*x); return found; } -//static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) { -// return mi_rotr(x,r); -//} - static inline mi_bfield_t mi_bfield_zero(void) { return 0; } @@ -65,6 +60,7 @@ static inline mi_bfield_t mi_bfield_all_set(void) { return ~((mi_bfield_t)0); } +// mask of `bit_count` bits set shifted to the left by `shiftl` static inline mi_bfield_t mi_bfield_mask(size_t bit_count, size_t shiftl) { mi_assert_internal(bit_count > 0); mi_assert_internal(bit_count + shiftl <= MI_BFIELD_BITS); @@ -72,7 +68,10 @@ static inline mi_bfield_t mi_bfield_mask(size_t bit_count, size_t shiftl) { return (mask0 << shiftl); } + // ------- mi_bfield_atomic_set --------------------------------------- +// the `_set` functions return also the count of bits that were already set (for commit statistics) +// the `_clear` functions return also whether the new bfield is all clear or not (for the chunk_map) // Set a bit atomically. Returns `true` if the bit transitioned from 0 to 1 static inline bool mi_bfield_atomic_set(_Atomic(mi_bfield_t)*b, size_t idx) { @@ -93,7 +92,8 @@ static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx, bo } // Clear a bit but only when/once it is set. This is used by concurrent free's while -// the page is abandoned and mapped. +// the page is abandoned and mapped. This can incure a busy wait :-( but it should +// happen almost never (and is accounted for in the stats) static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_t idx) { mi_assert_internal(idx < MI_BFIELD_BITS); const mi_bfield_t mask = mi_bfield_one()<bfields[i], idx); } -static inline bool mi_bchunk_clear(mi_bchunk_t* chunk, size_t cidx, bool* maybe_all_clear) { - mi_assert_internal(cidx < MI_BCHUNK_BITS); +static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); const size_t i = cidx / MI_BFIELD_BITS; - const size_t idx = cidx % MI_BFIELD_BITS; - return mi_bfield_atomic_clear(&chunk->bfields[i], idx, maybe_all_clear); + const size_t idx = cidx % MI_BFIELD_BITS; + const mi_bfield_t mask = mi_bfield_mask(n, idx); + return mi_bfield_atomic_set_mask(&chunk->bfields[i], mask, already_set); } -static inline bool mi_bchunk_set8(mi_bchunk_t* chunk, size_t byte_idx) { - mi_assert_internal(byte_idx < MI_BCHUNK_SIZE); - const size_t i = byte_idx / MI_BFIELD_SIZE; - const size_t bidx = byte_idx % MI_BFIELD_SIZE; - return mi_bfield_atomic_set8(&chunk->bfields[i], bidx); +static inline bool mi_bchunk_setX(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); + mi_assert_internal((cidx%MI_BFIELD_BITS)==0); + const size_t i = cidx / MI_BFIELD_BITS; + return mi_bfield_atomic_setX(&chunk->bfields[i], already_set); } -static inline bool mi_bchunk_clear8(mi_bchunk_t* chunk, size_t byte_idx, bool* maybe_all_clear) { - mi_assert_internal(byte_idx < MI_BCHUNK_SIZE); - const size_t i = byte_idx / MI_BFIELD_SIZE; - const size_t bidx = byte_idx % MI_BFIELD_SIZE; - return mi_bfield_atomic_clear8(&chunk->bfields[i], bidx, maybe_all_clear); -} - -static inline bool mi_bchunk_setX(mi_bchunk_t* chunk, size_t field_idx) { - mi_assert_internal(field_idx < MI_BCHUNK_FIELDS); - return mi_bfield_atomic_setX(&chunk->bfields[field_idx]); -} - -static inline bool mi_bchunk_clearX(mi_bchunk_t* chunk, size_t field_idx, bool* maybe_all_clear) { - mi_assert_internal(field_idx < MI_BCHUNK_FIELDS); - if (maybe_all_clear != NULL) { *maybe_all_clear = true; } - return mi_bfield_atomic_clearX(&chunk->bfields[field_idx]); -} - -// Set/clear a sequence of `n` bits within a chunk. +// Set a sequence of `n` bits within a chunk. // Returns true if all bits transitioned from 0 to 1 (or 1 to 0). -static bool mi_bchunk_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* palready_xset) { +mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* palready_set, bool* pmaybe_all_clear) { mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(n>0); bool all_transition = true; - size_t total_already_xset = 0; + bool maybe_all_clear = true; + size_t total_already_set = 0; size_t idx = cidx % MI_BFIELD_BITS; size_t field = cidx / MI_BFIELD_BITS; while (n > 0) { @@ -326,28 +272,67 @@ static bool mi_bchunk_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size mi_assert_internal(idx + m <= MI_BFIELD_BITS); mi_assert_internal(field < MI_BCHUNK_FIELDS); const mi_bfield_t mask = mi_bfield_mask(m, idx); - size_t already_xset = 0; - const bool transition = mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset); - mi_assert_internal((transition && already_xset == 0) || (!transition && already_xset > 0)); + size_t already_set = 0; + bool all_clear = false; + const bool transition = (set ? mi_bfield_atomic_set_mask(&chunk->bfields[field], mask, &already_set) + : mi_bfield_atomic_clear_mask(&chunk->bfields[field], mask, &all_clear)); + mi_assert_internal((transition && already_set == 0) || (!transition && already_set > 0)); all_transition = all_transition && transition; - total_already_xset += already_xset; + total_already_set += already_set; + maybe_all_clear = maybe_all_clear && all_clear; // next field field++; idx = 0; n -= m; } - if (palready_xset!=NULL) { *palready_xset = total_already_xset; } + if (palready_set!=NULL) { *palready_set = total_already_set; } + if (pmaybe_all_clear!=NULL) { *pmaybe_all_clear = maybe_all_clear; } return all_transition; } static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { - return mi_bchunk_xsetN(MI_BIT_SET, chunk, cidx, n, already_set); + mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS); + if (n==1) { + bool was_clear = mi_bchunk_set(chunk, cidx); + if (already_set != NULL) { *already_set = !was_clear; } + return was_clear; + } + if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set); + if (n bfields[i], idx, all_clear); } +static inline bool mi_bchunk_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* all_clear) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); + const size_t i = cidx / MI_BFIELD_BITS; + const size_t idx = cidx % MI_BFIELD_BITS; + const mi_bfield_t mask = mi_bfield_mask(n, idx); + return mi_bfield_atomic_clear_mask(&chunk->bfields[i], mask, all_clear); +} + +static inline bool mi_bchunk_clearX(mi_bchunk_t* chunk, size_t cidx, bool* all_clear) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); + mi_assert_internal((cidx%MI_BFIELD_BITS)==0); + const size_t i = cidx / MI_BFIELD_BITS; + return mi_bfield_atomic_clearX(&chunk->bfields[i], all_clear); +} + +static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) { + mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS); + if (n==1) return mi_bchunk_clear(chunk, cidx, maybe_all_clear); + if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, maybe_all_clear); + if (n 0); if (n==0) return true; - size_t field = cidx / MI_BFIELD_BITS; - size_t idx = cidx % MI_BFIELD_BITS; + const size_t i = cidx / MI_BFIELD_BITS; + const size_t idx = cidx % MI_BFIELD_BITS; if mi_likely(n<=MI_BFIELD_BITS) { - return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mi_bfield_mask(n, idx)); + return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); } else { - return mi_bchunk_is_xsetN_(set, chunk, field, idx, n); + return mi_bchunk_is_xsetN_(set, chunk, i, idx, n); } } -// ------- mi_bchunk_try_xset --------------------------------------- +// ------- mi_bchunk_try_clear --------------------------------------- + +static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); + mi_assert_internal(n <= MI_BFIELD_BITS); + const size_t i = cidx / MI_BFIELD_BITS; + const size_t idx = cidx % MI_BFIELD_BITS; + mi_assert_internal(idx + n <= MI_BFIELD_BITS); + const size_t mask = mi_bfield_mask(n, idx); + return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mask, pmaybe_all_clear); +} + +static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); + mi_assert_internal((cidx%MI_BFIELD_BITS) == 0); + const size_t i = cidx / MI_BFIELD_BITS; + return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear); +} // Try to atomically set/clear a sequence of `n` bits within a chunk. // Returns true if all bits transitioned from 0 to 1 (or 1 to 0), // and false otherwise leaving all bit fields as is. -static bool mi_bchunk_try_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { +// Note: this is a hard one as we need to unwind partial atomic operations +// if we fail halfway.. +mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(n>0); if (n==0) return true; @@ -414,7 +418,7 @@ static bool mi_bchunk_try_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); mi_assert_internal(start_field < MI_BCHUNK_FIELDS); const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx); - if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start, &field_is_clear)) return false; + if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &field_is_clear)) return false; maybe_all_clear = maybe_all_clear && field_is_clear; // done? @@ -431,7 +435,7 @@ static bool mi_bchunk_try_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, field++; mi_assert_internal(field < MI_BCHUNK_FIELDS); mask_mid = mi_bfield_all_set(); - if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid, &field_is_clear)) goto restore; + if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_mid, &field_is_clear)) goto restore; maybe_all_clear = maybe_all_clear && field_is_clear; n -= MI_BFIELD_BITS; } @@ -443,7 +447,7 @@ static bool mi_bchunk_try_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, mi_assert_internal(field < MI_BCHUNK_FIELDS); end_field = field; mask_end = mi_bfield_mask(n, 0); - if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end, &field_is_clear)) goto restore; + if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_end, &field_is_clear)) goto restore; maybe_all_clear = maybe_all_clear && field_is_clear; } @@ -456,17 +460,17 @@ restore: while( field > start_field) { field--; const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid)); - mi_bfield_atomic_xset_mask(!set, &chunk->bfields[field], mask, NULL); + mi_bfield_atomic_set_mask(&chunk->bfields[field], mask, NULL); } return false; } -// static inline bool mi_bchunk_try_setN(mi_bchunk_t* chunk, size_t cidx, size_t n) { -// return mi_bchunk_try_xsetN(MI_BIT_SET, chunk, cidx, n, NULL); -// } static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) { - return mi_bchunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n, maybe_all_clear); + mi_assert_internal(n>0); + if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear); + if (nbfields[chunk_idx], byte_idx, NULL)) { // unset the byte atomically + mi_assert_internal((idx%8)==0); + if mi_likely(mi_bfield_atomic_try_clear8(&chunk->bfields[chunk_idx], idx, NULL)) { // unset the byte atomically *pidx = (chunk_idx*MI_BFIELD_BITS) + idx; mi_assert_internal(*pidx + 8 <= MI_BCHUNK_BITS); return true; @@ -614,9 +617,9 @@ static mi_decl_noinline bool mi_bchunk_try_find_and_clear8(mi_bchunk_t* chunk, s if (mask==0) return false; const size_t bidx = _tzcnt_u64(mask); // byte-idx of the byte in the chunk const size_t chunk_idx = bidx / 8; - const size_t byte_idx = bidx % 8; // byte index of the byte in the bfield + const size_t idx = (bidx % 8)*8; mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS); - if mi_likely(mi_bfield_atomic_try_clear8(&chunk->bfields[chunk_idx], byte_idx, NULL)) { // clear it atomically + if mi_likely(mi_bfield_atomic_try_clear8(&chunk->bfields[chunk_idx], idx, NULL)) { // clear it atomically *pidx = (chunk_idx*MI_BFIELD_BITS) + 8*byte_idx; mi_assert_internal(*pidx + 8 <= MI_BCHUNK_BITS); return true; @@ -672,7 +675,7 @@ static mi_decl_noinline bool mi_bchunk_try_find_and_clearX(mi_bchunk_t* chunk, #else for (int i = 0; i < MI_BCHUNK_FIELDS; i++) { const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]); - if (~b==0 && mi_bfield_atomic_try_clearX(&chunk->bfields[i])) { + if (~b==0 && mi_bfield_atomic_try_clearX(&chunk->bfields[i], NULL)) { *pidx = i*MI_BFIELD_BITS; mi_assert_internal(*pidx + MI_BFIELD_BITS <= MI_BCHUNK_BITS); return true; @@ -691,7 +694,7 @@ static inline bool mi_bchunk_try_find_and_clear_X(mi_bchunk_t* chunk, size_t n, // and try to clear them atomically. // set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success. // (We do not cross bfield boundaries) -static mi_decl_noinline bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) { +mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) { if (n == 0 || n > MI_BFIELD_BITS) return false; const mi_bfield_t mask = mi_bfield_mask(n, 0); for(int i = 0; i < MI_BCHUNK_FIELDS; i++) { @@ -955,69 +958,31 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { // ------- mi_bitmap_xset --------------------------------------- // Set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) -bool mi_bitmap_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { +bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx) { mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); const size_t chunk_idx = idx / MI_BCHUNK_BITS; const size_t cidx = idx % MI_BCHUNK_BITS; mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - if (set) { - const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); - mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards - return wasclear; - } - else { - bool maybe_all_clear; - const bool wasset = mi_bchunk_clear(&bitmap->chunks[chunk_idx], cidx, &maybe_all_clear); - if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } - return wasset; - } + const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); + mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards + return wasclear; } -// Set/clear aligned 8-bits in the bitmap (with `(idx%8)==0`). -// Returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) -static bool mi_bitmap_xset8(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { +bool mi_bitmap_clear(mi_bitmap_t* bitmap, size_t idx) { mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); - mi_assert_internal((idx%8)==0); const size_t chunk_idx = idx / MI_BCHUNK_BITS; - const size_t byte_idx = (idx % MI_BCHUNK_BITS)/8; + const size_t cidx = idx % MI_BCHUNK_BITS; mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - if (set) { - const bool wasclear = mi_bchunk_set8(&bitmap->chunks[chunk_idx], byte_idx); - mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards - return wasclear; - } - else { - bool maybe_all_clear; - const bool wasset = mi_bchunk_clear8(&bitmap->chunks[chunk_idx], byte_idx, &maybe_all_clear); - if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } - return wasset; - } + bool maybe_all_clear; + const bool wasset = mi_bchunk_clear(&bitmap->chunks[chunk_idx], cidx, &maybe_all_clear); + if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } + return wasset; } -// Set/clear a field of bits. -// Returns `true` if atomically transitioned from 0 to ~0 (or ~0 to 0) -static bool mi_bitmap_xsetX(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { - mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); - mi_assert_internal((idx%MI_BFIELD_BITS)==0); - const size_t chunk_idx = idx / MI_BCHUNK_BITS; - const size_t field_idx = (idx % MI_BCHUNK_BITS)/MI_BFIELD_BITS; - mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - if (set) { - const bool wasclear = mi_bchunk_setX(&bitmap->chunks[chunk_idx],field_idx); - mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards - return wasclear; - } - else { - bool maybe_all_clear; - const bool wasset = mi_bchunk_clearX(&bitmap->chunks[chunk_idx], field_idx, &maybe_all_clear); - if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } - return wasset; - } -} -// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). +// Set a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). // `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! -static bool mi_bitmap_xsetN_(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset ) { +bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set) { mi_assert_internal(n>0); mi_assert_internal(n<=MI_BCHUNK_BITS); @@ -1027,30 +992,30 @@ static bool mi_bitmap_xsetN_(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, siz mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); if (cidx + n > MI_BCHUNK_BITS) { n = MI_BCHUNK_BITS - cidx; } // paranoia - if (set) { - const bool allclear = mi_bchunk_setN(&bitmap->chunks[chunk_idx], cidx, n, already_xset); - mi_bitmap_chunkmap_set(bitmap,chunk_idx); // set afterwards - return allclear; - } - else { - size_t already_clear = 0; - const bool allset = mi_bchunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, &already_clear ); - if (already_xset != NULL) { *already_xset = already_clear; } - if (already_clear < n) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } - return allset; - } + const bool were_allclear = mi_bchunk_setN(&bitmap->chunks[chunk_idx], cidx, n, already_set); + mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards + return were_allclear; } -// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). +// Clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 1's to 0's. // `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! -bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset) { - mi_assert_internal(n>0 && n<=MI_BCHUNK_BITS); - if (n==1) return mi_bitmap_xset(set, bitmap, idx); - if (n==8) return mi_bitmap_xset8(set, bitmap, idx); - if (n==MI_BFIELD_BITS) return mi_bitmap_xsetX(set, bitmap, idx); - return mi_bitmap_xsetN_(set, bitmap, idx, n, already_xset); +bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { + mi_assert_internal(n>0); + mi_assert_internal(n<=MI_BCHUNK_BITS); + + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); // don't cross chunks (for now) + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); + if (cidx + n > MI_BCHUNK_BITS) { n = MI_BCHUNK_BITS - cidx; } // paranoia + + bool maybe_all_clear; + const bool were_allset = mi_bchunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, &maybe_all_clear); + if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } + return were_allset; } + // ------- mi_bitmap_try_clearN --------------------------------------- bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { diff --git a/src/bitmap.h b/src/bitmap.h index 72ba69c1..4afcdaf1 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -13,7 +13,7 @@ Concurrent bitmap that can set/reset sequences of bits atomically #define MI_BITMAP_H /* -------------------------------------------------------------------------------- - Atomic bitmaps: + Atomic bitmaps with release/acquire guarantees: `mi_bfield_t`: is a single machine word that can efficiently be bit counted (usually `size_t`) each bit usually represents a single MI_ARENA_SLICE_SIZE in an arena (64 KiB). @@ -25,19 +25,25 @@ Concurrent bitmap that can set/reset sequences of bits atomically These chunks are cache-aligned and we can use AVX2/AVX512/NEON/SVE/SVE2/etc. instructions to scan for bits (perhaps) more efficiently. - `mi_bchunkmap_t` == `mi_bchunk_t`: for each chunk we track if it has (potentially) any bit set. + We allocate byte-sized ranges aligned to bytes in the bfield, and bfield-sized + ranges aligned to a bfield. + + Searching linearly through the chunks would be too slow (16K bits per GiB). + Instead we add a "chunkmap" to do a two-level search (more or less a btree of depth 2). + + `mi_bchunkmap_t` (== `mi_bchunk_t`): for each chunk we track if it has (potentially) any bit set. The chunkmap has 1 bit per chunk that is set if the chunk potentially has a bit set. This is used to avoid scanning every chunk. (and thus strictly an optimization) - It is conservative: it is fine to a bit in the chunk map even if the chunk turns out + It is conservative: it is fine to set a bit in the chunk map even if the chunk turns out to have no bits set. It is also allowed to briefly have a clear bit even if the - chunk has bits set, as long as we guarantee that we set the bit later on -- this - allows us to set the chunkmap bit after we set a bit in the corresponding chunk. + chunk has bits set -- as long as we guarantee that the bit will be set later on; + (this allows us to set the chunkmap bit right after we set a bit in the corresponding chunk). However, when we clear a bit in a chunk, and the chunk is indeed all clear, we cannot safely clear the bit corresponding to the chunk in the chunkmap since it may race with another thread setting a bit in the same chunk. Therefore, when clearing, we first test if a chunk is clear, then clear the chunkmap bit, and - then test again to catch any set bits that we missed. + then test again to catch any set bits that we may have missed. Since the chunkmap may thus be briefly out-of-sync, this means that we may sometimes not find a free page even though it's there (but we accept this as we avoid taking @@ -130,32 +136,22 @@ size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero); // Not atomic so only use if still local to a thread. void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n); +// Set a bit in the bitmap; returns `true` if it atomically transitioned from 0 to 1 +bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx); -// Set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) -bool mi_bitmap_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx); - -static inline bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx) { - return mi_bitmap_xset(MI_BIT_SET, bitmap, idx); -} - -static inline bool mi_bitmap_clear(mi_bitmap_t* bitmap, size_t idx) { - return mi_bitmap_xset(MI_BIT_CLEAR, bitmap, idx); -} +// Clear a bit in the bitmap; returns `true` if it atomically transitioned from 1 to 0 +bool mi_bitmap_clear(mi_bitmap_t* bitmap, size_t idx); -// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's). +// Set a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's // `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! -// If `already_xset` is not NULL, it is set to count of bits were already all set/cleared. +// If `already_set` is not NULL, it is set to count of bits were already all set. // (this is used for correct statistics if commiting over a partially committed area) -bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset); +bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set); -static inline bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set) { - return mi_bitmap_xsetN(MI_BIT_SET, bitmap, idx, n, already_set); -} - -static inline bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { - return mi_bitmap_xsetN(MI_BIT_CLEAR, bitmap, idx, n, NULL); -} +// Clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 1's to 0's +// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! +bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n); // Is a sequence of n bits already all set/cleared? @@ -167,6 +163,7 @@ static inline bool mi_bitmap_is_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) return mi_bitmap_is_xsetN(MI_BIT_SET, bitmap, idx, n); } +// Is a sequence of n bits already clear? static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n); } @@ -180,8 +177,11 @@ static inline bool mi_bitmap_is_clear(mi_bitmap_t* bitmap, size_t idx) { } +// Try to atomically transition `n` bits from all set to all clear. Returns `true` on succes. +// `n` cannot cross chunk boundaries, where `n <= MI_CHUNK_BITS`. bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n); +// Try to atomically transition a bit from set to clear. Returns `true` on succes. static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) { return mi_bitmap_try_clearN(bitmap, idx, 1); } @@ -223,7 +223,7 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx); -// If a bit is set in the bitmap, return `true` and set `idx` to its index. +// If a bit is set in the bitmap, return `true` and set `idx` to the index of the highest bit. // Otherwise return `false` (and `*idx` is undefined). bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx); diff --git a/src/init.c b/src/init.c index 5c5186b9..8f1449a3 100644 --- a/src/init.c +++ b/src/init.c @@ -84,8 +84,7 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ - { 0, 0 } \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() // -------------------------------------------------------- diff --git a/src/stats.c b/src/stats.c index 2a793b59..860a69ef 100644 --- a/src/stats.c +++ b/src/stats.c @@ -338,12 +338,11 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); mi_stat_counter_print(&stats->arena_count, "arenas", out, arg); - mi_stat_counter_print(&stats->arena_crossover_count, "-crossover", out, arg); - mi_stat_counter_print(&stats->arena_rollback_count, "-rollback", out, arg); - mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); - mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); - mi_stat_counter_print(&stats->reset_calls, "resets", out, arg); - mi_stat_counter_print(&stats->purge_calls, "purges", out, arg); + mi_stat_counter_print(&stats->arena_purges, "-purges", out, arg); + mi_stat_counter_print(&stats->mmap_calls, "mmap calls", out, arg); + mi_stat_counter_print(&stats->commit_calls, " -commit", out, arg); + mi_stat_counter_print(&stats->reset_calls, "-reset", out, arg); + mi_stat_counter_print(&stats->purge_calls, "-purge", out, arg); mi_stat_counter_print(&stats->guarded_alloc_count, "guarded", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); diff --git a/test/test-stress.c b/test/test-stress.c index 126a7601..1996e52e 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -352,7 +352,7 @@ int main(int argc, char** argv) { mi_collect(true); mi_debug_show_arenas(true,false,false); #else - mi_collect(false); + //mi_collect(true); mi_debug_show_arenas(true,false,false); // mi_stats_print(NULL); #endif