diff --git a/src/arena.c b/src/arena.c index 70e1802b..62811720 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1384,7 +1384,7 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k, return bit_set_count; } -static size_t mi_debug_show_chunks(const char* header1, const char* header2, const char* header3, size_t slice_count, size_t chunk_count, mi_bchunk_t* chunks, _Atomic(uint8_t)* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) { +static size_t mi_debug_show_chunks(const char* header1, const char* header2, const char* header3, size_t slice_count, size_t chunk_count, mi_bchunk_t* chunks, mi_bchunkmap_t* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) { _mi_raw_message("\x1B[37m%s%s%s (use/commit: \x1B[31m0 - 25%%\x1B[33m - 50%%\x1B[36m - 75%%\x1B[32m - 100%%\x1B[0m)\n", header1, header2, header3); const size_t fields_per_line = (narrow ? 2 : 4); size_t bit_count = 0; @@ -1400,11 +1400,12 @@ static size_t mi_debug_show_chunks(const char* header1, const char* header2, con char chunk_kind = ' '; if (chunk_bins != NULL) { - switch (mi_atomic_load_relaxed(&chunk_bins[i])) { + switch (mi_bbitmap_debug_get_bin(chunk_bins,i)) { case MI_BBIN_SMALL: chunk_kind = 'S'; break; case MI_BBIN_MEDIUM: chunk_kind = 'M'; break; case MI_BBIN_LARGE: chunk_kind = 'L'; break; case MI_BBIN_OTHER: chunk_kind = 'X'; break; + default: chunk_kind = ' '; break; // suppress warning // case MI_BBIN_NONE: chunk_kind = 'N'; break; } } @@ -1441,7 +1442,7 @@ static size_t mi_debug_show_chunks(const char* header1, const char* header2, con return bit_set_count; } -static size_t mi_debug_show_bitmap_binned(const char* header1, const char* header2, const char* header3, size_t slice_count, mi_bitmap_t* bitmap, _Atomic(uint8_t)* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) { +static size_t mi_debug_show_bitmap_binned(const char* header1, const char* header2, const char* header3, size_t slice_count, mi_bitmap_t* bitmap, mi_bchunkmap_t* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) { return mi_debug_show_chunks(header1, header2, header3, slice_count, mi_bitmap_chunk_count(bitmap), &bitmap->chunks[0], chunk_bins, invert, arena, narrow); } @@ -1472,7 +1473,7 @@ static void mi_debug_show_arenas_ex(bool show_pages, bool narrow) mi_attr_noexce const char* header1 = "pages (p:page, f:full, s:singleton, P,F,S:not abandoned, i:arena-info, m:meta-data, ~:free-purgable, _:free-committed, .:free-reserved)"; const char* header2 = (narrow ? "\n " : " "); const char* header3 = "(chunk bin: S:small, M : medium, L : large, X : other)"; - page_total += mi_debug_show_bitmap_binned(header1, header2, header3, arena->slice_count, arena->pages, arena->slices_free->chunk_bins, false, arena, narrow); + page_total += mi_debug_show_bitmap_binned(header1, header2, header3, arena->slice_count, arena->pages, arena->slices_free->chunkmap_bins, false, arena, narrow); } } // if (show_inuse) _mi_raw_message("total inuse slices : %zu\n", slice_total - free_total); diff --git a/src/bitmap.c b/src/bitmap.c index 908562c0..a2e29645 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -218,39 +218,39 @@ static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b, bool* all // ------- mi_bfield_atomic_is_set --------------------------------------- // Check if a bit is set -static inline bool mi_bfield_atomic_is_set(_Atomic(mi_bfield_t)*b, const size_t idx) { +static inline bool mi_bfield_atomic_is_set(const _Atomic(mi_bfield_t)*b, const size_t idx) { const mi_bfield_t x = mi_atomic_load_relaxed(b); return ((x & mi_bfield_mask(1,idx)) != 0); } // Check if a bit is clear -static inline bool mi_bfield_atomic_is_clear(_Atomic(mi_bfield_t)*b, const size_t idx) { +static inline bool mi_bfield_atomic_is_clear(const _Atomic(mi_bfield_t)*b, const size_t idx) { const mi_bfield_t x = mi_atomic_load_relaxed(b); return ((x & mi_bfield_mask(1, idx)) == 0); } // Check if a bit is xset -static inline bool mi_bfield_atomic_is_xset(mi_xset_t set, _Atomic(mi_bfield_t)*b, const size_t idx) { +static inline bool mi_bfield_atomic_is_xset(mi_xset_t set, const _Atomic(mi_bfield_t)*b, const size_t idx) { if (set) return mi_bfield_atomic_is_set(b, idx); else return mi_bfield_atomic_is_clear(b, idx); } // Check if all bits corresponding to a mask are set. -static inline bool mi_bfield_atomic_is_set_mask(_Atomic(mi_bfield_t)* b, mi_bfield_t mask) { +static inline bool mi_bfield_atomic_is_set_mask(const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) { mi_assert_internal(mask != 0); const mi_bfield_t x = mi_atomic_load_relaxed(b); return ((x & mask) == mask); } // Check if all bits corresponding to a mask are clear. -static inline bool mi_bfield_atomic_is_clear_mask(_Atomic(mi_bfield_t)* b, mi_bfield_t mask) { +static inline bool mi_bfield_atomic_is_clear_mask(const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) { mi_assert_internal(mask != 0); const mi_bfield_t x = mi_atomic_load_relaxed(b); return ((x & mask) == 0); } // Check if all bits corresponding to a mask are set/cleared. -static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask) { +static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) { mi_assert_internal(mask != 0); if (set) return mi_bfield_atomic_is_set_mask(b, mask); else return mi_bfield_atomic_is_clear_mask(b, mask); @@ -371,7 +371,7 @@ static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, b // Check if a sequence of `n` bits within a chunk are all set/cleared. // This can cross bfield's -mi_decl_noinline static bool mi_bchunk_is_xsetN_(mi_xset_t set, mi_bchunk_t* chunk, size_t field_idx, size_t idx, size_t n) { +mi_decl_noinline static bool mi_bchunk_is_xsetN_(mi_xset_t set, const mi_bchunk_t* chunk, size_t field_idx, size_t idx, size_t n) { mi_assert_internal((field_idx*MI_BFIELD_BITS) + idx + n <= MI_BCHUNK_BITS); while (n > 0) { size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field @@ -391,7 +391,7 @@ mi_decl_noinline static bool mi_bchunk_is_xsetN_(mi_xset_t set, mi_bchunk_t* chu } // Check if a sequence of `n` bits within a chunk are all set/cleared. -static inline bool mi_bchunk_is_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n) { +static inline bool mi_bchunk_is_xsetN(mi_xset_t set, const mi_bchunk_t* chunk, size_t cidx, size_t n) { mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(n>0); if (n==0) return true; @@ -1413,7 +1413,23 @@ void mi_bbitmap_unsafe_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) { // Assign a specific size bin to a chunk static void mi_bbitmap_set_chunk_bin(mi_bbitmap_t* bbitmap, size_t chunk_idx, mi_bbin_t bin) { mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap)); - mi_atomic_store_release(&bbitmap->chunk_bins[chunk_idx], (uint8_t)bin); + for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) { + if (ibin == bin) { + mi_bchunk_set(& bbitmap->chunkmap_bins[ibin], chunk_idx, NULL); + } + else { + mi_bchunk_clear(&bbitmap->chunkmap_bins[ibin], chunk_idx, NULL); + } + } +} + +mi_bbin_t mi_bbitmap_debug_get_bin(const mi_bchunkmap_t* chunkmap_bins, size_t chunk_idx) { + for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) { + if (mi_bchunk_is_xsetN(MI_BIT_SET, &chunkmap_bins[ibin], chunk_idx, 1)) { + return ibin; + } + } + return MI_BBIN_NONE; } // Track the index of the highest chunk that is accessed. @@ -1542,62 +1558,59 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, const mi_bfield_t cmap_mask = mi_bfield_mask(cmap_max_count,0); const size_t cmap_cycle = cmap_acc+1; const mi_bbin_t bbin = mi_bbin_of(n); - // visit bins from smallest to largest (to reduce fragmentation on the larger blocks) - for(mi_bbin_t bin = MI_BBIN_SMALL; bin <= bbin; bin = mi_bbin_inc(bin)) // no need to traverse for MI_BBIN_NONE as anyone can allocate in MI_BBIN_SMALL - // (int bin = bbin; bin >= MI_BBIN_SMALL; bin--) // visit bins from largest size bin up to the NONE bin + // visit each cmap entry + size_t cmap_idx = 0; + mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X) { - size_t cmap_idx = 0; - mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X) + // and for each chunkmap entry we iterate over its bits to find the chunks + const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]); + const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits); + if (cmap_entry == 0) continue; + + // get size bin masks + mi_bfield_t cmap_bins[MI_BBIN_COUNT] = { 0 }; + cmap_bins[MI_BBIN_NONE] = cmap_entry; + for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) { + const mi_bfield_t cmap_bin = mi_atomic_load_relaxed(&bbitmap->chunkmap_bins[ibin].bfields[cmap_idx]); + cmap_bins[ibin] = cmap_bin & cmap_entry; + cmap_bins[MI_BBIN_NONE] &= ~cmap_bin; // clear bits that are in an assigned size bin + } + + // consider only chunks for a particular size bin at a time + for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin <= MI_BBIN_NONE; + // skip from bbin to NONE (so, say, a SMALL will never be placed in a OTHER, MEDIUM, or LARGE chunk to reduce fragmentation) + ibin = (ibin == bbin ? MI_BBIN_NONE : mi_bbin_inc(ibin))) { - // don't search into non-accessed memory until we tried other size bins as well - if (bin < bbin && cmap_idx > cmap_acc) - // (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) // large to small - { - break; - } - - // and for each chunkmap entry we iterate over its bits to find the chunks - const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]); - const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits); + const mi_bfield_t cmap_bin = cmap_bins[ibin]; size_t eidx = 0; - mi_bfield_cycle_iterate(cmap_entry, tseq, cmap_entry_cycle, eidx, Y) + mi_bfield_cycle_iterate(cmap_bin, tseq, cmap_entry_cycle, eidx, Y) { - mi_assert_internal(eidx <= MI_BFIELD_BITS); - - // don't search into non-acgcessed memory until we tried other size bins as well - if (bin < bbin && eidx >= cmap_entry_cycle) break; + // assertion doesn't quite hold as the max_accessed may be out-of-date + // mi_assert_internal(cmap_entry_cycle > eidx || ibin == MI_BBIN_NONE); - // get the chunk idx + // get the chunk const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx; - mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap)); - mi_assert_internal(bin >= bbin || chunk_idx <= chunk_acc); - - // only in the current size class! - const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]); - if ((mi_bbin_t)bin == chunk_bin || (bin == bbin && chunk_bin == MI_BBIN_NONE)) // only allow NONE at the final run - // ((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) { largest to smallest - { - mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx]; - size_t cidx; - if ((*on_find)(chunk, n, &cidx)) { - if (cidx==0 && chunk_bin == MI_BBIN_NONE) { // only the first determines the size bin - // this chunk is now reserved for the `bbin` size class - mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, bbin); - } - *pidx = (chunk_idx * MI_BCHUNK_BITS) + cidx; - mi_assert_internal(*pidx + n <= mi_bbitmap_max_bits(bbitmap)); - return true; - } - else { - /* we may find that all are cleared only on a second iteration but that is ok as the chunkmap is a conservative approximation. */ - mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx); + mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx]; + + size_t cidx; + if ((*on_find)(chunk, n, &cidx)) { + if (cidx==0 && ibin == MI_BBIN_NONE) { // only the first block determines the size bin + // this chunk is now reserved for the `bbin` size class + mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, bbin); } + *pidx = (chunk_idx * MI_BCHUNK_BITS) + cidx; + mi_assert_internal(*pidx + n <= mi_bbitmap_max_bits(bbitmap)); + return true; + } + else { + /* we may find that all are cleared only on a second iteration but that is ok as the chunkmap is a conservative approximation. */ + mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx); } } mi_bfield_cycle_iterate_end(Y); } - mi_bfield_cycle_iterate_end(X); } + mi_bfield_cycle_iterate_end(X); return false; } diff --git a/src/bitmap.h b/src/bitmap.h index b17d83e5..e797bd8e 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -215,18 +215,24 @@ bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* vis // Size bins; larger bins are allowed to go into smaller bins. // SMALL can only be in small (and NONE), so they cannot fragment the larger bins. typedef enum mi_bbin_e { - MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free) MI_BBIN_SMALL, // slice_count == 1 MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS MI_BBIN_MEDIUM, // slice_count == 8 MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS -- only used if MI_ENABLE_LARGE_PAGES is 1 + MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free) MI_BBIN_COUNT } mi_bbin_t; static inline mi_bbin_t mi_bbin_inc(mi_bbin_t bbin) { + mi_assert_internal(bbin < MI_BBIN_COUNT); return (mi_bbin_t)((int)bbin + 1); } +static inline mi_bbin_t mi_bbin_dec(mi_bbin_t bbin) { + mi_assert_internal(bbin > MI_BBIN_NONE); + return (mi_bbin_t)((int)bbin - 1); +} + static inline mi_bbin_t mi_bbin_of(size_t slice_count) { if (slice_count==1) return MI_BBIN_SMALL; if (slice_count==8) return MI_BBIN_MEDIUM; @@ -241,8 +247,8 @@ typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bbitmap_s { _Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS) _Atomic(size_t) chunk_max_accessed; // max chunk index that was once cleared or set size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc - mi_bchunkmap_t chunkmap; - _Atomic(uint8_t) chunk_bins[MI_BITMAP_MAX_CHUNK_COUNT]; // 512b + mi_bchunkmap_t chunkmap; + mi_bchunkmap_t chunkmap_bins[MI_BBIN_COUNT - 1]; // chunkmaps with bit set if the chunk is in that size class (except MI_BBIN_NONE) mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT } mi_bbitmap_t; @@ -255,6 +261,8 @@ static inline size_t mi_bbitmap_max_bits(const mi_bbitmap_t* bbitmap) { return (mi_bbitmap_chunk_count(bbitmap) * MI_BCHUNK_BITS); } +mi_bbin_t mi_bbitmap_debug_get_bin(const mi_bchunk_t* chunkmap_bins, size_t chunk_idx); + size_t mi_bbitmap_size(size_t bit_count, size_t* chunk_count); diff --git a/src/options.c b/src/options.c index 94cb8b67..e8eb85ad 100644 --- a/src/options.c +++ b/src/options.c @@ -202,7 +202,7 @@ void _mi_options_init(void) { } } #endif - if (!mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); } + if (mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); } } #define mi_stringifyx(str) #str // and stringify