remove maxaccessed from general bitmaps

This commit is contained in:
daanx 2024-12-15 19:15:00 -08:00
parent e24217e69c
commit 3330d4353a
8 changed files with 44 additions and 49 deletions

View file

@ -77,7 +77,7 @@ static mi_meta_page_t* mi_meta_page_zalloc(void) {
// initialize the page // initialize the page
mpage->memid = memid; mpage->memid = memid;
mi_bbitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */); mi_bbitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */);
const size_t mpage_size = offsetof(mi_meta_page_t,blocks_free) + mi_bitmap_size(MI_META_BLOCKS_PER_PAGE, NULL); const size_t mpage_size = offsetof(mi_meta_page_t,blocks_free) + mi_bbitmap_size(MI_META_BLOCKS_PER_PAGE, NULL);
const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE); const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE);
mi_assert_internal(info_blocks < MI_META_BLOCKS_PER_PAGE); mi_assert_internal(info_blocks < MI_META_BLOCKS_PER_PAGE);
mi_bbitmap_unsafe_setN(&mpage->blocks_free, info_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks); mi_bbitmap_unsafe_setN(&mpage->blocks_free, info_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks);
@ -142,7 +142,7 @@ mi_decl_noinline void _mi_meta_free(void* p, size_t size, mi_memid_t memid) {
mi_meta_page_t* mpage = (mi_meta_page_t*)memid.mem.meta.meta_page; mi_meta_page_t* mpage = (mi_meta_page_t*)memid.mem.meta.meta_page;
mi_assert_internal(mi_meta_page_of_ptr(p,NULL) == mpage); mi_assert_internal(mi_meta_page_of_ptr(p,NULL) == mpage);
mi_assert_internal(block_idx + block_count < MI_META_BLOCKS_PER_PAGE); mi_assert_internal(block_idx + block_count < MI_META_BLOCKS_PER_PAGE);
mi_assert_internal(mi_bitmap_is_clearN(&mpage->blocks_free, block_idx, block_count)); mi_assert_internal(mi_bbitmap_is_clearN(&mpage->blocks_free, block_idx, block_count));
// we zero on free (and on the initial page allocation) so we don't need a "dirty" map // we zero on free (and on the initial page allocation) so we don't need a "dirty" map
_mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE); _mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE);
mi_bbitmap_setN(&mpage->blocks_free, block_idx, block_count); mi_bbitmap_setN(&mpage->blocks_free, block_idx, block_count);

View file

@ -1258,7 +1258,7 @@ static size_t mi_debug_show_chunks(const char* header, size_t slice_count, size_
char chunk_kind = ' '; char chunk_kind = ' ';
if (chunk_bins != NULL) { if (chunk_bins != NULL) {
switch (chunk_bins[i]) { switch (mi_atomic_load_relaxed(&chunk_bins[i])) {
// case MI_BBIN_SMALL: chunk_kind = 'S'; break; // case MI_BBIN_SMALL: chunk_kind = 'S'; break;
case MI_BBIN_MEDIUM: chunk_kind = 'M'; break; case MI_BBIN_MEDIUM: chunk_kind = 'M'; break;
case MI_BBIN_LARGE: chunk_kind = 'L'; break; case MI_BBIN_LARGE: chunk_kind = 'L'; break;

View file

@ -45,6 +45,14 @@ static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
return mi_bsf(x,idx); return mi_bsf(x,idx);
} }
// find the most significant bit that is set.
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bfield_find_highest_bit(mi_bfield_t x, size_t* idx) {
return mi_bsf(x, idx);
}
// find each set bit in a bit field `x` and clear it, until it becomes zero. // find each set bit in a bit field `x` and clear it, until it becomes zero.
static inline bool mi_bfield_foreach_bit(mi_bfield_t* x, size_t* idx) { static inline bool mi_bfield_foreach_bit(mi_bfield_t* x, size_t* idx) {
@ -873,17 +881,9 @@ static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) {
bitmap chunkmap bitmap chunkmap
-------------------------------------------------------------------------------- */ -------------------------------------------------------------------------------- */
static void mi_bitmap_chunkmap_set_max(mi_bitmap_t* bitmap, size_t chunk_idx) {
size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_accessed);
if mi_unlikely(chunk_idx > oldmax) {
mi_atomic_cas_strong_relaxed(&bitmap->chunk_max_accessed, &oldmax, chunk_idx);
}
}
static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) { static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
mi_bchunk_set(&bitmap->chunkmap, chunk_idx); mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
mi_bitmap_chunkmap_set_max(bitmap, chunk_idx);
} }
static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) { static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) {
@ -898,13 +898,12 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
mi_bchunk_set(&bitmap->chunkmap, chunk_idx); mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
return false; return false;
} }
mi_bitmap_chunkmap_set_max(bitmap, chunk_idx);
return true; return true;
} }
/* -------------------------------------------------------------------------------- /* --------------------------------------------------------------------------------
bitmap bitmap
-------------------------------------------------------------------------------- */ -------------------------------------------------------------------------------- */
size_t mi_bitmap_size(size_t bit_count, size_t* pchunk_count) { size_t mi_bitmap_size(size_t bit_count, size_t* pchunk_count) {
@ -1107,33 +1106,24 @@ typedef bool (mi_bitmap_visit_fun_t)(mi_bitmap_t* bitmap, size_t chunk_idx, size
// If it returns `true` stop the search. // If it returns `true` stop the search.
static inline bool mi_bitmap_find(mi_bitmap_t* bitmap, size_t tseq, size_t n, size_t* pidx, mi_bitmap_visit_fun_t* on_find, void* arg1, void* arg2) static inline bool mi_bitmap_find(mi_bitmap_t* bitmap, size_t tseq, size_t n, size_t* pidx, mi_bitmap_visit_fun_t* on_find, void* arg1, void* arg2)
{ {
// we space out threads to reduce contention const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
const size_t cmap_max_count = _mi_divide_up(mi_bitmap_chunk_count(bitmap),MI_BFIELD_BITS); for (size_t i = 0; i < chunkmap_max; i++) {
const size_t chunk_acc = mi_atomic_load_relaxed(&bitmap->chunk_max_accessed);
const size_t cmap_acc = chunk_acc / MI_BFIELD_BITS;
const size_t cmap_acc_bits = 1 + (chunk_acc % MI_BFIELD_BITS);
// create a mask over the chunkmap entries to iterate over them efficiently
mi_assert_internal(MI_BFIELD_BITS >= MI_BCHUNK_FIELDS);
const mi_bfield_t cmap_mask = mi_bfield_mask(cmap_max_count,0);
const size_t cmap_cycle = cmap_acc+1;
mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
{
// and for each chunkmap entry we iterate over its bits to find the chunks // and for each chunkmap entry we iterate over its bits to find the chunks
mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[cmap_idx]); const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits); size_t hi;
mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`) if (mi_bfield_find_highest_bit(cmap_entry, &hi)) {
{ mi_bfield_cycle_iterate(cmap_entry, tseq%8, hi+1, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`)
mi_assert_internal(eidx <= MI_BFIELD_BITS); {
const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx; mi_assert_internal(eidx <= MI_BFIELD_BITS);
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); const size_t chunk_idx = i*MI_BFIELD_BITS + eidx;
if ((*on_find)(bitmap, chunk_idx, n, pidx, arg1, arg2)) { mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
return true; if ((*on_find)(bitmap, chunk_idx, n, pidx, arg1, arg2)) {
return true;
}
} }
mi_bfield_cycle_iterate_end(Y);
} }
mi_bfield_cycle_iterate_end(Y);
} }
mi_bfield_cycle_iterate_end(X);
return false; return false;
} }
@ -1478,7 +1468,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
// only in the current size class! // only in the current size class!
const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_acquire(&bbitmap->chunk_bins[chunk_idx]); const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_acquire(&bbitmap->chunk_bins[chunk_idx]);
if // (bin >= chunk_bin) { if // (bin >= chunk_bin) {
(bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) { ((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) {
mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx]; mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx];
size_t cidx; size_t cidx;
if ((*on_find)(chunk, n, &cidx)) { if ((*on_find)(chunk, n, &cidx)) {

View file

@ -98,8 +98,7 @@ typedef mi_bchunk_t mi_bchunkmap_t;
// An atomic bitmap // An atomic bitmap
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s { typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS) _Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
_Atomic(size_t) chunk_max_accessed; // max chunk index that was once cleared or set size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
mi_bchunkmap_t chunkmap; mi_bchunkmap_t chunkmap;
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bitmap_t; } mi_bitmap_t;

View file

@ -138,7 +138,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
// collect retired pages // collect retired pages
_mi_heap_collect_retired(heap, force); _mi_heap_collect_retired(heap, force);
// if (_mi_is_main_thread()) { mi_debug_show_arenas(true, false, false); }
// collect all pages owned by this thread // collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);

View file

@ -17,7 +17,7 @@ static mi_memid_t mi_page_map_memid;
// (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization) // (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization)
static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0), static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT),
{ 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} }; { 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} };
bool _mi_page_map_init(void) { bool _mi_page_map_init(void) {

View file

@ -82,7 +82,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(mi_page_block_size(page) > 0); mi_assert_internal(mi_page_block_size(page) > 0);
mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->used <= page->capacity);
mi_assert_internal(page->capacity <= page->reserved); mi_assert_internal(page->capacity <= page->reserved);
// const size_t bsize = mi_page_block_size(page); // const size_t bsize = mi_page_block_size(page);
// uint8_t* start = mi_page_start(page); // uint8_t* start = mi_page_start(page);
//mi_assert_internal(start + page->capacity*page->block_size == page->top); //mi_assert_internal(start + page->capacity*page->block_size == page->top);
@ -623,7 +623,7 @@ void _mi_page_init(mi_heap_t* heap, mi_page_t* page) {
#endif #endif
mi_assert_internal(page->block_size_shift == 0 || (mi_page_block_size(page) == ((size_t)1 << page->block_size_shift))); mi_assert_internal(page->block_size_shift == 0 || (mi_page_block_size(page) == ((size_t)1 << page->block_size_shift)));
mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_expensive(mi_page_is_valid_init(page));
// initialize an initial free list // initialize an initial free list
mi_page_extend_free(heap,page); mi_page_extend_free(heap,page);
mi_assert(mi_page_immediate_available(page)); mi_assert(mi_page_immediate_available(page));
@ -872,10 +872,14 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
mi_assert_internal(mi_heap_is_initialized(heap)); mi_assert_internal(mi_heap_is_initialized(heap));
// call potential deferred free routines // call potential deferred free routines
// _mi_deferred_free(heap, false); _mi_deferred_free(heap, false);
// free delayed frees from other threads (but skip contended ones) // collect every N generic mallocs
// _mi_heap_delayed_free_partial(heap); /*static long count = 0;
if (count++ > 100000) {
count = 0;
_mi_heap_collect_retired(heap,false);
}*/
// find (or allocate) a page of the right size // find (or allocate) a page of the right size
mi_page_t* page = mi_find_page(heap, size, huge_alignment); mi_page_t* page = mi_find_page(heap, size, huge_alignment);

View file

@ -262,7 +262,7 @@ static void test_stress(void) {
#if !defined(NDEBUG) || defined(MI_TSAN) #if !defined(NDEBUG) || defined(MI_TSAN)
if ((n + 1) % 10 == 0) { if ((n + 1) % 10 == 0) {
printf("- iterations left: %3d\n", ITER - (n + 1)); printf("- iterations left: %3d\n", ITER - (n + 1));
//mi_debug_show_arenas(true, false, false); mi_debug_show_arenas(true, false, false);
//mi_collect(true); //mi_collect(true);
//mi_debug_show_arenas(true, false, false); //mi_debug_show_arenas(true, false, false);
} }
@ -352,7 +352,7 @@ int main(int argc, char** argv) {
mi_collect(true); mi_collect(true);
mi_debug_show_arenas(true,false,false); mi_debug_show_arenas(true,false,false);
#else #else
//mi_collect(true); mi_collect(false);
mi_debug_show_arenas(true,false,false); mi_debug_show_arenas(true,false,false);
// mi_stats_print(NULL); // mi_stats_print(NULL);
#endif #endif