remove maxaccessed from general bitmaps

This commit is contained in:
daanx 2024-12-15 19:15:00 -08:00
parent e24217e69c
commit 3330d4353a
8 changed files with 44 additions and 49 deletions

View file

@ -77,7 +77,7 @@ static mi_meta_page_t* mi_meta_page_zalloc(void) {
// initialize the page
mpage->memid = memid;
mi_bbitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */);
const size_t mpage_size = offsetof(mi_meta_page_t,blocks_free) + mi_bitmap_size(MI_META_BLOCKS_PER_PAGE, NULL);
const size_t mpage_size = offsetof(mi_meta_page_t,blocks_free) + mi_bbitmap_size(MI_META_BLOCKS_PER_PAGE, NULL);
const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE);
mi_assert_internal(info_blocks < MI_META_BLOCKS_PER_PAGE);
mi_bbitmap_unsafe_setN(&mpage->blocks_free, info_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks);
@ -142,7 +142,7 @@ mi_decl_noinline void _mi_meta_free(void* p, size_t size, mi_memid_t memid) {
mi_meta_page_t* mpage = (mi_meta_page_t*)memid.mem.meta.meta_page;
mi_assert_internal(mi_meta_page_of_ptr(p,NULL) == mpage);
mi_assert_internal(block_idx + block_count < MI_META_BLOCKS_PER_PAGE);
mi_assert_internal(mi_bitmap_is_clearN(&mpage->blocks_free, block_idx, block_count));
mi_assert_internal(mi_bbitmap_is_clearN(&mpage->blocks_free, block_idx, block_count));
// we zero on free (and on the initial page allocation) so we don't need a "dirty" map
_mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE);
mi_bbitmap_setN(&mpage->blocks_free, block_idx, block_count);

View file

@ -1258,7 +1258,7 @@ static size_t mi_debug_show_chunks(const char* header, size_t slice_count, size_
char chunk_kind = ' ';
if (chunk_bins != NULL) {
switch (chunk_bins[i]) {
switch (mi_atomic_load_relaxed(&chunk_bins[i])) {
// case MI_BBIN_SMALL: chunk_kind = 'S'; break;
case MI_BBIN_MEDIUM: chunk_kind = 'M'; break;
case MI_BBIN_LARGE: chunk_kind = 'L'; break;

View file

@ -45,6 +45,14 @@ static inline bool mi_bfield_find_least_bit(mi_bfield_t x, size_t* idx) {
return mi_bsf(x,idx);
}
// find the most significant bit that is set.
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bfield_find_highest_bit(mi_bfield_t x, size_t* idx) {
return mi_bsf(x, idx);
}
// find each set bit in a bit field `x` and clear it, until it becomes zero.
static inline bool mi_bfield_foreach_bit(mi_bfield_t* x, size_t* idx) {
@ -873,17 +881,9 @@ static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) {
bitmap chunkmap
-------------------------------------------------------------------------------- */
static void mi_bitmap_chunkmap_set_max(mi_bitmap_t* bitmap, size_t chunk_idx) {
size_t oldmax = mi_atomic_load_relaxed(&bitmap->chunk_max_accessed);
if mi_unlikely(chunk_idx > oldmax) {
mi_atomic_cas_strong_relaxed(&bitmap->chunk_max_accessed, &oldmax, chunk_idx);
}
}
static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
mi_bitmap_chunkmap_set_max(bitmap, chunk_idx);
}
static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) {
@ -898,7 +898,6 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
return false;
}
mi_bitmap_chunkmap_set_max(bitmap, chunk_idx);
return true;
}
@ -1107,25 +1106,16 @@ typedef bool (mi_bitmap_visit_fun_t)(mi_bitmap_t* bitmap, size_t chunk_idx, size
// If it returns `true` stop the search.
static inline bool mi_bitmap_find(mi_bitmap_t* bitmap, size_t tseq, size_t n, size_t* pidx, mi_bitmap_visit_fun_t* on_find, void* arg1, void* arg2)
{
// we space out threads to reduce contention
const size_t cmap_max_count = _mi_divide_up(mi_bitmap_chunk_count(bitmap),MI_BFIELD_BITS);
const size_t chunk_acc = mi_atomic_load_relaxed(&bitmap->chunk_max_accessed);
const size_t cmap_acc = chunk_acc / MI_BFIELD_BITS;
const size_t cmap_acc_bits = 1 + (chunk_acc % MI_BFIELD_BITS);
// create a mask over the chunkmap entries to iterate over them efficiently
mi_assert_internal(MI_BFIELD_BITS >= MI_BCHUNK_FIELDS);
const mi_bfield_t cmap_mask = mi_bfield_mask(cmap_max_count,0);
const size_t cmap_cycle = cmap_acc+1;
mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
{
const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS);
for (size_t i = 0; i < chunkmap_max; i++) {
// and for each chunkmap entry we iterate over its bits to find the chunks
mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[cmap_idx]);
size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits);
mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`)
const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]);
size_t hi;
if (mi_bfield_find_highest_bit(cmap_entry, &hi)) {
mi_bfield_cycle_iterate(cmap_entry, tseq%8, hi+1, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`)
{
mi_assert_internal(eidx <= MI_BFIELD_BITS);
const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
const size_t chunk_idx = i*MI_BFIELD_BITS + eidx;
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
if ((*on_find)(bitmap, chunk_idx, n, pidx, arg1, arg2)) {
return true;
@ -1133,7 +1123,7 @@ static inline bool mi_bitmap_find(mi_bitmap_t* bitmap, size_t tseq, size_t n, si
}
mi_bfield_cycle_iterate_end(Y);
}
mi_bfield_cycle_iterate_end(X);
}
return false;
}
@ -1478,7 +1468,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
// only in the current size class!
const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_acquire(&bbitmap->chunk_bins[chunk_idx]);
if // (bin >= chunk_bin) {
(bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) {
((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) {
mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx];
size_t cidx;
if ((*on_find)(chunk, n, &cidx)) {

View file

@ -98,8 +98,7 @@ typedef mi_bchunk_t mi_bchunkmap_t;
// An atomic bitmap
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
_Atomic(size_t) chunk_max_accessed; // max chunk index that was once cleared or set
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc
mi_bchunkmap_t chunkmap;
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bitmap_t;

View file

@ -139,6 +139,8 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
// collect retired pages
_mi_heap_collect_retired(heap, force);
// if (_mi_is_main_thread()) { mi_debug_show_arenas(true, false, false); }
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);

View file

@ -17,7 +17,7 @@ static mi_memid_t mi_page_map_memid;
// (note: we need to initialize statically or otherwise C++ may run a default constructors after process initialization)
static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT), MI_ATOMIC_VAR_INIT(0),
static mi_bitmap_t mi_page_map_commit = { MI_ATOMIC_VAR_INIT(MI_BITMAP_DEFAULT_CHUNK_COUNT),
{ 0 }, { {MI_ATOMIC_VAR_INIT(0)} }, {{{ MI_ATOMIC_VAR_INIT(0) }}} };
bool _mi_page_map_init(void) {

View file

@ -872,10 +872,14 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
mi_assert_internal(mi_heap_is_initialized(heap));
// call potential deferred free routines
// _mi_deferred_free(heap, false);
_mi_deferred_free(heap, false);
// free delayed frees from other threads (but skip contended ones)
// _mi_heap_delayed_free_partial(heap);
// collect every N generic mallocs
/*static long count = 0;
if (count++ > 100000) {
count = 0;
_mi_heap_collect_retired(heap,false);
}*/
// find (or allocate) a page of the right size
mi_page_t* page = mi_find_page(heap, size, huge_alignment);

View file

@ -262,7 +262,7 @@ static void test_stress(void) {
#if !defined(NDEBUG) || defined(MI_TSAN)
if ((n + 1) % 10 == 0) {
printf("- iterations left: %3d\n", ITER - (n + 1));
//mi_debug_show_arenas(true, false, false);
mi_debug_show_arenas(true, false, false);
//mi_collect(true);
//mi_debug_show_arenas(true, false, false);
}
@ -352,7 +352,7 @@ int main(int argc, char** argv) {
mi_collect(true);
mi_debug_show_arenas(true,false,false);
#else
//mi_collect(true);
mi_collect(false);
mi_debug_show_arenas(true,false,false);
// mi_stats_print(NULL);
#endif