From bc67be4d79ff03ef824efcecd0aae1066b068b16 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 4 Dec 2024 21:40:57 -0800 Subject: [PATCH] small adjustments --- include/mimalloc/bits.h | 13 ++++++ src/arena.c | 58 +----------------------- src/bitmap.c | 98 +++++++++++------------------------------ src/bitmap.h | 2 +- src/init.c | 2 +- test/test-stress.c | 4 +- 6 files changed, 43 insertions(+), 134 deletions(-) diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index f3bbe3bc..e1951cf7 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -314,6 +314,19 @@ static inline bool mi_bsr(size_t x, size_t* idx) { #endif } +// Bit scan reverse: find the most significant bit that is set +// return false if `x==0` (with `*idx` undefined) and true otherwise, +// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). +static inline bool mi_bsr32(uint32_t x, uint32_t* idx) { +#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long i; + return (_BitScanReverse(&i, x) ? (*idx = i, true) : false); +#else + const size_t r = mi_clz((size_t)x); + *idx = (~r & (MI_SIZE_BITS - 1)) - (MI_SIZE_SIZE - sizeof(uint32_t)); + return (x!=0); +#endif +} /* -------------------------------------------------------------------------------- diff --git a/src/arena.c b/src/arena.c index 19815616..79a52c4d 100644 --- a/src/arena.c +++ b/src/arena.c @@ -335,7 +335,7 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are size_t _start; \ if (req_arena_id == _mi_arena_id_none()) { \ _max_arena = mi_atomic_load_relaxed(&mi_arena_count); \ - _start = (_max_arena <= 1 ? 0 : (tseq / MI_THREADS_PER_ARENA) % _max_arena); \ + _start = (_max_arena <= 2 ? 0 : (tseq % (_max_arena-1))); \ } \ else { \ _max_arena = 1; \ @@ -795,62 +795,6 @@ void _mi_arena_page_unabandon(mi_page_t* page) { _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); } -/* -bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) { - if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_is_abandoned(page)); } - mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); - mi_assert_internal(_mi_ptr_page(page)==page); - // if (!mi_page_is_abandoned(page)) return false; // it is not abandoned (anymore) - - // note: we can access the page even it is in the meantime reclaimed by another thread since - // we only call this when on free (and thus there is still an object alive in the page) - mi_memid_t memid = page->memid; - if (!_mi_arena_memid_is_suitable(memid, heap->arena_id)) return false; // don't reclaim between exclusive and non-exclusive arena's - if (mi_atomic_load_acquire(&page->xheap) != (uintptr_t)heap->tld->subproc) return false; - - if mi_likely(memid.memkind == MI_MEM_ARENA) { - size_t slice_index; - mi_arena_t* arena = mi_page_arena(page, &slice_index, NULL); - //if (arena->subproc != heap->tld->subproc) return false; // only reclaim within the same subprocess - - // don't reclaim more from a `free` call than half the current segments - // this is to prevent a pure free-ing thread to start owning too many segments - // (but not for out-of-arena segments as that is the main way to be reclaimed for those) - // if (segment->memid.memkind == MI_MEM_ARENA && heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) { - // return false; - // } - const size_t bin = _mi_bin(page->block_size); - if (mi_bitmap_try_clear(&arena->slices_abandoned[bin], slice_index)) { - // we got it atomically - _mi_page_reclaim(heap, page); - mi_assert_internal(!mi_page_is_abandoned(page)); - return true; - } - else { - if (mi_page_is_abandoned(page)) { - // mi_assert(false); - } - } - } - else { - // A page in OS or external memory - if (mi_atomic_load_acquire(&page->xheap) != (uintptr_t)heap->tld->subproc) return false; - - // we use the thread_id to atomically grab ownership - mi_threadid_t abandoned_thread_id = 0; - if (mi_atomic_cas_strong_acq_rel(&page->xthread_id, &abandoned_thread_id, heap->thread_id)) { - // we got it atomically - _mi_page_reclaim(heap, page); - mi_assert_internal(!mi_page_is_abandoned(page)); - return true; - } - } - - - return false; -} -*/ - void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { MI_UNUSED(heap); // TODO: implement this diff --git a/src/bitmap.c b/src/bitmap.c index 1aa0a822..d5578cfb 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -768,7 +768,7 @@ static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) { static void mi_chunkmap_split(mi_chunkmap_t es, mi_cmap_t* cmap, mi_epoch_t* epoch) { *cmap = (mi_cmap_t)es; - *epoch = (mi_epoch_t)(es >> 32); + if (epoch!=NULL) { *epoch = (mi_epoch_t)(es >> 32); } } static mi_chunkmap_t mi_chunkmap_join(mi_cmap_t cmap, mi_epoch_t epoch) { @@ -1091,80 +1091,50 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n /* -------------------------------------------------------------------------------- bitmap try_find_and_clear -------------------------------------------------------------------------------- */ -/* -typedef bool (mi_bitmap_find_fun_t)(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx); - -static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx, mi_bitmap_find_fun_t* find_fun) -{ - if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; - - // start chunk index -- todo: can depend on the tseq to decrease contention between threads - MI_UNUSED(tseq); - const size_t chunk_start = 0; - const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS; - const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS; - - // for each chunkmap entry `i` - for( size_t _i = 0; _i < bitmap->chunk_map_count; _i++) - { - size_t i = (_i + chunk_map_start); - if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; // adjust for the start position - - const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS; - mi_epoch_t epoch; - mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &epoch); - if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); } // rotate right for the start position (on the first iteration) - - uint32_t cmap_idx; // one bit set of each chunk that may have bits set - size_t cmap_idx_shift = 0; // shift through the cmap - while (mi_bsf32(cmap, &cmap_idx)) { // find least bit that is set - // adjust for the start position - if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; } - // set the chunk idx - const size_t chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift; - - // try to find and clear N bits in that chunk - if (chunk_idx < mi_bitmap_chunk_count(bitmap)) { // we can have less chunks than in the chunkmap.. - if ((*find_fun)(bitmap, n, chunk_idx, epoch, pidx)) { - return true; - } - } - - // skip to the next bit - cmap_idx_shift += cmap_idx+1; - cmap >>= cmap_idx; // skip scanned bits (and avoid UB for `cmap_idx+1`) - cmap >>= 1; +static inline size_t mi_bitmap_find_hi_chunk(mi_bitmap_t* bitmap) { + size_t hi_chunk_map_idx = 0; + mi_cmap_t hi_cmap = 0; + for (size_t i = 1; i < mi_bitmap_chunk_map_count(bitmap); i++) { + mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, i, NULL); + if (cmap != 0) { + hi_chunk_map_idx = i; + hi_cmap = cmap; } } - - return false; + uint32_t cmap_idx; + if (mi_bsr32(hi_cmap, &cmap_idx)) { + const size_t hi = (hi_chunk_map_idx * MI_CHUNKMAP_BITS) + cmap_idx; + mi_assert_internal(hi < mi_bitmap_chunk_count(bitmap)); + return hi; + } + else { + return 0; + } } -*/ #define mi_bitmap_forall_chunks(bitmap, tseq, name_epoch, name_chunk_idx) \ { \ /* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \ MI_UNUSED(tseq); \ - const size_t chunk_start = 0; \ + const size_t chunk_start = 0; /* tseq % (1 + mi_bitmap_find_hi_chunk(bitmap)); */ \ const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS; \ - const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS; \ + const uint32_t chunk_map_start_idx = (uint32_t)(chunk_start % MI_CHUNKMAP_BITS); \ /* for each chunkmap entry `i` */ \ for (size_t _i = 0; _i < bitmap->chunk_map_count; _i++) { \ size_t i = (_i + chunk_map_start); \ - if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; /* adjust for the start position */ \ + if (i >= bitmap->chunk_map_count) { i -= bitmap->chunk_map_count; } /* adjust for the start position */ \ \ const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS; \ mi_epoch_t name_epoch; \ mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &name_epoch); \ - if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); } /* rotate right for the start position (on the first iteration) */ \ + uint32_t cmap_idx_shift = 0; /* shift through the cmap */ \ + if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); cmap_idx_shift = chunk_map_start_idx; } /* rotate right for the start position (on the first iteration) */ \ \ uint32_t cmap_idx; /* one bit set of each chunk that may have bits set */ \ - size_t cmap_idx_shift = 0; /* shift through the cmap */ \ while (mi_bsf32(cmap, &cmap_idx)) { /* find least bit that is set */ \ - /* adjust for the start position again */ \ - if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; } \ /* set the chunk idx */ \ - const size_t name_chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift; \ + size_t name_chunk_idx = chunk_idx0 + ((cmap_idx + cmap_idx_shift) % MI_CHUNKMAP_BITS); \ + if (name_chunk_idx >= mi_bitmap_chunk_count(bitmap)) { name_chunk_idx -= mi_bitmap_chunk_count(bitmap); } \ /* try to find and clear N bits in that chunk */ \ if (name_chunk_idx < mi_bitmap_chunk_count(bitmap)) { /* we can have less chunks than in the chunkmap.. */ @@ -1177,28 +1147,10 @@ static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq } \ }} -//static bool mi_bitmap_try_find_and_clearN_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) { -// size_t cidx; -// if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) { -// *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; -// mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n); -// return true; -// } -// else { -// // we may find that all are cleared only on a second iteration but that is ok as -// // the chunkmap is a conservative approximation. -// if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { -// mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); -// } -// return false; -// } -//} - // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) { - // return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_clearN_at); mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx) { size_t cidx; diff --git a/src/bitmap.h b/src/bitmap.h index ca62735b..78ee5380 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -91,7 +91,7 @@ typedef uint32_t mi_cmap_t; // An atomic bitmap typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s { - _Atomic(size_t) chunk_map_count; // valid chunk_map's + _Atomic(size_t) chunk_map_count; // valid chunk_maps entries _Atomic(size_t) chunk_count; // total count of chunks size_t padding[MI_BITMAP_CHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc _Atomic(mi_chunkmap_t) chunk_maps[MI_BITMAP_MAX_CHUNKMAPS]; diff --git a/src/init.c b/src/init.c index 3dcb68e3..353b0ce4 100644 --- a/src/init.c +++ b/src/init.c @@ -400,7 +400,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { tld->heap_backing = bheap; tld->heaps = NULL; tld->subproc = &mi_subproc_default; - tld->tseq = 0; // mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); tld->os.stats = &tld->stats; } diff --git a/test/test-stress.c b/test/test-stress.c index 0b1b6c8d..61891269 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -343,9 +343,9 @@ int main(int argc, char** argv) { #ifndef USE_STD_MALLOC #ifndef NDEBUG - // mi_debug_show_arenas(true, true, false); + mi_debug_show_arenas(true, true, false); mi_collect(true); - mi_debug_show_arenas(true,true,false); + // mi_debug_show_arenas(true,true,false); #endif // mi_stats_print(NULL); #else