From 61436a92b9ec623220a92d1f2c166d39a64067a9 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 6 Dec 2024 15:26:01 -0800 Subject: [PATCH] working simplified version without pairmaps and bitmap epoch --- src/bitmap.c | 48 +++++++++++++++++++++++----------------------- src/bitmap.h | 18 +++++++++++------ src/init.c | 2 +- src/page-map.c | 6 +++--- test/test-stress.c | 8 ++++---- 5 files changed, 44 insertions(+), 38 deletions(-) diff --git a/src/bitmap.c b/src/bitmap.c index 7df46070..0916aaae 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -42,9 +42,9 @@ static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) { return mi_rotr(x,r); } -static inline mi_bfield_t mi_bfield_zero(void) { - return 0; -} +//static inline mi_bfield_t mi_bfield_zero(void) { +// return 0; +//} static inline mi_bfield_t mi_bfield_one(void) { return 1; @@ -147,10 +147,10 @@ static inline bool mi_bfield_atomic_xset_mask(mi_xset_t set, _Atomic(mi_bfield_t // Tries to set a bit atomically. Returns `true` if the bit transitioned from 0 to 1 // and otherwise false (leaving the bit unchanged) -static inline bool mi_bfield_atomic_try_set(_Atomic(mi_bfield_t)*b, size_t idx) { - mi_assert_internal(idx < MI_BFIELD_BITS); - return mi_bfield_atomic_set(b, idx); // for a single bit there is no difference -} +//static inline bool mi_bfield_atomic_try_set(_Atomic(mi_bfield_t)*b, size_t idx) { +// mi_assert_internal(idx < MI_BFIELD_BITS); +// return mi_bfield_atomic_set(b, idx); // for a single bit there is no difference +//} // Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0. // `all_clear` is set to true if the new bfield is zero (and false otherwise) @@ -237,17 +237,17 @@ static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t by // Try to set a full field of bits atomically, and return true all bits transitioned from all 0's to 1's. // and false otherwise leaving the bit field as-is. -static inline bool mi_bfield_atomic_try_setX(_Atomic(mi_bfield_t)*b) { - mi_bfield_t old = 0; - return mi_atomic_cas_weak_acq_rel(b, &old, mi_bfield_all_set()); -} +//static inline bool mi_bfield_atomic_try_setX(_Atomic(mi_bfield_t)*b) { +// mi_bfield_t old = 0; +// return mi_atomic_cas_weak_acq_rel(b, &old, mi_bfield_all_set()); +//} // Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's. // and false otherwise leaving the bit field as-is. -static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b) { - mi_bfield_t old = mi_bfield_all_set(); - return mi_atomic_cas_weak_acq_rel(b, &old, mi_bfield_zero()); -} +//static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b) { +// mi_bfield_t old = mi_bfield_all_set(); +// return mi_atomic_cas_weak_acq_rel(b, &old, mi_bfield_zero()); +//} // Check if all bits corresponding to a mask are set. @@ -328,7 +328,7 @@ static bool mi_bchunk_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size const mi_bfield_t mask = mi_bfield_mask(m, idx); size_t already_xset = 0; const bool transition = mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset); - mi_assert_internal((transition && already_xset == m) || (!transition && already_xset > 0)); + mi_assert_internal((transition && already_xset == 0) || (!transition && already_xset > 0)); all_transition = all_transition && transition; total_already_xset += already_xset; // next field @@ -605,9 +605,9 @@ static inline bool mi_bchunk_find_and_try_clear(mi_bchunk_t* chunk, size_t* pidx return mi_bchunk_find_and_try_xset(MI_BIT_CLEAR, chunk, pidx); } -static inline bool mi_bchunk_find_and_try_set(mi_bchunk_t* chunk, size_t* pidx) { - return mi_bchunk_find_and_try_xset(MI_BIT_SET, chunk, pidx); -} +//static inline bool mi_bchunk_find_and_try_set(mi_bchunk_t* chunk, size_t* pidx) { +// return mi_bchunk_find_and_try_xset(MI_BIT_SET, chunk, pidx); +//} // find least byte in a chunk with all bits set, and try unset it atomically @@ -763,7 +763,7 @@ static inline bool mi_bchunk_all_are_clear_relaxed(mi_bchunk_t* chunk) { // a 64b cache-line contains the entire chunk anyway so load both at once const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); - return (mi_mm256_is_zero(_mm256_or_epi64(vec1,vec2))); + return (mi_mm256_is_zero(_mm256_or_si256(vec1,vec2))); #else return mi_bchunk_all_are_clear(chunk); #endif @@ -810,7 +810,7 @@ size_t mi_bitmap_size(size_t bit_count, size_t* pchunk_count) { mi_assert_internal(bit_count > 0); const size_t chunk_count = bit_count / MI_BCHUNK_BITS; mi_assert_internal(chunk_count >= 1); - const size_t size = sizeof(mi_bitmap_t) + ((chunk_count - 1) * MI_BCHUNK_SIZE); + const size_t size = offsetof(mi_bitmap_t,chunks) + (chunk_count * MI_BCHUNK_SIZE); mi_assert_internal( (size%MI_BCHUNK_SIZE) == 0 ); if (pchunk_count != NULL) { *pchunk_count = chunk_count; } return size; @@ -1044,10 +1044,10 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n const size_t chunk_idx0 = i*MI_BFIELD_BITS; \ mi_bfield_t cmap = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]); \ size_t cmap_idx_shift = 0; /* shift through the cmap */ \ - if (_i == 0) { cmap = mi_rotr(cmap, chunkmap_start_idx); cmap_idx_shift = chunkmap_start_idx; } /* rotate right for the start position (on the first iteration) */ \ + if (_i == 0) { cmap = mi_bfield_rotate_right(cmap, chunkmap_start_idx); cmap_idx_shift = chunkmap_start_idx; } /* rotate right for the start position (on the first iteration) */ \ \ size_t cmap_idx; \ - while (mi_bsf(cmap, &cmap_idx)) { /* find least bit that is set */ \ + while (mi_bfield_find_least_bit(cmap, &cmap_idx)) { \ /* set the chunk idx */ \ size_t name_chunk_idx = chunk_idx0 + ((cmap_idx + cmap_idx_shift) % MI_BFIELD_BITS); \ mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); \ @@ -1130,4 +1130,4 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx) { const size_t cidx = idx % MI_BCHUNK_BITS; mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); mi_bchunk_clear_once_set(&bitmap->chunks[chunk_idx], cidx); -} \ No newline at end of file +} diff --git a/src/bitmap.h b/src/bitmap.h index 9ef97d2f..7b6000cc 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -75,12 +75,18 @@ typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bchunk_s { // The chunkmap is itself a chunk. typedef mi_bchunk_t mi_bchunkmap_t; -#define MI_BCHUNKMAP_BITS MI_BCHUNK_BITS +#define MI_BCHUNKMAP_BITS MI_BCHUNK_BITS -#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BCHUNKMAP_BITS) -#define MI_BITMAP_MIN_CHUNK_COUNT (1) -#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BCHUNK_BITS) // 16 GiB arena -#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BCHUNK_BITS) // 32 MiB arena +#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BCHUNKMAP_BITS) +#define MI_BITMAP_MIN_CHUNK_COUNT (1) +#if MI_SIZE_BITS > 32 +#define MI_BITMAP_DEFAULT_CHUNK_COUNT (64) // 2 GiB on 64-bit -- this is for the page map +#else +#define MI_BITMAP_DEFAULT_CHUNK_COUNT (1) +#endif +#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BCHUNK_BITS) // 16 GiB arena +#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BCHUNK_BITS) // 32 MiB arena +#define MI_BITMAP_DEFAULT_BIT_COUNT (MI_BITMAP_DEFAULT_CHUNK_COUNT * MI_BCHUNK_BITS) // 2 GiB arena // An atomic bitmap @@ -88,7 +94,7 @@ typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s { _Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS) size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc mi_bchunkmap_t chunkmap; - mi_bchunk_t chunks[1]; // or more, up to MI_BITMAP_MAX_CHUNK_COUNT + mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT } mi_bitmap_t; diff --git a/src/init.c b/src/init.c index 64b31e1b..5d4a775a 100644 --- a/src/init.c +++ b/src/init.c @@ -400,7 +400,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { tld->heap_backing = bheap; tld->heaps = NULL; tld->subproc = &mi_subproc_default; - tld->tseq = 0; // mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); tld->os.stats = &tld->stats; } diff --git a/src/page-map.c b/src/page-map.c index 25693064..c292378b 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -13,7 +13,7 @@ mi_decl_cache_align uint8_t* _mi_page_map = NULL; static bool mi_page_map_all_committed = false; static size_t mi_page_map_entries_per_commit_bit = MI_ARENA_SLICE_SIZE; static mi_memid_t mi_page_map_memid; -static mi_bitmap_t mi_page_map_commit = { 1, MI_BITMAP_MIN_CHUNK_COUNT }; +static mi_bitmap_t mi_page_map_commit = { MI_BITMAP_DEFAULT_CHUNK_COUNT, { 0 }, { 0 }, { { 0 } } }; bool _mi_page_map_init(void) { size_t vbits = _mi_os_virtual_address_bits(); @@ -22,10 +22,10 @@ bool _mi_page_map_init(void) { // 64 KiB for 4 GiB address space (on 32-bit) const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT)); - mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size, MI_BITMAP_MIN_BIT_COUNT); + mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size, MI_BITMAP_DEFAULT_BIT_COUNT); // mi_bitmap_init(&mi_page_map_commit, MI_BITMAP_MIN_BIT_COUNT, true); - mi_page_map_all_committed = false; // _mi_os_has_overcommit(); // commit on-access on Linux systems? + mi_page_map_all_committed = (page_map_size <= 1*MI_MiB); // _mi_os_has_overcommit(); // commit on-access on Linux systems? _mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL); if (_mi_page_map==NULL) { _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); diff --git a/test/test-stress.c b/test/test-stress.c index 61891269..d5f106d5 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -41,11 +41,11 @@ static int THREADS = 8; static int SCALE = 10; static int ITER = 10; #elif 0 -static int THREADS = 1; +static int THREADS = 4; static int SCALE = 100; static int ITER = 10; #define ALLOW_LARGE false -#elif 1 +#elif 0 static int THREADS = 32; static int SCALE = 50; static int ITER = 50; @@ -343,9 +343,9 @@ int main(int argc, char** argv) { #ifndef USE_STD_MALLOC #ifndef NDEBUG - mi_debug_show_arenas(true, true, false); + //mi_debug_show_arenas(true, true, false); mi_collect(true); - // mi_debug_show_arenas(true,true,false); + mi_debug_show_arenas(true,true,false); #endif // mi_stats_print(NULL); #else