diff --git a/src/arena.c b/src/arena.c index 1b891377..f8b6fca1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -37,18 +37,20 @@ typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific size_t slice_count; // size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`) + size_t info_slices; // initial slices reserved for the arena bitmaps int numa_node; // associated NUMA node bool exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited _Atomic(mi_msecs_t) purge_expire; // expiration time when slices should be decommitted from `slices_decommit`. - mi_bitmap_t slices_free; // is the slice free? - mi_bitmap_t slices_committed; // is the slice committed? (i.e. accessible) - mi_bitmap_t slices_purge; // can the slice be purged? (slice in purge => slice in free) - mi_bitmap_t slices_dirty; // is the slice potentially non-zero? + mi_bitmap_t* slices_free; // is the slice free? + mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible) + mi_bitmap_t* slices_purge; // can the slice be purged? (slice in purge => slice in free) + mi_bitmap_t* slices_dirty; // is the slice potentially non-zero? mi_pairmap_t pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page) // the full queue contains abandoned full pages + // followed by the bitmaps (whose size depends on the arena size) } mi_arena_t; #define MI_MAX_ARENAS (1024) // Limited for now (and takes up .bss) @@ -58,6 +60,7 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 + /* ----------------------------------------------------------- Arena id's id = arena_index + 1 @@ -103,6 +106,11 @@ mi_arena_t* mi_arena_from_id(mi_arena_id_t id) { return mi_arena_from_index(mi_arena_id_index(id)); } +static size_t mi_arena_info_slices(mi_arena_t* arena) { + return arena->info_slices; +} + + /* ----------------------------------------------------------- Util @@ -114,14 +122,6 @@ static size_t mi_arena_size(mi_arena_t* arena) { return mi_size_of_slices(arena->slice_count); } -static size_t mi_arena_info_slices(void) { - const size_t os_page_size = _mi_os_page_size(); - const size_t info_size = _mi_align_up(sizeof(mi_arena_t), os_page_size) + os_page_size; // + guard page - const size_t info_slices = mi_slice_count_of_size(info_size); - return info_slices; -} - - // Start of the arena memory area static uint8_t* mi_arena_start(mi_arena_t* arena) { return ((uint8_t*)arena); @@ -187,7 +187,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( mi_arena_t* arena, size_t slice_count, bool commit, size_t tseq, mi_memid_t* memid) { size_t slice_index; - if (!mi_bitmap_try_find_and_clearN(&arena->slices_free, slice_count, tseq, &slice_index)) return NULL; + if (!mi_bitmap_try_find_and_clearN(arena->slices_free, slice_count, tseq, &slice_index)) return NULL; // claimed it! void* p = mi_arena_slice_start(arena, slice_index); @@ -197,7 +197,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // set the dirty bits if (arena->memid.initially_zero) { // size_t dirty_count = 0; - memid->initially_zero = mi_bitmap_setN(&arena->slices_dirty, slice_index, slice_count, NULL); + memid->initially_zero = mi_bitmap_setN(arena->slices_dirty, slice_index, slice_count, NULL); //if (dirty_count>0) { // if (memid->initially_zero) { // _mi_error_message(EFAULT, "ouch1\n"); @@ -217,7 +217,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( memid->initially_committed = true; // commit requested, but the range may not be committed as a whole: ensure it is committed now - if (!mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)) { + if (!mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)) { // not fully committed: commit the full range and set the commit bits // (this may race and we may double-commit which is fine) bool commit_zero = false; @@ -235,7 +235,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( } #endif size_t already_committed_count = 0; - mi_bitmap_setN(&arena->slices_committed, slice_index, slice_count, &already_committed_count); + mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count); if (already_committed_count < slice_count) { // todo: also decrease total mi_stat_decrease(_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); @@ -245,13 +245,13 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( } else { // no need to commit, but check if already fully committed - memid->initially_committed = mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count); + memid->initially_committed = mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count); } - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count)); - if (commit) { mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)); } - mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count)); - // mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); + if (commit) { mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); } + mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count)); + // mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); return p; } @@ -285,8 +285,8 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re } // check arena bounds - const size_t min_reserve = mi_size_of_slices(mi_arena_info_slices() + 1); - const size_t max_reserve = MI_BITMAP_MAX_BITS * MI_ARENA_SLICE_SIZE; + const size_t min_reserve = 8; // hope that fits minimal bitmaps? + const size_t max_reserve = MI_BITMAP_MAX_BIT_COUNT * MI_ARENA_SLICE_SIZE; // 16 GiB if (arena_reserve < min_reserve) { arena_reserve = min_reserve; } @@ -494,10 +494,10 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1); _mi_page_free_collect(page, false); // update `used` count - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_ptr_page(page)==page); mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page); @@ -670,9 +670,9 @@ void _mi_arena_page_free(mi_page_t* page) { size_t slice_count; mi_arena_t* arena = mi_page_arena(page, &slice_index, &slice_count); - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); mi_assert_internal(mi_pairmap_is_clear(&arena->pages_abandoned[bin], slice_index)); } #endif @@ -701,10 +701,10 @@ static void mi_arena_page_abandon_no_stat(mi_page_t* page) { size_t slice_count; mi_arena_t* arena = mi_page_arena(page, &slice_index, &slice_count); mi_assert_internal(!mi_page_is_singleton(page)); - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count)); mi_page_set_abandoned_mapped(page); bool were_zero = mi_pairmap_set(&arena->pages_abandoned[bin], slice_index); @@ -757,9 +757,9 @@ void _mi_arena_page_unabandon(mi_page_t* page) { size_t slice_count; mi_arena_t* arena = mi_page_arena(page, &slice_index, &slice_count); - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)); - mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); // this busy waits until a concurrent reader (from alloc_abandoned) is done mi_pairmap_clear_while_not_busy(&arena->pages_abandoned[bin], slice_index); @@ -876,8 +876,8 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi return; } mi_assert_internal(slice_index < arena->slice_count); - mi_assert_internal(slice_index >= mi_arena_info_slices()); - if (slice_index < mi_arena_info_slices() || slice_index > arena->slice_count) { + mi_assert_internal(slice_index >= mi_arena_info_slices(arena)); + if (slice_index < mi_arena_info_slices(arena) || slice_index > arena->slice_count) { _mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } @@ -907,7 +907,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi } // and make it available to others again - bool all_inuse = mi_bitmap_setN(&arena->slices_free, slice_index, slice_count, NULL); + bool all_inuse = mi_bitmap_setN(arena->slices_free, slice_index, slice_count, NULL); if (!all_inuse) { _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", mi_arena_slice_start(arena,slice_index), mi_size_of_slices(slice_count)); return; @@ -989,6 +989,29 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* return true; } +static size_t mi_arena_info_slices_needed(size_t slice_count, size_t* bitmap_base) { + if (slice_count == 0) slice_count = MI_BITMAP_CHUNK_BITS; + mi_assert_internal((slice_count % MI_BITMAP_CHUNK_BITS) == 0); + const size_t base_size = _mi_align_up(sizeof(mi_arena_t), MI_BITMAP_CHUNK_SIZE); + const size_t bitmaps_size = 4 * mi_bitmap_size(slice_count,NULL); + const size_t pairmaps_size = MI_BIN_COUNT * 2 * mi_bitmap_size(slice_count,NULL); + const size_t size = base_size + bitmaps_size + pairmaps_size; + + const size_t os_page_size = _mi_os_page_size(); + const size_t info_size = _mi_align_up(size, os_page_size) + os_page_size; // + guard page + const size_t info_slices = mi_slice_count_of_size(info_size); + + if (bitmap_base != NULL) *bitmap_base = base_size; + return info_slices; +} + +static mi_bitmap_t* mi_arena_bitmap_init(size_t slice_count, uint8_t** base) { + mi_bitmap_t* bitmap = (mi_bitmap_t*)(*base); + *base = (*base) + mi_bitmap_init(bitmap, slice_count, true /* already zero */); + return bitmap; +} + + static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { mi_assert(!is_large || (memid.initially_committed && memid.is_pinned)); @@ -1003,23 +1026,25 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } - const size_t info_slices = mi_arena_info_slices(); - const size_t bcount = size / MI_ARENA_SLICE_SIZE; // divide down - if (bcount < info_slices+1) { + const size_t slice_count = _mi_align_down(size / MI_ARENA_SLICE_SIZE, MI_BITMAP_CHUNK_BITS); + if (slice_count > MI_BITMAP_MAX_BIT_COUNT) { // 16 GiB for now + // todo: allow larger areas (either by splitting it up in arena's or having larger arena's) + _mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_slices(MI_BITMAP_MAX_BIT_COUNT)/MI_MiB); + return false; + } + size_t bitmap_base; + const size_t info_slices = mi_arena_info_slices_needed(slice_count, &bitmap_base); + if (slice_count < info_slices+1) { _mi_warning_message("cannot use OS memory since it is not large enough (size %zu KiB, minimum required is %zu KiB)", size/MI_KiB, mi_size_of_slices(info_slices+1)/MI_KiB); return false; } - if (bcount > MI_BITMAP_MAX_BITS) { - // todo: allow larger areas (either by splitting it up in arena's or having larger arena's) - _mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_slices(MI_BITMAP_MAX_BITS)/MI_MiB); - return false; - } + mi_arena_t* arena = (mi_arena_t*)start; // commit & zero if needed bool is_zero = memid.initially_zero; if (!memid.initially_committed) { - _mi_os_commit(arena, mi_size_of_slices(info_slices), &is_zero, &_mi_stats_main); + _mi_os_commit(arena, mi_size_of_slices(info_slices), NULL, &_mi_stats_main); } if (!is_zero) { _mi_memzero(arena, mi_size_of_slices(info_slices)); @@ -1029,34 +1054,37 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int arena->id = _mi_arena_id_none(); arena->memid = memid; arena->exclusive = exclusive; - arena->slice_count = bcount; + arena->slice_count = slice_count; + arena->info_slices = info_slices; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; arena->purge_expire = 0; mi_lock_init(&arena->abandoned_visit_lock); // init bitmaps - mi_bitmap_init(&arena->slices_free,true); - mi_bitmap_init(&arena->slices_committed,true); - mi_bitmap_init(&arena->slices_dirty,true); - mi_bitmap_init(&arena->slices_purge,true); + uint8_t* base = mi_arena_start(arena) + bitmap_base; + arena->slices_free = mi_arena_bitmap_init(slice_count,&base); + arena->slices_committed = mi_arena_bitmap_init(slice_count,&base); + arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base); + arena->slices_purge = mi_arena_bitmap_init(slice_count,&base); for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) { - mi_pairmap_init(&arena->pages_abandoned[i],true); + mi_pairmap_init(&arena->pages_abandoned[i], mi_arena_bitmap_init(slice_count, &base), mi_arena_bitmap_init(slice_count, &base)); } + mi_assert_internal(mi_size_of_slices(info_slices) >= (size_t)(base - mi_arena_start(arena))); // reserve our meta info (and reserve slices outside the memory area) - mi_bitmap_unsafe_setN(&arena->slices_free, info_slices /* start */, arena->slice_count - info_slices); + mi_bitmap_unsafe_setN(arena->slices_free, info_slices /* start */, arena->slice_count - info_slices); if (memid.initially_committed) { - mi_bitmap_unsafe_setN(&arena->slices_committed, 0, arena->slice_count); + mi_bitmap_unsafe_setN(arena->slices_committed, 0, arena->slice_count); } else { - mi_bitmap_setN(&arena->slices_committed, 0, info_slices, NULL); + mi_bitmap_setN(arena->slices_committed, 0, info_slices, NULL); } if (!memid.initially_zero) { - mi_bitmap_unsafe_setN(&arena->slices_dirty, 0, arena->slice_count); + mi_bitmap_unsafe_setN(arena->slices_dirty, 0, arena->slice_count); } else { - mi_bitmap_setN(&arena->slices_dirty, 0, info_slices, NULL); + mi_bitmap_setN(arena->slices_dirty, 0, info_slices, NULL); } return mi_arena_add(arena, arena_id, &_mi_stats_main); @@ -1117,7 +1145,7 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_ _mi_output_message("%s%s:\n", prefix, header); size_t bit_count = 0; size_t bit_set_count = 0; - for (int i = 0; i < MI_BITMAP_CHUNK_COUNT && bit_count < slice_count; i++) { + for (int i = 0; i < mi_bitmap_chunk_count(bitmap) && bit_count < slice_count; i++) { char buf[MI_BITMAP_CHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf)); mi_bitmap_chunk_t* chunk = &bitmap->chunks[i]; for (size_t j = 0, k = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) { @@ -1161,12 +1189,12 @@ void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) slice_total += arena->slice_count; _mi_output_message("arena %zu: %zu slices (%zu MiB)%s\n", i, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : "")); if (show_inuse) { - free_total += mi_debug_show_bitmap(" ", "in-use slices", arena->slice_count, &arena->slices_free, true); + free_total += mi_debug_show_bitmap(" ", "in-use slices", arena->slice_count, arena->slices_free, true); } - mi_debug_show_bitmap(" ", "committed slices", arena->slice_count, &arena->slices_committed, false); + mi_debug_show_bitmap(" ", "committed slices", arena->slice_count, arena->slices_committed, false); // todo: abandoned slices if (show_purge) { - purge_total += mi_debug_show_bitmap(" ", "purgeable slices", arena->slice_count, &arena->slices_purge, false); + purge_total += mi_debug_show_bitmap(" ", "purgeable slices", arena->slice_count, arena->slices_purge, false); } } if (show_inuse) _mi_output_message("total inuse slices : %zu\n", slice_total - free_total); @@ -1262,7 +1290,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices, const size_t size = mi_size_of_slices(slices); void* const p = mi_arena_slice_start(arena, slice_index); bool needs_recommit; - if (mi_bitmap_is_setN(&arena->slices_committed, slice_index, slices)) { + if (mi_bitmap_is_setN(arena->slices_committed, slice_index, slices)) { // all slices are committed, we can purge freely needs_recommit = _mi_os_purge(p, size, stats); } @@ -1277,11 +1305,11 @@ static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices, } // clear the purged slices - mi_bitmap_clearN(&arena->slices_purge, slices, slice_index); + mi_bitmap_clearN(arena->slices_purge, slices, slice_index); // update committed bitmap if (needs_recommit) { - mi_bitmap_clearN(&arena->slices_committed, slices, slice_index); + mi_bitmap_clearN(arena->slices_committed, slices, slice_index); } } diff --git a/src/bitmap.c b/src/bitmap.c index a6c9e879..4156cfd1 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -64,7 +64,7 @@ static inline mi_bfield_t mi_bfield_mask(size_t bit_count, size_t shiftl) { // Find the least significant bit that can be xset (0 for MI_BIT_SET, 1 for MI_BIT_CLEAR). // return false if `x==~0` (for MI_BIT_SET) or `x==0` for MI_BIT_CLEAR (with `*idx` undefined) and true otherwise, // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). -static inline bool mi_bfield_find_least_to_xset(mi_bit_t set, mi_bfield_t x, size_t* idx) { +static inline bool mi_bfield_find_least_to_xset(mi_xset_t set, mi_bfield_t x, size_t* idx) { return mi_bfield_find_least_bit((set ? ~x : x), idx); } @@ -85,7 +85,7 @@ static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx) { } // Set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0). -static inline bool mi_bfield_atomic_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) { +static inline bool mi_bfield_atomic_xset(mi_xset_t set, _Atomic(mi_bfield_t)*b, size_t idx) { if (set) { return mi_bfield_atomic_set(b, idx); } @@ -115,7 +115,7 @@ static inline bool mi_bfield_atomic_clear2(_Atomic(mi_bfield_t)*b, size_t idx, b } // Set/clear a pair of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's) -static inline bool mi_bfield_atomic_xset2(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx, bool* already_xset) { +static inline bool mi_bfield_atomic_xset2(mi_xset_t set, _Atomic(mi_bfield_t)*b, size_t idx, bool* already_xset) { if (set) { return mi_bfield_atomic_set2(b, idx, already_xset); } @@ -143,7 +143,7 @@ static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield } // Set/clear a mask set of bits atomically, and return true of the mask bits transitioned from all 0's to 1's (or all 1's to 0's) -static inline bool mi_bfield_atomic_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, size_t* already_xset) { +static inline bool mi_bfield_atomic_xset_mask(mi_xset_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask, size_t* already_xset) { mi_assert_internal(mask != 0); if (set) { return mi_bfield_atomic_set_mask(b, mask, already_xset); @@ -169,7 +169,7 @@ static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx } // Tries to set/clear a bit atomically, and returns true if the bit atomically transitioned from 0 to 1 (or 1 to 0) -static inline bool mi_bfield_atomic_try_xset( mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) { +static inline bool mi_bfield_atomic_try_xset( mi_xset_t set, _Atomic(mi_bfield_t)*b, size_t idx) { mi_assert_internal(idx < MI_BFIELD_BITS); // for a single bit, we can always just set/clear and test afterwards if it was actually us that changed it first return mi_bfield_atomic_xset(set, b, idx); @@ -201,7 +201,7 @@ static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bf // Tries to (un)set a mask atomically, and returns true if the mask bits atomically transitioned from 0 to mask (or mask to 0) // and false otherwise (leaving the bit field as is). -static inline bool mi_bfield_atomic_try_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) { +static inline bool mi_bfield_atomic_try_xset_mask(mi_xset_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask ) { mi_assert_internal(mask != 0); if (set) { return mi_bfield_atomic_try_set_mask(b, mask); @@ -228,7 +228,7 @@ static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t by // Tries to set/clear a byte atomically, and returns true if the byte atomically transitioned from 0 to 0xFF (or 0xFF to 0) // and false otherwise (leaving the bit field as is). -static inline bool mi_bfield_atomic_try_xset8(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t byte_idx) { +static inline bool mi_bfield_atomic_try_xset8(mi_xset_t set, _Atomic(mi_bfield_t)*b, size_t byte_idx) { mi_assert_internal(byte_idx < MI_BFIELD_SIZE); const mi_bfield_t mask = ((mi_bfield_t)0xFF)<<(byte_idx*8); return mi_bfield_atomic_try_xset_mask(set, b, mask); @@ -264,7 +264,7 @@ static inline bool mi_bfield_atomic_is_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfi // Check if all bits corresponding to a mask are set/cleared. -static inline bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) { +static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, _Atomic(mi_bfield_t)*b, mi_bfield_t mask) { mi_assert_internal(mask != 0); if (set) { return mi_bfield_atomic_is_set_mask(b, mask); @@ -276,7 +276,7 @@ static inline bool mi_bfield_atomic_is_xset_mask(mi_bit_t set, _Atomic(mi_bfield // Check if a bit is set/clear -// static inline bool mi_bfield_atomic_is_xset(mi_bit_t set, _Atomic(mi_bfield_t)*b, size_t idx) { +// static inline bool mi_bfield_atomic_is_xset(mi_xset_t set, _Atomic(mi_bfield_t)*b, size_t idx) { // mi_assert_internal(idx < MI_BFIELD_BITS); // const mi_bfield_t mask = mi_bfield_one()<bfields[i], idx, all_already_xset); } @@ -309,7 +309,7 @@ static inline bool mi_bitmap_chunk_clear2(mi_bitmap_chunk_t* chunk, size_t cidx, // Set/clear a sequence of `n` bits within a chunk. // Returns true if all bits transitioned from 0 to 1 (or 1 to 0). -static bool mi_bitmap_chunk_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, size_t* pall_already_xset) { +static bool mi_bitmap_chunk_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, size_t* pall_already_xset) { mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); mi_assert_internal(n>0); bool all_transition = true; @@ -349,7 +349,7 @@ static inline bool mi_bitmap_chunk_clearN(mi_bitmap_chunk_t* chunk, size_t cidx, // check if a pair of bits is set/clear -static inline bool mi_bitmap_chunk_is_xset2(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx) { +static inline bool mi_bitmap_chunk_is_xset2(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx) { mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS); const size_t i = cidx / MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS; @@ -369,7 +369,7 @@ static inline bool mi_bitmap_chunk_is_clear2(mi_bitmap_chunk_t* chunk, size_t ci // Check if a sequence of `n` bits within a chunk are all set/cleared. -static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { +static bool mi_bitmap_chunk_is_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); mi_assert_internal(n>0); size_t idx = cidx % MI_BFIELD_BITS; @@ -393,7 +393,7 @@ static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, siz -static inline bool mi_bitmap_chunk_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx) { +static inline bool mi_bitmap_chunk_try_xset(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx) { mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS); const size_t i = cidx / MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS; @@ -408,7 +408,7 @@ static inline bool mi_bitmap_chunk_try_clear(mi_bitmap_chunk_t* chunk, size_t ci return mi_bitmap_chunk_try_xset(MI_BIT_CLEAR, chunk, cidx); } -static inline bool mi_bitmap_chunk_try_xset8(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx) { +static inline bool mi_bitmap_chunk_try_xset8(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx) { mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS); const size_t i = byte_idx / MI_BFIELD_SIZE; const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE; @@ -426,7 +426,7 @@ static inline bool mi_bitmap_chunk_try_clear8(mi_bitmap_chunk_t* chunk, size_t b // Try to atomically set/clear a sequence of `n` bits within a chunk. // Returns true if all bits transitioned from 0 to 1 (or 1 to 0), // and false otherwise leaving all bit fields as is. -static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { +static bool mi_bitmap_chunk_try_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); mi_assert_internal(n>0); if (n==0) return true; @@ -442,7 +442,7 @@ static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, si if (m > n) { m = n; } mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS); - const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx); + const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx); if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false; // done? @@ -509,7 +509,7 @@ static inline bool mi_mm256_is_zero( __m256i vec) { // find least 0/1-bit in a chunk and try to set/clear it atomically // set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. // todo: try neon version -static inline bool mi_bitmap_chunk_find_and_try_xset(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t* pidx) { +static inline bool mi_bitmap_chunk_find_and_try_xset(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t* pidx) { #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) while (true) { const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); @@ -644,7 +644,7 @@ static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, // and try to clear them atomically. // set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success. static bool mi_bitmap_chunk_find_and_try_clearNX(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { - if (n == 0 || n > MI_BFIELD_BITS) return false; + if (n == 0 || n > MI_BFIELD_BITS) return false; const mi_bfield_t mask = mi_bfield_mask(n, 0); for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { mi_bfield_t b = chunk->bfields[i]; @@ -683,14 +683,14 @@ static bool mi_bitmap_chunk_find_and_try_clearNX(mi_bitmap_chunk_t* chunk, size_ // find a sequence of `n` bits in a chunk with `n < MI_BITMAP_CHUNK_BITS` with all bits set, // and try to clear them atomically. // set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success. -static bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { +static bool mi_bitmap_chunk_find_and_try_clearN_(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; // cannot be more than a chunk - if (n < MI_BFIELD_BITS) return mi_bitmap_chunk_find_and_try_clearNX(chunk, n, pidx); + // if (n < MI_BFIELD_BITS) return mi_bitmap_chunk_find_and_try_clearNX(chunk, n, pidx); // we align an a field, and require `field_count` fields to be all clear. // n >= MI_BFIELD_BITS; find a first field that is 0 const size_t field_count = _mi_divide_up(n, MI_BFIELD_BITS); // we need this many fields - for (size_t i = 0; i <= MI_BITMAP_CHUNK_FIELDS - field_count; i++) + for (size_t i = 0; i <= MI_BITMAP_CHUNK_FIELDS - field_count; i++) { // first pre-scan for a range of fields that are all set bool allset = true; @@ -721,6 +721,14 @@ static bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t } +static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { + if (n==1) return mi_bitmap_chunk_find_and_try_clear(chunk, pidx); + if (n==8) return mi_bitmap_chunk_find_and_try_clear8(chunk, pidx); + if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; // cannot be more than a chunk + if (n < MI_BFIELD_BITS) return mi_bitmap_chunk_find_and_try_clearNX(chunk, n, pidx); + return mi_bitmap_chunk_find_and_try_clearN_(chunk, n, pidx); +} + // are all bits in a bitmap chunk set? // static inline bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) { // #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) @@ -755,70 +763,76 @@ static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) { } /* -------------------------------------------------------------------------------- - epochset (for now for 32-bit sets only) + chunkmap (for now for 32-bit sets only) -------------------------------------------------------------------------------- */ -static void mi_epochset_split(mi_epochset_t es, uint32_t* bset, size_t* epoch) { - *bset = (uint32_t)es; - *epoch = (size_t)(es >> 32); +static void mi_chunkmap_split(mi_chunkmap_t es, mi_cmap_t* cmap, mi_epoch_t* epoch) { + *cmap = (mi_cmap_t)es; + *epoch = (mi_epoch_t)(es >> 32); } -static mi_epochset_t mi_epochset_join(uint32_t bset, size_t epoch) { - return ((uint64_t)epoch << 32) | bset; +static mi_chunkmap_t mi_chunkmap_join(mi_cmap_t cmap, mi_epoch_t epoch) { + return ((mi_chunkmap_t)epoch << MI_CHUNKMAP_BITS) | cmap; } // setting a bit increases the epoch -static void mi_epochset_set(_Atomic(mi_epochset_t)*es, size_t idx) { - mi_assert(idx < 32); - size_t epoch; - uint32_t bset; - mi_epochset_t es_new; - mi_epochset_t es_old = mi_atomic_load_relaxed(es); +static void mi_chunkmap_set(_Atomic(mi_chunkmap_t)* cm, size_t idx) { + mi_assert(idx < MI_CHUNKMAP_BITS); + mi_epoch_t epoch; + mi_cmap_t cmap; + mi_chunkmap_t cm_new; + mi_chunkmap_t cm_old = mi_atomic_load_relaxed(cm); do { - mi_epochset_split(es_old, &bset, &epoch); - es_new = mi_epochset_join(bset | (MI_ZU(1)<any_set, chunk_idx); +static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) { + mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); + const size_t cmidx = chunk_idx / MI_CHUNKMAP_BITS; + const size_t idx = chunk_idx % MI_CHUNKMAP_BITS; + mi_chunkmap_set(&bitmap->chunk_maps[cmidx], idx); } -static bool mi_bitmap_anyset_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx, size_t epoch) { - mi_assert(chunk_idx < MI_BITMAP_CHUNK_COUNT); - return mi_epochset_try_clear(&bitmap->any_set, chunk_idx, epoch); +static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx, mi_epoch_t epoch) { + mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); + const size_t cmidx = chunk_idx / MI_CHUNKMAP_BITS; + const size_t idx = chunk_idx % MI_CHUNKMAP_BITS; + return mi_chunkmap_try_clear(&bitmap->chunk_maps[cmidx], idx, epoch); } -static uint32_t mi_bitmap_anyset(mi_bitmap_t* bitmap, size_t* epoch) { - uint32_t bset; - mi_epochset_split(mi_atomic_load_relaxed(&bitmap->any_set), &bset, epoch); - return bset; +static mi_cmap_t mi_bitmap_chunkmap(mi_bitmap_t* bitmap, size_t chunk_idx, mi_epoch_t* epoch) { + mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); + const size_t cmidx = chunk_idx / MI_CHUNKMAP_BITS; + mi_assert_internal(cmidx < bitmap->chunk_map_count); + mi_cmap_t cmap; + mi_chunkmap_split(mi_atomic_load_relaxed(&bitmap->chunk_maps[cmidx]), &cmap, epoch); + return cmap; } -static size_t mi_bitmap_epoch(mi_bitmap_t* bitmap) { - size_t epoch; - uint32_t bset; - mi_epochset_split(mi_atomic_load_relaxed(&bitmap->any_set), &bset, &epoch); +static mi_epoch_t mi_bitmap_chunkmap_epoch(mi_bitmap_t* bitmap, size_t chunk_idx) { + mi_epoch_t epoch; + mi_bitmap_chunkmap(bitmap, chunk_idx, &epoch); return epoch; } @@ -826,17 +840,38 @@ static size_t mi_bitmap_epoch(mi_bitmap_t* bitmap) { bitmap -------------------------------------------------------------------------------- */ +size_t mi_bitmap_size(size_t bit_count, size_t* pchunk_count) { + mi_assert_internal((bit_count % MI_BITMAP_CHUNK_BITS) == 0); + bit_count = _mi_align_up(bit_count, MI_BITMAP_CHUNK_BITS); + mi_assert_internal(bit_count <= MI_BITMAP_MAX_BIT_COUNT); + mi_assert_internal(bit_count > 0); + const size_t chunk_count = bit_count / MI_BITMAP_CHUNK_BITS; + mi_assert_internal(chunk_count >= 1); + const size_t size = offsetof(mi_bitmap_t,chunks) + (chunk_count * MI_BITMAP_CHUNK_SIZE); + mi_assert_internal( (size%MI_BITMAP_CHUNK_SIZE) == 0 ); + if (pchunk_count != NULL) { *pchunk_count = chunk_count; } + return size; +} + // initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true -void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero) { +// returns the size of the bitmap +size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero) { + size_t chunk_count; + const size_t size = mi_bitmap_size(bit_count, &chunk_count); if (!already_zero) { - _mi_memzero_aligned(bitmap, sizeof(*bitmap)); + _mi_memzero_aligned(bitmap, size); } + bitmap->chunk_map_count = _mi_divide_up(chunk_count, MI_CHUNKMAP_BITS); + mi_assert_internal(bitmap->chunk_map_count <= MI_BITMAP_MAX_CHUNKMAPS); + bitmap->chunk_count = chunk_count; + mi_assert_internal(bitmap->chunk_map_count <= MI_BITMAP_MAX_CHUNK_COUNT); + return size; } // Set a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread. void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(n>0); - mi_assert_internal(idx + n<=MI_BITMAP_MAX_BITS); + mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap)); // first chunk size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; @@ -844,17 +879,17 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { size_t m = MI_BITMAP_CHUNK_BITS - cidx; if (m > n) { m = n; } mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, m, NULL); - mi_bitmap_anyset_set(bitmap, chunk_idx); + mi_bitmap_chunkmap_set(bitmap, chunk_idx); // n can be large so use memset for efficiency for all in-between chunks chunk_idx++; n -= m; const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS; if (mid_chunks > 0) { - _mi_memset(&bitmap->chunks[chunk_idx], ~0, mid_chunks * (MI_BITMAP_CHUNK_BITS/8)); + _mi_memset(&bitmap->chunks[chunk_idx], ~0, mid_chunks * MI_BITMAP_CHUNK_SIZE); const size_t end_chunk = chunk_idx + mid_chunks; while (chunk_idx < end_chunk) { - mi_bitmap_anyset_set(bitmap, chunk_idx); + mi_bitmap_chunkmap_set(bitmap, chunk_idx); chunk_idx++; } n -= (mid_chunks * MI_BITMAP_CHUNK_BITS); @@ -865,28 +900,29 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(n < MI_BITMAP_CHUNK_BITS); mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS); mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], 0, n, NULL); - mi_bitmap_anyset_set(bitmap, chunk_idx); + mi_bitmap_chunkmap_set(bitmap, chunk_idx); } } // Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0), // and false otherwise leaving the bitmask as is. -static bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { - mi_assert_internal(idx < MI_BITMAP_MAX_BITS); +static bool mi_bitmap_try_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); if (set) { - // first set the anyset since it is a conservative approximation (increases epoch) - mi_bitmap_anyset_set(bitmap, chunk_idx); + // first set the chunkmap since it is a conservative approximation (increases epoch) + mi_bitmap_chunkmap_set(bitmap, chunk_idx); // then actually try to set it atomically return mi_bitmap_chunk_try_set(&bitmap->chunks[chunk_idx], cidx); } else { - const size_t epoch = mi_bitmap_epoch(bitmap); + const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap, chunk_idx); bool cleared = mi_bitmap_chunk_try_clear(&bitmap->chunks[chunk_idx], cidx); - if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); + if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); } return cleared; } @@ -894,22 +930,24 @@ static bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { // Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0) // and false otherwise leaving the bitmask as is. -static bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { - mi_assert_internal(idx < MI_BITMAP_MAX_BITS); +static bool mi_bitmap_try_xset8(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); mi_assert_internal(idx%8 == 0); const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8; + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); + if (set) { // first set the anyset since it is a conservative approximation (increases epoch) - mi_bitmap_anyset_set(bitmap, chunk_idx); + mi_bitmap_chunkmap_set(bitmap, chunk_idx); // then actually try to set it atomically return mi_bitmap_chunk_try_set8(&bitmap->chunks[chunk_idx], byte_idx); } else { - const size_t epoch = mi_bitmap_epoch(bitmap); + const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap,chunk_idx); bool cleared = mi_bitmap_chunk_try_clear8(&bitmap->chunks[chunk_idx], byte_idx); - if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); + if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap,chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); } return cleared; } @@ -919,71 +957,63 @@ static bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) // and false otherwise leaving the bitmask as is. // `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -static bool mi_bitmap_try_xsetN_(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { +static bool mi_bitmap_try_xsetN_(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); - mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); - if (n==0 || idx + n > MI_BITMAP_MAX_BITS) return false; + mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap)); + if (n==0 || idx + n > mi_bitmap_max_bits(bitmap)) return false; const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) - mi_assert_internal(chunk_idx < MI_BFIELD_BITS); + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia if (set) { - // first set the anyset since it is a conservative approximation (increases epoch) - mi_bitmap_anyset_set(bitmap, chunk_idx); + // first set the chunkmap since it is a conservative approximation (increases epoch) + mi_bitmap_chunkmap_set(bitmap, chunk_idx); // then actually try to set it atomically return mi_bitmap_chunk_try_setN(&bitmap->chunks[chunk_idx], cidx, n); } else { - const size_t epoch = mi_bitmap_epoch(bitmap); + const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap,chunk_idx); bool cleared = mi_bitmap_chunk_try_clearN(&bitmap->chunks[chunk_idx], cidx, n); - if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); + if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap,chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); } return cleared; } } -bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { +bool mi_bitmap_try_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(n>0 && n<=MI_BITMAP_CHUNK_BITS); if (n==1) return mi_bitmap_try_xset(set, bitmap, idx); if (n==8) return mi_bitmap_try_xset8(set, bitmap, idx); + // todo: add 32/64 for large pages return mi_bitmap_try_xsetN_(set, bitmap, idx, n); } - -// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). +// Set/clear a sequence of 2 bits that were on an even `idx` in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). // `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -static bool mi_bitmap_xsetN_(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset ) { - mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); - - //TODO: specialize? - //if (n==1) { return mi_bitmap_xset(set, bitmap, idx); } - //if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); } - +static bool mi_bitmap_xset_pair(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal((idx%2)==0); const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) - mi_assert_internal(chunk_idx < MI_BFIELD_BITS); - if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia - + mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS); + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); + if (set) { - // first set the anyset since it is a conservative approximation (increases epoch) - mi_bitmap_anyset_set(bitmap, chunk_idx); + // first set the chunkmap since it is a conservative approximation (increases epoch) + mi_bitmap_chunkmap_set(bitmap, chunk_idx); // then actually try to set it atomically - return mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, n, already_xset); + return mi_bitmap_chunk_set2(&bitmap->chunks[chunk_idx], cidx, NULL); } else { - const size_t epoch = mi_bitmap_epoch(bitmap); - size_t already_clear = 0; - const bool allset = mi_bitmap_chunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, &already_clear); - if (already_xset != NULL) { *already_xset = already_clear; } - if (already_clear < n && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); + const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap, chunk_idx); + bool already_clear = false; + const bool allset = mi_bitmap_chunk_clear2(&bitmap->chunks[chunk_idx], cidx, &already_clear); + if (!already_clear && epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); } return allset; } @@ -991,25 +1021,67 @@ static bool mi_bitmap_xsetN_(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). // `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset) { +static bool mi_bitmap_xsetN_(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset ) { + mi_assert_internal(n>0); + mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; + const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); + if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia + + if (set) { + // first set the chunkmap since it is a conservative approximation (increases epoch) + mi_bitmap_chunkmap_set(bitmap, chunk_idx); + // then actually try to set it atomically + return mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, n, already_xset); + } + else { + const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap,chunk_idx); + size_t already_clear = 0; + const bool allset = mi_bitmap_chunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, &already_clear); + if (already_xset != NULL) { *already_xset = already_clear; } + if (already_clear < n && epoch == mi_bitmap_chunkmap_epoch(bitmap,chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); + } + return allset; + } +} + +// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). +// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset) { mi_assert_internal(n>0 && n<=MI_BITMAP_CHUNK_BITS); //TODO: specialize? //if (n==1) return mi_bitmap_xset(set, bitmap, idx); + //if (n==2) return mi_bitmap_xset(set, bitmap, idx); //if (n==8) return mi_bitmap_xset8(set, bitmap, idx); return mi_bitmap_xsetN_(set, bitmap, idx, n, already_xset); } +// Is a sequence of 2 bits already all set/cleared? +static inline bool mi_bitmap_is_xset2(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal(idx + 2 <= mi_bitmap_max_bits(bitmap)); + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; + const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS); + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); + return mi_bitmap_chunk_is_xset2(set, &bitmap->chunks[chunk_idx], cidx); +} + + // Is a sequence of n bits already all set/cleared? -bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { +bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(n>0); mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); - mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); + mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap)); const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) - mi_assert_internal(chunk_idx < MI_BFIELD_BITS); + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n); @@ -1020,185 +1092,121 @@ bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) bitmap try_find_and_clear -------------------------------------------------------------------------------- */ +typedef bool (mi_bitmap_find_fun_t)(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx); -#define mi_bitmap_forall_set_chunks(bitmap,tseq,name_epoch,name_chunk_idx) \ - { uint32_t _bit_idx; \ - uint32_t _start = (uint32_t)(tseq % MI_EPOCHSET_BITS); \ - size_t name_epoch; \ - uint32_t _any_set = mi_bitmap_anyset(bitmap,&name_epoch); \ - _any_set = mi_rotr32(_any_set, _start); \ - while (mi_bsf32(_any_set,&_bit_idx)) { \ - size_t name_chunk_idx = (_bit_idx + _start) % MI_BFIELD_BITS; +static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx, mi_bitmap_find_fun_t* find_fun) +{ + if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; + + // start chunk index -- todo: can depend on the tseq to decrease contention between threads + MI_UNUSED(tseq); + const size_t chunk_start = 0; + const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS; + const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS; -#define mi_bitmap_forall_set_chunks_end() \ - _start += _bit_idx+1; /* so chunk_idx calculation stays valid */ \ - _any_set >>= _bit_idx; /* skip scanned bits (and avoid UB with (_bit_idx+1)) */ \ - _any_set >>= 1; \ - } \ - } - -// Find a set bit in a bitmap and atomically unset it. Returns true on success, -// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. -// The low `MI_BFIELD_BITS` of start are used to set the start point of the search -// (to reduce thread contention). -mi_decl_nodiscard static bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) { - mi_bitmap_forall_set_chunks(bitmap, tseq, epoch, chunk_idx) + // for each chunkmap entry `i` + for( size_t _i = 0; _i < bitmap->chunk_map_count; _i++) { - size_t cidx; - if mi_likely(mi_bitmap_chunk_find_and_try_clear(&bitmap->chunks[chunk_idx],&cidx)) { - *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal(*pidx < MI_BITMAP_MAX_BITS); - return true; - } - else { - // we may find that all are unset only on a second iteration but that is ok as - // _any_set is a conservative approximation. - if (epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); + size_t i = (_i + chunk_map_start); + if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; // adjust for the start position + + const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS; + mi_epoch_t epoch; + mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &epoch); + if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); } // rotate right for the start position (on the first iteration) + + uint32_t cmap_idx; // one bit set of each chunk that may have bits set + size_t cmap_idx_shift = 0; // shift through the cmap + while (mi_bsf32(cmap, &cmap_idx)) { // find least bit that is set + // adjust for the start position + if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; } + // set the chunk idx + const size_t chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift; + + // try to find and clear N bits in that chunk + if (chunk_idx < mi_bitmap_chunk_count(bitmap)) { // we can have less chunks than in the chunkmap.. + if ((*find_fun)(bitmap, n, chunk_idx, epoch, pidx)) { + return true; + } } + + // skip to the next bit + cmap_idx_shift += cmap_idx+1; + cmap >>= cmap_idx; // skip scanned bits (and avoid UB for `cmap_idx+1`) + cmap >>= 1; } } - mi_bitmap_forall_set_chunks_end(); + return false; } - -// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. -// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. -mi_decl_nodiscard static bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx ) { - mi_bitmap_forall_set_chunks(bitmap,tseq, epoch, chunk_idx) - { - size_t cidx; - if mi_likely(mi_bitmap_chunk_find_and_try_clear8(&bitmap->chunks[chunk_idx],&cidx)) { - *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-8); - mi_assert_internal((*pidx % 8) == 0); - return true; - } - else { - // we may find that all are unset only on a second iteration but that is ok as - // _any_set is a conservative approximation. - if (epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); - } - } +static bool mi_bitmap_try_find_and_clearN_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) { + size_t cidx; + if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) { + *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; + mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n); + return true; + } + else { + // we may find that all are cleared only on a second iteration but that is ok as + // the chunkmap is a conservative approximation. + if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); + } + return false; } - mi_bitmap_forall_set_chunks_end(); - return false; } // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. -mi_decl_nodiscard static bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx ) { - // TODO: allow spanning across chunk boundaries? - if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; - mi_bitmap_forall_set_chunks(bitmap,tseq,epoch,chunk_idx) - { - size_t cidx; - if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx],n,&cidx)) { - *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal(*pidx <= MI_BITMAP_MAX_BITS-n); - return true; - } - else { - // we may find that all are unset only on a second iteration but that is ok as - // _any_set is a conservative approximation. - if (epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); - } - } - } - mi_bitmap_forall_set_chunks_end(); - return false; -} - -mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) { - if (n == 1) return mi_bitmap_try_find_and_clear(bitmap, tseq, pidx); - if (n == 8) return mi_bitmap_try_find_and_clear8(bitmap, tseq, pidx); - return mi_bitmap_try_find_and_clearN_(bitmap, n, tseq, pidx); -} - - -/* -------------------------------------------------------------------------------- - pairmap epochset --------------------------------------------------------------------------------- */ - -static void mi_pairmap_anyset_set(mi_pairmap_t* pairmap, size_t chunk_idx) { - mi_assert(chunk_idx < MI_BITMAP_CHUNK_COUNT); - mi_epochset_set(&pairmap->any_set, chunk_idx); -} - -static bool mi_pairmap_anyset_try_clear(mi_pairmap_t* pairmap, size_t chunk_idx, size_t epoch) { - mi_assert(chunk_idx < MI_BITMAP_CHUNK_COUNT); - return mi_epochset_try_clear(&pairmap->any_set, chunk_idx, epoch); -} - -static uint32_t mi_pairmap_anyset(mi_pairmap_t* pairmap, size_t* epoch) { - uint32_t bset; - mi_epochset_split(mi_atomic_load_relaxed(&pairmap->any_set), &bset, epoch); - return bset; -} - -static size_t mi_pairmap_epoch(mi_pairmap_t* pairmap) { - size_t epoch; - uint32_t bset; - mi_epochset_split(mi_atomic_load_relaxed(&pairmap->any_set), &bset, &epoch); - return epoch; +mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) +{ + return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_clearN_at); } /* -------------------------------------------------------------------------------- pairmap -------------------------------------------------------------------------------- */ -// initialize a pairmap to all clear; avoid a mem_zero if `already_zero` is true -void mi_pairmap_init(mi_pairmap_t* pairmap, bool already_zero) { - if (!already_zero) { - _mi_memzero_aligned(pairmap, sizeof(*pairmap)); +void mi_pairmap_init(mi_pairmap_t* pairmap, mi_bitmap_t* bm1, mi_bitmap_t* bm2) { + mi_assert_internal(mi_bitmap_chunk_count(bm1)==mi_bitmap_chunk_count(bm2)); + pairmap->bitmap1 = bm1; + pairmap->bitmap2 = bm2; +} + +static void mi_pairmap_from_pair_idx(mi_pairmap_t* pairmap, size_t pair_idx, mi_bitmap_t** bitmap, size_t* pidx) { + const size_t idx = 2*pair_idx; + const size_t maxbits = mi_bitmap_max_bits(pairmap->bitmap1); + mi_assert_internal(pair_idx < maxbits); + if (idx < maxbits) { + *bitmap = pairmap->bitmap1; + *pidx = idx; + } + else { + *bitmap = pairmap->bitmap2; + *pidx = idx - maxbits; } } -/* -------------------------------------------------------------------------------- - pairmap set/clear unconditionally --------------------------------------------------------------------------------- */ - -// is a pairmap entry clear? -bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx) { - const size_t idx = 2*pair_idx; - mi_assert_internal(idx < MI_PAIRMAP_MAX_BITS); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - return mi_bitmap_chunk_is_clear2(&pairmap->chunks[chunk_idx], cidx); -} - -// A reader can set from busy, or a new abandoned page can set from clear bool mi_pairmap_set(mi_pairmap_t* pairmap, size_t pair_idx) { - const size_t idx = 2*pair_idx; - mi_assert_internal(idx < MI_PAIRMAP_MAX_BITS); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - // first set the anyset since it is a conservative approximation(increases epoch) - mi_pairmap_anyset_set(pairmap, chunk_idx/2); - return mi_bitmap_chunk_set2(&pairmap->chunks[chunk_idx], cidx, NULL); + mi_bitmap_t* bitmap; + size_t idx; + mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx); + return mi_bitmap_xset_pair(MI_BIT_SET, bitmap, idx); } -// A busy reader can clear unconditionally -void mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx) { - const size_t idx = 2*pair_idx; - mi_assert_internal(idx < MI_PAIRMAP_MAX_BITS); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - const size_t epoch = mi_pairmap_epoch(pairmap); - bool both_already_clear = false; - mi_bitmap_chunk_clear2(&pairmap->chunks[chunk_idx], cidx, &both_already_clear); - mi_assert_internal(!both_already_clear); // in our use cases this should not happen - if (!both_already_clear && epoch == mi_pairmap_epoch(pairmap)) { - const size_t chunk_idx1 = 2*(chunk_idx/2); // round down to even - mi_bitmap_chunk_t* chunk1 = &pairmap->chunks[chunk_idx1]; - mi_bitmap_chunk_t* chunk2 = &pairmap->chunks[chunk_idx1 + 1]; - if (mi_bitmap_chunk_all_are_clear(chunk1) && mi_bitmap_chunk_all_are_clear(chunk2)) { - mi_pairmap_anyset_try_clear(pairmap, chunk_idx1/2, epoch); - } - } +bool mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx) { + mi_bitmap_t* bitmap; + size_t idx; + mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx); + return mi_bitmap_xset_pair(MI_BIT_CLEAR, bitmap, idx); +} + +bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx) { + mi_bitmap_t* bitmap; + size_t idx; + mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx); + return mi_bitmap_is_xset2(MI_BIT_CLEAR, bitmap, idx); } @@ -1207,8 +1215,8 @@ void mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx) { pairmap clear while not busy -------------------------------------------------------------------------------- */ -static inline bool mi_bfield_atomic_clear_while_not_busy(_Atomic(mi_bfield_t)*b, size_t idx) { - mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set). +static inline bool mi_bfield_atomic_clear2_while_not_busy(_Atomic(mi_bfield_t)*b, size_t idx) { + mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set). mi_assert_internal(idx < MI_BFIELD_BITS-1); const mi_bfield_t mask = ((mi_bfield_t)0x03 << idx); const mi_bfield_t mask_busy = ((mi_bfield_t)MI_PAIR_BUSY << idx); @@ -1221,41 +1229,44 @@ static inline bool mi_bfield_atomic_clear_while_not_busy(_Atomic(mi_bfield_t)*b, while ((old&mask)==mask_busy) { // busy wait mi_atomic_yield(); old = mi_atomic_load_acquire(b); - } + } } bnew = (old & ~mask); // clear } while (!mi_atomic_cas_weak_acq_rel(b, &old, bnew)); mi_assert_internal((old&mask) != mask_busy); // we should never clear a busy page mi_assert_internal((old&mask) == mask); // in our case: we should only go from set to clear (when reclaiming an abandoned page from a free) - return true; + return ((old&mask) == mask); } -static void mi_pairmap_chunk_clear_while_not_busy(mi_bitmap_chunk_t* chunk, size_t cidx) { +static inline bool mi_bitmap_chunk_clear2_while_not_busy(mi_bitmap_chunk_t* chunk, size_t cidx) { mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS); const size_t i = cidx / MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS; - mi_bfield_atomic_clear_while_not_busy(&chunk->bfields[i], idx); + return mi_bfield_atomic_clear2_while_not_busy(&chunk->bfields[i], idx); } -// Used for a page about to be freed to clear itself from the abandoned map; it has to wait -// for all readers to finish reading the page -void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) { - const size_t idx = 2*pair_idx; - mi_assert_internal(idx < MI_PAIRMAP_MAX_BITS); +static bool mi_bitmap_clear2_while_not_busy(mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal((idx%2)==0); + mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - const size_t epoch = mi_pairmap_epoch(pairmap); - mi_pairmap_chunk_clear_while_not_busy(&pairmap->chunks[chunk_idx], cidx); - if (epoch == mi_pairmap_epoch(pairmap)) { - const size_t chunk_idx1 = 2*(chunk_idx/2); // round down to even - mi_bitmap_chunk_t* chunk1 = &pairmap->chunks[chunk_idx1]; - mi_bitmap_chunk_t* chunk2 = &pairmap->chunks[chunk_idx1 + 1]; - if (mi_bitmap_chunk_all_are_clear(chunk1) && mi_bitmap_chunk_all_are_clear(chunk2)) { - mi_pairmap_anyset_try_clear(pairmap, chunk_idx1/2, epoch); - } - } + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); + const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap, chunk_idx); + bool cleared = mi_bitmap_chunk_clear2_while_not_busy(&bitmap->chunks[chunk_idx], cidx); + if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); + } + return cleared; } +void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) { + mi_bitmap_t* bitmap; + size_t idx; + mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx); + mi_bitmap_clear2_while_not_busy(bitmap, idx); +} + + /* -------------------------------------------------------------------------------- pairmap try and set busy @@ -1263,7 +1274,7 @@ void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) { // Atomically go from set to busy, or return false otherwise and leave the bit field as-is. static inline bool mi_bfield_atomic_try_set_busy(_Atomic(mi_bfield_t)*b, size_t idx) { - mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set). + mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set). mi_assert_internal(idx < MI_BFIELD_BITS-1); const mi_bfield_t mask = ((mi_bfield_t)0x03 << idx); const mi_bfield_t mask_busy = ((mi_bfield_t)MI_PAIR_BUSY << idx); @@ -1277,11 +1288,11 @@ static inline bool mi_bfield_atomic_try_set_busy(_Atomic(mi_bfield_t)*b, size_t return true; } -static inline bool mi_pairmap_chunk_find_and_set_busy(mi_bitmap_chunk_t* chunk, size_t* pidx) { +static inline bool mi_bitmap_chunk_try_find_and_set_busy(mi_bitmap_chunk_t* chunk, size_t* pidx) { for (int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { size_t idx; if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i], &idx)) { // find least 1-bit, it may be set or busy - mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set). + mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set). if mi_likely(mi_bfield_atomic_try_set_busy(&chunk->bfields[i], idx)) { *pidx = (i*MI_BFIELD_BITS) + idx; mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS-1); @@ -1292,41 +1303,36 @@ static inline bool mi_pairmap_chunk_find_and_set_busy(mi_bitmap_chunk_t* chunk, return false; } +static bool mi_bitmap_try_find_and_set_busy_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) { + MI_UNUSED(epoch); + mi_assert_internal(n==2); + size_t cidx; + if mi_likely(mi_bitmap_chunk_try_find_and_set_busy(&bitmap->chunks[chunk_idx], &cidx)) { + *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; + mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n); + return true; + } + else { + return false; + } +} + +static bool mi_bitmap_try_find_and_set_busy(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) { + return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_set_busy_at); +} + // Used to find an abandoned page, and transition from set to busy. mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx) { - uint32_t bit_idx; - uint32_t start = (uint32_t)(tseq % MI_EPOCHSET_BITS); - size_t epoch; - uint32_t any_set = mi_pairmap_anyset(pairmap,&epoch); - any_set = mi_rotr32(any_set, start); - while (mi_bsf32(any_set,&bit_idx)) { \ - size_t chunk_idx = 2*((bit_idx + start) % MI_BFIELD_BITS); - { - // look at chunk_idx and chunck_idx+1 - mi_bitmap_chunk_t* chunk1 = &pairmap->chunks[chunk_idx]; - mi_bitmap_chunk_t* chunk2 = &pairmap->chunks[chunk_idx+1]; - size_t cidx; - if (mi_pairmap_chunk_find_and_set_busy(chunk1, &cidx)) { - const size_t idx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal(idx < MI_PAIRMAP_MAX_BITS); - mi_assert_internal((idx%2)==0); - *pidx = idx/2; - return true; - } - else if (mi_pairmap_chunk_find_and_set_busy(chunk2, &cidx)) { - const size_t idx = ((chunk_idx+1) * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal(idx < MI_PAIRMAP_MAX_BITS); - mi_assert_internal((idx%2)==0); - *pidx = idx/2; - return true; - } - else if (epoch == mi_pairmap_epoch(pairmap) && mi_bitmap_chunk_all_are_clear(chunk1) && mi_bitmap_chunk_all_are_clear(chunk1)) { - mi_pairmap_anyset_try_clear(pairmap, chunk_idx/2, epoch); - } + size_t idx = 0; + if (!mi_bitmap_try_find_and_set_busy(pairmap->bitmap1, 2, tseq, &idx)) { + if (!mi_bitmap_try_find_and_set_busy(pairmap->bitmap2, 2, tseq, &idx)) { + return false; + } + else { + idx += mi_bitmap_max_bits(pairmap->bitmap1); } - start += bit_idx+1; /* so chunk_idx computation stays valid */ - any_set >>= bit_idx; /* skip scanned bits (and avoid UB with (idx+1)) */ - any_set >>= 1; } - return false; + mi_assert_internal((idx%2)==0); + *pidx = idx/2; + return true; } diff --git a/src/bitmap.h b/src/bitmap.h index 948bd1e3..9b931c95 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -34,30 +34,56 @@ typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_chunk_s { _Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS]; } mi_bitmap_chunk_t; -// for now 32 (note: with ABA instructions we can make this 64) -#define MI_EPOCHSET_BITS (32) -#define MI_BITMAP_CHUNK_COUNT MI_EPOCHSET_BITS -typedef uint64_t mi_epochset_t; +// for now 32-bit epoch + 32-bit bit-set (note: with ABA instructions we can double this) +typedef uint64_t mi_chunkmap_t; +typedef uint32_t mi_epoch_t; +typedef uint32_t mi_cmap_t; + +#define MI_CHUNKMAP_BITS (32) // 1 chunkmap tracks 32 chunks + +#define MI_BITMAP_MAX_CHUNKMAPS (16) +#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BITMAP_MAX_CHUNKMAPS * MI_CHUNKMAP_BITS) +#define MI_BITMAP_MIN_CHUNK_COUNT (1 * MI_CHUNKMAP_BITS) // 1 GiB arena + +#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 16 GiB arena +#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 1 GiB arena typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s { - mi_bitmap_chunk_t chunks[MI_BITMAP_CHUNK_COUNT]; - _Atomic(mi_epochset_t) any_set; + _Atomic(size_t) chunk_map_count; + _Atomic(size_t) chunk_count; + _Atomic(mi_chunkmap_t) chunk_maps[MI_BITMAP_MAX_CHUNKMAPS]; + // padding + mi_bitmap_chunk_t chunks[MI_BITMAP_MIN_BIT_COUNT]; // or more, up to MI_BITMAP_MAX_CHUNK_COUNT } mi_bitmap_t; -// 16k bits on 64bit, 8k bits on 32bit -// with 64KiB slices, this can address a 1GiB arena -#define MI_BITMAP_MAX_BITS (MI_BITMAP_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) +static inline size_t mi_bitmap_chunk_map_count(const mi_bitmap_t* bitmap) { + return mi_atomic_load_relaxed(&bitmap->chunk_map_count); +} + +static inline size_t mi_bitmap_chunk_count(const mi_bitmap_t* bitmap) { + return mi_atomic_load_relaxed(&bitmap->chunk_count); +} + +static inline size_t mi_bitmap_max_bits(const mi_bitmap_t* bitmap) { + return (mi_bitmap_chunk_count(bitmap) * MI_BITMAP_CHUNK_BITS); +} + + /* -------------------------------------------------------------------------------- Atomic bitmap -------------------------------------------------------------------------------- */ -typedef bool mi_bit_t; +typedef bool mi_xset_t; #define MI_BIT_SET (true) #define MI_BIT_CLEAR (false) + +size_t mi_bitmap_size(size_t bit_count, size_t* chunk_count); + // initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true -void mi_bitmap_init(mi_bitmap_t* bitmap, bool already_zero); +// returns the size of the bitmap. +size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero); // Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread. void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n); @@ -65,7 +91,7 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n); // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's). // `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! // If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared. -bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset); +bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset); static inline bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set) { return mi_bitmap_xsetN(MI_BIT_SET, bitmap, idx, n, already_set); @@ -77,7 +103,7 @@ static inline bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { // Is a sequence of n bits already all set/cleared? -bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); +bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); static inline bool mi_bitmap_is_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { return mi_bitmap_is_xsetN(MI_BIT_SET, bitmap, idx, n); @@ -88,9 +114,29 @@ static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n } +// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) +// and false otherwise leaving the bitmask as is. +// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); + +static inline bool mi_bitmap_try_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { + return mi_bitmap_try_xsetN(MI_BIT_SET, bitmap, idx, n); +} + +static inline bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { + return mi_bitmap_try_xsetN(MI_BIT_CLEAR, bitmap, idx, n); +} + +// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. +// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. +mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx); + + + + // Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) // and false otherwise leaving the bitmask as is. -//mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); +//mi_decl_nodiscard bool mi_bitmap_try_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx); // //static inline bool mi_bitmap_try_set(mi_bitmap_t* bitmap, size_t idx) { // return mi_bitmap_try_xset(MI_BIT_SET, bitmap, idx); @@ -103,7 +149,7 @@ static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n // Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0) // and false otherwise leaving the bitmask as is. -//mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); +//mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx); // //static inline bool mi_bitmap_try_set8(mi_bitmap_t* bitmap, size_t idx) { // return mi_bitmap_try_xset8(MI_BIT_SET, bitmap, idx); @@ -113,48 +159,28 @@ static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n // return mi_bitmap_try_xset8(MI_BIT_CLEAR, bitmap, idx); //} -// Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) -// and false otherwise leaving the bitmask as is. -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); - -static inline bool mi_bitmap_try_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { - return mi_bitmap_try_xsetN(MI_BIT_SET, bitmap, idx, n); -} - -static inline bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { - return mi_bitmap_try_xsetN(MI_BIT_CLEAR, bitmap, idx, n); -} - -// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. -// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. -mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx ); - /* -------------------------------------------------------------------------------- Atomic bitmap for a pair of bits -------------------------------------------------------------------------------- */ -typedef mi_bfield_t mi_pair_t; - #define MI_PAIR_CLEAR (0) #define MI_PAIR_BUSY (1) #define MI_PAIR_UNUSED (2) // should never occur #define MI_PAIR_SET (3) -typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_pairmap_s { - mi_bitmap_chunk_t chunks[2*MI_BITMAP_CHUNK_COUNT]; - _Atomic(mi_epochset_t) any_set; +typedef struct mi_pairmap_s { + mi_bitmap_t* bitmap1; + mi_bitmap_t* bitmap2; } mi_pairmap_t; -#define MI_PAIRMAP_MAX_PAIRS (MI_BITMAP_MAX_BITS) // 16k pairs on 64bit, 8k pairs on 32bit -#define MI_PAIRMAP_MAX_BITS (2*MI_PAIRMAP_MAX_PAIRS) + // initialize a pairmap to all unset; avoid a mem_zero if `already_zero` is true -void mi_pairmap_init(mi_pairmap_t* pairmap, bool already_zero); +void mi_pairmap_init(mi_pairmap_t* pairmap, mi_bitmap_t* bm1, mi_bitmap_t* bm2); bool mi_pairmap_set(mi_pairmap_t* pairmap, size_t pair_idx); +bool mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx); bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx); -void mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx); void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx); mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx); diff --git a/src/page-map.c b/src/page-map.c index 0e99a890..35a22d8d 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -22,7 +22,8 @@ static bool mi_page_map_init(void) { // 64 KiB for 4 GiB address space (on 32-bit) const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT)); - mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS); + mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size, MI_BITMAP_MIN_BIT_COUNT); + mi_bitmap_init(&mi_page_map_commit, MI_BITMAP_MIN_BIT_COUNT, true); mi_page_map_all_committed = false; // _mi_os_has_overcommit(); // commit on-access on Linux systems? _mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL); diff --git a/test/test-stress.c b/test/test-stress.c index 9e53e920..e49fde00 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -41,7 +41,7 @@ static int THREADS = 8; static int SCALE = 10; static int ITER = 10; #elif 0 -static int THREADS = 4; +static int THREADS = 1; static int SCALE = 100; static int ITER = 10; #define ALLOW_LARGE false