From ec9c61c066d46ad998028d83e984ff33a5fb5470 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 6 Dec 2024 14:53:24 -0800 Subject: [PATCH] initial no more pairmap --- include/mimalloc/internal.h | 7 +- include/mimalloc/types.h | 8 +- src/arena.c | 66 +-- src/bitmap.c | 937 ++++++++++++++---------------------- src/bitmap.h | 158 +++--- 5 files changed, 465 insertions(+), 711 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 3a8b272e..d9c2cd6e 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -700,7 +700,9 @@ static inline bool mi_page_try_claim_ownership(mi_page_t* page) { return ((old&1)==0); } -static inline void _mi_page_unown(mi_page_t* page) { +// release ownership of a page. This may free the page if all blocks were concurrently +// freed in the meantime. Returns true if the page was freed. +static inline bool _mi_page_unown(mi_page_t* page) { mi_assert_internal(mi_page_is_owned(page)); mi_assert_internal(mi_page_is_abandoned(page)); mi_thread_free_t tf_new; @@ -712,13 +714,14 @@ static inline void _mi_page_unown(mi_page_t* page) { if (mi_page_all_free(page)) { // it may become free just before unowning it _mi_arena_page_unabandon(page); _mi_arena_page_free(page); - return; + return true; } tf_old = mi_atomic_load_relaxed(&page->xthread_free); } mi_assert_internal(mi_tf_block(tf_old)==NULL); tf_new = mi_tf_create(NULL, false); } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tf_old, tf_new)); + return false; } //----------------------------------------------------------- diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index d4c37c37..d78dbc59 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -117,16 +117,16 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ARENA_SLICE_SHIFT (13 + MI_SIZE_SHIFT) // 64 KiB (32 KiB on 32-bit) #endif #endif -#ifndef MI_BITMAP_CHUNK_BITS_SHIFT -#define MI_BITMAP_CHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512) +#ifndef MI_BCHUNK_BITS_SHIFT +#define MI_BCHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512) #endif -#define MI_BITMAP_CHUNK_BITS (1 << MI_BITMAP_CHUNK_BITS_SHIFT) +#define MI_BCHUNK_BITS (1 << MI_BCHUNK_BITS_SHIFT) #define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) #define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE) #define MI_ARENA_MIN_OBJ_SLICES (1) -#define MI_ARENA_MAX_OBJ_SLICES (MI_BITMAP_CHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries) +#define MI_ARENA_MAX_OBJ_SLICES (MI_BCHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE) #define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE) diff --git a/src/arena.c b/src/arena.c index 79a52c4d..fd609fe0 100644 --- a/src/arena.c +++ b/src/arena.c @@ -48,7 +48,7 @@ typedef struct mi_arena_s { mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible) mi_bitmap_t* slices_purge; // can the slice be purged? (slice in purge => slice in free) mi_bitmap_t* slices_dirty; // is the slice potentially non-zero? - mi_pairmap_t pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page) + mi_bitmap_t* pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page) // the full queue contains abandoned full pages // followed by the bitmaps (whose size depends on the arena size) } mi_arena_t; @@ -476,16 +476,24 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t Arena page allocation ----------------------------------------------------------- */ -static bool mi_arena_claim_abandoned(size_t slice_index, void* arg1, void* arg2) { - mi_arena_t* arena = (mi_arena_t*)arg1; - mi_subproc_t* subproc = (mi_subproc_t*)arg2; - +static bool mi_arena_claim_abandoned(size_t slice_index, void* arg1, void* arg2, bool* keep_abandoned) { // found an abandoned page of the right size - // it is set busy for now so we can read safely even with concurrent mi_free reclaiming - // try to claim ownership atomically - mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); - if (subproc != page->subproc) return false; - if (!mi_page_try_claim_ownership(page)) return false; + mi_arena_t* const arena = (mi_arena_t*)arg1; + mi_subproc_t* const subproc = (mi_subproc_t*)arg2; + mi_page_t* const page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); + // can we claim ownership? + if (!mi_page_try_claim_ownership(page)) { + *keep_abandoned = true; + return false; + } + if (subproc != page->subproc) { + // wrong sub-process.. we need to unown again, and perhaps not keep it abandoned + const bool freed = _mi_page_unown(page); + *keep_abandoned = !freed; + return false; + } + // yes, we can reclaim it + *keep_abandoned = false; return true; } @@ -505,10 +513,10 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl mi_forall_arenas(req_arena_id, allow_large, tseq, arena_id, arena) { size_t slice_index; - mi_pairmap_t* const pairmap = &arena->pages_abandoned[bin]; + mi_bitmap_t* const bitmap = arena->pages_abandoned[bin]; - if (mi_pairmap_try_find_and_set_busy(pairmap, tseq, &slice_index, &mi_arena_claim_abandoned, arena, subproc)) { - // found an abandoned page of the right size + if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_claim_abandoned, arena, subproc)) { + // found an abandoned page of the right size // and claimed ownership. mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); mi_assert_internal(mi_page_is_owned(page)); @@ -528,7 +536,7 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl mi_assert_internal(mi_page_block_size(page) == block_size); mi_assert_internal(!mi_page_is_full(page)); return page; - } + } } mi_forall_arenas_end(); return NULL; @@ -694,7 +702,7 @@ void _mi_arena_page_free(mi_page_t* page) { mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); - mi_assert_internal(mi_pairmap_is_clear(&arena->pages_abandoned[bin], slice_index)); + mi_assert_internal(mi_bitmap_is_clearN(arena->pages_abandoned[bin], slice_index, 1)); } #endif @@ -728,8 +736,8 @@ static void mi_arena_page_abandon_no_stat(mi_page_t* page) { mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count)); mi_page_set_abandoned_mapped(page); - bool were_zero = mi_pairmap_set(&arena->pages_abandoned[bin], slice_index); - MI_UNUSED(were_zero); mi_assert_internal(were_zero); + const bool wasclear = mi_bitmap_set(arena->pages_abandoned[bin], slice_index); + MI_UNUSED(wasclear); mi_assert_internal(wasclear); mi_atomic_increment_relaxed(&subproc->abandoned_count[bin]); } else { @@ -783,7 +791,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) { mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count)); // this busy waits until a concurrent reader (from alloc_abandoned) is done - mi_pairmap_clear_once_not_busy(&arena->pages_abandoned[bin], slice_index); + mi_bitmap_clear_once_set(arena->pages_abandoned[bin], slice_index); mi_page_clear_abandoned_mapped(page); mi_atomic_decrement_relaxed(&page->subproc->abandoned_count[bin]); } @@ -956,12 +964,12 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* } static size_t mi_arena_info_slices_needed(size_t slice_count, size_t* bitmap_base) { - if (slice_count == 0) slice_count = MI_BITMAP_CHUNK_BITS; - mi_assert_internal((slice_count % MI_BITMAP_CHUNK_BITS) == 0); - const size_t base_size = _mi_align_up(sizeof(mi_arena_t), MI_BITMAP_CHUNK_SIZE); - const size_t bitmaps_size = 4 * mi_bitmap_size(slice_count,NULL); - const size_t pairmaps_size = MI_BIN_COUNT * 2 * mi_bitmap_size(slice_count,NULL); - const size_t size = base_size + bitmaps_size + pairmaps_size; + if (slice_count == 0) slice_count = MI_BCHUNK_BITS; + mi_assert_internal((slice_count % MI_BCHUNK_BITS) == 0); + const size_t base_size = _mi_align_up(sizeof(mi_arena_t), MI_BCHUNK_SIZE); + const size_t bitmaps_count = 4 + MI_BIN_COUNT; // free, commit, dirty, purge, and abandonded + const size_t bitmaps_size = bitmaps_count * mi_bitmap_size(slice_count,NULL); + const size_t size = base_size + bitmaps_size; const size_t os_page_size = _mi_os_page_size(); const size_t info_size = _mi_align_up(size, os_page_size) + os_page_size; // + guard page @@ -992,7 +1000,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } - const size_t slice_count = _mi_align_down(size / MI_ARENA_SLICE_SIZE, MI_BITMAP_CHUNK_BITS); + const size_t slice_count = _mi_align_down(size / MI_ARENA_SLICE_SIZE, MI_BCHUNK_BITS); if (slice_count > MI_BITMAP_MAX_BIT_COUNT) { // 16 GiB for now // todo: allow larger areas (either by splitting it up in arena's or having larger arena's) _mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_slices(MI_BITMAP_MAX_BIT_COUNT)/MI_MiB); @@ -1034,7 +1042,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base); arena->slices_purge = mi_arena_bitmap_init(slice_count,&base); for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) { - mi_pairmap_init(&arena->pages_abandoned[i], mi_arena_bitmap_init(slice_count, &base), mi_arena_bitmap_init(slice_count, &base)); + arena->pages_abandoned[i] = mi_arena_bitmap_init(slice_count,&base); } mi_assert_internal(mi_size_of_slices(info_slices) >= (size_t)(base - mi_arena_start(arena))); @@ -1112,9 +1120,9 @@ static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_ size_t bit_count = 0; size_t bit_set_count = 0; for (size_t i = 0; i < mi_bitmap_chunk_count(bitmap) && bit_count < slice_count; i++) { - char buf[MI_BITMAP_CHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf)); - mi_bitmap_chunk_t* chunk = &bitmap->chunks[i]; - for (size_t j = 0, k = 0; j < MI_BITMAP_CHUNK_FIELDS; j++) { + char buf[MI_BCHUNK_BITS + 64]; _mi_memzero(buf, sizeof(buf)); + mi_bchunk_t* chunk = &bitmap->chunks[i]; + for (size_t j = 0, k = 0; j < MI_BCHUNK_FIELDS; j++) { if (j > 0 && (j % 4) == 0) { buf[k++] = '\n'; _mi_memcpy(buf+k, prefix, strlen(prefix)); k += strlen(prefix); diff --git a/src/bitmap.c b/src/bitmap.c index f25c91ac..7df46070 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -77,50 +77,41 @@ static inline bool mi_bfield_atomic_set(_Atomic(mi_bfield_t)*b, size_t idx) { } // Clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0. -static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx) { +// `all_clear` is set if the new bfield is zero. +static inline bool mi_bfield_atomic_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) { mi_assert_internal(idx < MI_BFIELD_BITS); const mi_bfield_t mask = mi_bfield_one()<bfields[i], idx); +//} + +static inline bool mi_bchunk_set(mi_bchunk_t* chunk, size_t cidx) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); const size_t i = cidx / MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS; - mi_assert_internal(idx < MI_BFIELD_BITS-1); - mi_assert_internal((idx%2)==0); - return mi_bfield_atomic_xset2(set, &chunk->bfields[i], idx, all_already_xset); + return mi_bfield_atomic_set(&chunk->bfields[i], idx); } -static inline bool mi_bitmap_chunk_set2(mi_bitmap_chunk_t* chunk, size_t cidx, bool* all_already_set) { - return mi_bitmap_chunk_xset2(MI_BIT_SET, chunk, cidx, all_already_set); -} - -static inline bool mi_bitmap_chunk_clear2(mi_bitmap_chunk_t* chunk, size_t cidx, bool* all_already_clear) { - return mi_bitmap_chunk_xset2(MI_BIT_CLEAR, chunk, cidx, all_already_clear); +static inline bool mi_bchunk_clear(mi_bchunk_t* chunk, size_t cidx, bool* maybe_all_clear) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); + const size_t i = cidx / MI_BFIELD_BITS; + const size_t idx = cidx % MI_BFIELD_BITS; + return mi_bfield_atomic_clear(&chunk->bfields[i], idx, maybe_all_clear); } // Set/clear a sequence of `n` bits within a chunk. // Returns true if all bits transitioned from 0 to 1 (or 1 to 0). -static bool mi_bitmap_chunk_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, size_t* pall_already_xset) { - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); +static bool mi_bchunk_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* palready_xset) { + mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(n>0); bool all_transition = true; - size_t all_already_xset = 0; + size_t total_already_xset = 0; size_t idx = cidx % MI_BFIELD_BITS; size_t field = cidx / MI_BFIELD_BITS; while (n > 0) { size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field if (m > n) { m = n; } mi_assert_internal(idx + m <= MI_BFIELD_BITS); - mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); + mi_assert_internal(field < MI_BCHUNK_FIELDS); const mi_bfield_t mask = mi_bfield_mask(m, idx); size_t already_xset = 0; const bool transition = mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset); - if (already_xset > 0 && transition) { - _mi_error_message(EFAULT, "ouch\n"); - } + mi_assert_internal((transition && already_xset == m) || (!transition && already_xset > 0)); all_transition = all_transition && transition; - all_already_xset += already_xset; + total_already_xset += already_xset; // next field field++; idx = 0; n -= m; } - if (pall_already_xset!=NULL) { *pall_already_xset = all_already_xset; } + if (palready_xset!=NULL) { *palready_xset = total_already_xset; } return all_transition; } -static inline bool mi_bitmap_chunk_setN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { - return mi_bitmap_chunk_xsetN(MI_BIT_SET, chunk, cidx, n, already_set); +static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { + return mi_bchunk_xsetN(MI_BIT_SET, chunk, cidx, n, already_set); } -static inline bool mi_bitmap_chunk_clearN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n, size_t* already_clear) { - return mi_bitmap_chunk_xsetN(MI_BIT_CLEAR, chunk, cidx, n, already_clear); +static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_clear) { + return mi_bchunk_xsetN(MI_BIT_CLEAR, chunk, cidx, n, already_clear); } -// check if a pair of bits is set/clear -static inline bool mi_bitmap_chunk_is_xset2(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx) { - mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS); - const size_t i = cidx / MI_BFIELD_BITS; - const size_t idx = cidx % MI_BFIELD_BITS; - mi_assert_internal(idx < MI_BFIELD_BITS-1); - mi_assert_internal((idx%2)==0); - const size_t mask = (mi_bfield_t)0x03 << idx; - return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mask); -} - -static inline bool mi_bitmap_chunk_is_set2(mi_bitmap_chunk_t* chunk, size_t cidx) { - return mi_bitmap_chunk_is_xset2(MI_BIT_SET, chunk, cidx); -} - -static inline bool mi_bitmap_chunk_is_clear2(mi_bitmap_chunk_t* chunk, size_t cidx) { - return mi_bitmap_chunk_is_xset2(MI_BIT_CLEAR, chunk, cidx); -} +// ------ is_xset -------- // Check if a sequence of `n` bits within a chunk are all set/cleared. -static bool mi_bitmap_chunk_is_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); +static bool mi_bchunk_is_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n) { + mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(n>0); size_t idx = cidx % MI_BFIELD_BITS; size_t field = cidx / MI_BFIELD_BITS; @@ -378,7 +363,7 @@ static bool mi_bitmap_chunk_is_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, si size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field if (m > n) { m = n; } mi_assert_internal(idx + m <= MI_BFIELD_BITS); - mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); + mi_assert_internal(field < MI_BCHUNK_FIELDS); const size_t mask = mi_bfield_mask(m, idx); if (!mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask)) { return false; @@ -392,71 +377,91 @@ static bool mi_bitmap_chunk_is_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, si } +// ------ try_xset -------- -static inline bool mi_bitmap_chunk_try_xset(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx) { - mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS); +static inline bool mi_bchunk_try_xset(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); const size_t i = cidx / MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS; return mi_bfield_atomic_try_xset(set, &chunk->bfields[i], idx); } -static inline bool mi_bitmap_chunk_try_set(mi_bitmap_chunk_t* chunk, size_t cidx) { - return mi_bitmap_chunk_try_xset(MI_BIT_SET, chunk, cidx); +static inline bool mi_bchunk_try_set(mi_bchunk_t* chunk, size_t cidx) { + return mi_bchunk_try_xset(MI_BIT_SET, chunk, cidx); } -static inline bool mi_bitmap_chunk_try_clear(mi_bitmap_chunk_t* chunk, size_t cidx) { - return mi_bitmap_chunk_try_xset(MI_BIT_CLEAR, chunk, cidx); +static inline bool mi_bchunk_try_clear(mi_bchunk_t* chunk, size_t cidx, bool* maybe_all_clear) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); + const size_t i = cidx / MI_BFIELD_BITS; + const size_t idx = cidx % MI_BFIELD_BITS; + return mi_bfield_atomic_try_clear(&chunk->bfields[i], idx, maybe_all_clear); } -static inline bool mi_bitmap_chunk_try_xset8(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t byte_idx) { - mi_assert_internal(byte_idx*8 < MI_BITMAP_CHUNK_BITS); + +//static inline bool mi_bchunk_try_xset8(mi_xset_t set, mi_bchunk_t* chunk, size_t byte_idx) { +// mi_assert_internal(byte_idx*8 < MI_BCHUNK_BITS); +// const size_t i = byte_idx / MI_BFIELD_SIZE; +// const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE; +// return mi_bfield_atomic_try_xset8(set, &chunk->bfields[i], ibyte_idx); +//} + +static inline bool mi_bchunk_try_set8(mi_bchunk_t* chunk, size_t byte_idx) { + mi_assert_internal(byte_idx*8 < MI_BCHUNK_BITS); const size_t i = byte_idx / MI_BFIELD_SIZE; const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE; - return mi_bfield_atomic_try_xset8(set, &chunk->bfields[i], ibyte_idx); + return mi_bfield_atomic_try_set8(&chunk->bfields[i], ibyte_idx); } -static inline bool mi_bitmap_chunk_try_set8(mi_bitmap_chunk_t* chunk, size_t byte_idx) { - return mi_bitmap_chunk_try_xset8(MI_BIT_SET, chunk, byte_idx); +static inline bool mi_bchunk_try_clear8(mi_bchunk_t* chunk, size_t byte_idx, bool* maybe_all_clear) { + mi_assert_internal(byte_idx*8 < MI_BCHUNK_BITS); + const size_t i = byte_idx / MI_BFIELD_SIZE; + const size_t ibyte_idx = byte_idx % MI_BFIELD_SIZE; + return mi_bfield_atomic_try_clear8(&chunk->bfields[i], ibyte_idx, maybe_all_clear); } -static inline bool mi_bitmap_chunk_try_clear8(mi_bitmap_chunk_t* chunk, size_t byte_idx) { - return mi_bitmap_chunk_try_xset8(MI_BIT_CLEAR, chunk, byte_idx); -} // Try to atomically set/clear a sequence of `n` bits within a chunk. // Returns true if all bits transitioned from 0 to 1 (or 1 to 0), // and false otherwise leaving all bit fields as is. -static bool mi_bitmap_chunk_try_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); +static bool mi_bchunk_try_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { + mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(n>0); if (n==0) return true; size_t start_idx = cidx % MI_BFIELD_BITS; size_t start_field = cidx / MI_BFIELD_BITS; - size_t end_field = MI_BITMAP_CHUNK_FIELDS; + size_t end_field = MI_BCHUNK_FIELDS; mi_bfield_t mask_mid = 0; mi_bfield_t mask_end = 0; + bool field_is_clear; + bool maybe_all_clear = true; + if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = false; } // first field size_t field = start_field; size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field if (m > n) { m = n; } mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); - mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS); + mi_assert_internal(start_field < MI_BCHUNK_FIELDS); const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx); - if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start)) return false; + if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_start, &field_is_clear)) return false; + maybe_all_clear = maybe_all_clear && field_is_clear; // done? n -= m; - if (n==0) return true; + if (n==0) { + if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = maybe_all_clear; } + return true; + } // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields // mid fields while (n >= MI_BFIELD_BITS) { field++; - mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); + mi_assert_internal(field < MI_BCHUNK_FIELDS); mask_mid = mi_bfield_all_set(); - if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore; + if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid, &field_is_clear)) goto restore; + maybe_all_clear = maybe_all_clear && field_is_clear; n -= MI_BFIELD_BITS; } @@ -464,12 +469,14 @@ static bool mi_bitmap_chunk_try_xsetN(mi_xset_t set, mi_bitmap_chunk_t* chunk, s if (n > 0) { mi_assert_internal(n < MI_BFIELD_BITS); field++; - mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); + mi_assert_internal(field < MI_BCHUNK_FIELDS); end_field = field; mask_end = mi_bfield_mask(n, 0); - if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end)) goto restore; + if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_end, &field_is_clear)) goto restore; + maybe_all_clear = maybe_all_clear && field_is_clear; } + if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = maybe_all_clear; } return true; restore: @@ -483,14 +490,23 @@ restore: return false; } -static inline bool mi_bitmap_chunk_try_setN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { - return mi_bitmap_chunk_try_xsetN(MI_BIT_SET, chunk, cidx, n); +static inline bool mi_bchunk_try_setN(mi_bchunk_t* chunk, size_t cidx, size_t n) { + return mi_bchunk_try_xsetN(MI_BIT_SET, chunk, cidx, n, NULL); } -static inline bool mi_bitmap_chunk_try_clearN(mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { - return mi_bitmap_chunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n); +static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) { + return mi_bchunk_try_xsetN(MI_BIT_CLEAR, chunk, cidx, n, maybe_all_clear); } +static inline void mi_bchunk_clear_once_set(mi_bchunk_t* chunk, size_t cidx) { + mi_assert_internal(cidx < MI_BCHUNK_BITS); + const size_t i = cidx / MI_BFIELD_BITS; + const size_t idx = cidx % MI_BFIELD_BITS; + mi_bfield_atomic_clear_once_set(&chunk->bfields[i], idx); +} + +// ------ find_and_try_xset -------- + #if defined(__AVX2__) static inline __m256i mi_mm256_zero(void) { return _mm256_setzero_si256(); @@ -507,10 +523,10 @@ static inline bool mi_mm256_is_zero( __m256i vec) { #endif // find least 0/1-bit in a chunk and try to set/clear it atomically -// set `*pidx` to the bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. +// set `*pidx` to the bit index (0 <= *pidx < MI_BCHUNK_BITS) on success. // todo: try neon version -static inline bool mi_bitmap_chunk_find_and_try_xset(mi_xset_t set, mi_bitmap_chunk_t* chunk, size_t* pidx) { -#if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) +static inline bool mi_bchunk_find_and_try_xset(mi_xset_t set, mi_bchunk_t* chunk, size_t* pidx) { +#if defined(__AVX2__) && (MI_BCHUNK_BITS==256) while (true) { const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vcmp = _mm256_cmpeq_epi64(vec, (set ? mi_mm256_ones() : mi_mm256_zero())); // (elem64 == ~0 / 0 ? 0xFF : 0) @@ -519,18 +535,18 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_xset_t set, mi_bitmap_ch if (mask==0) return false; mi_assert_internal((_tzcnt_u32(mask)%8) == 0); // tzcnt == 0, 8, 16, or 24 const size_t chunk_idx = _tzcnt_u32(mask) / 8; - mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS); + mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS); size_t cidx; if (mi_bfield_find_least_to_xset(set, chunk->bfields[chunk_idx], &cidx)) { // find the bit-idx that is set/clear if mi_likely(mi_bfield_atomic_try_xset(set, &chunk->bfields[chunk_idx], cidx)) { // set/clear it atomically *pidx = (chunk_idx*MI_BFIELD_BITS) + cidx; - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(*pidx < MI_BCHUNK_BITS); return true; } } // try again } -#elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512) +#elif defined(__AVX2__) && (MI_BCHUNK_BITS==512) while (true) { size_t chunk_idx = 0; #if 1 @@ -559,24 +575,24 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_xset_t set, mi_bitmap_ch mi_assert_internal((_tzcnt_u64(mask)%8) == 0); // tzcnt == 0, 8, 16, 24 , .. const size_t chunk_idx = _tzcnt_u64(mask) / 8; #endif - mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS); + mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS); size_t cidx; if (mi_bfield_find_least_to_xset(set, chunk->bfields[chunk_idx], &cidx)) { // find the bit-idx that is set/clear if mi_likely(mi_bfield_atomic_try_xset(set, &chunk->bfields[chunk_idx], cidx)) { // set/clear it atomically *pidx = (chunk_idx*MI_BFIELD_BITS) + cidx; - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(*pidx < MI_BCHUNK_BITS); return true; } } // try again } #else - for (int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { + for (int i = 0; i < MI_BCHUNK_FIELDS; i++) { size_t idx; if mi_unlikely(mi_bfield_find_least_to_xset(set, chunk->bfields[i], &idx)) { // find least 0-bit if mi_likely(mi_bfield_atomic_try_xset(set, &chunk->bfields[i], idx)) { // try to set it atomically *pidx = (i*MI_BFIELD_BITS + idx); - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(*pidx < MI_BCHUNK_BITS); return true; } } @@ -585,38 +601,38 @@ static inline bool mi_bitmap_chunk_find_and_try_xset(mi_xset_t set, mi_bitmap_ch #endif } -static inline bool mi_bitmap_chunk_find_and_try_clear(mi_bitmap_chunk_t* chunk, size_t* pidx) { - return mi_bitmap_chunk_find_and_try_xset(MI_BIT_CLEAR, chunk, pidx); +static inline bool mi_bchunk_find_and_try_clear(mi_bchunk_t* chunk, size_t* pidx) { + return mi_bchunk_find_and_try_xset(MI_BIT_CLEAR, chunk, pidx); } -static inline bool mi_bitmap_chunk_find_and_try_set(mi_bitmap_chunk_t* chunk, size_t* pidx) { - return mi_bitmap_chunk_find_and_try_xset(MI_BIT_SET, chunk, pidx); +static inline bool mi_bchunk_find_and_try_set(mi_bchunk_t* chunk, size_t* pidx) { + return mi_bchunk_find_and_try_xset(MI_BIT_SET, chunk, pidx); } // find least byte in a chunk with all bits set, and try unset it atomically -// set `*pidx` to its bit index (0 <= *pidx < MI_BITMAP_CHUNK_BITS) on success. +// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success. // todo: try neon version -static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, size_t* pidx) { - #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) +static inline bool mi_bchunk_find_and_try_clear8(mi_bchunk_t* chunk, size_t* pidx) { + #if defined(__AVX2__) && (MI_BCHUNK_BITS==256) while(true) { const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); const __m256i vcmp = _mm256_cmpeq_epi8(vec, mi_mm256_ones()); // (byte == ~0 ? -1 : 0) const uint32_t mask = _mm256_movemask_epi8(vcmp); // mask of most significant bit of each byte if (mask == 0) return false; const size_t i = _tzcnt_u32(mask); - mi_assert_internal(8*i < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(8*i < MI_BCHUNK_BITS); const size_t chunk_idx = i / MI_BFIELD_SIZE; const size_t byte_idx = i % MI_BFIELD_SIZE; if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[chunk_idx],byte_idx)) { // try to unset atomically *pidx = (chunk_idx*MI_BFIELD_BITS) + (byte_idx*8); - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(*pidx < MI_BCHUNK_BITS); return true; } // try again } #else - for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { + for(int i = 0; i < MI_BCHUNK_FIELDS; i++) { const mi_bfield_t x = chunk->bfields[i]; // has_set8 has low bit in each byte set if the byte in x == 0xFF const mi_bfield_t has_set8 = ((~x - MI_BFIELD_LO_BIT8) & // high bit set if byte in x is 0xFF or < 0x7F @@ -627,9 +643,9 @@ static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, mi_assert_internal(idx <= (MI_BFIELD_BITS - 8)); mi_assert_internal((idx%8)==0); const size_t byte_idx = idx/8; - if mi_likely(mi_bfield_atomic_try_xset8(MI_BIT_CLEAR,&chunk->bfields[i],byte_idx)) { // unset the byte atomically + if mi_likely(mi_bfield_atomic_try_clear8(&chunk->bfields[i],byte_idx,NULL)) { // unset the byte atomically *pidx = (i*MI_BFIELD_BITS) + idx; - mi_assert_internal(*pidx + 8 <= MI_BITMAP_CHUNK_BITS); + mi_assert_internal(*pidx + 8 <= MI_BCHUNK_BITS); return true; } // else continue @@ -642,11 +658,11 @@ static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, // find a sequence of `n` bits in a chunk with `n < MI_BFIELD_BITS` with all bits set, // and try to clear them atomically. -// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success. -static bool mi_bitmap_chunk_find_and_try_clearNX(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { +// set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success. +static bool mi_bchunk_find_and_try_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) { if (n == 0 || n > MI_BFIELD_BITS) return false; const mi_bfield_t mask = mi_bfield_mask(n, 0); - for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { + for(int i = 0; i < MI_BCHUNK_FIELDS; i++) { mi_bfield_t b = chunk->bfields[i]; size_t bshift = 0; size_t idx; @@ -657,10 +673,10 @@ static bool mi_bitmap_chunk_find_and_try_clearNX(mi_bitmap_chunk_t* chunk, size_ if ((b&mask) == mask) { // found a match mi_assert_internal( ((mask << bshift) >> bshift) == mask ); - if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i],mask<bfields[i],mask< MI_BITMAP_CHUNK_BITS) return false; // cannot be more than a chunk - // if (n < MI_BFIELD_BITS) return mi_bitmap_chunk_find_and_try_clearNX(chunk, n, pidx); +// set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success. +static bool mi_bchunk_find_and_try_clearN_(mi_bchunk_t* chunk, size_t n, size_t* pidx) { + if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk + // if (n < MI_BFIELD_BITS) return mi_bchunk_find_and_try_clearNX(chunk, n, pidx); // we align an a field, and require `field_count` fields to be all clear. // n >= MI_BFIELD_BITS; find a first field that is 0 const size_t field_count = _mi_divide_up(n, MI_BFIELD_BITS); // we need this many fields - for (size_t i = 0; i <= MI_BITMAP_CHUNK_FIELDS - field_count; i++) + for (size_t i = 0; i <= MI_BCHUNK_FIELDS - field_count; i++) { // first pre-scan for a range of fields that are all set bool allset = true; size_t j = 0; do { - mi_assert_internal(i + j < MI_BITMAP_CHUNK_FIELDS); + mi_assert_internal(i + j < MI_BCHUNK_FIELDS); mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i+j]); if (~b != 0) { allset = false; @@ -708,11 +724,11 @@ static bool mi_bitmap_chunk_find_and_try_clearN_(mi_bitmap_chunk_t* chunk, size_ // if all set, we can try to atomically clear them if (allset) { const size_t cidx = i*MI_BFIELD_BITS; - if (mi_bitmap_chunk_try_clearN(chunk, cidx, n)) { + if (mi_bchunk_try_clearN(chunk, cidx, n, NULL)) { // we cleared all atomically *pidx = cidx; - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); - mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS); + mi_assert_internal(*pidx < MI_BCHUNK_BITS); + mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS); return true; } } @@ -721,87 +737,43 @@ static bool mi_bitmap_chunk_find_and_try_clearN_(mi_bitmap_chunk_t* chunk, size_ } -static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { - if (n==1) return mi_bitmap_chunk_find_and_try_clear(chunk, pidx); - if (n==8) return mi_bitmap_chunk_find_and_try_clear8(chunk, pidx); - if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; // cannot be more than a chunk - if (n < MI_BFIELD_BITS) return mi_bitmap_chunk_find_and_try_clearNX(chunk, n, pidx); - return mi_bitmap_chunk_find_and_try_clearN_(chunk, n, pidx); +static inline bool mi_bchunk_find_and_try_clearN(mi_bchunk_t* chunk, size_t n, size_t* pidx) { + if (n==1) return mi_bchunk_find_and_try_clear(chunk, pidx); + if (n==8) return mi_bchunk_find_and_try_clear8(chunk, pidx); + if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk + if (n < MI_BFIELD_BITS) return mi_bchunk_find_and_try_clearNX(chunk, n, pidx); + return mi_bchunk_find_and_try_clearN_(chunk, n, pidx); } -// are all bits in a bitmap chunk set? -// static inline bool mi_bitmap_chunk_all_are_set(mi_bitmap_chunk_t* chunk) { -// #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) -// const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); -// return _mm256_test_all_ones(vec); -// #else -// // written like this for vectorization -// mi_bfield_t x = chunk->bfields[0]; -// for(int i = 1; i < MI_BITMAP_CHUNK_FIELDS; i++) { -// x = x & chunk->bfields[i]; -// } -// return (~x == 0); -// #endif -// } -// are all bits in a bitmap chunk clear? -static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) { - #if defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==256) - const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); - return mi_mm256_is_zero(vec); - #elif defined(__AVX2__) && (MI_BITMAP_CHUNK_BITS==512) - const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields); - if (!mi_mm256_is_zero(vec1)) return false; - const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); - return (mi_mm256_is_zero(vec2)); - #else - for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { - if (chunk->bfields[i] != 0) return false; +// are all bits in a bitmap chunk clear? (this uses guaranteed atomic reads) +static inline bool mi_bchunk_all_are_clear(mi_bchunk_t* chunk) { + for(int i = 0; i < MI_BCHUNK_FIELDS; i++) { + if (mi_atomic_load_relaxed(&chunk->bfields[i]) != 0) return false; } return true; +} + +// are all bits in a bitmap chunk clear? +static inline bool mi_bchunk_all_are_clear_relaxed(mi_bchunk_t* chunk) { + #if defined(__AVX2__) && (MI_BCHUNK_BITS==256) + const __m256i vec = _mm256_load_si256((const __m256i*)chunk->bfields); + return mi_mm256_is_zero(vec); + #elif defined(__AVX2__) && (MI_BCHUNK_BITS==512) + // a 64b cache-line contains the entire chunk anyway so load both at once + const __m256i vec1 = _mm256_load_si256((const __m256i*)chunk->bfields); + const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); + return (mi_mm256_is_zero(_mm256_or_epi64(vec1,vec2))); + #else + return mi_bchunk_all_are_clear(chunk); #endif } + /* -------------------------------------------------------------------------------- - chunkmap (for now for 32-bit sets only) + chunkmap -------------------------------------------------------------------------------- */ -static void mi_chunkmap_split(mi_chunkmap_t es, mi_cmap_t* cmap, mi_epoch_t* epoch) { - *cmap = (mi_cmap_t)es; - if (epoch!=NULL) { *epoch = (mi_epoch_t)(es >> 32); } -} - -static mi_chunkmap_t mi_chunkmap_join(mi_cmap_t cmap, mi_epoch_t epoch) { - return ((mi_chunkmap_t)epoch << MI_CHUNKMAP_BITS) | cmap; -} - -// setting a bit increases the epoch -static void mi_chunkmap_set(_Atomic(mi_chunkmap_t)* cm, size_t idx) { - mi_assert(idx < MI_CHUNKMAP_BITS); - mi_epoch_t epoch; - mi_cmap_t cmap; - mi_chunkmap_t cm_new; - mi_chunkmap_t cm_old = mi_atomic_load_relaxed(cm); - do { - mi_chunkmap_split(cm_old, &cmap, &epoch); - cm_new = mi_chunkmap_join(cmap | (((mi_cmap_t)1)<chunk_maps[cmidx], idx); + mi_bchunk_set(&bitmap->chunkmap, chunk_idx); } -static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx, mi_epoch_t epoch) { +static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) { mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); - const size_t cmidx = chunk_idx / MI_CHUNKMAP_BITS; - const size_t idx = chunk_idx % MI_CHUNKMAP_BITS; - return mi_chunkmap_try_clear(&bitmap->chunk_maps[cmidx], idx, epoch); -} - -static mi_cmap_t mi_bitmap_chunkmap(mi_bitmap_t* bitmap, size_t chunk_idx, mi_epoch_t* epoch) { - mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); - const size_t cmidx = chunk_idx / MI_CHUNKMAP_BITS; - mi_assert_internal(cmidx < bitmap->chunk_map_count); - mi_cmap_t cmap; - mi_chunkmap_split(mi_atomic_load_relaxed(&bitmap->chunk_maps[cmidx]), &cmap, epoch); - return cmap; -} - -static mi_epoch_t mi_bitmap_chunkmap_epoch(mi_bitmap_t* bitmap, size_t chunk_idx) { - mi_epoch_t epoch; - mi_bitmap_chunkmap(bitmap, chunk_idx, &epoch); - return epoch; + // check if the corresponding chunk is all clear + if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) return false; + // clear the chunkmap bit + mi_bchunk_clear(&bitmap->chunkmap, chunk_idx, NULL); + // .. but a concurrent set may have happened in between our all-clear test and the clearing of the + // bit in the mask. We check again to catch this situation. + if (!mi_bchunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + mi_bchunk_set(&bitmap->chunkmap, chunk_idx); + return false; + } + return true; } /* -------------------------------------------------------------------------------- @@ -841,14 +804,14 @@ static mi_epoch_t mi_bitmap_chunkmap_epoch(mi_bitmap_t* bitmap, size_t chunk_idx -------------------------------------------------------------------------------- */ size_t mi_bitmap_size(size_t bit_count, size_t* pchunk_count) { - mi_assert_internal((bit_count % MI_BITMAP_CHUNK_BITS) == 0); - bit_count = _mi_align_up(bit_count, MI_BITMAP_CHUNK_BITS); + mi_assert_internal((bit_count % MI_BCHUNK_BITS) == 0); + bit_count = _mi_align_up(bit_count, MI_BCHUNK_BITS); mi_assert_internal(bit_count <= MI_BITMAP_MAX_BIT_COUNT); mi_assert_internal(bit_count > 0); - const size_t chunk_count = bit_count / MI_BITMAP_CHUNK_BITS; + const size_t chunk_count = bit_count / MI_BCHUNK_BITS; mi_assert_internal(chunk_count >= 1); - const size_t size = offsetof(mi_bitmap_t,chunks) + (chunk_count * MI_BITMAP_CHUNK_SIZE); - mi_assert_internal( (size%MI_BITMAP_CHUNK_SIZE) == 0 ); + const size_t size = sizeof(mi_bitmap_t) + ((chunk_count - 1) * MI_BCHUNK_SIZE); + mi_assert_internal( (size%MI_BCHUNK_SIZE) == 0 ); if (pchunk_count != NULL) { *pchunk_count = chunk_count; } return size; } @@ -861,8 +824,6 @@ size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero) if (!already_zero) { _mi_memzero_aligned(bitmap, size); } - mi_atomic_store_release(&bitmap->chunk_map_count, _mi_divide_up(chunk_count, MI_CHUNKMAP_BITS)); - mi_assert_internal(mi_atomic_load_relaxed(&bitmap->chunk_map_count) <= MI_BITMAP_MAX_CHUNKMAPS); mi_atomic_store_release(&bitmap->chunk_count, chunk_count); mi_assert_internal(mi_atomic_load_relaxed(&bitmap->chunk_count) <= MI_BITMAP_MAX_CHUNK_COUNT); return size; @@ -874,32 +835,39 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap)); // first chunk - size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - size_t m = MI_BITMAP_CHUNK_BITS - cidx; + size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; + size_t m = MI_BCHUNK_BITS - cidx; if (m > n) { m = n; } - mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, m, NULL); + mi_bchunk_setN(&bitmap->chunks[chunk_idx], cidx, m, NULL); mi_bitmap_chunkmap_set(bitmap, chunk_idx); // n can be large so use memset for efficiency for all in-between chunks chunk_idx++; n -= m; - const size_t mid_chunks = n / MI_BITMAP_CHUNK_BITS; + const size_t mid_chunks = n / MI_BCHUNK_BITS; if (mid_chunks > 0) { - _mi_memset(&bitmap->chunks[chunk_idx], ~0, mid_chunks * MI_BITMAP_CHUNK_SIZE); + _mi_memset(&bitmap->chunks[chunk_idx], ~0, mid_chunks * MI_BCHUNK_SIZE); const size_t end_chunk = chunk_idx + mid_chunks; while (chunk_idx < end_chunk) { - mi_bitmap_chunkmap_set(bitmap, chunk_idx); - chunk_idx++; + if ((chunk_idx % MI_BFIELD_BITS) == 0 && (chunk_idx + MI_BFIELD_BITS <= end_chunk)) { + // optimize: we can set a full bfield in the chunkmap + mi_atomic_store_relaxed( &bitmap->chunkmap.bfields[chunk_idx/MI_BFIELD_BITS], mi_bfield_all_set()); + chunk_idx += MI_BFIELD_BITS; + } + else { + mi_bitmap_chunkmap_set(bitmap, chunk_idx); + chunk_idx++; + } } - n -= (mid_chunks * MI_BITMAP_CHUNK_BITS); + n -= (mid_chunks * MI_BCHUNK_BITS); } // last chunk if (n > 0) { - mi_assert_internal(n < MI_BITMAP_CHUNK_BITS); - mi_assert_internal(chunk_idx < MI_BITMAP_CHUNK_FIELDS); - mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], 0, n, NULL); + mi_assert_internal(n < MI_BCHUNK_BITS); + mi_assert_internal(chunk_idx < MI_BCHUNK_FIELDS); + mi_bchunk_setN(&bitmap->chunks[chunk_idx], 0, n, NULL); mi_bitmap_chunkmap_set(bitmap, chunk_idx); } } @@ -909,22 +877,19 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { // and false otherwise leaving the bitmask as is. static bool mi_bitmap_try_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); if (set) { - // first set the chunkmap since it is a conservative approximation (increases epoch) - mi_bitmap_chunkmap_set(bitmap, chunk_idx); - // then actually try to set it atomically - return mi_bitmap_chunk_try_set(&bitmap->chunks[chunk_idx], cidx); + const bool ok = mi_bchunk_try_set(&bitmap->chunks[chunk_idx], cidx); + if (ok) { mi_bitmap_chunkmap_set(bitmap,chunk_idx); } // set afterwards + return ok; } else { - const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap, chunk_idx); - bool cleared = mi_bitmap_chunk_try_clear(&bitmap->chunks[chunk_idx], cidx); - if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); - } - return cleared; + bool maybe_all_clear; + const bool ok = mi_bchunk_try_clear(&bitmap->chunks[chunk_idx], cidx, &maybe_all_clear); + if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } + return ok; } } @@ -933,126 +898,107 @@ static bool mi_bitmap_try_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { static bool mi_bitmap_try_xset8(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); mi_assert_internal(idx%8 == 0); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t byte_idx = (idx % MI_BITMAP_CHUNK_BITS)/8; + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t byte_idx = (idx % MI_BCHUNK_BITS)/8; mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - if (set) { - // first set the anyset since it is a conservative approximation (increases epoch) - mi_bitmap_chunkmap_set(bitmap, chunk_idx); - // then actually try to set it atomically - return mi_bitmap_chunk_try_set8(&bitmap->chunks[chunk_idx], byte_idx); + const bool ok = mi_bchunk_try_set8(&bitmap->chunks[chunk_idx], byte_idx); + if (ok) { mi_bitmap_chunkmap_set(bitmap,chunk_idx); } // set afterwards + return ok; } else { - const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap,chunk_idx); - bool cleared = mi_bitmap_chunk_try_clear8(&bitmap->chunks[chunk_idx], byte_idx); - if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap,chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); - } - return cleared; + bool maybe_all_clear; + const bool ok = mi_bchunk_try_clear8(&bitmap->chunks[chunk_idx], byte_idx, &maybe_all_clear); + if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } + return ok; } } - // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) // and false otherwise leaving the bitmask as is. -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! static bool mi_bitmap_try_xsetN_(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + mi_assert_internal(n<=MI_BCHUNK_BITS); mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap)); if (n==0 || idx + n > mi_bitmap_max_bits(bitmap)) return false; - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); // don't cross chunks (for now) mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia - + if (cidx + n > MI_BCHUNK_BITS) { n = MI_BCHUNK_BITS - cidx; } // paranoia if (set) { - // first set the chunkmap since it is a conservative approximation (increases epoch) - mi_bitmap_chunkmap_set(bitmap, chunk_idx); - // then actually try to set it atomically - return mi_bitmap_chunk_try_setN(&bitmap->chunks[chunk_idx], cidx, n); + const bool ok = mi_bchunk_try_setN(&bitmap->chunks[chunk_idx], cidx, n); + if (ok) { mi_bitmap_chunkmap_set(bitmap,chunk_idx); } // set afterwards + return ok; } else { - const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap,chunk_idx); - bool cleared = mi_bitmap_chunk_try_clearN(&bitmap->chunks[chunk_idx], cidx, n); - if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap,chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); - } - return cleared; + bool maybe_all_clear; + const bool ok = mi_bchunk_try_clearN(&bitmap->chunks[chunk_idx], cidx, n, &maybe_all_clear); + if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } + return ok; } } mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { - mi_assert_internal(n>0 && n<=MI_BITMAP_CHUNK_BITS); + mi_assert_internal(n>0 && n<=MI_BCHUNK_BITS); if (n==1) return mi_bitmap_try_xset(set, bitmap, idx); if (n==8) return mi_bitmap_try_xset8(set, bitmap, idx); - // todo: add 32/64 for large pages + // todo: add 32/64 for large pages ? return mi_bitmap_try_xsetN_(set, bitmap, idx, n); } -// Set/clear a sequence of 2 bits that were on an even `idx` in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -static bool mi_bitmap_xset_pair(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { - mi_assert_internal((idx%2)==0); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS); - mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); +// Set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) +bool mi_bitmap_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); if (set) { - // first set the chunkmap since it is a conservative approximation (increases epoch) - mi_bitmap_chunkmap_set(bitmap, chunk_idx); - // then actually try to set it atomically - return mi_bitmap_chunk_set2(&bitmap->chunks[chunk_idx], cidx, NULL); + const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); + mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards + return wasclear; } else { - const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap, chunk_idx); - bool already_clear = false; - const bool allset = mi_bitmap_chunk_clear2(&bitmap->chunks[chunk_idx], cidx, &already_clear); - if (!already_clear && epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); - } - return allset; + bool maybe_all_clear; + const bool wasset = mi_bchunk_clear(&bitmap->chunks[chunk_idx], cidx, &maybe_all_clear); + if (maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } + return wasset; } } // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! static bool mi_bitmap_xsetN_(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset ) { mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + mi_assert_internal(n<=MI_BCHUNK_BITS); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); // don't cross chunks (for now) mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia + if (cidx + n > MI_BCHUNK_BITS) { n = MI_BCHUNK_BITS - cidx; } // paranoia if (set) { - // first set the chunkmap since it is a conservative approximation (increases epoch) - mi_bitmap_chunkmap_set(bitmap, chunk_idx); - // then actually try to set it atomically - return mi_bitmap_chunk_setN(&bitmap->chunks[chunk_idx], cidx, n, already_xset); + const bool allclear = mi_bchunk_setN(&bitmap->chunks[chunk_idx], cidx, n, already_xset); + mi_bitmap_chunkmap_set(bitmap,chunk_idx); // set afterwards + return allclear; } else { - const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap,chunk_idx); size_t already_clear = 0; - const bool allset = mi_bitmap_chunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, &already_clear); + const bool allset = mi_bchunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, &already_clear ); if (already_xset != NULL) { *already_xset = already_clear; } - if (already_clear < n && epoch == mi_bitmap_chunkmap_epoch(bitmap,chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); - } + if (already_clear < n) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } return allset; } } // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset) { - mi_assert_internal(n>0 && n<=MI_BITMAP_CHUNK_BITS); + mi_assert_internal(n>0 && n<=MI_BCHUNK_BITS); //TODO: specialize? //if (n==1) return mi_bitmap_xset(set, bitmap, idx); //if (n==2) return mi_bitmap_xset(set, bitmap, idx); @@ -1061,82 +1007,52 @@ bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, s } -// Is a sequence of 2 bits already all set/cleared? -static inline bool mi_bitmap_is_xset2(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) { - mi_assert_internal(idx + 2 <= mi_bitmap_max_bits(bitmap)); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS); - mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - return mi_bitmap_chunk_is_xset2(set, &bitmap->chunks[chunk_idx], cidx); -} - - // Is a sequence of n bits already all set/cleared? bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + mi_assert_internal(n<=MI_BCHUNK_BITS); mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap)); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); // don't cross chunks (for now) mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - if (cidx + n > MI_BITMAP_CHUNK_BITS) { n = MI_BITMAP_CHUNK_BITS - cidx; } // paranoia + if (cidx + n > MI_BCHUNK_BITS) { n = MI_BCHUNK_BITS - cidx; } // paranoia - return mi_bitmap_chunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n); + return mi_bchunk_is_xsetN(set, &bitmap->chunks[chunk_idx], cidx, n); } /* -------------------------------------------------------------------------------- bitmap try_find_and_clear -------------------------------------------------------------------------------- */ -static inline size_t mi_bitmap_find_hi_chunk(mi_bitmap_t* bitmap) { - size_t hi_chunk_map_idx = 0; - mi_cmap_t hi_cmap = 0; - for (size_t i = 1; i < mi_bitmap_chunk_map_count(bitmap); i++) { - mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, i, NULL); - if (cmap != 0) { - hi_chunk_map_idx = i; - hi_cmap = cmap; - } - } - uint32_t cmap_idx; - if (mi_bsr32(hi_cmap, &cmap_idx)) { - const size_t hi = (hi_chunk_map_idx * MI_CHUNKMAP_BITS) + cmap_idx; - mi_assert_internal(hi < mi_bitmap_chunk_count(bitmap)); - return hi; - } - else { - return 0; - } -} + #define mi_bitmap_forall_chunks(bitmap, tseq, name_epoch, name_chunk_idx) \ { \ /* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \ MI_UNUSED(tseq); \ const size_t chunk_start = 0; /* tseq % (1 + mi_bitmap_find_hi_chunk(bitmap)); */ \ - const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS; \ - const uint32_t chunk_map_start_idx = (uint32_t)(chunk_start % MI_CHUNKMAP_BITS); \ + const size_t chunkmap_max_bfield = _mi_divide_up( mi_bitmap_chunk_count(bitmap), MI_BCHUNK_BITS ); \ + const size_t chunkmap_start = chunk_start / MI_BFIELD_BITS; \ + const size_t chunkmap_start_idx = chunk_start % MI_BFIELD_BITS; \ /* for each chunkmap entry `i` */ \ - for (size_t _i = 0; _i < bitmap->chunk_map_count; _i++) { \ - size_t i = (_i + chunk_map_start); \ - if (i >= bitmap->chunk_map_count) { i -= bitmap->chunk_map_count; } /* adjust for the start position */ \ + for (size_t _i = 0; _i < chunkmap_max_bfield; _i++) { \ + size_t i = (_i + chunkmap_start); \ + if (i >= chunkmap_max_bfield) { i -= chunkmap_max_bfield; } /* adjust for the start position */ \ \ - const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS; \ - mi_epoch_t name_epoch; \ - mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &name_epoch); \ - uint32_t cmap_idx_shift = 0; /* shift through the cmap */ \ - if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); cmap_idx_shift = chunk_map_start_idx; } /* rotate right for the start position (on the first iteration) */ \ + const size_t chunk_idx0 = i*MI_BFIELD_BITS; \ + mi_bfield_t cmap = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]); \ + size_t cmap_idx_shift = 0; /* shift through the cmap */ \ + if (_i == 0) { cmap = mi_rotr(cmap, chunkmap_start_idx); cmap_idx_shift = chunkmap_start_idx; } /* rotate right for the start position (on the first iteration) */ \ \ - uint32_t cmap_idx; /* one bit set of each chunk that may have bits set */ \ - while (mi_bsf32(cmap, &cmap_idx)) { /* find least bit that is set */ \ + size_t cmap_idx; \ + while (mi_bsf(cmap, &cmap_idx)) { /* find least bit that is set */ \ /* set the chunk idx */ \ - size_t name_chunk_idx = chunk_idx0 + ((cmap_idx + cmap_idx_shift) % MI_CHUNKMAP_BITS); \ - if (name_chunk_idx >= mi_bitmap_chunk_count(bitmap)) { name_chunk_idx -= mi_bitmap_chunk_count(bitmap); } \ + size_t name_chunk_idx = chunk_idx0 + ((cmap_idx + cmap_idx_shift) % MI_BFIELD_BITS); \ + mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); \ /* try to find and clear N bits in that chunk */ \ - if (name_chunk_idx < mi_bitmap_chunk_count(bitmap)) { /* we can have less chunks than in the chunkmap.. */ + { #define mi_bitmap_forall_chunks_end() \ } \ @@ -1146,7 +1062,7 @@ static inline size_t mi_bitmap_find_hi_chunk(mi_bitmap_t* bitmap) { cmap >>= 1; \ } \ }} - + // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) @@ -1154,17 +1070,15 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx) { size_t cidx; - if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) { - *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; + if mi_likely(mi_bchunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) { + *pidx = (chunk_idx * MI_BCHUNK_BITS) + cidx; mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n); return true; } else { // we may find that all are cleared only on a second iteration but that is ok as // the chunkmap is a conservative approximation. - if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); - } + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); // continue } } @@ -1172,183 +1086,48 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t return false; } -/* -------------------------------------------------------------------------------- - pairmap --------------------------------------------------------------------------------- */ -void mi_pairmap_init(mi_pairmap_t* pairmap, mi_bitmap_t* bm1, mi_bitmap_t* bm2) { - mi_assert_internal(mi_bitmap_chunk_count(bm1)==mi_bitmap_chunk_count(bm2)); - pairmap->bitmap1 = bm1; - pairmap->bitmap2 = bm2; -} - -static void mi_pairmap_from_pair_idx(mi_pairmap_t* pairmap, size_t pair_idx, mi_bitmap_t** bitmap, size_t* pidx) { - const size_t idx = 2*pair_idx; - const size_t maxbits = mi_bitmap_max_bits(pairmap->bitmap1); - mi_assert_internal(pair_idx < maxbits); - if (idx < maxbits) { - *bitmap = pairmap->bitmap1; - *pidx = idx; - } - else { - *bitmap = pairmap->bitmap2; - *pidx = idx - maxbits; - } -} - -bool mi_pairmap_set(mi_pairmap_t* pairmap, size_t pair_idx) { - mi_bitmap_t* bitmap; - size_t idx; - mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx); - return mi_bitmap_xset_pair(MI_BIT_SET, bitmap, idx); -} - -bool mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx) { - mi_bitmap_t* bitmap; - size_t idx; - mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx); - return mi_bitmap_xset_pair(MI_BIT_CLEAR, bitmap, idx); -} - -bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx) { - mi_bitmap_t* bitmap; - size_t idx; - mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx); - return mi_bitmap_is_xset2(MI_BIT_CLEAR, bitmap, idx); -} - - - -/* -------------------------------------------------------------------------------- - pairmap clear while not busy --------------------------------------------------------------------------------- */ - -static inline bool mi_bfield_atomic_clear2_once_not_busy(_Atomic(mi_bfield_t)*b, size_t idx) { - mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 10 (busy), and 11 (set). - mi_assert_internal(idx < MI_BFIELD_BITS-1); - const mi_bfield_t mask = ((mi_bfield_t)MI_PAIR_SET << idx); - const mi_bfield_t mask_busy = ((mi_bfield_t)MI_PAIR_BUSY << idx); - mi_bfield_t bnew; - mi_bfield_t old = mi_atomic_load_relaxed(b); - do { - if mi_unlikely((old&mask)==mask_busy) { - old = mi_atomic_load_acquire(b); - if ((old&mask)==mask_busy) { _mi_stat_counter_increase(&_mi_stats_main.pages_unabandon_busy_wait, 1); } - while ((old&mask)==mask_busy) { // busy wait - mi_atomic_yield(); - old = mi_atomic_load_acquire(b); - } - } - bnew = (old & ~mask); // clear - } while (!mi_atomic_cas_weak_acq_rel(b, &old, bnew)); - mi_assert_internal((old&mask) != mask_busy); // we should never clear a busy page - mi_assert_internal((old&mask) == mask); // in our case: we should only go from set to clear (when reclaiming an abandoned page from a free) - return ((old&mask) == mask); -} - -static inline bool mi_bitmap_chunk_clear2_once_not_busy(mi_bitmap_chunk_t* chunk, size_t cidx) { - mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS); - const size_t i = cidx / MI_BFIELD_BITS; - const size_t idx = cidx % MI_BFIELD_BITS; - return mi_bfield_atomic_clear2_once_not_busy(&chunk->bfields[i], idx); -} - -static bool mi_bitmap_clear2_once_not_busy(mi_bitmap_t* bitmap, size_t idx) { - mi_assert_internal((idx%2)==0); - mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); - const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; - const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; - mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap, chunk_idx); - bool cleared = mi_bitmap_chunk_clear2_once_not_busy(&bitmap->chunks[chunk_idx], cidx); - if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { - mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch); - } - return cleared; -} - -void mi_pairmap_clear_once_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) { - mi_bitmap_t* bitmap; - size_t idx; - mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx); - mi_bitmap_clear2_once_not_busy(bitmap, idx); -} - - - -/* -------------------------------------------------------------------------------- - pairmap try and set busy --------------------------------------------------------------------------------- */ - -// Atomically go from set to busy, or return false otherwise and leave the bit field as-is. -static inline bool mi_bfield_atomic_try_set_busy(_Atomic(mi_bfield_t)*b, size_t idx) { - mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 10 (busy), and 11 (set). - mi_assert_internal(idx < MI_BFIELD_BITS-1); - const mi_bfield_t mask = ((mi_bfield_t)MI_PAIR_SET << idx); - const mi_bfield_t mask_busy = ((mi_bfield_t)MI_PAIR_BUSY << idx); - mi_bfield_t old; - mi_bfield_t bnew; - do { - old = mi_atomic_load_relaxed(b); - if ((old & mask) != mask) return false; // no longer set - bnew = (old & ~mask) | mask_busy; - } while (!mi_atomic_cas_weak_acq_rel(b, &old, bnew)); - return true; -} - -static inline bool mi_bitmap_chunk_try_find_and_set_busy(mi_bitmap_chunk_t* chunk, size_t* pidx) { - for (int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { - while (true) { - const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]) & MI_BFIELD_LO_BIT2; // only keep MI_PAIR_SET bits - size_t idx; - if (!mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit - break; // not found: continue with the next field - } - else { - mi_assert_internal((idx%2)==0); - if mi_likely(mi_bfield_atomic_try_set_busy(&chunk->bfields[i], idx)) { - *pidx = (i*MI_BFIELD_BITS) + idx; - mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS-1); - return true; - } - // else: try this word once again - } - } - } - return false; -} - - -static bool mi_bitmap_try_find_and_set_busy(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t idx_offset, size_t* ppair_idx, - mi_bitmap_claim_while_busy_fun_t* claim, void* arg1, void* arg2) +mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, + mi_claim_fun_t* claim, void* arg1, void* arg2) { mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx) { - MI_UNUSED(epoch); MI_UNUSED(n); - mi_assert_internal(n==2); size_t cidx; - if mi_likely(mi_bitmap_chunk_try_find_and_set_busy(&bitmap->chunks[chunk_idx], &cidx)) { - const size_t idx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx; - mi_assert_internal((idx%2)==0); - const size_t pair_idx = (idx + idx_offset)/2; - if (claim(pair_idx, arg1, arg2)) { // while busy, the claim function can read from the page - mi_bitmap_xset_pair(MI_BIT_CLEAR, bitmap, idx); // claimed, clear the entry - *ppair_idx = pair_idx; + if mi_likely(mi_bchunk_find_and_try_clear(&bitmap->chunks[chunk_idx], &cidx)) { + const size_t slice_index = (chunk_idx * MI_BCHUNK_BITS) + cidx; + mi_assert_internal(slice_index < mi_bitmap_max_bits(bitmap)); + bool keep_set = true; + if ((*claim)(slice_index, arg1, arg2, &keep_set)) { + // success! + mi_assert_internal(!keep_set); + *pidx = slice_index; return true; } else { - mi_bitmap_xset_pair(MI_BIT_SET, bitmap, idx); // not claimed, reset the entry - // and continue + // failed to claim it, set abandoned mapping again (unless thet page was freed) + if (keep_set) { + const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); + mi_assert_internal(wasclear); MI_UNUSED(wasclear); + } + // continue } } + else { + // we may find that all are cleared only on a second iteration but that is ok as + // the chunkmap is a conservative approximation. + mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); + // continue + } } mi_bitmap_forall_chunks_end(); return false; } -// Used to find an abandoned page, and transition from set to busy. -mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pair_idx, - mi_bitmap_claim_while_busy_fun_t* claim, void* arg1, void* arg2 ) { - if (mi_bitmap_try_find_and_set_busy(pairmap->bitmap1, 2, tseq, 0, pair_idx, claim, arg1, arg2)) return true; - return mi_bitmap_try_find_and_set_busy(pairmap->bitmap2, 2, tseq, mi_bitmap_max_bits(pairmap->bitmap1), pair_idx, claim, arg1, arg2); -} +// Clear a bit once it is set. +void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx) { + mi_assert_internal(idx < mi_bitmap_max_bits(bitmap)); + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); + mi_bchunk_clear_once_set(&bitmap->chunks[chunk_idx], cidx); +} \ No newline at end of file diff --git a/src/bitmap.h b/src/bitmap.h index 78ee5380..9ef97d2f 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -19,35 +19,34 @@ Concurrent bitmap that can set/reset sequences of bits atomically each bit usually represents a single MI_ARENA_SLICE_SIZE in an arena (64 KiB). We need 16K bits to represent a 1GiB arena. - `mi_bitmap_chunk_t`: a chunk of bfield's of a total of MI_BITMAP_CHUNK_BITS (= 512) + `mi_bchunk_t`: a chunk of bfield's of a total of MI_BCHUNK_BITS (= 512 on 64-bit, 256 on 32-bit) allocations never span across chunks -- so MI_ARENA_MAX_OBJ_SIZE is the number of bits in a chunk times the MI_ARENA_SLICE_SIZE (512 * 64KiB = 32 MiB). - These chunks are cache-aligned and we can use AVX2/AVX512/SVE/SVE2/etc. instructions + These chunks are cache-aligned and we can use AVX2/AVX512/NEON/SVE/SVE2/etc. instructions to scan for bits (perhaps) more efficiently. - `mi_chunkmap_t`: for each chunk we track if it has (potentially) any bit set. + `mi_bchunkmap_t` == `mi_bchunk_t`: for each chunk we track if it has (potentially) any bit set. The chunkmap has 1 bit per chunk that is set if the chunk potentially has a bit set. This is used to avoid scanning every chunk. (and thus strictly an optimization) It is conservative: it is fine to a bit in the chunk map even if the chunk turns out - to have no bits set. + to have no bits set. It is also allowed to briefly have a clear bit even if the + chunk has bits set, as long as we guarantee that we set the bit later on -- this + allows us to set the chunkmap bit after we set a bit in the corresponding chunk. - When we (potentially) set a bit in a chunk, we first update the chunkmap. However, when we clear a bit in a chunk, and the chunk is indeed all clear, we cannot safely clear the bit corresponding to the chunk in the chunkmap since it - may race with another thread setting a bit in the same chunk (and we may clear the - bit even though a bit is set in the chunk which is not allowed). + may race with another thread setting a bit in the same chunk. Therefore, when + clearing, we first test if a chunk is clear, then clear the chunkmap bit, and + then test again to catch any set bits that we missed. - To fix this, the chunkmap contains 32-bits of bits for chunks, and a 32-bit "epoch" - counter that is increased everytime a bit is set. We only clear a bit if the epoch - stayed the same over our clear operation (so we know no other thread in the mean - time set a bit in any of the chunks corresponding to the chunkmap). - Since increasing the epoch and setting a bit must be atomic, we use only half-word - bits (32) (we could use 128-bit atomics if needed since modern hardware supports this) + Since the chunkmap may thus be briefly out-of-sync, this means that we may sometimes + not find a free page even though it's there (but we accept this as we avoid taking + full locks). (Another way to do this is to use an epoch but we like to avoid that complexity + for now). - `mi_bitmap_t`: a bitmap with N chunks. A bitmap always has MI_BITMAP_MAX_CHUNK_FIELDS (=16) - and can support arena's from few chunks up to 16 chunkmap's = 16 * 32 chunks = 16 GiB - The `chunk_count` can be anything from 1 to the max supported by the chunkmap's but - each chunk is always complete (512 bits, so 512 * 64KiB = 32MiB memory area's). + `mi_bitmap_t`: a bitmap with N chunks. A bitmap has a chunkmap of MI_BCHUNK_BITS (512) + and thus has at most 512 chunks (=2^18 bits x 64 KiB slices = 16 GiB max arena size). + The minimum is 1 chunk which is a 32 MiB arena. For now, the implementation assumes MI_HAS_FAST_BITSCAN and uses trailing-zero-count and pop-count (but we think it can be adapted work reasonably well on older hardware too) @@ -56,60 +55,49 @@ Concurrent bitmap that can set/reset sequences of bits atomically // A word-size bit field. typedef size_t mi_bfield_t; -#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3) -#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT) -#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8) -#define MI_BFIELD_BITS_MOD_MASK (MI_BFIELD_BITS - 1) -#define MI_BFIELD_LO_BIT8 (((~(mi_bfield_t)0))/0xFF) // 0x01010101 .. -#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 .. +#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3) +#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT) +#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8) +#define MI_BFIELD_LO_BIT8 (((~(mi_bfield_t)0))/0xFF) // 0x01010101 .. +#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 .. -#define MI_BITMAP_CHUNK_SIZE (MI_BITMAP_CHUNK_BITS / 8) -#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS) -#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1) - -// A bitmap chunk contains 512 bits of bfields on 64_bit (256 on 32-bit) -typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_chunk_s { - _Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS]; -} mi_bitmap_chunk_t; +#define MI_BCHUNK_SIZE (MI_BCHUNK_BITS / 8) +#define MI_BCHUNK_FIELDS (MI_BCHUNK_BITS / MI_BFIELD_BITS) // 8 on both 64- and 32-bit -// for now 32-bit epoch + 32-bit bit-set (note: with ABA instructions we can double this) -typedef uint64_t mi_chunkmap_t; -typedef uint32_t mi_epoch_t; -typedef uint32_t mi_cmap_t; +// A bitmap chunk contains 512 bits on 64-bit (256 on 32-bit) +typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bchunk_s { + _Atomic(mi_bfield_t) bfields[MI_BCHUNK_FIELDS]; +} mi_bchunk_t; -#define MI_CHUNKMAP_BITS (32) // 1 chunkmap tracks 32 chunks +// The chunkmap has one bit per corresponding chunk that is set if the chunk potentially has bits set. +// The chunkmap is itself a chunk. +typedef mi_bchunk_t mi_bchunkmap_t; -#define MI_BITMAP_MAX_CHUNKMAPS (16) -#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BITMAP_MAX_CHUNKMAPS * MI_CHUNKMAP_BITS) -#define MI_BITMAP_MIN_CHUNK_COUNT (1 * MI_CHUNKMAP_BITS) // 1 GiB arena +#define MI_BCHUNKMAP_BITS MI_BCHUNK_BITS -#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 16 GiB arena -#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 1 GiB arena +#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BCHUNKMAP_BITS) +#define MI_BITMAP_MIN_CHUNK_COUNT (1) +#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BCHUNK_BITS) // 16 GiB arena +#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BCHUNK_BITS) // 32 MiB arena // An atomic bitmap -typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s { - _Atomic(size_t) chunk_map_count; // valid chunk_maps entries - _Atomic(size_t) chunk_count; // total count of chunks - size_t padding[MI_BITMAP_CHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc - _Atomic(mi_chunkmap_t) chunk_maps[MI_BITMAP_MAX_CHUNKMAPS]; - - mi_bitmap_chunk_t chunks[MI_BITMAP_MIN_BIT_COUNT]; // or more, up to MI_BITMAP_MAX_CHUNK_COUNT +typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s { + _Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS) + size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc + mi_bchunkmap_t chunkmap; + mi_bchunk_t chunks[1]; // or more, up to MI_BITMAP_MAX_CHUNK_COUNT } mi_bitmap_t; -static inline size_t mi_bitmap_chunk_map_count(const mi_bitmap_t* bitmap) { - return mi_atomic_load_relaxed(&bitmap->chunk_map_count); -} - static inline size_t mi_bitmap_chunk_count(const mi_bitmap_t* bitmap) { return mi_atomic_load_relaxed(&bitmap->chunk_count); } static inline size_t mi_bitmap_max_bits(const mi_bitmap_t* bitmap) { - return (mi_bitmap_chunk_count(bitmap) * MI_BITMAP_CHUNK_BITS); + return (mi_bitmap_chunk_count(bitmap) * MI_BCHUNK_BITS); } @@ -134,9 +122,22 @@ size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero); // Set/clear a sequence of `n` bits in the bitmap (and can cross chunks). Not atomic so only use if local to a thread. void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n); + +// Set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) +bool mi_bitmap_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx); + +static inline bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx) { + return mi_bitmap_xset(MI_BIT_SET, bitmap, idx); +} + +static inline bool mi_bitmap_clear(mi_bitmap_t* bitmap, size_t idx) { + return mi_bitmap_xset(MI_BIT_CLEAR, bitmap, idx); +} + + // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's (or all 1's to 0's). -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -// If `already_xset` is not NULL, it is set to true if all the bits were already all set/cleared. +// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! +// If `already_xset` is not NULL, it is to all the bits were already all set/cleared. bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset); static inline bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set) { @@ -162,7 +163,7 @@ static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n // Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) // and false otherwise leaving the bitmask as is. -// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)! mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n); static inline bool mi_bitmap_try_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { @@ -177,48 +178,11 @@ static inline bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx); +typedef bool (mi_claim_fun_t)(size_t slice_index, void* arg1, void* arg2, bool* keep_set); +mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, + mi_claim_fun_t* claim, void* arg1, void* arg2); -/* -------------------------------------------------------------------------------- - Atomic bitmap for a pair of bits. - - The valid pairs are CLEAR (0), SET (3), or BUSY (2). - - These bit pairs are used in the abandoned pages maps: when set, the entry has - an available page. When we scan for an available abandoned page and find an entry SET, - we first set it to BUSY, and try to claim the page atomically (since it can race - with a concurrent `mi_free` which also tries to claim the page). However, unlike `mi_free`, - we cannot be sure that a concurrent `mi_free` also didn't free (and decommit) the page - just when we got the entry. Therefore, a page can only be freed after `mi_arena_unabandon` - which (busy) waits until the BUSY flag is cleared to ensure all readers are done. - (and pair-bit operations must therefore be release_acquire). --------------------------------------------------------------------------------- */ - -#define MI_PAIR_CLEAR (0) -#define MI_PAIR_UNUSED (1) // should never occur -#define MI_PAIR_BUSY (2) -#define MI_PAIR_SET (3) - -// 0b....0101010101010101 -#define MI_BFIELD_LO_BIT2 ((MI_BFIELD_LO_BIT8 << 6)|(MI_BFIELD_LO_BIT8 << 4)|(MI_BFIELD_LO_BIT8 << 2)|MI_BFIELD_LO_BIT8) - -// A pairmap manipulates pairs of bits (and consists of 2 bitmaps) -typedef struct mi_pairmap_s { - mi_bitmap_t* bitmap1; - mi_bitmap_t* bitmap2; -} mi_pairmap_t; - -// initialize a pairmap to all clear; avoid a mem_zero if `already_zero` is true -void mi_pairmap_init(mi_pairmap_t* pairmap, mi_bitmap_t* bm1, mi_bitmap_t* bm2); -bool mi_pairmap_set(mi_pairmap_t* pairmap, size_t pair_idx); -bool mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx); -bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx); -void mi_pairmap_clear_once_not_busy(mi_pairmap_t* pairmap, size_t pair_idx); - -typedef bool (mi_bitmap_claim_while_busy_fun_t)(size_t pair_index, void* arg1, void* arg2); -mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx, - mi_bitmap_claim_while_busy_fun_t* claim, void* arg1 ,void* arg2 - ); - +void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx); #endif // MI_BITMAP_H