From 8d9c725482537a811b4eb9c982bfbfdf7680cbc1 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 3 Dec 2024 17:27:43 -0800 Subject: [PATCH] increase MAX_OBJ_SLICES to a full chunk (32MiB) --- include/mimalloc/internal.h | 15 +++ include/mimalloc/types.h | 3 +- src/arena.c | 65 ++++++++++--- src/bitmap.c | 185 +++++++++++++++++++++++++++--------- src/bitmap.h | 47 ++++----- src/os.c | 15 --- src/page-map.c | 2 +- 7 files changed, 230 insertions(+), 102 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 9fa27f31..34dbab07 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -339,6 +339,21 @@ static inline uint8_t* _mi_align_up_ptr(void* p, size_t alignment) { } +static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return (sz & ~mask); + } + else { + return ((sz / alignment) * alignment); + } +} + +static inline void* mi_align_down_ptr(void* p, size_t alignment) { + return (void*)_mi_align_down((uintptr_t)p, alignment); +} + // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { mi_assert_internal(divider != 0); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 4430cd6c..3d83e27a 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -128,8 +128,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE) #define MI_ARENA_MIN_OBJ_SLICES (1) -#define MI_ARENA_MAX_OBJ_SLICES (MI_SIZE_BITS) // for now, cannot cross bit field boundaries.. todo: make it at least MI_BITMAP_CHUNK_BITS ? (16 MiB) -// #define MI_ARENA_MAX_OBJ_BLOCKS (MI_BITMAP_CHUNK_BITS) // for now, cannot cross chunk boundaries +#define MI_ARENA_MAX_OBJ_SLICES (MI_BITMAP_CHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE) #define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE) diff --git a/src/arena.c b/src/arena.c index a2343674..1b891377 100644 --- a/src/arena.c +++ b/src/arena.c @@ -193,30 +193,55 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( void* p = mi_arena_slice_start(arena, slice_index); *memid = mi_memid_create_arena(arena->id, arena->exclusive, slice_index, slice_count); memid->is_pinned = arena->memid.is_pinned; - + // set the dirty bits if (arena->memid.initially_zero) { + // size_t dirty_count = 0; memid->initially_zero = mi_bitmap_setN(&arena->slices_dirty, slice_index, slice_count, NULL); + //if (dirty_count>0) { + // if (memid->initially_zero) { + // _mi_error_message(EFAULT, "ouch1\n"); + // } + // // memid->initially_zero = false; + //} + //else { + // if (!memid->initially_zero) { + // _mi_error_message(EFAULT, "ouch2\n"); + // } + // // memid->initially_zero = true; + //} } // set commit state if (commit) { - // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; - size_t already_committed_count = 0; - mi_bitmap_setN(&arena->slices_committed, slice_index, slice_count, &already_committed_count); - if (already_committed_count < slice_count) { - // recommit the full range + // commit requested, but the range may not be committed as a whole: ensure it is committed now + if (!mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)) { + // not fully committed: commit the full range and set the commit bits + // (this may race and we may double-commit which is fine) bool commit_zero = false; - mi_stat_decrease(_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero, NULL)) { memid->initially_committed = false; } else { if (commit_zero) { memid->initially_zero = true; } + #if MI_DEBUG > 1 + if (memid->initially_zero) { + if (!mi_mem_is_zero(p, mi_size_of_slices(slice_count))) { + _mi_error_message(EFAULT, "arena allocation was not zero-initialized!\n"); + memid->initially_zero = false; + } + } + #endif + size_t already_committed_count = 0; + mi_bitmap_setN(&arena->slices_committed, slice_index, slice_count, &already_committed_count); + if (already_committed_count < slice_count) { + // todo: also decrease total + mi_stat_decrease(_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); + } } - } + } } else { // no need to commit, but check if already fully committed @@ -523,7 +548,18 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment)); // claimed free slices: initialize the page partly - if (!memid.initially_zero) { _mi_memzero_aligned(page, sizeof(*page)); } + if (!memid.initially_zero) { + _mi_memzero_aligned(page, sizeof(*page)); + } + #if MI_DEBUG > 1 + else { + if (!mi_mem_is_zero(page, mi_size_of_slices(slice_count))) { + _mi_error_message(EFAULT, "page memory was not zero initialized!\n"); + memid.initially_zero = false; + _mi_memzero_aligned(page, sizeof(*page)); + } + } + #endif mi_assert(MI_PAGE_INFO_SIZE >= _mi_align_up(sizeof(*page), MI_PAGE_MIN_BLOCK_ALIGN)); const size_t block_start = (os_align ? MI_PAGE_ALIGN : MI_PAGE_INFO_SIZE); const size_t reserved = (os_align ? 1 : (mi_size_of_slices(slice_count) - block_start) / block_size); @@ -668,7 +704,7 @@ static void mi_arena_page_abandon_no_stat(mi_page_t* page) { mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_free, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_setN(&arena->slices_committed, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_clearN(&arena->slices_purge, slice_index, slice_count)); - // mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count)); + mi_assert_internal(mi_bitmap_is_setN(&arena->slices_dirty, slice_index, slice_count)); mi_page_set_abandoned_mapped(page); bool were_zero = mi_pairmap_set(&arena->pages_abandoned[bin], slice_index); @@ -851,6 +887,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_assert_internal(all_committed); } else { + /* if (!all_committed) { // mark the entire range as no longer committed (so we recommit the full range when re-using) mi_bitmap_clearN(&arena->slices_committed, slice_index, slice_count); @@ -864,6 +901,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi // that contains already decommitted parts. Since purge consistently uses reset or decommit that // works (as we should never reset decommitted parts). } + */ // (delay) purge the entire range mi_arena_schedule_purge(arena, slice_index, slice_count, stats); } @@ -1014,7 +1052,12 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int else { mi_bitmap_setN(&arena->slices_committed, 0, info_slices, NULL); } - mi_bitmap_setN(&arena->slices_dirty, 0, info_slices, NULL); + if (!memid.initially_zero) { + mi_bitmap_unsafe_setN(&arena->slices_dirty, 0, arena->slice_count); + } + else { + mi_bitmap_setN(&arena->slices_dirty, 0, info_slices, NULL); + } return mi_arena_add(arena, arena_id, &_mi_stats_main); } diff --git a/src/bitmap.c b/src/bitmap.c index 4eadce80..a6c9e879 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -42,6 +42,25 @@ static inline mi_bfield_t mi_bfield_rotate_right(mi_bfield_t x, size_t r) { return mi_rotr(x,r); } +static inline mi_bfield_t mi_bfield_zero(void) { + return 0; +} + +static inline mi_bfield_t mi_bfield_one(void) { + return 1; +} + +static inline mi_bfield_t mi_bfield_all_set(void) { + return ~((mi_bfield_t)0); +} + +static inline mi_bfield_t mi_bfield_mask(size_t bit_count, size_t shiftl) { + mi_assert_internal(bit_count + shiftl <= MI_BFIELD_BITS); + const mi_bfield_t mask0 = (bit_count < MI_BFIELD_BITS ? (mi_bfield_one() << bit_count)-1 : mi_bfield_all_set()); + return (mask0 << shiftl); +} + + // Find the least significant bit that can be xset (0 for MI_BIT_SET, 1 for MI_BIT_CLEAR). // return false if `x==~0` (for MI_BIT_SET) or `x==0` for MI_BIT_CLEAR (with `*idx` undefined) and true otherwise, // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). @@ -52,7 +71,7 @@ static inline bool mi_bfield_find_least_to_xset(mi_bit_t set, mi_bfield_t x, siz // Set a bit atomically. Returns `true` if the bit transitioned from 0 to 1 static inline bool mi_bfield_atomic_set(_Atomic(mi_bfield_t)*b, size_t idx) { mi_assert_internal(idx < MI_BFIELD_BITS); - const mi_bfield_t mask = ((mi_bfield_t)1)< n) { m = n; } mi_assert_internal(idx + m <= MI_BFIELD_BITS); mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); - const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask, &already_xset ); + const bool transition = mi_bfield_atomic_xset_mask(set, &chunk->bfields[field], mask, &already_xset); + if (already_xset > 0 && transition) { + _mi_error_message(EFAULT, "ouch\n"); + } + all_transition = all_transition && transition; all_already_xset += already_xset; // next field field++; @@ -335,7 +372,6 @@ static inline bool mi_bitmap_chunk_is_clear2(mi_bitmap_chunk_t* chunk, size_t ci static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); mi_assert_internal(n>0); - bool all_xset = true; size_t idx = cidx % MI_BFIELD_BITS; size_t field = cidx / MI_BFIELD_BITS; while (n > 0) { @@ -343,14 +379,16 @@ static bool mi_bitmap_chunk_is_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, siz if (m > n) { m = n; } mi_assert_internal(idx + m <= MI_BFIELD_BITS); mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); - const size_t mask = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask); + const size_t mask = mi_bfield_mask(m, idx); + if (!mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[field], mask)) { + return false; + } // next field field++; idx = 0; n -= m; } - return all_xset; + return true; } @@ -389,14 +427,14 @@ static inline bool mi_bitmap_chunk_try_clear8(mi_bitmap_chunk_t* chunk, size_t b // Returns true if all bits transitioned from 0 to 1 (or 1 to 0), // and false otherwise leaving all bit fields as is. static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, size_t cidx, size_t n) { - mi_assert_internal(cidx + n < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); mi_assert_internal(n>0); if (n==0) return true; size_t start_idx = cidx % MI_BFIELD_BITS; size_t start_field = cidx / MI_BFIELD_BITS; size_t end_field = MI_BITMAP_CHUNK_FIELDS; - size_t mask_mid = 0; - size_t mask_end = 0; + mi_bfield_t mask_mid = 0; + mi_bfield_t mask_end = 0; // first field size_t field = start_field; @@ -404,7 +442,7 @@ static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, si if (m > n) { m = n; } mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); mi_assert_internal(start_field < MI_BITMAP_CHUNK_FIELDS); - const size_t mask_start = (m == MI_BFIELD_BITS ? ~MI_ZU(0) : ((MI_ZU(1)<bfields[field], mask_start)) return false; // done? @@ -417,7 +455,7 @@ static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, si while (n >= MI_BFIELD_BITS) { field++; mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); - mask_mid = ~MI_ZU(0); + mask_mid = mi_bfield_all_set(); if (!mi_bfield_atomic_try_xset_mask(set, &chunk->bfields[field], mask_mid)) goto restore; n -= MI_BFIELD_BITS; } @@ -428,7 +466,7 @@ static bool mi_bitmap_chunk_try_xsetN(mi_bit_t set, mi_bitmap_chunk_t* chunk, si field++; mi_assert_internal(field < MI_BITMAP_CHUNK_FIELDS); end_field = field; - mask_end = (MI_ZU(1)<bfields[field], mask_end)) goto restore; } @@ -602,14 +640,12 @@ static inline bool mi_bitmap_chunk_find_and_try_clear8(mi_bitmap_chunk_t* chunk, } -// find a sequence of `n` bits in a chunk with all `n` (`< MI_BFIELD_BITS`!) bits set, -// and try unset it atomically +// find a sequence of `n` bits in a chunk with `n < MI_BFIELD_BITS` with all bits set, +// and try to clear them atomically. // set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success. -// todo: try avx2 and neon version -// todo: allow spanning across bfield boundaries? -static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { - if (n == 0 || n > MI_BFIELD_BITS) return false; // TODO: allow larger? - const mi_bfield_t mask = (n==MI_BFIELD_BITS ? ~((mi_bfield_t)0) : (((mi_bfield_t)1) << n)-1); +static bool mi_bitmap_chunk_find_and_try_clearNX(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { + if (n == 0 || n > MI_BFIELD_BITS) return false; + const mi_bfield_t mask = mi_bfield_mask(n, 0); for(int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) { mi_bfield_t b = chunk->bfields[i]; size_t bshift = 0; @@ -636,8 +672,48 @@ static inline bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, // advance const size_t ones = mi_bfield_ctz(~b); // skip all ones (since it didn't fit the mask) mi_assert_internal(ones>0); - bshift += ones; b >>= ones; + bshift += ones; + } + } + } + return false; +} + +// find a sequence of `n` bits in a chunk with `n < MI_BITMAP_CHUNK_BITS` with all bits set, +// and try to clear them atomically. +// set `*pidx` to its bit index (0 <= *pidx <= MI_BITMAP_CHUNK_BITS - n) on success. +static bool mi_bitmap_chunk_find_and_try_clearN(mi_bitmap_chunk_t* chunk, size_t n, size_t* pidx) { + if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; // cannot be more than a chunk + if (n < MI_BFIELD_BITS) return mi_bitmap_chunk_find_and_try_clearNX(chunk, n, pidx); + + // we align an a field, and require `field_count` fields to be all clear. + // n >= MI_BFIELD_BITS; find a first field that is 0 + const size_t field_count = _mi_divide_up(n, MI_BFIELD_BITS); // we need this many fields + for (size_t i = 0; i <= MI_BITMAP_CHUNK_FIELDS - field_count; i++) + { + // first pre-scan for a range of fields that are all set + bool allset = true; + size_t j = 0; + do { + mi_assert_internal(i + j < MI_BITMAP_CHUNK_FIELDS); + mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i+j]); + if (~b != 0) { + allset = false; + i += j; // no need to look again at the previous fields + break; + } + } while (++j < field_count); + + // if all set, we can try to atomically clear them + if (allset) { + const size_t cidx = i*MI_BFIELD_BITS; + if (mi_bitmap_chunk_try_clearN(chunk, cidx, n)) { + // we cleared all atomically + *pidx = cidx; + mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS); + mi_assert_internal(*pidx + n <= MI_BITMAP_CHUNK_BITS); + return true; } } } @@ -796,7 +872,7 @@ void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) { // Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0), // and false otherwise leaving the bitmask as is. -bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { +static bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { mi_assert_internal(idx < MI_BITMAP_MAX_BITS); const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; @@ -816,12 +892,9 @@ bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { } } - - - // Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0) // and false otherwise leaving the bitmask as is. -bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { +static bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { mi_assert_internal(idx < MI_BITMAP_MAX_BITS); mi_assert_internal(idx%8 == 0); const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; @@ -846,13 +919,12 @@ bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx) { // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) // and false otherwise leaving the bitmask as is. // `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { +static bool mi_bitmap_try_xsetN_(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { mi_assert_internal(n>0); - mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); - if (n==1) { return mi_bitmap_try_xset(set,bitmap,idx); } - if (n==8) { return mi_bitmap_try_xset8(set,bitmap,idx); } - + mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); mi_assert_internal(idx + n <= MI_BITMAP_MAX_BITS); + if (n==0 || idx + n > MI_BITMAP_MAX_BITS) return false; + const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS; const size_t cidx = idx % MI_BITMAP_CHUNK_BITS; mi_assert_internal(cidx + n <= MI_BITMAP_CHUNK_BITS); // don't cross chunks (for now) @@ -875,13 +947,21 @@ bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n } } +bool mi_bitmap_try_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { + mi_assert_internal(n>0 && n<=MI_BITMAP_CHUNK_BITS); + if (n==1) return mi_bitmap_try_xset(set, bitmap, idx); + if (n==8) return mi_bitmap_try_xset8(set, bitmap, idx); + return mi_bitmap_try_xsetN_(set, bitmap, idx, n); +} + // Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). // `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! -bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset ) { +static bool mi_bitmap_xsetN_(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset ) { mi_assert_internal(n>0); mi_assert_internal(n<=MI_BITMAP_CHUNK_BITS); + //TODO: specialize? //if (n==1) { return mi_bitmap_xset(set, bitmap, idx); } //if (n==8) { return mi_bitmap_xset8(set, bitmap, idx); } @@ -899,14 +979,26 @@ bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, si } else { const size_t epoch = mi_bitmap_epoch(bitmap); - bool cleared = mi_bitmap_chunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, already_xset); - if (cleared && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { + size_t already_clear = 0; + const bool allset = mi_bitmap_chunk_clearN(&bitmap->chunks[chunk_idx], cidx, n, &already_clear); + if (already_xset != NULL) { *already_xset = already_clear; } + if (already_clear < n && epoch == mi_bitmap_epoch(bitmap) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) { mi_bitmap_anyset_try_clear(bitmap, chunk_idx, epoch); } - return cleared; + return allset; } } +// Set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's). +// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)! +bool mi_bitmap_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_xset) { + mi_assert_internal(n>0 && n<=MI_BITMAP_CHUNK_BITS); + //TODO: specialize? + //if (n==1) return mi_bitmap_xset(set, bitmap, idx); + //if (n==8) return mi_bitmap_xset8(set, bitmap, idx); + return mi_bitmap_xsetN_(set, bitmap, idx, n, already_xset); +} + // Is a sequence of n bits already all set/cleared? bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) { @@ -949,7 +1041,7 @@ bool mi_bitmap_is_xsetN(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx, size_t n) // and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. // The low `MI_BFIELD_BITS` of start are used to set the start point of the search // (to reduce thread contention). -mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) { +mi_decl_nodiscard static bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) { mi_bitmap_forall_set_chunks(bitmap, tseq, epoch, chunk_idx) { size_t cidx; @@ -973,7 +1065,7 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t // Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. -mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx ) { +mi_decl_nodiscard static bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx ) { mi_bitmap_forall_set_chunks(bitmap,tseq, epoch, chunk_idx) { size_t cidx; @@ -997,10 +1089,9 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. -mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx ) { - // TODO: allow at least MI_BITMAP_CHUNK_BITS and probably larger - // TODO: allow spanning across chunk boundaries - if (n == 0 || n > MI_BFIELD_BITS) return false; +mi_decl_nodiscard static bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx ) { + // TODO: allow spanning across chunk boundaries? + if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; mi_bitmap_forall_set_chunks(bitmap,tseq,epoch,chunk_idx) { size_t cidx; @@ -1021,6 +1112,12 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t return false; } +mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) { + if (n == 1) return mi_bitmap_try_find_and_clear(bitmap, tseq, pidx); + if (n == 8) return mi_bitmap_try_find_and_clear8(bitmap, tseq, pidx); + return mi_bitmap_try_find_and_clearN_(bitmap, n, tseq, pidx); +} + /* -------------------------------------------------------------------------------- pairmap epochset diff --git a/src/bitmap.h b/src/bitmap.h index 8c961fe1..948bd1e3 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -90,28 +90,28 @@ static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n // Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0) // and false otherwise leaving the bitmask as is. -mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); - -static inline bool mi_bitmap_try_set(mi_bitmap_t* bitmap, size_t idx) { - return mi_bitmap_try_xset(MI_BIT_SET, bitmap, idx); -} - -static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) { - return mi_bitmap_try_xset(MI_BIT_CLEAR, bitmap, idx); -} +//mi_decl_nodiscard bool mi_bitmap_try_xset(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); +// +//static inline bool mi_bitmap_try_set(mi_bitmap_t* bitmap, size_t idx) { +// return mi_bitmap_try_xset(MI_BIT_SET, bitmap, idx); +//} +// +//static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) { +// return mi_bitmap_try_xset(MI_BIT_CLEAR, bitmap, idx); +//} // Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0) // and false otherwise leaving the bitmask as is. -mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); - -static inline bool mi_bitmap_try_set8(mi_bitmap_t* bitmap, size_t idx) { - return mi_bitmap_try_xset8(MI_BIT_SET, bitmap, idx); -} - -static inline bool mi_bitmap_try_clear8(mi_bitmap_t* bitmap, size_t idx) { - return mi_bitmap_try_xset8(MI_BIT_CLEAR, bitmap, idx); -} +//mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_bit_t set, mi_bitmap_t* bitmap, size_t idx); +// +//static inline bool mi_bitmap_try_set8(mi_bitmap_t* bitmap, size_t idx) { +// return mi_bitmap_try_xset8(MI_BIT_SET, bitmap, idx); +//} +// +//static inline bool mi_bitmap_try_clear8(mi_bitmap_t* bitmap, size_t idx) { +// return mi_bitmap_try_xset8(MI_BIT_CLEAR, bitmap, idx); +//} // Try to set/clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's) // and false otherwise leaving the bitmask as is. @@ -126,17 +126,6 @@ static inline bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t return mi_bitmap_try_xsetN(MI_BIT_CLEAR, bitmap, idx, n); } - -// Find a set bit in a bitmap and atomically unset it. Returns true on success, -// and in that case sets the index: `0 <= *pidx < MI_BITMAP_MAX_BITS`. -// The low `MI_BFIELD_BITS` of start are used to set the start point of the search -// (to reduce thread contention). -mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); - -// Find a byte in the bitmap with all bits set (0xFF) and atomically unset it to zero. -// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-8`. -mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx ); - // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx ); diff --git a/src/os.c b/src/os.c index bac59437..c7f464c0 100644 --- a/src/os.c +++ b/src/os.c @@ -92,21 +92,6 @@ void _mi_os_init(void) { bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); -static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { - mi_assert_internal(alignment != 0); - uintptr_t mask = alignment - 1; - if ((alignment & mask) == 0) { // power of two? - return (sz & ~mask); - } - else { - return ((sz / alignment) * alignment); - } -} - -static void* mi_align_down_ptr(void* p, size_t alignment) { - return (void*)_mi_align_down((uintptr_t)p, alignment); -} - void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { MI_UNUSED(try_alignment); MI_UNUSED(size); return NULL; diff --git a/src/page-map.c b/src/page-map.c index 15578301..0e99a890 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -24,7 +24,7 @@ static bool mi_page_map_init(void) { mi_page_map_entries_per_commit_bit = _mi_divide_up(page_map_size,MI_BITMAP_MAX_BITS); - mi_page_map_all_committed = _mi_os_has_overcommit(); // commit on-access on Linux systems + mi_page_map_all_committed = false; // _mi_os_has_overcommit(); // commit on-access on Linux systems? _mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL); if (_mi_page_map==NULL) { _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);