From 0d302cd1749ac8025893923b1c1d77f9246199e0 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 31 Dec 2024 15:11:09 -0800 Subject: [PATCH 1/3] add comments --- include/mimalloc/types.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 8b72140a..c5029a14 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -100,9 +100,10 @@ terms of the MIT license. A copy of the license can be found in the file #endif -// ------------------------------------------------------ +// -------------------------------------------------------------- // Sizes of internal data-structures -// ------------------------------------------------------ +// (comments specify sizes on 64-bit, usually 32-bit is halved) +// -------------------------------------------------------------- // Sizes are for 64-bit #ifndef MI_ARENA_SLICE_SHIFT @@ -116,19 +117,19 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_BCHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512) #endif -#define MI_BCHUNK_BITS (1 << MI_BCHUNK_BITS_SHIFT) -#define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) +#define MI_BCHUNK_BITS (1 << MI_BCHUNK_BITS_SHIFT) // sub-bitmaps are "bchunks" of 512 bits +#define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) // arena's allocate in slices of 64 KiB #define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE) -#define MI_ARENA_MIN_OBJ_SLICES (1) -#define MI_ARENA_MAX_OBJ_SLICES (MI_BCHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries) +#define MI_ARENA_MIN_OBJ_SLICES (1) +#define MI_ARENA_MAX_OBJ_SLICES (MI_BCHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE) #define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE) -#define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE -#define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bitmap) -#define MI_LARGE_PAGE_SIZE (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE) // 4 MiB (=word in the bitmap) +#define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE // 64 KiB +#define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bchunk bitmap) +#define MI_LARGE_PAGE_SIZE (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE) // 4 MiB (=word in the bchunk bitmap) // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) @@ -272,7 +273,7 @@ typedef uint8_t mi_heaptag_t; // // Notes: // - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`). -// - If a page is not part of a heap it is called "abandoned" -- in +// - If a page is not part of a heap it is called "abandoned" (`heap==NULL`) -- in // that case the `xthreadid` is 0 or 1 (1 is for abandoned pages that // are in the abandoned page lists of an arena, these are called "mapped" abandoned pages). // - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc` @@ -304,7 +305,7 @@ typedef struct mi_page_s { mi_heap_t* heap; // the heap owning this page (or NULL for abandoned pages) struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` - size_t slice_committed; // committed size relative to the first arena slice of the page data + size_t slice_committed; // committed size relative to the first arena slice of the page data (or 0 if the page is fully committed already) mi_memid_t memid; // provenance of the page memory } mi_page_t; @@ -315,7 +316,7 @@ typedef struct mi_page_s { #define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map. #define MI_PAGE_MIN_START_BLOCK_ALIGN MI_MAX_ALIGN_SIZE // minimal block alignment for the first block in a page (16b) -#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks +#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks (such that we guarantee natural alignment) #define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation #if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8 @@ -328,7 +329,7 @@ typedef struct mi_page_s { // (Except for large pages since huge objects are allocated in 4MiB chunks) #define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 8 KiB #define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 64 KiB -#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/4) // <= 512 KiB // note: this must be a nice power of 2 or we get rounding issues with _mi_bin +#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/4) // <= 512 KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin` #define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE) From c507ee3d96a2146717d6ac5fe120d1dc2da545dd Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 2 Jan 2025 11:42:28 -0800 Subject: [PATCH 2/3] make bitmap scan cross bfields for NX; disable the use of large object pages --- src/arena.c | 6 +- src/bitmap.c | 287 +++++++++++++++++++++++++++++---------------------- src/bitmap.h | 4 +- 3 files changed, 170 insertions(+), 127 deletions(-) diff --git a/src/arena.c b/src/arena.c index c8d4c9cd..11a4f82f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -773,9 +773,9 @@ mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t bloc else if (block_size <= MI_MEDIUM_MAX_OBJ_SIZE) { page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_MEDIUM_PAGE_SIZE), block_size); } - else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) { - page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size); - } + //else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) { + // page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size); + // } else { page = mi_arenas_page_singleton_alloc(heap, block_size, block_alignment); } diff --git a/src/bitmap.c b/src/bitmap.c index 03e21c89..5cecc606 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -130,6 +130,7 @@ static inline bool mi_bfield_atomic_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t } // Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's +// `all_clear` is set to `true` if the new bfield became zero. static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) { mi_assert_internal(mask != 0); mi_bfield_t old = mi_atomic_load_relaxed(b); @@ -155,6 +156,7 @@ static inline bool mi_bfield_atomic_clearX(_Atomic(mi_bfield_t)*b, bool* all_cle // Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0 // and false otherwise (leaving the bit field as is). +// `all_clear` is set to `true` if the new bfield became zero. static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) { mi_assert_internal(mask != 0); mi_bfield_t old = mi_atomic_load_relaxed(b); @@ -170,9 +172,9 @@ static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bf } -// Tries to set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0) +// Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0 // and `false` otherwise leaving the bfield `b` as-is. -// `all_clear` is set to true if the new bfield is zero (and false otherwise) +// `all_clear` is set to true if the new bfield became zero (and false otherwise) static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) { mi_assert_internal(idx < MI_BFIELD_BITS); const mi_bfield_t mask = mi_bfield_one()<bfields[i], idx); + const bool was_clear = mi_bfield_atomic_set(&chunk->bfields[i], idx); + if (already_set != NULL) { *already_set = (was_clear ? 0 : 1); } + return was_clear; } +// Set `0 < n <= MI_BFIELD_BITS`, and return true of the mask bits transitioned from all 0's to 1's. +// `already_set` contains the count of bits that were already set (used when committing ranges to account +// statistics correctly). +// Can cross over two bfields. static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { mi_assert_internal(cidx < MI_BCHUNK_BITS); + mi_assert_internal(n > 0 && n <= MI_BFIELD_BITS); const size_t i = cidx / MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS; - const mi_bfield_t mask = mi_bfield_mask(n, idx); - return mi_bfield_atomic_set_mask(&chunk->bfields[i], mask, already_set); -} - -static inline bool mi_bchunk_setX(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) { - mi_assert_internal(cidx < MI_BCHUNK_BITS); - mi_assert_internal((cidx%MI_BFIELD_BITS)==0); - const size_t i = cidx / MI_BFIELD_BITS; - return mi_bfield_atomic_setX(&chunk->bfields[i], already_set); + if mi_likely(idx + n <= MI_BFIELD_BITS) { + // within one field + return mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(n,idx), already_set); + } + else { + // spanning two fields + const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field + mi_assert_internal(m < n); + mi_assert_internal(i < MI_BCHUNK_FIELDS - 1); + size_t already_set1; + const bool all_set1 = mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &already_set1); + mi_assert_internal(n - m > 0); + mi_assert_internal(n - m < MI_BFIELD_BITS); + size_t already_set2; + const bool all_set2 = mi_bfield_atomic_set_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &already_set2); + if (already_set != NULL) { *already_set = already_set1 + already_set2; } + return (all_set1 && all_set2); + } } // Set a sequence of `n` bits within a chunk. @@ -298,6 +319,7 @@ mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk, // next field field++; idx = 0; + mi_assert_internal(m <= n); n -= m; } if (palready_set!=NULL) { *palready_set = total_already_set; } @@ -307,13 +329,10 @@ mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk, static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS); - if (n==1) { - bool was_clear = mi_bchunk_set(chunk, cidx); - if (already_set != NULL) { *already_set = !was_clear; } - return was_clear; - } - if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set); - if (n bfields[i], idx, all_clear); } -static inline bool mi_bchunk_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* all_clear) { - mi_assert_internal(cidx < MI_BCHUNK_BITS); - const size_t i = cidx / MI_BFIELD_BITS; - const size_t idx = cidx % MI_BFIELD_BITS; - const mi_bfield_t mask = mi_bfield_mask(n, idx); - return mi_bfield_atomic_clear_mask(&chunk->bfields[i], mask, all_clear); -} - -static inline bool mi_bchunk_clearX(mi_bchunk_t* chunk, size_t cidx, bool* all_clear) { - mi_assert_internal(cidx < MI_BCHUNK_BITS); - mi_assert_internal((cidx%MI_BFIELD_BITS)==0); - const size_t i = cidx / MI_BFIELD_BITS; - return mi_bfield_atomic_clearX(&chunk->bfields[i], all_clear); -} - static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) { mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS); if (n==1) return mi_bchunk_clear(chunk, cidx, maybe_all_clear); - if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, maybe_all_clear); - if (n bfields[i], idx); } - if mi_likely(n<=MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); } + if (n==1) { return mi_bfield_atomic_is_xset(set, &chunk->bfields[i], idx); } + if (idx + n <= MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); } return mi_bchunk_is_xsetN_(set, chunk, i, idx, n); } // ------- mi_bchunk_try_clear --------------------------------------- +// Clear `0 < n <= MI_BITFIELD_BITS`. Can cross over a bfield boundary. static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(n <= MI_BFIELD_BITS); const size_t i = cidx / MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS; - mi_assert_internal(idx + n <= MI_BFIELD_BITS); - const size_t mask = mi_bfield_mask(n, idx); - return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mask, pmaybe_all_clear); + if mi_likely(idx + n <= MI_BFIELD_BITS) { + // within one field + return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(n, idx), pmaybe_all_clear); + } + else { + // spanning two fields (todo: use double-word atomic ops?) + const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field + mi_assert_internal(m < n); + mi_assert_internal(i < MI_BCHUNK_FIELDS - 1); + bool field1_is_clear; + if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &field1_is_clear)) return false; + // try the second field as well + mi_assert_internal(n - m > 0); + mi_assert_internal(n - m < MI_BFIELD_BITS); + bool field2_is_clear; + if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &field2_is_clear)) { + // we failed to clear the second field, restore the first one + mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), NULL); + return false; + } + if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = field1_is_clear && field2_is_clear; } + return true; + } } +// Clear a full aligned bfield. static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) { mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal((cidx%MI_BFIELD_BITS) == 0); @@ -405,60 +432,51 @@ static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* p return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear); } -// Try to atomically set/clear a sequence of `n` bits within a chunk. -// Returns true if all bits transitioned from 0 to 1 (or 1 to 0), +// Try to atomically clear a sequence of `n` bits within a chunk. +// Returns true if all bits transitioned from 1 to 0, // and false otherwise leaving all bit fields as is. -// Note: this is a hard one as we need to unwind partial atomic operations -// if we fail halfway.. +// Note: this is the complex one as we need to unwind partial atomic operations if we fail halfway.. +// `maybe_all_clear` is set to `true` if all the bfields involved become zero. mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(n>0); + if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = true; } if (n==0) return true; - size_t start_idx = cidx % MI_BFIELD_BITS; - size_t start_field = cidx / MI_BFIELD_BITS; - size_t end_field = MI_BCHUNK_FIELDS; - mi_bfield_t mask_mid = 0; - mi_bfield_t mask_end = 0; - bool field_is_clear; - bool maybe_all_clear = true; - if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = false; } - + // first field + const size_t start_idx = cidx % MI_BFIELD_BITS; + const size_t start_field = cidx / MI_BFIELD_BITS; size_t field = start_field; - size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field + size_t m = MI_BFIELD_BITS - start_idx; // m are the bits to clear in this field if (m > n) { m = n; } mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); mi_assert_internal(start_field < MI_BCHUNK_FIELDS); const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx); - if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &field_is_clear)) return false; - maybe_all_clear = maybe_all_clear && field_is_clear; + bool maybe_all_clear; + if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &maybe_all_clear)) return false; // done? + mi_assert_internal(m <= n); n -= m; - if (n==0) { - if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = maybe_all_clear; } - return true; - } - - // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields - - // mid fields + + // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields + // mid fields? while (n >= MI_BFIELD_BITS) { field++; mi_assert_internal(field < MI_BCHUNK_FIELDS); - mask_mid = mi_bfield_all_set(); - if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_mid, &field_is_clear)) goto restore; + bool field_is_clear; + if (!mi_bfield_atomic_try_clearX(&chunk->bfields[field], &field_is_clear)) goto restore; maybe_all_clear = maybe_all_clear && field_is_clear; n -= MI_BFIELD_BITS; } - // last field + // last field? if (n > 0) { mi_assert_internal(n < MI_BFIELD_BITS); field++; mi_assert_internal(field < MI_BCHUNK_FIELDS); - end_field = field; - mask_end = mi_bfield_mask(n, 0); + const mi_bfield_t mask_end = mi_bfield_mask(n, 0); + bool field_is_clear; if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_end, &field_is_clear)) goto restore; maybe_all_clear = maybe_all_clear && field_is_clear; } @@ -467,12 +485,16 @@ mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t ci return true; restore: - // field is on the field that failed to set atomically; we need to restore all previous fields + // `field` is the index of the field that failed to set atomically; we need to restore all previous fields mi_assert_internal(field > start_field); while( field > start_field) { field--; - const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid)); - mi_bfield_atomic_set_mask(&chunk->bfields[field], mask, NULL); + if (field == start_field) { + mi_bfield_atomic_set_mask(&chunk->bfields[field], mask_start, NULL); + } + else { + mi_bfield_atomic_setX(&chunk->bfields[field], NULL); // mid-field: set all bits again + } } return false; } @@ -480,8 +502,8 @@ restore: static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) { mi_assert_internal(n>0); - if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear); - if (n MI_BFIELD_BITS) return false; const mi_bfield_t mask = mi_bfield_mask(n, 0); + // for all fields in the chunk for (int i = 0; i < MI_BCHUNK_FIELDS; i++) { mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]); size_t idx; + // is there a range inside the field? while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit - if (idx + n > MI_BFIELD_BITS) break; + if (idx + n > MI_BFIELD_BITS) break; // too short, maybe cross over, or continue with the next field const size_t bmask = mask<>idx == mask); - if ((b&bmask) == bmask) { // found a match + if ((b&bmask) == bmask) { // found a match with all bits set, try clearing atomically if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], bmask, NULL)) { *pidx = (i*MI_BFIELD_BITS) + idx; mi_assert_internal(*pidx < MI_BCHUNK_BITS); @@ -753,7 +776,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, return true; } else { - // if failed to atomically commit, reload b and try again from this position + // if we failed to atomically commit, reload b and try again from the start b = mi_atomic_load_acquire(&chunk->bfields[i]); } } @@ -764,6 +787,25 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, b = b & ~mi_bfield_mask(ones, idx); // clear the ones } } + + // check if we can cross into the next bfield + if (i < MI_BCHUNK_FIELDS-1) { + const size_t post = mi_bfield_clz(~b); + if (post > 0) { + const size_t pre = mi_bfield_ctz(mi_atomic_load_relaxed(&chunk->bfields[i+1])); + if (post + pre <= n) { + // it fits -- try to claim it atomically + const size_t cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - post); + if (mi_bchunk_try_clearNX(chunk, cidx, n, NULL)) { + // we cleared all atomically + *pidx = cidx; + mi_assert_internal(*pidx < MI_BCHUNK_BITS); + mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS); + return true; + } + } + } + } } return false; } @@ -775,46 +817,47 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk, size_t n, size_t* pidx) { if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk - const size_t skip_count = n/MI_BFIELD_BITS; + // we first scan ahead to see if there is a range of `n` set bits, and only then try to clear atomically + mi_assert_internal(n>0); + const size_t skip_count = (n-1)/MI_BFIELD_BITS; size_t cidx; - for (size_t i = 0; i <= MI_BCHUNK_FIELDS - skip_count; i++) + for (size_t i = 0; i < MI_BCHUNK_FIELDS - skip_count; i++) { size_t m = n; // bits to go // first field mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]); size_t ones = mi_bfield_clz(~b); - cidx = i*MI_BFIELD_BITS + (MI_BFIELD_BITS - ones); // start index + cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - ones); // start index if (ones >= m) { // we found enough bits! m = 0; } else { m -= ones; - mi_assert_internal(m>0); - } - - // keep scanning further fields? - size_t j = 1; // field count from i - while (i+j < MI_BCHUNK_FIELDS) { - mi_assert_internal(m > 0); - b = mi_atomic_load_relaxed(&chunk->bfields[i+j]); - ones = mi_bfield_ctz(~b); - if (ones >= m) { - // we found enough bits - m = 0; - break; - } - else if (ones == MI_BFIELD_BITS) { - // not enough yet, proceed to the next field - j++; - m -= MI_BFIELD_BITS; - } - else { - // the range was not enough, start from scratch - i = i + j - 1; // no need to re-scan previous fields, except the last one (with clz this time) - mi_assert_internal(m>0); - break; + + // keep scanning further fields? + size_t j = 1; // field count from i + while (i+j < MI_BCHUNK_FIELDS) { + mi_assert_internal(m > 0); + b = mi_atomic_load_relaxed(&chunk->bfields[i+j]); + ones = mi_bfield_ctz(~b); + if (ones >= m) { + // we found enough bits + m = 0; + break; + } + else if (ones == MI_BFIELD_BITS) { + // not enough yet, proceed to the next field + j++; + m -= MI_BFIELD_BITS; + } + else { + // the range was not enough, start from scratch + i = i + j - 1; // no need to re-scan previous fields, except the last one (with clz this time) + mi_assert_internal(m>0); + break; + } } } @@ -838,9 +881,9 @@ static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk, //static inline bool mi_bchunk_try_find_and_clearN(mi_bchunk_t* chunk, size_t n, size_t* pidx) { // if (n==1) return mi_bchunk_try_find_and_clear(chunk, pidx); // small pages // if (n==8) return mi_bchunk_try_find_and_clear8(chunk, pidx); // medium pages -// if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages -// if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk -// if (n < MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx); +// // if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages +// if (n==0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk +// if (n<=MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx); // return mi_bchunk_try_find_and_clearN_(chunk, n, pidx); //} @@ -909,7 +952,7 @@ static void mi_bitmap_chunkmap_set_max(mi_bitmap_t* bitmap, size_t chunk_idx) { static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) { mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); - mi_bchunk_set(&bitmap->chunkmap, chunk_idx); + mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL); mi_bitmap_chunkmap_set_max(bitmap, chunk_idx); } @@ -922,7 +965,7 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) // .. but a concurrent set may have happened in between our all-clear test and the clearing of the // bit in the mask. We check again to catch this situation. if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) { - mi_bchunk_set(&bitmap->chunkmap, chunk_idx); + mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL); return false; } mi_bitmap_chunkmap_set_max(bitmap, chunk_idx); @@ -1018,7 +1061,7 @@ bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx) { const size_t chunk_idx = idx / MI_BCHUNK_BITS; const size_t cidx = idx % MI_BCHUNK_BITS; mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); - const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); + const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx, NULL); mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards return wasclear; } @@ -1235,9 +1278,9 @@ bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pid return mi_bitmap_try_find_and_clear_generic(bitmap, tseq, 8, pidx, &mi_bchunk_try_find_and_clear_8); } -bool mi_bitmap_try_find_and_clearX(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) { - return mi_bitmap_try_find_and_clear_generic(bitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X); -} +//bool mi_bitmap_try_find_and_clearX(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) { +// return mi_bitmap_try_find_and_clear_generic(bitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X); +//} bool mi_bitmap_try_find_and_clearNX(mi_bitmap_t* bitmap, size_t tseq, size_t n, size_t* pidx) { mi_assert_internal(n<=MI_BFIELD_BITS); @@ -1279,7 +1322,7 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk else { // failed to claim it, set abandoned mapping again (unless the page was freed) if (keep_set) { - const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); + const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx, NULL); mi_assert_internal(wasclear); MI_UNUSED(wasclear); } } diff --git a/src/bitmap.h b/src/bitmap.h index 16ecea07..09967fb9 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -191,7 +191,7 @@ static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) { // Specialized versions for common bit sequence sizes mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 1-bit mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 8-bits -mi_decl_nodiscard bool mi_bitmap_try_find_and_clearX(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS +// mi_decl_nodiscard bool mi_bitmap_try_find_and_clearX(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS mi_decl_nodiscard bool mi_bitmap_try_find_and_clearNX(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS @@ -200,7 +200,7 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_ mi_decl_nodiscard static inline bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) { if (n==1) return mi_bitmap_try_find_and_clear(bitmap, tseq, pidx); // small pages if (n==8) return mi_bitmap_try_find_and_clear8(bitmap, tseq, pidx); // medium pages - if (n==MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearX(bitmap, tseq, pidx); // large pages + // if (n==MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearX(bitmap, tseq, pidx); // large pages if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk if (n < MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearNX(bitmap, tseq, n, pidx); return mi_bitmap_try_find_and_clearN_(bitmap, tseq, n, pidx); From 5e26ba6fe62e5624dd65564501ef8d2fd915e56d Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 2 Jan 2025 12:14:12 -0800 Subject: [PATCH 3/3] fix debug output --- src/arena.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/arena.c b/src/arena.c index 11a4f82f..4c363a57 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1319,10 +1319,10 @@ static int mi_page_commit_usage(mi_page_t* page) { return (int)(used_size * 100 / committed_size); } -static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k, mi_arena_t* arena, size_t slice_index) { +static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k, mi_arena_t* arena, size_t slice_index, long* pbit_of_page, mi_ansi_color_t* pcolor_of_page ) { size_t bit_set_count = 0; - long bit_of_page = 0; - mi_ansi_color_t color = MI_GRAY; + long bit_of_page = *pbit_of_page; + mi_ansi_color_t color = *pcolor_of_page; mi_ansi_color_t prev_color = MI_GRAY; for (int bit = 0; bit < MI_BFIELD_BITS; bit++, bit_of_page--) { bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0); @@ -1331,9 +1331,9 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k, if (is_set) { mi_assert_internal(bit_of_page <= 0); bit_set_count++; - mi_page_t* page = (mi_page_t*)start; c = 'p'; color = MI_GRAY; + mi_page_t* page = (mi_page_t*)start; if (mi_page_is_abandoned_mapped(page)) { c = 'a'; } else if (mi_page_is_abandoned(page)) { c = (mi_page_is_singleton(page) ? 's' : 'f'); } int commit_usage = mi_page_commit_usage(page); @@ -1362,7 +1362,9 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k, } buf[*k] = c; *k += 1; } - mi_debug_color(buf, k, MI_GRAY); + mi_debug_color(buf, k, MI_GRAY); + *pbit_of_page = bit_of_page; + *pcolor_of_page = color; return bit_set_count; } @@ -1381,6 +1383,8 @@ static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bi else if (i<100) { buf[k++] = ('0' + (char)(i/10)); buf[k++] = ('0' + (char)(i%10)); buf[k++] = ' '; } else if (i<1000) { buf[k++] = ('0' + (char)(i/100)); buf[k++] = ('0' + (char)((i%100)/10)); buf[k++] = ('0' + (char)(i%10)); } + long bit_of_page = 0; + mi_ansi_color_t color_of_page = MI_GRAY; for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) { if (j > 0 && (j % MI_FIELDS_PER_LINE) == 0) { _mi_output_message(" %s\n\x1B[37m", buf); @@ -1390,7 +1394,7 @@ static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bi if (bit_count < slice_count) { mi_bfield_t bfield = chunk->bfields[j]; if (invert) bfield = ~bfield; - size_t xcount = (arena!=NULL ? mi_debug_show_page_bfield(bfield, buf, &k, arena, bit_count) + size_t xcount = (arena!=NULL ? mi_debug_show_page_bfield(bfield, buf, &k, arena, bit_count, &bit_of_page, &color_of_page) : mi_debug_show_bfield(bfield, buf, &k)); if (invert) xcount = MI_BFIELD_BITS - xcount; bit_set_count += xcount;