From 10b40f90fc12b1e6895555410561c07b0cba0344 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 2 Jan 2025 14:59:42 -0800 Subject: [PATCH 1/4] fix scan of NX --- src/bitmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bitmap.c b/src/bitmap.c index 5cecc606..067faff0 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -281,6 +281,7 @@ static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, si const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field mi_assert_internal(m < n); mi_assert_internal(i < MI_BCHUNK_FIELDS - 1); + mi_assert_internal(idx + m <= MI_BFIELD_BITS); size_t already_set1; const bool all_set1 = mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &already_set1); mi_assert_internal(n - m > 0); @@ -792,7 +793,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, if (i < MI_BCHUNK_FIELDS-1) { const size_t post = mi_bfield_clz(~b); if (post > 0) { - const size_t pre = mi_bfield_ctz(mi_atomic_load_relaxed(&chunk->bfields[i+1])); + const size_t pre = mi_bfield_ctz(~mi_atomic_load_relaxed(&chunk->bfields[i+1])); if (post + pre <= n) { // it fits -- try to claim it atomically const size_t cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - post); From 34e402e128402c4d534f0513b76f54ecfaa573dd Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 2 Jan 2025 15:00:17 -0800 Subject: [PATCH 2/4] fix NX test in try_find_and_clearN --- src/bitmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bitmap.h b/src/bitmap.h index 09967fb9..8ab06216 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -201,8 +201,8 @@ mi_decl_nodiscard static inline bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* if (n==1) return mi_bitmap_try_find_and_clear(bitmap, tseq, pidx); // small pages if (n==8) return mi_bitmap_try_find_and_clear8(bitmap, tseq, pidx); // medium pages // if (n==MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearX(bitmap, tseq, pidx); // large pages - if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk - if (n < MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearNX(bitmap, tseq, n, pidx); + if (n==0 || n>MI_BCHUNK_BITS) return false; // cannot be more than a chunk + if (n <= MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearNX(bitmap, tseq, n, pidx); return mi_bitmap_try_find_and_clearN_(bitmap, tseq, n, pidx); } From b28e2bdd93cb14d961aa4ff6e505a8293c2bc059 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 2 Jan 2025 15:02:40 -0800 Subject: [PATCH 3/4] fix pointer alignment for zero-sized large alignment case --- src/alloc-aligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 6b0a33c1..7304eb1d 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -78,7 +78,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t } else { // otherwise over-allocate - oversize = size + alignment - 1; + oversize = (size < MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : size) + alignment - 1; // adjust for size <= 16; with size 0 and aligment 64k, we would allocate a 64k block and pointing just beyond that. p = mi_heap_malloc_zero_no_guarded(heap, oversize, zero); if (p == NULL) return NULL; } From ab78d57a843476edd6e89139585a98011e107911 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 2 Jan 2025 15:19:08 -0800 Subject: [PATCH 4/4] search size bins from small to large --- src/bitmap.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/bitmap.c b/src/bitmap.c index a03aef69..b9daf7c6 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1552,14 +1552,16 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, const mi_bfield_t cmap_mask = mi_bfield_mask(cmap_max_count,0); const size_t cmap_cycle = cmap_acc+1; const mi_bbin_t bbin = mi_bbin_of(n); - // visit bins from largest size bin up to the NONE bin - for(int bin = bbin; bin >= MI_BBIN_SMALL; bin--) // no need to traverse for MI_BBIN_NONE as anyone can allocate in MI_BBIN_SMALL - // const mi_bbin_t bin = bbin; + // visit bins from smallest to largest (to reduce fragmentation on the larger blocks) + for(int bin = MI_BBIN_SMALL; bin <= bbin; bin++) // no need to traverse for MI_BBIN_NONE as anyone can allocate in MI_BBIN_SMALL + // (int bin = bbin; bin >= MI_BBIN_SMALL; bin--) // visit bins from largest size bin up to the NONE bin { mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X) { // don't search into non-accessed memory until we tried other size bins as well - if (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) { + if (bin < bbin && cmap_idx > cmap_acc) + // (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) // large to small + { break; } @@ -1573,8 +1575,10 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap)); // only in the current size class! const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]); - if // (bin >= chunk_bin) { - ((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) { + if ((mi_bbin_t)bin == chunk_bin || (bin == bbin && chunk_bin == MI_BBIN_NONE)) // only allow NONE at the final run + // ((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) { largest to smallest + + { mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx]; size_t cidx; if ((*on_find)(chunk, n, &cidx)) {