Merge branch 'dev3-bin' of e:\dev\mimalloc3 into dev3-bin

This commit is contained in:
Daan Leijen 2025-01-02 12:45:38 -08:00
commit 44264b3d8b
4 changed files with 214 additions and 169 deletions

View file

@ -100,9 +100,10 @@ terms of the MIT license. A copy of the license can be found in the file
#endif #endif
// ------------------------------------------------------ // --------------------------------------------------------------
// Sizes of internal data-structures // Sizes of internal data-structures
// ------------------------------------------------------ // (comments specify sizes on 64-bit, usually 32-bit is halved)
// --------------------------------------------------------------
// Sizes are for 64-bit // Sizes are for 64-bit
#ifndef MI_ARENA_SLICE_SHIFT #ifndef MI_ARENA_SLICE_SHIFT
@ -116,19 +117,19 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_BCHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512) #define MI_BCHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512)
#endif #endif
#define MI_BCHUNK_BITS (1 << MI_BCHUNK_BITS_SHIFT) #define MI_BCHUNK_BITS (1 << MI_BCHUNK_BITS_SHIFT) // sub-bitmaps are "bchunks" of 512 bits
#define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) #define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) // arena's allocate in slices of 64 KiB
#define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE) #define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE)
#define MI_ARENA_MIN_OBJ_SLICES (1) #define MI_ARENA_MIN_OBJ_SLICES (1)
#define MI_ARENA_MAX_OBJ_SLICES (MI_BCHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries) #define MI_ARENA_MAX_OBJ_SLICES (MI_BCHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries)
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
#define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE) #define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
#define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE #define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE // 64 KiB
#define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bitmap) #define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bchunk bitmap)
#define MI_LARGE_PAGE_SIZE (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE) // 4 MiB (=word in the bitmap) #define MI_LARGE_PAGE_SIZE (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE) // 4 MiB (=word in the bchunk bitmap)
// Maximum number of size classes. (spaced exponentially in 12.5% increments) // Maximum number of size classes. (spaced exponentially in 12.5% increments)
#define MI_BIN_HUGE (73U) #define MI_BIN_HUGE (73U)
@ -272,7 +273,7 @@ typedef uint8_t mi_heaptag_t;
// //
// Notes: // Notes:
// - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`). // - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`).
// - If a page is not part of a heap it is called "abandoned" -- in // - If a page is not part of a heap it is called "abandoned" (`heap==NULL`) -- in
// that case the `xthreadid` is 0 or 1 (1 is for abandoned pages that // that case the `xthreadid` is 0 or 1 (1 is for abandoned pages that
// are in the abandoned page lists of an arena, these are called "mapped" abandoned pages). // are in the abandoned page lists of an arena, these are called "mapped" abandoned pages).
// - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc` // - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
@ -304,7 +305,7 @@ typedef struct mi_page_s {
mi_heap_t* heap; // the heap owning this page (or NULL for abandoned pages) mi_heap_t* heap; // the heap owning this page (or NULL for abandoned pages)
struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
size_t slice_committed; // committed size relative to the first arena slice of the page data size_t slice_committed; // committed size relative to the first arena slice of the page data (or 0 if the page is fully committed already)
mi_memid_t memid; // provenance of the page memory mi_memid_t memid; // provenance of the page memory
} mi_page_t; } mi_page_t;
@ -315,7 +316,7 @@ typedef struct mi_page_s {
#define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map. #define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map.
#define MI_PAGE_MIN_START_BLOCK_ALIGN MI_MAX_ALIGN_SIZE // minimal block alignment for the first block in a page (16b) #define MI_PAGE_MIN_START_BLOCK_ALIGN MI_MAX_ALIGN_SIZE // minimal block alignment for the first block in a page (16b)
#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks #define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks (such that we guarantee natural alignment)
#define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation #define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation
#if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8 #if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8
@ -328,7 +329,7 @@ typedef struct mi_page_s {
// (Except for large pages since huge objects are allocated in 4MiB chunks) // (Except for large pages since huge objects are allocated in 4MiB chunks)
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 8 KiB #define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 8 KiB
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 64 KiB #define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 64 KiB
#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/4) // <= 512 KiB // note: this must be a nice power of 2 or we get rounding issues with _mi_bin #define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/4) // <= 512 KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE) #define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)

View file

@ -773,9 +773,9 @@ mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t bloc
else if (block_size <= MI_MEDIUM_MAX_OBJ_SIZE) { else if (block_size <= MI_MEDIUM_MAX_OBJ_SIZE) {
page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_MEDIUM_PAGE_SIZE), block_size); page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_MEDIUM_PAGE_SIZE), block_size);
} }
else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) { //else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) {
page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size); // page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size);
} // }
else { else {
page = mi_arenas_page_singleton_alloc(heap, block_size, block_alignment); page = mi_arenas_page_singleton_alloc(heap, block_size, block_alignment);
} }
@ -1325,10 +1325,10 @@ static int mi_page_commit_usage(mi_page_t* page) {
return (int)(used_size * 100 / committed_size); return (int)(used_size * 100 / committed_size);
} }
static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k, mi_arena_t* arena, size_t slice_index) { static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k, mi_arena_t* arena, size_t slice_index, long* pbit_of_page, mi_ansi_color_t* pcolor_of_page ) {
size_t bit_set_count = 0; size_t bit_set_count = 0;
long bit_of_page = 0; long bit_of_page = *pbit_of_page;
mi_ansi_color_t color = MI_GRAY; mi_ansi_color_t color = *pcolor_of_page;
mi_ansi_color_t prev_color = MI_GRAY; mi_ansi_color_t prev_color = MI_GRAY;
for (int bit = 0; bit < MI_BFIELD_BITS; bit++, bit_of_page--) { for (int bit = 0; bit < MI_BFIELD_BITS; bit++, bit_of_page--) {
bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0); bool is_set = ((((mi_bfield_t)1 << bit) & field) != 0);
@ -1337,9 +1337,9 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k,
if (is_set) { if (is_set) {
mi_assert_internal(bit_of_page <= 0); mi_assert_internal(bit_of_page <= 0);
bit_set_count++; bit_set_count++;
mi_page_t* page = (mi_page_t*)start;
c = 'p'; c = 'p';
color = MI_GRAY; color = MI_GRAY;
mi_page_t* page = (mi_page_t*)start;
if (mi_page_is_abandoned_mapped(page)) { c = 'a'; } if (mi_page_is_abandoned_mapped(page)) { c = 'a'; }
else if (mi_page_is_abandoned(page)) { c = (mi_page_is_singleton(page) ? 's' : 'f'); } else if (mi_page_is_abandoned(page)) { c = (mi_page_is_singleton(page) ? 's' : 'f'); }
int commit_usage = mi_page_commit_usage(page); int commit_usage = mi_page_commit_usage(page);
@ -1368,7 +1368,9 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k,
} }
buf[*k] = c; *k += 1; buf[*k] = c; *k += 1;
} }
mi_debug_color(buf, k, MI_GRAY); mi_debug_color(buf, k, MI_GRAY);
*pbit_of_page = bit_of_page;
*pcolor_of_page = color;
return bit_set_count; return bit_set_count;
} }
@ -1390,16 +1392,18 @@ static size_t mi_debug_show_chunks(const char* header, size_t slice_count, size_
char chunk_kind = ' '; char chunk_kind = ' ';
if (chunk_bins != NULL) { if (chunk_bins != NULL) {
switch (mi_atomic_load_relaxed(&chunk_bins[i])) { switch (mi_atomic_load_relaxed(&chunk_bins[i])) {
// case MI_BBIN_SMALL: chunk_kind = 'S'; break; case MI_BBIN_SMALL: chunk_kind = 'S'; break;
case MI_BBIN_MEDIUM: chunk_kind = 'M'; break; case MI_BBIN_MEDIUM: chunk_kind = 'M'; break;
case MI_BBIN_LARGE: chunk_kind = 'L'; break; case MI_BBIN_LARGE: chunk_kind = 'L'; break;
case MI_BBIN_OTHER: chunk_kind = 'O'; break; case MI_BBIN_OTHER: chunk_kind = 'X'; break;
// case MI_BBIN_NONE: chunk_kind = 'N'; break; // case MI_BBIN_NONE: chunk_kind = 'N'; break;
} }
} }
buf[k++] = chunk_kind; buf[k++] = chunk_kind;
buf[k++] = ' '; buf[k++] = ' ';
long bit_of_page = 0;
mi_ansi_color_t color_of_page = MI_GRAY;
for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) { for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) {
if (j > 0 && (j % MI_FIELDS_PER_LINE) == 0) { if (j > 0 && (j % MI_FIELDS_PER_LINE) == 0) {
// buf[k++] = '\n'; _mi_memset(buf+k,' ',7); k += 7; // buf[k++] = '\n'; _mi_memset(buf+k,' ',7); k += 7;
@ -1410,7 +1414,7 @@ static size_t mi_debug_show_chunks(const char* header, size_t slice_count, size_
if (bit_count < slice_count) { if (bit_count < slice_count) {
mi_bfield_t bfield = chunk->bfields[j]; mi_bfield_t bfield = chunk->bfields[j];
if (invert) bfield = ~bfield; if (invert) bfield = ~bfield;
size_t xcount = (arena!=NULL ? mi_debug_show_page_bfield(bfield, buf, &k, arena, bit_count) size_t xcount = (arena!=NULL ? mi_debug_show_page_bfield(bfield, buf, &k, arena, bit_count, &bit_of_page, &color_of_page)
: mi_debug_show_bfield(bfield, buf, &k)); : mi_debug_show_bfield(bfield, buf, &k));
if (invert) xcount = MI_BFIELD_BITS - xcount; if (invert) xcount = MI_BFIELD_BITS - xcount;
bit_set_count += xcount; bit_set_count += xcount;

View file

@ -114,8 +114,8 @@ static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_
do { do {
if mi_unlikely((old&mask) == 0) { if mi_unlikely((old&mask) == 0) {
old = mi_atomic_load_acquire(b); old = mi_atomic_load_acquire(b);
if ((old&mask)==0) { if ((old&mask)==0) {
mi_subproc_stat_counter_increase(_mi_subproc(), pages_unabandon_busy_wait, 1); mi_subproc_stat_counter_increase(_mi_subproc(), pages_unabandon_busy_wait, 1);
} }
while ((old&mask)==0) { // busy wait while ((old&mask)==0) { // busy wait
mi_atomic_yield(); mi_atomic_yield();
@ -138,6 +138,7 @@ static inline bool mi_bfield_atomic_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t
} }
// Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's // Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's
// `all_clear` is set to `true` if the new bfield became zero.
static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) { static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) {
mi_assert_internal(mask != 0); mi_assert_internal(mask != 0);
mi_bfield_t old = mi_atomic_load_relaxed(b); mi_bfield_t old = mi_atomic_load_relaxed(b);
@ -163,6 +164,7 @@ static inline bool mi_bfield_atomic_clearX(_Atomic(mi_bfield_t)*b, bool* all_cle
// Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0 // Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0
// and false otherwise (leaving the bit field as is). // and false otherwise (leaving the bit field as is).
// `all_clear` is set to `true` if the new bfield became zero.
static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) { static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) {
mi_assert_internal(mask != 0); mi_assert_internal(mask != 0);
mi_bfield_t old = mi_atomic_load_relaxed(b); mi_bfield_t old = mi_atomic_load_relaxed(b);
@ -178,9 +180,9 @@ static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bf
} }
// Tries to set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0) // Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0
// and `false` otherwise leaving the bfield `b` as-is. // and `false` otherwise leaving the bfield `b` as-is.
// `all_clear` is set to true if the new bfield is zero (and false otherwise) // `all_clear` is set to true if the new bfield became zero (and false otherwise)
static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) { static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
mi_assert_internal(idx < MI_BFIELD_BITS); mi_assert_internal(idx < MI_BFIELD_BITS);
const mi_bfield_t mask = mi_bfield_one()<<idx; const mi_bfield_t mask = mi_bfield_one()<<idx;
@ -189,6 +191,7 @@ static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx
// Tries to clear a byte atomically, and returns true if the byte atomically transitioned from 0xFF to 0 // Tries to clear a byte atomically, and returns true if the byte atomically transitioned from 0xFF to 0
// `all_clear` is set to true if the new bfield became zero (and false otherwise)
static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) { static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
mi_assert_internal(idx < MI_BFIELD_BITS); mi_assert_internal(idx < MI_BFIELD_BITS);
mi_assert_internal((idx%8)==0); mi_assert_internal((idx%8)==0);
@ -198,6 +201,7 @@ static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t id
// Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's. // Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's.
// and false otherwise leaving the bit field as-is. // and false otherwise leaving the bit field as-is.
// `all_clear` is set to true if the new bfield became zero (which is always the case if successful).
static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) { static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) {
mi_bfield_t old = mi_bfield_all_set(); mi_bfield_t old = mi_bfield_all_set();
if (mi_atomic_cas_strong_acq_rel(b, &old, mi_bfield_zero())) { if (mi_atomic_cas_strong_acq_rel(b, &old, mi_bfield_zero())) {
@ -257,26 +261,43 @@ static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, _Atomic(mi_bfiel
// ------- mi_bchunk_set --------------------------------------- // ------- mi_bchunk_set ---------------------------------------
static inline bool mi_bchunk_set(mi_bchunk_t* chunk, size_t cidx) { // Set a single bit
static inline bool mi_bchunk_set(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) {
mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(cidx < MI_BCHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS; const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS;
return mi_bfield_atomic_set(&chunk->bfields[i], idx); const bool was_clear = mi_bfield_atomic_set(&chunk->bfields[i], idx);
if (already_set != NULL) { *already_set = (was_clear ? 0 : 1); }
return was_clear;
} }
// Set `0 < n <= MI_BFIELD_BITS`, and return true of the mask bits transitioned from all 0's to 1's.
// `already_set` contains the count of bits that were already set (used when committing ranges to account
// statistics correctly).
// Can cross over two bfields.
static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) {
mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal(n > 0 && n <= MI_BFIELD_BITS);
const size_t i = cidx / MI_BFIELD_BITS; const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS;
const mi_bfield_t mask = mi_bfield_mask(n, idx); if mi_likely(idx + n <= MI_BFIELD_BITS) {
return mi_bfield_atomic_set_mask(&chunk->bfields[i], mask, already_set); // within one field
} return mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(n,idx), already_set);
}
static inline bool mi_bchunk_setX(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) { else {
mi_assert_internal(cidx < MI_BCHUNK_BITS); // spanning two fields
mi_assert_internal((cidx%MI_BFIELD_BITS)==0); const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field
const size_t i = cidx / MI_BFIELD_BITS; mi_assert_internal(m < n);
return mi_bfield_atomic_setX(&chunk->bfields[i], already_set); mi_assert_internal(i < MI_BCHUNK_FIELDS - 1);
size_t already_set1;
const bool all_set1 = mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &already_set1);
mi_assert_internal(n - m > 0);
mi_assert_internal(n - m < MI_BFIELD_BITS);
size_t already_set2;
const bool all_set2 = mi_bfield_atomic_set_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &already_set2);
if (already_set != NULL) { *already_set = already_set1 + already_set2; }
return (all_set1 && all_set2);
}
} }
// Set a sequence of `n` bits within a chunk. // Set a sequence of `n` bits within a chunk.
@ -306,6 +327,7 @@ mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk,
// next field // next field
field++; field++;
idx = 0; idx = 0;
mi_assert_internal(m <= n);
n -= m; n -= m;
} }
if (palready_set!=NULL) { *palready_set = total_already_set; } if (palready_set!=NULL) { *palready_set = total_already_set; }
@ -315,13 +337,10 @@ mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk,
static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) {
mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS); mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS);
if (n==1) { if (n==1) return mi_bchunk_set(chunk, cidx, already_set);
bool was_clear = mi_bchunk_set(chunk, cidx); // if (n==8 && (cidx%8) == 0) return mi_bchunk_set8(chunk, cidx, already_set);
if (already_set != NULL) { *already_set = !was_clear; } // if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set);
return was_clear; if (n<=MI_BFIELD_BITS) return mi_bchunk_setNX(chunk, cidx, n, already_set);
}
if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set);
if (n <MI_BFIELD_BITS) return mi_bchunk_setNX(chunk, cidx, n, already_set);
return mi_bchunk_xsetN_(MI_BIT_SET, chunk, cidx, n, already_set, NULL); return mi_bchunk_xsetN_(MI_BIT_SET, chunk, cidx, n, already_set, NULL);
} }
@ -334,27 +353,13 @@ static inline bool mi_bchunk_clear(mi_bchunk_t* chunk, size_t cidx, bool* all_cl
return mi_bfield_atomic_clear(&chunk->bfields[i], idx, all_clear); return mi_bfield_atomic_clear(&chunk->bfields[i], idx, all_clear);
} }
static inline bool mi_bchunk_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* all_clear) { static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
const mi_bfield_t mask = mi_bfield_mask(n, idx);
return mi_bfield_atomic_clear_mask(&chunk->bfields[i], mask, all_clear);
}
static inline bool mi_bchunk_clearX(mi_bchunk_t* chunk, size_t cidx, bool* all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal((cidx%MI_BFIELD_BITS)==0);
const size_t i = cidx / MI_BFIELD_BITS;
return mi_bfield_atomic_clearX(&chunk->bfields[i], all_clear);
}
static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS); mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS);
if (n==1) return mi_bchunk_clear(chunk, cidx, pmaybe_all_clear); if (n==1) return mi_bchunk_clear(chunk, cidx, maybe_all_clear);
if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, pmaybe_all_clear); // if (n==8) return mi_bchunk_clear8(chunk, cidx, maybe_all_clear);
if (n <MI_BFIELD_BITS) return mi_bchunk_clearNX(chunk, cidx, n, pmaybe_all_clear); // if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, maybe_all_clear);
return mi_bchunk_xsetN_(MI_BIT_CLEAR, chunk, cidx, n, NULL, pmaybe_all_clear); // TODO: implement mi_bchunk_xsetNX instead of setNX
return mi_bchunk_xsetN_(MI_BIT_CLEAR, chunk, cidx, n, NULL, maybe_all_clear);
} }
@ -388,24 +393,46 @@ static inline bool mi_bchunk_is_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t
if (n==0) return true; if (n==0) return true;
const size_t i = cidx / MI_BFIELD_BITS; const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS;
if mi_likely(n==1) { return mi_bfield_atomic_is_xset(set, &chunk->bfields[i], idx); } if (n==1) { return mi_bfield_atomic_is_xset(set, &chunk->bfields[i], idx); }
if mi_likely(n<=MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); } if (idx + n <= MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); }
return mi_bchunk_is_xsetN_(set, chunk, i, idx, n); return mi_bchunk_is_xsetN_(set, chunk, i, idx, n);
} }
// ------- mi_bchunk_try_clear --------------------------------------- // ------- mi_bchunk_try_clear ---------------------------------------
// Clear `0 < n <= MI_BITFIELD_BITS`. Can cross over a bfield boundary.
static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal(n <= MI_BFIELD_BITS); mi_assert_internal(n <= MI_BFIELD_BITS);
const size_t i = cidx / MI_BFIELD_BITS; const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS;
mi_assert_internal(idx + n <= MI_BFIELD_BITS); if mi_likely(idx + n <= MI_BFIELD_BITS) {
const size_t mask = mi_bfield_mask(n, idx); // within one field
return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mask, pmaybe_all_clear); return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(n, idx), pmaybe_all_clear);
}
else {
// spanning two fields (todo: use double-word atomic ops?)
const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field
mi_assert_internal(m < n);
mi_assert_internal(i < MI_BCHUNK_FIELDS - 1);
bool field1_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &field1_is_clear)) return false;
// try the second field as well
mi_assert_internal(n - m > 0);
mi_assert_internal(n - m < MI_BFIELD_BITS);
bool field2_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &field2_is_clear)) {
// we failed to clear the second field, restore the first one
mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), NULL);
return false;
}
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = field1_is_clear && field2_is_clear; }
return true;
}
} }
// Clear a full aligned bfield.
static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) { static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal((cidx%MI_BFIELD_BITS) == 0); mi_assert_internal((cidx%MI_BFIELD_BITS) == 0);
@ -413,60 +440,51 @@ static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* p
return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear); return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear);
} }
// Try to atomically set/clear a sequence of `n` bits within a chunk. // Try to atomically clear a sequence of `n` bits within a chunk.
// Returns true if all bits transitioned from 0 to 1 (or 1 to 0), // Returns true if all bits transitioned from 1 to 0,
// and false otherwise leaving all bit fields as is. // and false otherwise leaving all bit fields as is.
// Note: this is a hard one as we need to unwind partial atomic operations // Note: this is the complex one as we need to unwind partial atomic operations if we fail halfway..
// if we fail halfway.. // `maybe_all_clear` is set to `true` if all the bfields involved become zero.
mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
mi_assert_internal(n>0); mi_assert_internal(n>0);
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = true; }
if (n==0) return true; if (n==0) return true;
size_t start_idx = cidx % MI_BFIELD_BITS;
size_t start_field = cidx / MI_BFIELD_BITS;
size_t end_field = MI_BCHUNK_FIELDS;
mi_bfield_t mask_mid = 0;
mi_bfield_t mask_end = 0;
bool field_is_clear;
bool maybe_all_clear = true;
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = false; }
// first field // first field
const size_t start_idx = cidx % MI_BFIELD_BITS;
const size_t start_field = cidx / MI_BFIELD_BITS;
size_t field = start_field; size_t field = start_field;
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field size_t m = MI_BFIELD_BITS - start_idx; // m are the bits to clear in this field
if (m > n) { m = n; } if (m > n) { m = n; }
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
mi_assert_internal(start_field < MI_BCHUNK_FIELDS); mi_assert_internal(start_field < MI_BCHUNK_FIELDS);
const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx); const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx);
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &field_is_clear)) return false; bool maybe_all_clear;
maybe_all_clear = maybe_all_clear && field_is_clear; if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &maybe_all_clear)) return false;
// done? // done?
mi_assert_internal(m <= n);
n -= m; n -= m;
if (n==0) {
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = maybe_all_clear; }
return true;
}
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
// mid fields?
// mid fields
while (n >= MI_BFIELD_BITS) { while (n >= MI_BFIELD_BITS) {
field++; field++;
mi_assert_internal(field < MI_BCHUNK_FIELDS); mi_assert_internal(field < MI_BCHUNK_FIELDS);
mask_mid = mi_bfield_all_set(); bool field_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_mid, &field_is_clear)) goto restore; if (!mi_bfield_atomic_try_clearX(&chunk->bfields[field], &field_is_clear)) goto restore;
maybe_all_clear = maybe_all_clear && field_is_clear; maybe_all_clear = maybe_all_clear && field_is_clear;
n -= MI_BFIELD_BITS; n -= MI_BFIELD_BITS;
} }
// last field // last field?
if (n > 0) { if (n > 0) {
mi_assert_internal(n < MI_BFIELD_BITS); mi_assert_internal(n < MI_BFIELD_BITS);
field++; field++;
mi_assert_internal(field < MI_BCHUNK_FIELDS); mi_assert_internal(field < MI_BCHUNK_FIELDS);
end_field = field; const mi_bfield_t mask_end = mi_bfield_mask(n, 0);
mask_end = mi_bfield_mask(n, 0); bool field_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_end, &field_is_clear)) goto restore; if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_end, &field_is_clear)) goto restore;
maybe_all_clear = maybe_all_clear && field_is_clear; maybe_all_clear = maybe_all_clear && field_is_clear;
} }
@ -475,12 +493,16 @@ mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t ci
return true; return true;
restore: restore:
// field is on the field that failed to set atomically; we need to restore all previous fields // `field` is the index of the field that failed to set atomically; we need to restore all previous fields
mi_assert_internal(field > start_field); mi_assert_internal(field > start_field);
while( field > start_field) { while( field > start_field) {
field--; field--;
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid)); if (field == start_field) {
mi_bfield_atomic_set_mask(&chunk->bfields[field], mask, NULL); mi_bfield_atomic_set_mask(&chunk->bfields[field], mask_start, NULL);
}
else {
mi_bfield_atomic_setX(&chunk->bfields[field], NULL); // mid-field: set all bits again
}
} }
return false; return false;
} }
@ -488,8 +510,8 @@ restore:
static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) { static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) {
mi_assert_internal(n>0); mi_assert_internal(n>0);
if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear); // if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear);
if (n<MI_BFIELD_BITS) return mi_bchunk_try_clearNX(chunk, cidx, n, maybe_all_clear); if (n<=MI_BFIELD_BITS) return mi_bchunk_try_clearNX(chunk, cidx, n, maybe_all_clear);
return mi_bchunk_try_clearN_(chunk, cidx, n, maybe_all_clear); return mi_bchunk_try_clearN_(chunk, cidx, n, maybe_all_clear);
} }
@ -591,7 +613,7 @@ static inline bool mi_bchunk_try_find_and_clear(mi_bchunk_t* chunk, size_t* pidx
const uint32x4_t vzero1 = vuzp1q_u32(vreinterpretq_u32_u64(vzero1_lo),vreinterpretq_u32_u64(vzero1_hi)); // unzip even elements: narrow to 4x32 bit is_zero () const uint32x4_t vzero1 = vuzp1q_u32(vreinterpretq_u32_u64(vzero1_lo),vreinterpretq_u32_u64(vzero1_hi)); // unzip even elements: narrow to 4x32 bit is_zero ()
const uint32x4_t vzero2 = vuzp1q_u32(vreinterpretq_u32_u64(vzero2_lo),vreinterpretq_u32_u64(vzero2_hi)); // unzip even elements: narrow to 4x32 bit is_zero () const uint32x4_t vzero2 = vuzp1q_u32(vreinterpretq_u32_u64(vzero2_lo),vreinterpretq_u32_u64(vzero2_hi)); // unzip even elements: narrow to 4x32 bit is_zero ()
const uint32x4_t vzero1x = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero1), 24)); // shift-right 2x32bit elem by 24: lo 16 bits contain the 2 lo bytes const uint32x4_t vzero1x = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero1), 24)); // shift-right 2x32bit elem by 24: lo 16 bits contain the 2 lo bytes
const uint32x4_t vzero2x = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero2), 24)); const uint32x4_t vzero2x = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero2), 24));
const uint16x8_t vzero12 = vreinterpretq_u16_u32(vuzp1q_u32(vzero1x,vzero2x)); // unzip even 32-bit elements into one vector const uint16x8_t vzero12 = vreinterpretq_u16_u32(vuzp1q_u32(vzero1x,vzero2x)); // unzip even 32-bit elements into one vector
const uint8x8_t vzero = vmovn_u32(vzero12); // narrow the bottom 16-bits const uint8x8_t vzero = vmovn_u32(vzero12); // narrow the bottom 16-bits
const uint64_t mask = ~vget_lane_u64(vreinterpret_u64_u8(vzero), 0); // 1 byte for each bfield (0xFF => bfield has a bit set) const uint64_t mask = ~vget_lane_u64(vreinterpret_u64_u8(vzero), 0); // 1 byte for each bfield (0xFF => bfield has a bit set)
@ -642,7 +664,7 @@ static inline bool mi_bchunk_try_find_and_clear8_at(mi_bchunk_t* chunk, size_t c
} }
#endif #endif
// find least byte in a chunk with all bits set, and try unset it atomically // find least aligned byte in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success. // set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
// Used to find medium size pages in the free blocks. // Used to find medium size pages in the free blocks.
// todo: try neon version // todo: try neon version
@ -690,7 +712,7 @@ static inline bool mi_bchunk_try_find_and_clear_8(mi_bchunk_t* chunk, size_t n,
} }
// find least bfield in a chunk with all bits set, and try unset it atomically // find least aligned bfield in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success. // set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
// Used to find large size pages in the free blocks. // Used to find large size pages in the free blocks.
// todo: try neon version // todo: try neon version
@ -737,23 +759,24 @@ static inline bool mi_bchunk_try_find_and_clear_X(mi_bchunk_t* chunk, size_t n,
return mi_bchunk_try_find_and_clearX(chunk, pidx); return mi_bchunk_try_find_and_clearX(chunk, pidx);
} }
// find a sequence of `n` bits in a chunk with `n < MI_BFIELD_BITS` with all bits set, // find a sequence of `n` bits in a chunk with `0 < n <= MI_BFIELD_BITS` with all bits set,
// and try to clear them atomically. // and try to clear them atomically.
// Currently does not cross bfield boundaries.
// set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success. // set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success.
// (We do not cross bfield boundaries) // will cross bfield boundaries.
mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) { mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BFIELD_BITS) return false; if (n == 0 || n > MI_BFIELD_BITS) return false;
const mi_bfield_t mask = mi_bfield_mask(n, 0); const mi_bfield_t mask = mi_bfield_mask(n, 0);
// for all fields in the chunk
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) { for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]); mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
size_t idx; size_t idx;
// is there a range inside the field?
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
if (idx + n > MI_BFIELD_BITS) break; if (idx + n > MI_BFIELD_BITS) break; // too short, maybe cross over, or continue with the next field
const size_t bmask = mask<<idx; const size_t bmask = mask<<idx;
mi_assert_internal(bmask>>idx == mask); mi_assert_internal(bmask>>idx == mask);
if ((b&bmask) == bmask) { // found a match if ((b&bmask) == bmask) { // found a match with all bits set, try clearing atomically
if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], bmask, NULL)) { if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], bmask, NULL)) {
*pidx = (i*MI_BFIELD_BITS) + idx; *pidx = (i*MI_BFIELD_BITS) + idx;
mi_assert_internal(*pidx < MI_BCHUNK_BITS); mi_assert_internal(*pidx < MI_BCHUNK_BITS);
@ -761,7 +784,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
return true; return true;
} }
else { else {
// if failed to atomically commit, reload b and try again from this position // if we failed to atomically commit, reload b and try again from the start
b = mi_atomic_load_acquire(&chunk->bfields[i]); b = mi_atomic_load_acquire(&chunk->bfields[i]);
} }
} }
@ -772,6 +795,25 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
b = b & ~mi_bfield_mask(ones, idx); // clear the ones b = b & ~mi_bfield_mask(ones, idx); // clear the ones
} }
} }
// check if we can cross into the next bfield
if (i < MI_BCHUNK_FIELDS-1) {
const size_t post = mi_bfield_clz(~b);
if (post > 0) {
const size_t pre = mi_bfield_ctz(mi_atomic_load_relaxed(&chunk->bfields[i+1]));
if (post + pre <= n) {
// it fits -- try to claim it atomically
const size_t cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - post);
if (mi_bchunk_try_clearNX(chunk, cidx, n, NULL)) {
// we cleared all atomically
*pidx = cidx;
mi_assert_internal(*pidx < MI_BCHUNK_BITS);
mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS);
return true;
}
}
}
}
} }
return false; return false;
} }
@ -783,46 +825,47 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk, size_t n, size_t* pidx) { static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
const size_t skip_count = n/MI_BFIELD_BITS; // we first scan ahead to see if there is a range of `n` set bits, and only then try to clear atomically
mi_assert_internal(n>0);
const size_t skip_count = (n-1)/MI_BFIELD_BITS;
size_t cidx; size_t cidx;
for (size_t i = 0; i <= MI_BCHUNK_FIELDS - skip_count; i++) for (size_t i = 0; i < MI_BCHUNK_FIELDS - skip_count; i++)
{ {
size_t m = n; // bits to go size_t m = n; // bits to go
// first field // first field
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]); mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
size_t ones = mi_bfield_clz(~b); size_t ones = mi_bfield_clz(~b);
cidx = i*MI_BFIELD_BITS + (MI_BFIELD_BITS - ones); // start index cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - ones); // start index
if (ones >= m) { if (ones >= m) {
// we found enough bits! // we found enough bits!
m = 0; m = 0;
} }
else { else {
m -= ones; m -= ones;
mi_assert_internal(m>0);
}
// keep scanning further fields? // keep scanning further fields?
size_t j = 1; // field count from i size_t j = 1; // field count from i
while (i+j < MI_BCHUNK_FIELDS) { while (i+j < MI_BCHUNK_FIELDS) {
mi_assert_internal(m > 0); mi_assert_internal(m > 0);
b = mi_atomic_load_relaxed(&chunk->bfields[i+j]); b = mi_atomic_load_relaxed(&chunk->bfields[i+j]);
ones = mi_bfield_ctz(~b); ones = mi_bfield_ctz(~b);
if (ones >= m) { if (ones >= m) {
// we found enough bits // we found enough bits
m = 0; m = 0;
break; break;
} }
else if (ones == MI_BFIELD_BITS) { else if (ones == MI_BFIELD_BITS) {
// not enough yet, proceed to the next field // not enough yet, proceed to the next field
j++; j++;
m -= MI_BFIELD_BITS; m -= MI_BFIELD_BITS;
} }
else { else {
// the range was not enough, start from scratch // the range was not enough, start from scratch
i = i + j - 1; // no need to re-scan previous fields, except the last one (with clz this time) i = i + j - 1; // no need to re-scan previous fields, except the last one (with clz this time)
mi_assert_internal(m>0); mi_assert_internal(m>0);
break; break;
}
} }
} }
@ -846,9 +889,9 @@ static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk,
//static inline bool mi_bchunk_try_find_and_clearN(mi_bchunk_t* chunk, size_t n, size_t* pidx) { //static inline bool mi_bchunk_try_find_and_clearN(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
// if (n==1) return mi_bchunk_try_find_and_clear(chunk, pidx); // small pages // if (n==1) return mi_bchunk_try_find_and_clear(chunk, pidx); // small pages
// if (n==8) return mi_bchunk_try_find_and_clear8(chunk, pidx); // medium pages // if (n==8) return mi_bchunk_try_find_and_clear8(chunk, pidx); // medium pages
// if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages // // if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages
// if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk // if (n==0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
// if (n < MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx); // if (n<=MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx);
// return mi_bchunk_try_find_and_clearN_(chunk, n, pidx); // return mi_bchunk_try_find_and_clearN_(chunk, n, pidx);
//} //}
@ -877,11 +920,11 @@ static inline bool mi_bchunk_all_are_clear_relaxed(mi_bchunk_t* chunk) {
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
return (mi_mm256_is_zero(_mm256_or_si256(vec1,vec2))); return (mi_mm256_is_zero(_mm256_or_si256(vec1,vec2)));
#elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64 #elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64
const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields); const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields);
const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2); const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2);
const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4); const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4);
const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6); const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6);
const uint64x2_t v = vorrq_u64(vorrq_u64(v0,v1),vorrq_u64(v2,v3)); const uint64x2_t v = vorrq_u64(vorrq_u64(v0,v1),vorrq_u64(v2,v3));
return (vmaxvq_u32(vreinterpretq_u32_u64(v)) == 0); return (vmaxvq_u32(vreinterpretq_u32_u64(v)) == 0);
#else #else
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) { for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
@ -902,12 +945,12 @@ static inline bool mi_bchunk_all_are_set_relaxed(mi_bchunk_t* chunk) {
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1); const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
return (mi_mm256_is_ones(_mm256_and_si256(vec1, vec2))); return (mi_mm256_is_ones(_mm256_and_si256(vec1, vec2)));
#elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64 #elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64
const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields); const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields);
const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2); const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2);
const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4); const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4);
const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6); const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6);
const uint64x2_t v = vandq_u64(vandq_u64(v0,v1),vandq_u64(v2,v3)); const uint64x2_t v = vandq_u64(vandq_u64(v0,v1),vandq_u64(v2,v3));
return (vminvq_u32(vreinterpretq_u32_u64(v)) == 0xFFFFFFFFUL); return (vminvq_u32(vreinterpretq_u32_u64(v)) == 0xFFFFFFFFUL);
#else #else
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) { for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
if (~mi_atomic_load_relaxed(&chunk->bfields[i]) != 0) return false; if (~mi_atomic_load_relaxed(&chunk->bfields[i]) != 0) return false;
@ -936,7 +979,7 @@ static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) {
static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) { static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
mi_bchunk_set(&bitmap->chunkmap, chunk_idx); mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL);
} }
static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) { static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) {
@ -948,7 +991,7 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
// .. but a concurrent set may have happened in between our all-clear test and the clearing of the // .. but a concurrent set may have happened in between our all-clear test and the clearing of the
// bit in the mask. We check again to catch this situation. // bit in the mask. We check again to catch this situation.
if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) { if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) {
mi_bchunk_set(&bitmap->chunkmap, chunk_idx); mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL);
return false; return false;
} }
return true; return true;
@ -1210,7 +1253,7 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk
else { else {
// failed to claim it, set abandoned mapping again (unless the page was freed) // failed to claim it, set abandoned mapping again (unless the page was freed)
if (keep_set) { if (keep_set) {
const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx, NULL);
mi_assert_internal(wasclear); MI_UNUSED(wasclear); mi_assert_internal(wasclear); MI_UNUSED(wasclear);
} }
} }
@ -1393,7 +1436,7 @@ static void mi_bbitmap_chunkmap_set(mi_bbitmap_t* bbitmap, size_t chunk_idx, boo
mi_atomic_store_release(&bbitmap->chunk_bins[chunk_idx], MI_BBIN_NONE); mi_atomic_store_release(&bbitmap->chunk_bins[chunk_idx], MI_BBIN_NONE);
} }
} }
mi_bchunk_set(&bbitmap->chunkmap, chunk_idx); mi_bchunk_set(&bbitmap->chunkmap, chunk_idx, NULL);
mi_bbitmap_chunkmap_set_max(bbitmap, chunk_idx); mi_bbitmap_chunkmap_set_max(bbitmap, chunk_idx);
} }
@ -1406,7 +1449,7 @@ static bool mi_bbitmap_chunkmap_try_clear(mi_bbitmap_t* bbitmap, size_t chunk_id
// .. but a concurrent set may have happened in between our all-clear test and the clearing of the // .. but a concurrent set may have happened in between our all-clear test and the clearing of the
// bit in the mask. We check again to catch this situation. // bit in the mask. We check again to catch this situation.
if (!mi_bchunk_all_are_clear_relaxed(&bbitmap->chunks[chunk_idx])) { if (!mi_bchunk_all_are_clear_relaxed(&bbitmap->chunks[chunk_idx])) {
mi_bchunk_set(&bbitmap->chunkmap, chunk_idx); mi_bchunk_set(&bbitmap->chunkmap, chunk_idx, NULL);
return false; return false;
} }
mi_bbitmap_chunkmap_set_max(bbitmap, chunk_idx); mi_bbitmap_chunkmap_set_max(bbitmap, chunk_idx);
@ -1569,9 +1612,9 @@ bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t*
return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, 8, pidx, &mi_bchunk_try_find_and_clear_8); return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, 8, pidx, &mi_bchunk_try_find_and_clear_8);
} }
bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx) { // bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx) {
return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X); // return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X);
} // }
bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx) { bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx) {
mi_assert_internal(n<=MI_BFIELD_BITS); mi_assert_internal(n<=MI_BFIELD_BITS);

View file

@ -214,21 +214,18 @@ bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* vis
---------------------------------------------------------------------------- */ ---------------------------------------------------------------------------- */
// Size bins; larger bins are allowed to go into smaller bins. // Size bins; larger bins are allowed to go into smaller bins.
// Since LARGE and MEDIUM are aligned (on word and byte boundaries respectively),
// they are larger than OTHER even though those can contain very large objects (but we
// don't want those in the MEDIUM or LARGE bins as these are variable size).
// SMALL can only be in small (and NONE), so they cannot fragment the larger bins. // SMALL can only be in small (and NONE), so they cannot fragment the larger bins.
typedef enum mi_bbin_e { typedef enum mi_bbin_e {
MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free) MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free)
MI_BBIN_SMALL, // slice_count == 1 MI_BBIN_SMALL, // slice_count == 1
MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
MI_BBIN_MEDIUM, // slice_count == 8 MI_BBIN_MEDIUM, // slice_count == 8
MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS -- not used for now!
MI_BBIN_COUNT MI_BBIN_COUNT
} mi_bbin_t; } mi_bbin_t;
static inline mi_bbin_t mi_bbin_of(size_t n) { static inline mi_bbin_t mi_bbin_of(size_t n) {
return (n==1 ? MI_BBIN_SMALL : (n==8 ? MI_BBIN_MEDIUM : (n==64 ? MI_BBIN_LARGE : MI_BBIN_OTHER))); return (n==1 ? MI_BBIN_SMALL : (n==8 ? MI_BBIN_MEDIUM : MI_BBIN_OTHER)); // (n==64 ? MI_BBIN_LARGE : MI_BBIN_OTHER)));
} }
// An atomic "binned" bitmap for the free slices where we keep chunks reserved for particalar size classes // An atomic "binned" bitmap for the free slices where we keep chunks reserved for particalar size classes
@ -293,7 +290,7 @@ bool mi_bbitmap_try_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Specialized versions for common bit sequence sizes // Specialized versions for common bit sequence sizes
bool mi_bbitmap_try_find_and_clear(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 1-bit bool mi_bbitmap_try_find_and_clear(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 1-bit
bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 8-bits bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 8-bits
bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS // bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS
bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS
bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS
@ -302,9 +299,9 @@ bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t n, size_t tse
mi_decl_nodiscard static inline bool mi_bbitmap_try_find_and_clearN(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx) { mi_decl_nodiscard static inline bool mi_bbitmap_try_find_and_clearN(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx) {
if (n==1) return mi_bbitmap_try_find_and_clear(bbitmap, tseq, pidx); // small pages if (n==1) return mi_bbitmap_try_find_and_clear(bbitmap, tseq, pidx); // small pages
if (n==8) return mi_bbitmap_try_find_and_clear8(bbitmap, tseq, pidx); // medium pages if (n==8) return mi_bbitmap_try_find_and_clear8(bbitmap, tseq, pidx); // medium pages
if (n==MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearX(bbitmap, tseq, pidx); // large pages // if (n==MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearX(bbitmap, tseq, pidx); // large pages
if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk if (n==0 || n>MI_BCHUNK_BITS) return false; // cannot be more than a chunk
if (n < MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearNX(bbitmap, tseq, n, pidx); if (n<=MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearNX(bbitmap, tseq, n, pidx);
return mi_bbitmap_try_find_and_clearN_(bbitmap, tseq, n, pidx); return mi_bbitmap_try_find_and_clearN_(bbitmap, tseq, n, pidx);
} }