merge from dev3

This commit is contained in:
daanx 2025-01-02 11:54:26 -08:00
commit 3933ac9a3f
3 changed files with 191 additions and 151 deletions

View file

@ -773,9 +773,9 @@ mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t bloc
else if (block_size <= MI_MEDIUM_MAX_OBJ_SIZE) {
page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_MEDIUM_PAGE_SIZE), block_size);
}
else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) {
page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size);
}
//else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) {
// page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size);
// }
else {
page = mi_arenas_page_singleton_alloc(heap, block_size, block_alignment);
}
@ -1390,10 +1390,10 @@ static size_t mi_debug_show_chunks(const char* header, size_t slice_count, size_
char chunk_kind = ' ';
if (chunk_bins != NULL) {
switch (mi_atomic_load_relaxed(&chunk_bins[i])) {
// case MI_BBIN_SMALL: chunk_kind = 'S'; break;
case MI_BBIN_SMALL: chunk_kind = 'S'; break;
case MI_BBIN_MEDIUM: chunk_kind = 'M'; break;
case MI_BBIN_LARGE: chunk_kind = 'L'; break;
case MI_BBIN_OTHER: chunk_kind = 'O'; break;
case MI_BBIN_LARGE: chunk_kind = 'L'; break;
case MI_BBIN_OTHER: chunk_kind = 'X'; break;
// case MI_BBIN_NONE: chunk_kind = 'N'; break;
}
}

View file

@ -114,8 +114,8 @@ static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_
do {
if mi_unlikely((old&mask) == 0) {
old = mi_atomic_load_acquire(b);
if ((old&mask)==0) {
mi_subproc_stat_counter_increase(_mi_subproc(), pages_unabandon_busy_wait, 1);
if ((old&mask)==0) {
mi_subproc_stat_counter_increase(_mi_subproc(), pages_unabandon_busy_wait, 1);
}
while ((old&mask)==0) { // busy wait
mi_atomic_yield();
@ -138,6 +138,7 @@ static inline bool mi_bfield_atomic_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t
}
// Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's
// `all_clear` is set to `true` if the new bfield became zero.
static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) {
mi_assert_internal(mask != 0);
mi_bfield_t old = mi_atomic_load_relaxed(b);
@ -163,6 +164,7 @@ static inline bool mi_bfield_atomic_clearX(_Atomic(mi_bfield_t)*b, bool* all_cle
// Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0
// and false otherwise (leaving the bit field as is).
// `all_clear` is set to `true` if the new bfield became zero.
static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) {
mi_assert_internal(mask != 0);
mi_bfield_t old = mi_atomic_load_relaxed(b);
@ -178,9 +180,9 @@ static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bf
}
// Tries to set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0)
// Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0
// and `false` otherwise leaving the bfield `b` as-is.
// `all_clear` is set to true if the new bfield is zero (and false otherwise)
// `all_clear` is set to true if the new bfield became zero (and false otherwise)
static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
mi_assert_internal(idx < MI_BFIELD_BITS);
const mi_bfield_t mask = mi_bfield_one()<<idx;
@ -189,6 +191,7 @@ static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx
// Tries to clear a byte atomically, and returns true if the byte atomically transitioned from 0xFF to 0
// `all_clear` is set to true if the new bfield became zero (and false otherwise)
static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
mi_assert_internal(idx < MI_BFIELD_BITS);
mi_assert_internal((idx%8)==0);
@ -198,6 +201,7 @@ static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t id
// Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's.
// and false otherwise leaving the bit field as-is.
// `all_clear` is set to true if the new bfield became zero (which is always the case if successful).
static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) {
mi_bfield_t old = mi_bfield_all_set();
if (mi_atomic_cas_strong_acq_rel(b, &old, mi_bfield_zero())) {
@ -257,26 +261,43 @@ static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, _Atomic(mi_bfiel
// ------- mi_bchunk_set ---------------------------------------
static inline bool mi_bchunk_set(mi_bchunk_t* chunk, size_t cidx) {
// Set a single bit
static inline bool mi_bchunk_set(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
return mi_bfield_atomic_set(&chunk->bfields[i], idx);
const bool was_clear = mi_bfield_atomic_set(&chunk->bfields[i], idx);
if (already_set != NULL) { *already_set = (was_clear ? 0 : 1); }
return was_clear;
}
// Set `0 < n <= MI_BFIELD_BITS`, and return true of the mask bits transitioned from all 0's to 1's.
// `already_set` contains the count of bits that were already set (used when committing ranges to account
// statistics correctly).
// Can cross over two bfields.
static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal(n > 0 && n <= MI_BFIELD_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
const mi_bfield_t mask = mi_bfield_mask(n, idx);
return mi_bfield_atomic_set_mask(&chunk->bfields[i], mask, already_set);
}
static inline bool mi_bchunk_setX(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal((cidx%MI_BFIELD_BITS)==0);
const size_t i = cidx / MI_BFIELD_BITS;
return mi_bfield_atomic_setX(&chunk->bfields[i], already_set);
if mi_likely(idx + n <= MI_BFIELD_BITS) {
// within one field
return mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(n,idx), already_set);
}
else {
// spanning two fields
const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field
mi_assert_internal(m < n);
mi_assert_internal(i < MI_BCHUNK_FIELDS - 1);
size_t already_set1;
const bool all_set1 = mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &already_set1);
mi_assert_internal(n - m > 0);
mi_assert_internal(n - m < MI_BFIELD_BITS);
size_t already_set2;
const bool all_set2 = mi_bfield_atomic_set_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &already_set2);
if (already_set != NULL) { *already_set = already_set1 + already_set2; }
return (all_set1 && all_set2);
}
}
// Set a sequence of `n` bits within a chunk.
@ -306,6 +327,7 @@ mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk,
// next field
field++;
idx = 0;
mi_assert_internal(m <= n);
n -= m;
}
if (palready_set!=NULL) { *palready_set = total_already_set; }
@ -315,13 +337,10 @@ mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk,
static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) {
mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS);
if (n==1) {
bool was_clear = mi_bchunk_set(chunk, cidx);
if (already_set != NULL) { *already_set = !was_clear; }
return was_clear;
}
if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set);
if (n <MI_BFIELD_BITS) return mi_bchunk_setNX(chunk, cidx, n, already_set);
if (n==1) return mi_bchunk_set(chunk, cidx, already_set);
// if (n==8 && (cidx%8) == 0) return mi_bchunk_set8(chunk, cidx, already_set);
// if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set);
if (n<=MI_BFIELD_BITS) return mi_bchunk_setNX(chunk, cidx, n, already_set);
return mi_bchunk_xsetN_(MI_BIT_SET, chunk, cidx, n, already_set, NULL);
}
@ -334,27 +353,13 @@ static inline bool mi_bchunk_clear(mi_bchunk_t* chunk, size_t cidx, bool* all_cl
return mi_bfield_atomic_clear(&chunk->bfields[i], idx, all_clear);
}
static inline bool mi_bchunk_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
const mi_bfield_t mask = mi_bfield_mask(n, idx);
return mi_bfield_atomic_clear_mask(&chunk->bfields[i], mask, all_clear);
}
static inline bool mi_bchunk_clearX(mi_bchunk_t* chunk, size_t cidx, bool* all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal((cidx%MI_BFIELD_BITS)==0);
const size_t i = cidx / MI_BFIELD_BITS;
return mi_bfield_atomic_clearX(&chunk->bfields[i], all_clear);
}
static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) {
mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS);
if (n==1) return mi_bchunk_clear(chunk, cidx, pmaybe_all_clear);
if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, pmaybe_all_clear);
if (n <MI_BFIELD_BITS) return mi_bchunk_clearNX(chunk, cidx, n, pmaybe_all_clear);
return mi_bchunk_xsetN_(MI_BIT_CLEAR, chunk, cidx, n, NULL, pmaybe_all_clear);
if (n==1) return mi_bchunk_clear(chunk, cidx, maybe_all_clear);
// if (n==8) return mi_bchunk_clear8(chunk, cidx, maybe_all_clear);
// if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, maybe_all_clear);
// TODO: implement mi_bchunk_xsetNX instead of setNX
return mi_bchunk_xsetN_(MI_BIT_CLEAR, chunk, cidx, n, NULL, maybe_all_clear);
}
@ -388,24 +393,46 @@ static inline bool mi_bchunk_is_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t
if (n==0) return true;
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
if mi_likely(n==1) { return mi_bfield_atomic_is_xset(set, &chunk->bfields[i], idx); }
if mi_likely(n<=MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); }
if (n==1) { return mi_bfield_atomic_is_xset(set, &chunk->bfields[i], idx); }
if (idx + n <= MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); }
return mi_bchunk_is_xsetN_(set, chunk, i, idx, n);
}
// ------- mi_bchunk_try_clear ---------------------------------------
// Clear `0 < n <= MI_BITFIELD_BITS`. Can cross over a bfield boundary.
static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal(n <= MI_BFIELD_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
mi_assert_internal(idx + n <= MI_BFIELD_BITS);
const size_t mask = mi_bfield_mask(n, idx);
return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mask, pmaybe_all_clear);
if mi_likely(idx + n <= MI_BFIELD_BITS) {
// within one field
return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(n, idx), pmaybe_all_clear);
}
else {
// spanning two fields (todo: use double-word atomic ops?)
const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field
mi_assert_internal(m < n);
mi_assert_internal(i < MI_BCHUNK_FIELDS - 1);
bool field1_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &field1_is_clear)) return false;
// try the second field as well
mi_assert_internal(n - m > 0);
mi_assert_internal(n - m < MI_BFIELD_BITS);
bool field2_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &field2_is_clear)) {
// we failed to clear the second field, restore the first one
mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), NULL);
return false;
}
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = field1_is_clear && field2_is_clear; }
return true;
}
}
// Clear a full aligned bfield.
static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal((cidx%MI_BFIELD_BITS) == 0);
@ -413,60 +440,51 @@ static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* p
return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear);
}
// Try to atomically set/clear a sequence of `n` bits within a chunk.
// Returns true if all bits transitioned from 0 to 1 (or 1 to 0),
// Try to atomically clear a sequence of `n` bits within a chunk.
// Returns true if all bits transitioned from 1 to 0,
// and false otherwise leaving all bit fields as is.
// Note: this is a hard one as we need to unwind partial atomic operations
// if we fail halfway..
// Note: this is the complex one as we need to unwind partial atomic operations if we fail halfway..
// `maybe_all_clear` is set to `true` if all the bfields involved become zero.
mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
mi_assert_internal(n>0);
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = true; }
if (n==0) return true;
size_t start_idx = cidx % MI_BFIELD_BITS;
size_t start_field = cidx / MI_BFIELD_BITS;
size_t end_field = MI_BCHUNK_FIELDS;
mi_bfield_t mask_mid = 0;
mi_bfield_t mask_end = 0;
bool field_is_clear;
bool maybe_all_clear = true;
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = false; }
// first field
const size_t start_idx = cidx % MI_BFIELD_BITS;
const size_t start_field = cidx / MI_BFIELD_BITS;
size_t field = start_field;
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field
size_t m = MI_BFIELD_BITS - start_idx; // m are the bits to clear in this field
if (m > n) { m = n; }
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
mi_assert_internal(start_field < MI_BCHUNK_FIELDS);
const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx);
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &field_is_clear)) return false;
maybe_all_clear = maybe_all_clear && field_is_clear;
bool maybe_all_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &maybe_all_clear)) return false;
// done?
mi_assert_internal(m <= n);
n -= m;
if (n==0) {
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = maybe_all_clear; }
return true;
}
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
// mid fields
// mid fields?
while (n >= MI_BFIELD_BITS) {
field++;
mi_assert_internal(field < MI_BCHUNK_FIELDS);
mask_mid = mi_bfield_all_set();
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_mid, &field_is_clear)) goto restore;
bool field_is_clear;
if (!mi_bfield_atomic_try_clearX(&chunk->bfields[field], &field_is_clear)) goto restore;
maybe_all_clear = maybe_all_clear && field_is_clear;
n -= MI_BFIELD_BITS;
}
// last field
// last field?
if (n > 0) {
mi_assert_internal(n < MI_BFIELD_BITS);
field++;
mi_assert_internal(field < MI_BCHUNK_FIELDS);
end_field = field;
mask_end = mi_bfield_mask(n, 0);
const mi_bfield_t mask_end = mi_bfield_mask(n, 0);
bool field_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_end, &field_is_clear)) goto restore;
maybe_all_clear = maybe_all_clear && field_is_clear;
}
@ -475,12 +493,16 @@ mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t ci
return true;
restore:
// field is on the field that failed to set atomically; we need to restore all previous fields
// `field` is the index of the field that failed to set atomically; we need to restore all previous fields
mi_assert_internal(field > start_field);
while( field > start_field) {
field--;
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid));
mi_bfield_atomic_set_mask(&chunk->bfields[field], mask, NULL);
if (field == start_field) {
mi_bfield_atomic_set_mask(&chunk->bfields[field], mask_start, NULL);
}
else {
mi_bfield_atomic_setX(&chunk->bfields[field], NULL); // mid-field: set all bits again
}
}
return false;
}
@ -488,8 +510,8 @@ restore:
static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) {
mi_assert_internal(n>0);
if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear);
if (n<MI_BFIELD_BITS) return mi_bchunk_try_clearNX(chunk, cidx, n, maybe_all_clear);
// if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear);
if (n<=MI_BFIELD_BITS) return mi_bchunk_try_clearNX(chunk, cidx, n, maybe_all_clear);
return mi_bchunk_try_clearN_(chunk, cidx, n, maybe_all_clear);
}
@ -591,7 +613,7 @@ static inline bool mi_bchunk_try_find_and_clear(mi_bchunk_t* chunk, size_t* pidx
const uint32x4_t vzero1 = vuzp1q_u32(vreinterpretq_u32_u64(vzero1_lo),vreinterpretq_u32_u64(vzero1_hi)); // unzip even elements: narrow to 4x32 bit is_zero ()
const uint32x4_t vzero2 = vuzp1q_u32(vreinterpretq_u32_u64(vzero2_lo),vreinterpretq_u32_u64(vzero2_hi)); // unzip even elements: narrow to 4x32 bit is_zero ()
const uint32x4_t vzero1x = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero1), 24)); // shift-right 2x32bit elem by 24: lo 16 bits contain the 2 lo bytes
const uint32x4_t vzero2x = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero2), 24));
const uint32x4_t vzero2x = vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(vzero2), 24));
const uint16x8_t vzero12 = vreinterpretq_u16_u32(vuzp1q_u32(vzero1x,vzero2x)); // unzip even 32-bit elements into one vector
const uint8x8_t vzero = vmovn_u32(vzero12); // narrow the bottom 16-bits
const uint64_t mask = ~vget_lane_u64(vreinterpret_u64_u8(vzero), 0); // 1 byte for each bfield (0xFF => bfield has a bit set)
@ -642,7 +664,7 @@ static inline bool mi_bchunk_try_find_and_clear8_at(mi_bchunk_t* chunk, size_t c
}
#endif
// find least byte in a chunk with all bits set, and try unset it atomically
// find least aligned byte in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
// Used to find medium size pages in the free blocks.
// todo: try neon version
@ -690,7 +712,7 @@ static inline bool mi_bchunk_try_find_and_clear_8(mi_bchunk_t* chunk, size_t n,
}
// find least bfield in a chunk with all bits set, and try unset it atomically
// find least aligned bfield in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
// Used to find large size pages in the free blocks.
// todo: try neon version
@ -737,23 +759,24 @@ static inline bool mi_bchunk_try_find_and_clear_X(mi_bchunk_t* chunk, size_t n,
return mi_bchunk_try_find_and_clearX(chunk, pidx);
}
// find a sequence of `n` bits in a chunk with `n < MI_BFIELD_BITS` with all bits set,
// find a sequence of `n` bits in a chunk with `0 < n <= MI_BFIELD_BITS` with all bits set,
// and try to clear them atomically.
// Currently does not cross bfield boundaries.
// set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success.
// (We do not cross bfield boundaries)
// will cross bfield boundaries.
mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BFIELD_BITS) return false;
const mi_bfield_t mask = mi_bfield_mask(n, 0);
// for all fields in the chunk
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
size_t idx;
// is there a range inside the field?
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
if (idx + n > MI_BFIELD_BITS) break;
if (idx + n > MI_BFIELD_BITS) break; // too short, maybe cross over, or continue with the next field
const size_t bmask = mask<<idx;
mi_assert_internal(bmask>>idx == mask);
if ((b&bmask) == bmask) { // found a match
if ((b&bmask) == bmask) { // found a match with all bits set, try clearing atomically
if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], bmask, NULL)) {
*pidx = (i*MI_BFIELD_BITS) + idx;
mi_assert_internal(*pidx < MI_BCHUNK_BITS);
@ -761,7 +784,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
return true;
}
else {
// if failed to atomically commit, reload b and try again from this position
// if we failed to atomically commit, reload b and try again from the start
b = mi_atomic_load_acquire(&chunk->bfields[i]);
}
}
@ -772,6 +795,25 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
b = b & ~mi_bfield_mask(ones, idx); // clear the ones
}
}
// check if we can cross into the next bfield
if (i < MI_BCHUNK_FIELDS-1) {
const size_t post = mi_bfield_clz(~b);
if (post > 0) {
const size_t pre = mi_bfield_ctz(mi_atomic_load_relaxed(&chunk->bfields[i+1]));
if (post + pre <= n) {
// it fits -- try to claim it atomically
const size_t cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - post);
if (mi_bchunk_try_clearNX(chunk, cidx, n, NULL)) {
// we cleared all atomically
*pidx = cidx;
mi_assert_internal(*pidx < MI_BCHUNK_BITS);
mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS);
return true;
}
}
}
}
}
return false;
}
@ -783,46 +825,47 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
const size_t skip_count = n/MI_BFIELD_BITS;
// we first scan ahead to see if there is a range of `n` set bits, and only then try to clear atomically
mi_assert_internal(n>0);
const size_t skip_count = (n-1)/MI_BFIELD_BITS;
size_t cidx;
for (size_t i = 0; i <= MI_BCHUNK_FIELDS - skip_count; i++)
for (size_t i = 0; i < MI_BCHUNK_FIELDS - skip_count; i++)
{
size_t m = n; // bits to go
// first field
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
size_t ones = mi_bfield_clz(~b);
cidx = i*MI_BFIELD_BITS + (MI_BFIELD_BITS - ones); // start index
cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - ones); // start index
if (ones >= m) {
// we found enough bits!
m = 0;
}
else {
m -= ones;
mi_assert_internal(m>0);
}
// keep scanning further fields?
size_t j = 1; // field count from i
while (i+j < MI_BCHUNK_FIELDS) {
mi_assert_internal(m > 0);
b = mi_atomic_load_relaxed(&chunk->bfields[i+j]);
ones = mi_bfield_ctz(~b);
if (ones >= m) {
// we found enough bits
m = 0;
break;
}
else if (ones == MI_BFIELD_BITS) {
// not enough yet, proceed to the next field
j++;
m -= MI_BFIELD_BITS;
}
else {
// the range was not enough, start from scratch
i = i + j - 1; // no need to re-scan previous fields, except the last one (with clz this time)
mi_assert_internal(m>0);
break;
// keep scanning further fields?
size_t j = 1; // field count from i
while (i+j < MI_BCHUNK_FIELDS) {
mi_assert_internal(m > 0);
b = mi_atomic_load_relaxed(&chunk->bfields[i+j]);
ones = mi_bfield_ctz(~b);
if (ones >= m) {
// we found enough bits
m = 0;
break;
}
else if (ones == MI_BFIELD_BITS) {
// not enough yet, proceed to the next field
j++;
m -= MI_BFIELD_BITS;
}
else {
// the range was not enough, start from scratch
i = i + j - 1; // no need to re-scan previous fields, except the last one (with clz this time)
mi_assert_internal(m>0);
break;
}
}
}
@ -846,9 +889,9 @@ static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk,
//static inline bool mi_bchunk_try_find_and_clearN(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
// if (n==1) return mi_bchunk_try_find_and_clear(chunk, pidx); // small pages
// if (n==8) return mi_bchunk_try_find_and_clear8(chunk, pidx); // medium pages
// if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages
// if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
// if (n < MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx);
// // if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages
// if (n==0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
// if (n<=MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx);
// return mi_bchunk_try_find_and_clearN_(chunk, n, pidx);
//}
@ -877,11 +920,11 @@ static inline bool mi_bchunk_all_are_clear_relaxed(mi_bchunk_t* chunk) {
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
return (mi_mm256_is_zero(_mm256_or_si256(vec1,vec2)));
#elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64
const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields);
const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2);
const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4);
const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6);
const uint64x2_t v = vorrq_u64(vorrq_u64(v0,v1),vorrq_u64(v2,v3));
const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields);
const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2);
const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4);
const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6);
const uint64x2_t v = vorrq_u64(vorrq_u64(v0,v1),vorrq_u64(v2,v3));
return (vmaxvq_u32(vreinterpretq_u32_u64(v)) == 0);
#else
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
@ -902,12 +945,12 @@ static inline bool mi_bchunk_all_are_set_relaxed(mi_bchunk_t* chunk) {
const __m256i vec2 = _mm256_load_si256(((const __m256i*)chunk->bfields)+1);
return (mi_mm256_is_ones(_mm256_and_si256(vec1, vec2)));
#elif MI_OPT_SIMD && (MI_BCHUNK_BITS==512) && MI_ARCH_ARM64
const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields);
const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2);
const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4);
const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6);
const uint64x2_t v = vandq_u64(vandq_u64(v0,v1),vandq_u64(v2,v3));
return (vminvq_u32(vreinterpretq_u32_u64(v)) == 0xFFFFFFFFUL);
const uint64x2_t v0 = vld1q_u64((uint64_t*)chunk->bfields);
const uint64x2_t v1 = vld1q_u64((uint64_t*)chunk->bfields + 2);
const uint64x2_t v2 = vld1q_u64((uint64_t*)chunk->bfields + 4);
const uint64x2_t v3 = vld1q_u64((uint64_t*)chunk->bfields + 6);
const uint64x2_t v = vandq_u64(vandq_u64(v0,v1),vandq_u64(v2,v3));
return (vminvq_u32(vreinterpretq_u32_u64(v)) == 0xFFFFFFFFUL);
#else
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
if (~mi_atomic_load_relaxed(&chunk->bfields[i]) != 0) return false;
@ -936,7 +979,7 @@ static bool mi_bchunk_bsr(mi_bchunk_t* chunk, size_t* pidx) {
static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL);
}
static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx) {
@ -948,7 +991,7 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
// .. but a concurrent set may have happened in between our all-clear test and the clearing of the
// bit in the mask. We check again to catch this situation.
if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) {
mi_bchunk_set(&bitmap->chunkmap, chunk_idx);
mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL);
return false;
}
return true;
@ -1210,7 +1253,7 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk
else {
// failed to claim it, set abandoned mapping again (unless the page was freed)
if (keep_set) {
const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx);
const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx, NULL);
mi_assert_internal(wasclear); MI_UNUSED(wasclear);
}
}
@ -1393,7 +1436,7 @@ static void mi_bbitmap_chunkmap_set(mi_bbitmap_t* bbitmap, size_t chunk_idx, boo
mi_atomic_store_release(&bbitmap->chunk_bins[chunk_idx], MI_BBIN_NONE);
}
}
mi_bchunk_set(&bbitmap->chunkmap, chunk_idx);
mi_bchunk_set(&bbitmap->chunkmap, chunk_idx, NULL);
mi_bbitmap_chunkmap_set_max(bbitmap, chunk_idx);
}
@ -1406,7 +1449,7 @@ static bool mi_bbitmap_chunkmap_try_clear(mi_bbitmap_t* bbitmap, size_t chunk_id
// .. but a concurrent set may have happened in between our all-clear test and the clearing of the
// bit in the mask. We check again to catch this situation.
if (!mi_bchunk_all_are_clear_relaxed(&bbitmap->chunks[chunk_idx])) {
mi_bchunk_set(&bbitmap->chunkmap, chunk_idx);
mi_bchunk_set(&bbitmap->chunkmap, chunk_idx, NULL);
return false;
}
mi_bbitmap_chunkmap_set_max(bbitmap, chunk_idx);
@ -1569,9 +1612,9 @@ bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t*
return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, 8, pidx, &mi_bchunk_try_find_and_clear_8);
}
bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx) {
return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X);
}
// bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx) {
// return mi_bbitmap_try_find_and_clear_generic(bbitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X);
// }
bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t tseq, size_t n, size_t* pidx) {
mi_assert_internal(n<=MI_BFIELD_BITS);

View file

@ -214,21 +214,18 @@ bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* vis
---------------------------------------------------------------------------- */
// Size bins; larger bins are allowed to go into smaller bins.
// Since LARGE and MEDIUM are aligned (on word and byte boundaries respectively),
// they are larger than OTHER even though those can contain very large objects (but we
// don't want those in the MEDIUM or LARGE bins as these are variable size).
// SMALL can only be in small (and NONE), so they cannot fragment the larger bins.
typedef enum mi_bbin_e {
MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free)
MI_BBIN_SMALL, // slice_count == 1
MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
MI_BBIN_MEDIUM, // slice_count == 8
MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS
MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS -- not used for now!
MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
MI_BBIN_COUNT
} mi_bbin_t;
static inline mi_bbin_t mi_bbin_of(size_t n) {
return (n==1 ? MI_BBIN_SMALL : (n==8 ? MI_BBIN_MEDIUM : (n==64 ? MI_BBIN_LARGE : MI_BBIN_OTHER)));
return (n==1 ? MI_BBIN_SMALL : (n==8 ? MI_BBIN_MEDIUM : MI_BBIN_OTHER)); // (n==64 ? MI_BBIN_LARGE : MI_BBIN_OTHER)));
}
// An atomic "binned" bitmap for the free slices where we keep chunks reserved for particalar size classes
@ -293,7 +290,7 @@ bool mi_bbitmap_try_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Specialized versions for common bit sequence sizes
bool mi_bbitmap_try_find_and_clear(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 1-bit
bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 8-bits
bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS
// bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS
bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS
bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS
@ -302,9 +299,9 @@ bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t n, size_t tse
mi_decl_nodiscard static inline bool mi_bbitmap_try_find_and_clearN(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx) {
if (n==1) return mi_bbitmap_try_find_and_clear(bbitmap, tseq, pidx); // small pages
if (n==8) return mi_bbitmap_try_find_and_clear8(bbitmap, tseq, pidx); // medium pages
if (n==MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearX(bbitmap, tseq, pidx); // large pages
if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
if (n < MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearNX(bbitmap, tseq, n, pidx);
// if (n==MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearX(bbitmap, tseq, pidx); // large pages
if (n==0 || n>MI_BCHUNK_BITS) return false; // cannot be more than a chunk
if (n<=MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearNX(bbitmap, tseq, n, pidx);
return mi_bbitmap_try_find_and_clearN_(bbitmap, tseq, n, pidx);
}