make bitmap scan cross bfields for NX; disable the use of large object pages

This commit is contained in:
daanx 2025-01-02 11:42:28 -08:00
parent 0d302cd174
commit c507ee3d96
3 changed files with 170 additions and 127 deletions

View file

@ -773,9 +773,9 @@ mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t bloc
else if (block_size <= MI_MEDIUM_MAX_OBJ_SIZE) { else if (block_size <= MI_MEDIUM_MAX_OBJ_SIZE) {
page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_MEDIUM_PAGE_SIZE), block_size); page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_MEDIUM_PAGE_SIZE), block_size);
} }
else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) { //else if (block_size <= MI_LARGE_MAX_OBJ_SIZE) {
page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size); // page = mi_arenas_page_regular_alloc(heap, mi_slice_count_of_size(MI_LARGE_PAGE_SIZE), block_size);
} // }
else { else {
page = mi_arenas_page_singleton_alloc(heap, block_size, block_alignment); page = mi_arenas_page_singleton_alloc(heap, block_size, block_alignment);
} }

View file

@ -130,6 +130,7 @@ static inline bool mi_bfield_atomic_set_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t
} }
// Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's // Clear a mask set of bits atomically, and return true of the mask bits transitioned from all 1's to 0's
// `all_clear` is set to `true` if the new bfield became zero.
static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) { static inline bool mi_bfield_atomic_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) {
mi_assert_internal(mask != 0); mi_assert_internal(mask != 0);
mi_bfield_t old = mi_atomic_load_relaxed(b); mi_bfield_t old = mi_atomic_load_relaxed(b);
@ -155,6 +156,7 @@ static inline bool mi_bfield_atomic_clearX(_Atomic(mi_bfield_t)*b, bool* all_cle
// Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0 // Tries to clear a mask atomically, and returns true if the mask bits atomically transitioned from mask to 0
// and false otherwise (leaving the bit field as is). // and false otherwise (leaving the bit field as is).
// `all_clear` is set to `true` if the new bfield became zero.
static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) { static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bfield_t mask, bool* all_clear) {
mi_assert_internal(mask != 0); mi_assert_internal(mask != 0);
mi_bfield_t old = mi_atomic_load_relaxed(b); mi_bfield_t old = mi_atomic_load_relaxed(b);
@ -170,9 +172,9 @@ static inline bool mi_bfield_atomic_try_clear_mask(_Atomic(mi_bfield_t)*b, mi_bf
} }
// Tries to set/clear a bit atomically. Returns `true` if the bit transitioned from 0 to 1 (or 1 to 0) // Tries to clear a bit atomically. Returns `true` if the bit transitioned from 1 to 0
// and `false` otherwise leaving the bfield `b` as-is. // and `false` otherwise leaving the bfield `b` as-is.
// `all_clear` is set to true if the new bfield is zero (and false otherwise) // `all_clear` is set to true if the new bfield became zero (and false otherwise)
static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) { static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
mi_assert_internal(idx < MI_BFIELD_BITS); mi_assert_internal(idx < MI_BFIELD_BITS);
const mi_bfield_t mask = mi_bfield_one()<<idx; const mi_bfield_t mask = mi_bfield_one()<<idx;
@ -181,6 +183,7 @@ static inline bool mi_bfield_atomic_try_clear(_Atomic(mi_bfield_t)*b, size_t idx
// Tries to clear a byte atomically, and returns true if the byte atomically transitioned from 0xFF to 0 // Tries to clear a byte atomically, and returns true if the byte atomically transitioned from 0xFF to 0
// `all_clear` is set to true if the new bfield became zero (and false otherwise)
static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) { static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t idx, bool* all_clear) {
mi_assert_internal(idx < MI_BFIELD_BITS); mi_assert_internal(idx < MI_BFIELD_BITS);
mi_assert_internal((idx%8)==0); mi_assert_internal((idx%8)==0);
@ -190,6 +193,7 @@ static inline bool mi_bfield_atomic_try_clear8(_Atomic(mi_bfield_t)*b, size_t id
// Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's. // Try to clear a full field of bits atomically, and return true all bits transitioned from all 1's to 0's.
// and false otherwise leaving the bit field as-is. // and false otherwise leaving the bit field as-is.
// `all_clear` is set to true if the new bfield became zero (which is always the case if successful).
static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) { static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b, bool* all_clear) {
mi_bfield_t old = mi_bfield_all_set(); mi_bfield_t old = mi_bfield_all_set();
if (mi_atomic_cas_strong_acq_rel(b, &old, mi_bfield_zero())) { if (mi_atomic_cas_strong_acq_rel(b, &old, mi_bfield_zero())) {
@ -249,26 +253,43 @@ static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, _Atomic(mi_bfiel
// ------- mi_bchunk_set --------------------------------------- // ------- mi_bchunk_set ---------------------------------------
static inline bool mi_bchunk_set(mi_bchunk_t* chunk, size_t cidx) { // Set a single bit
static inline bool mi_bchunk_set(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) {
mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(cidx < MI_BCHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS; const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS;
return mi_bfield_atomic_set(&chunk->bfields[i], idx); const bool was_clear = mi_bfield_atomic_set(&chunk->bfields[i], idx);
if (already_set != NULL) { *already_set = (was_clear ? 0 : 1); }
return was_clear;
} }
// Set `0 < n <= MI_BFIELD_BITS`, and return true of the mask bits transitioned from all 0's to 1's.
// `already_set` contains the count of bits that were already set (used when committing ranges to account
// statistics correctly).
// Can cross over two bfields.
static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { static inline bool mi_bchunk_setNX(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) {
mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal(n > 0 && n <= MI_BFIELD_BITS);
const size_t i = cidx / MI_BFIELD_BITS; const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS;
const mi_bfield_t mask = mi_bfield_mask(n, idx); if mi_likely(idx + n <= MI_BFIELD_BITS) {
return mi_bfield_atomic_set_mask(&chunk->bfields[i], mask, already_set); // within one field
} return mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(n,idx), already_set);
}
static inline bool mi_bchunk_setX(mi_bchunk_t* chunk, size_t cidx, size_t* already_set) { else {
mi_assert_internal(cidx < MI_BCHUNK_BITS); // spanning two fields
mi_assert_internal((cidx%MI_BFIELD_BITS)==0); const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field
const size_t i = cidx / MI_BFIELD_BITS; mi_assert_internal(m < n);
return mi_bfield_atomic_setX(&chunk->bfields[i], already_set); mi_assert_internal(i < MI_BCHUNK_FIELDS - 1);
size_t already_set1;
const bool all_set1 = mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &already_set1);
mi_assert_internal(n - m > 0);
mi_assert_internal(n - m < MI_BFIELD_BITS);
size_t already_set2;
const bool all_set2 = mi_bfield_atomic_set_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &already_set2);
if (already_set != NULL) { *already_set = already_set1 + already_set2; }
return (all_set1 && all_set2);
}
} }
// Set a sequence of `n` bits within a chunk. // Set a sequence of `n` bits within a chunk.
@ -298,6 +319,7 @@ mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk,
// next field // next field
field++; field++;
idx = 0; idx = 0;
mi_assert_internal(m <= n);
n -= m; n -= m;
} }
if (palready_set!=NULL) { *palready_set = total_already_set; } if (palready_set!=NULL) { *palready_set = total_already_set; }
@ -307,13 +329,10 @@ mi_decl_noinline static bool mi_bchunk_xsetN_(mi_xset_t set, mi_bchunk_t* chunk,
static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) { static inline bool mi_bchunk_setN(mi_bchunk_t* chunk, size_t cidx, size_t n, size_t* already_set) {
mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS); mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS);
if (n==1) { if (n==1) return mi_bchunk_set(chunk, cidx, already_set);
bool was_clear = mi_bchunk_set(chunk, cidx); // if (n==8 && (cidx%8) == 0) return mi_bchunk_set8(chunk, cidx, already_set);
if (already_set != NULL) { *already_set = !was_clear; } // if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set);
return was_clear; if (n<=MI_BFIELD_BITS) return mi_bchunk_setNX(chunk, cidx, n, already_set);
}
if (n==MI_BFIELD_BITS) return mi_bchunk_setX(chunk, cidx, already_set);
if (n <MI_BFIELD_BITS) return mi_bchunk_setNX(chunk, cidx, n, already_set);
return mi_bchunk_xsetN_(MI_BIT_SET, chunk, cidx, n, already_set, NULL); return mi_bchunk_xsetN_(MI_BIT_SET, chunk, cidx, n, already_set, NULL);
} }
@ -326,27 +345,13 @@ static inline bool mi_bchunk_clear(mi_bchunk_t* chunk, size_t cidx, bool* all_cl
return mi_bfield_atomic_clear(&chunk->bfields[i], idx, all_clear); return mi_bfield_atomic_clear(&chunk->bfields[i], idx, all_clear);
} }
static inline bool mi_bchunk_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
const mi_bfield_t mask = mi_bfield_mask(n, idx);
return mi_bfield_atomic_clear_mask(&chunk->bfields[i], mask, all_clear);
}
static inline bool mi_bchunk_clearX(mi_bchunk_t* chunk, size_t cidx, bool* all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal((cidx%MI_BFIELD_BITS)==0);
const size_t i = cidx / MI_BFIELD_BITS;
return mi_bfield_atomic_clearX(&chunk->bfields[i], all_clear);
}
static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) { static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) {
mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS); mi_assert_internal(n>0 && n <= MI_BCHUNK_BITS);
if (n==1) return mi_bchunk_clear(chunk, cidx, maybe_all_clear); if (n==1) return mi_bchunk_clear(chunk, cidx, maybe_all_clear);
if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, maybe_all_clear); // if (n==8) return mi_bchunk_clear8(chunk, cidx, maybe_all_clear);
if (n <MI_BFIELD_BITS) return mi_bchunk_clearNX(chunk, cidx, n, maybe_all_clear); // if (n==MI_BFIELD_BITS) return mi_bchunk_clearX(chunk, cidx, maybe_all_clear);
return mi_bchunk_xsetN_(MI_BIT_CLEAR, chunk, cidx, n, NULL, maybe_all_clear); // TODO: implement mi_bchunk_xsetNX instead of setNX
return mi_bchunk_xsetN_(MI_BIT_CLEAR, chunk, cidx, n, NULL, maybe_all_clear);
} }
@ -380,24 +385,46 @@ static inline bool mi_bchunk_is_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t
if (n==0) return true; if (n==0) return true;
const size_t i = cidx / MI_BFIELD_BITS; const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS;
if mi_likely(n==1) { return mi_bfield_atomic_is_xset(set, &chunk->bfields[i], idx); } if (n==1) { return mi_bfield_atomic_is_xset(set, &chunk->bfields[i], idx); }
if mi_likely(n<=MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); } if (idx + n <= MI_BFIELD_BITS) { return mi_bfield_atomic_is_xset_mask(set, &chunk->bfields[i], mi_bfield_mask(n, idx)); }
return mi_bchunk_is_xsetN_(set, chunk, i, idx, n); return mi_bchunk_is_xsetN_(set, chunk, i, idx, n);
} }
// ------- mi_bchunk_try_clear --------------------------------------- // ------- mi_bchunk_try_clear ---------------------------------------
// Clear `0 < n <= MI_BITFIELD_BITS`. Can cross over a bfield boundary.
static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { static inline bool mi_bchunk_try_clearNX(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal(n <= MI_BFIELD_BITS); mi_assert_internal(n <= MI_BFIELD_BITS);
const size_t i = cidx / MI_BFIELD_BITS; const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS; const size_t idx = cidx % MI_BFIELD_BITS;
mi_assert_internal(idx + n <= MI_BFIELD_BITS); if mi_likely(idx + n <= MI_BFIELD_BITS) {
const size_t mask = mi_bfield_mask(n, idx); // within one field
return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mask, pmaybe_all_clear); return mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(n, idx), pmaybe_all_clear);
}
else {
// spanning two fields (todo: use double-word atomic ops?)
const size_t m = MI_BFIELD_BITS - idx; // bits to clear in the first field
mi_assert_internal(m < n);
mi_assert_internal(i < MI_BCHUNK_FIELDS - 1);
bool field1_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), &field1_is_clear)) return false;
// try the second field as well
mi_assert_internal(n - m > 0);
mi_assert_internal(n - m < MI_BFIELD_BITS);
bool field2_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[i+1], mi_bfield_mask(n - m, 0), &field2_is_clear)) {
// we failed to clear the second field, restore the first one
mi_bfield_atomic_set_mask(&chunk->bfields[i], mi_bfield_mask(m, idx), NULL);
return false;
}
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = field1_is_clear && field2_is_clear; }
return true;
}
} }
// Clear a full aligned bfield.
static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) { static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* pmaybe_all_clear) {
mi_assert_internal(cidx < MI_BCHUNK_BITS); mi_assert_internal(cidx < MI_BCHUNK_BITS);
mi_assert_internal((cidx%MI_BFIELD_BITS) == 0); mi_assert_internal((cidx%MI_BFIELD_BITS) == 0);
@ -405,60 +432,51 @@ static inline bool mi_bchunk_try_clearX(mi_bchunk_t* chunk, size_t cidx, bool* p
return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear); return mi_bfield_atomic_try_clearX(&chunk->bfields[i], pmaybe_all_clear);
} }
// Try to atomically set/clear a sequence of `n` bits within a chunk. // Try to atomically clear a sequence of `n` bits within a chunk.
// Returns true if all bits transitioned from 0 to 1 (or 1 to 0), // Returns true if all bits transitioned from 1 to 0,
// and false otherwise leaving all bit fields as is. // and false otherwise leaving all bit fields as is.
// Note: this is a hard one as we need to unwind partial atomic operations // Note: this is the complex one as we need to unwind partial atomic operations if we fail halfway..
// if we fail halfway.. // `maybe_all_clear` is set to `true` if all the bfields involved become zero.
mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) { mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* pmaybe_all_clear) {
mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
mi_assert_internal(n>0); mi_assert_internal(n>0);
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = true; }
if (n==0) return true; if (n==0) return true;
size_t start_idx = cidx % MI_BFIELD_BITS;
size_t start_field = cidx / MI_BFIELD_BITS;
size_t end_field = MI_BCHUNK_FIELDS;
mi_bfield_t mask_mid = 0;
mi_bfield_t mask_end = 0;
bool field_is_clear;
bool maybe_all_clear = true;
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = false; }
// first field // first field
const size_t start_idx = cidx % MI_BFIELD_BITS;
const size_t start_field = cidx / MI_BFIELD_BITS;
size_t field = start_field; size_t field = start_field;
size_t m = MI_BFIELD_BITS - start_idx; // m is the bits to xset in this field size_t m = MI_BFIELD_BITS - start_idx; // m are the bits to clear in this field
if (m > n) { m = n; } if (m > n) { m = n; }
mi_assert_internal(start_idx + m <= MI_BFIELD_BITS); mi_assert_internal(start_idx + m <= MI_BFIELD_BITS);
mi_assert_internal(start_field < MI_BCHUNK_FIELDS); mi_assert_internal(start_field < MI_BCHUNK_FIELDS);
const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx); const mi_bfield_t mask_start = mi_bfield_mask(m, start_idx);
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &field_is_clear)) return false; bool maybe_all_clear;
maybe_all_clear = maybe_all_clear && field_is_clear; if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_start, &maybe_all_clear)) return false;
// done? // done?
mi_assert_internal(m <= n);
n -= m; n -= m;
if (n==0) {
if (pmaybe_all_clear != NULL) { *pmaybe_all_clear = maybe_all_clear; } // continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
return true; // mid fields?
}
// continue with mid fields and last field: if these fail we need to recover by unsetting previous fields
// mid fields
while (n >= MI_BFIELD_BITS) { while (n >= MI_BFIELD_BITS) {
field++; field++;
mi_assert_internal(field < MI_BCHUNK_FIELDS); mi_assert_internal(field < MI_BCHUNK_FIELDS);
mask_mid = mi_bfield_all_set(); bool field_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_mid, &field_is_clear)) goto restore; if (!mi_bfield_atomic_try_clearX(&chunk->bfields[field], &field_is_clear)) goto restore;
maybe_all_clear = maybe_all_clear && field_is_clear; maybe_all_clear = maybe_all_clear && field_is_clear;
n -= MI_BFIELD_BITS; n -= MI_BFIELD_BITS;
} }
// last field // last field?
if (n > 0) { if (n > 0) {
mi_assert_internal(n < MI_BFIELD_BITS); mi_assert_internal(n < MI_BFIELD_BITS);
field++; field++;
mi_assert_internal(field < MI_BCHUNK_FIELDS); mi_assert_internal(field < MI_BCHUNK_FIELDS);
end_field = field; const mi_bfield_t mask_end = mi_bfield_mask(n, 0);
mask_end = mi_bfield_mask(n, 0); bool field_is_clear;
if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_end, &field_is_clear)) goto restore; if (!mi_bfield_atomic_try_clear_mask(&chunk->bfields[field], mask_end, &field_is_clear)) goto restore;
maybe_all_clear = maybe_all_clear && field_is_clear; maybe_all_clear = maybe_all_clear && field_is_clear;
} }
@ -467,12 +485,16 @@ mi_decl_noinline static bool mi_bchunk_try_clearN_(mi_bchunk_t* chunk, size_t ci
return true; return true;
restore: restore:
// field is on the field that failed to set atomically; we need to restore all previous fields // `field` is the index of the field that failed to set atomically; we need to restore all previous fields
mi_assert_internal(field > start_field); mi_assert_internal(field > start_field);
while( field > start_field) { while( field > start_field) {
field--; field--;
const size_t mask = (field == start_field ? mask_start : (field == end_field ? mask_end : mask_mid)); if (field == start_field) {
mi_bfield_atomic_set_mask(&chunk->bfields[field], mask, NULL); mi_bfield_atomic_set_mask(&chunk->bfields[field], mask_start, NULL);
}
else {
mi_bfield_atomic_setX(&chunk->bfields[field], NULL); // mid-field: set all bits again
}
} }
return false; return false;
} }
@ -480,8 +502,8 @@ restore:
static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) { static inline bool mi_bchunk_try_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, bool* maybe_all_clear) {
mi_assert_internal(n>0); mi_assert_internal(n>0);
if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear); // if (n==MI_BFIELD_BITS) return mi_bchunk_try_clearX(chunk, cidx, maybe_all_clear);
if (n<MI_BFIELD_BITS) return mi_bchunk_try_clearNX(chunk, cidx, n, maybe_all_clear); if (n<=MI_BFIELD_BITS) return mi_bchunk_try_clearNX(chunk, cidx, n, maybe_all_clear);
return mi_bchunk_try_clearN_(chunk, cidx, n, maybe_all_clear); return mi_bchunk_try_clearN_(chunk, cidx, n, maybe_all_clear);
} }
@ -634,7 +656,7 @@ static inline bool mi_bchunk_try_find_and_clear8_at(mi_bchunk_t* chunk, size_t c
} }
#endif #endif
// find least byte in a chunk with all bits set, and try unset it atomically // find least aligned byte in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success. // set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
// Used to find medium size pages in the free blocks. // Used to find medium size pages in the free blocks.
// todo: try neon version // todo: try neon version
@ -682,7 +704,7 @@ static inline bool mi_bchunk_try_find_and_clear_8(mi_bchunk_t* chunk, size_t n,
} }
// find least bfield in a chunk with all bits set, and try unset it atomically // find least aligned bfield in a chunk with all bits set, and try unset it atomically
// set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success. // set `*pidx` to its bit index (0 <= *pidx < MI_BCHUNK_BITS) on success.
// Used to find large size pages in the free blocks. // Used to find large size pages in the free blocks.
// todo: try neon version // todo: try neon version
@ -729,23 +751,24 @@ static inline bool mi_bchunk_try_find_and_clear_X(mi_bchunk_t* chunk, size_t n,
return mi_bchunk_try_find_and_clearX(chunk, pidx); return mi_bchunk_try_find_and_clearX(chunk, pidx);
} }
// find a sequence of `n` bits in a chunk with `n < MI_BFIELD_BITS` with all bits set, // find a sequence of `n` bits in a chunk with `0 < n <= MI_BFIELD_BITS` with all bits set,
// and try to clear them atomically. // and try to clear them atomically.
// Currently does not cross bfield boundaries.
// set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success. // set `*pidx` to its bit index (0 <= *pidx <= MI_BCHUNK_BITS - n) on success.
// (We do not cross bfield boundaries) // will cross bfield boundaries.
mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) { mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BFIELD_BITS) return false; if (n == 0 || n > MI_BFIELD_BITS) return false;
const mi_bfield_t mask = mi_bfield_mask(n, 0); const mi_bfield_t mask = mi_bfield_mask(n, 0);
// for all fields in the chunk
for (int i = 0; i < MI_BCHUNK_FIELDS; i++) { for (int i = 0; i < MI_BCHUNK_FIELDS; i++) {
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]); mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
size_t idx; size_t idx;
// is there a range inside the field?
while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit while (mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
if (idx + n > MI_BFIELD_BITS) break; if (idx + n > MI_BFIELD_BITS) break; // too short, maybe cross over, or continue with the next field
const size_t bmask = mask<<idx; const size_t bmask = mask<<idx;
mi_assert_internal(bmask>>idx == mask); mi_assert_internal(bmask>>idx == mask);
if ((b&bmask) == bmask) { // found a match if ((b&bmask) == bmask) { // found a match with all bits set, try clearing atomically
if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], bmask, NULL)) { if mi_likely(mi_bfield_atomic_try_clear_mask(&chunk->bfields[i], bmask, NULL)) {
*pidx = (i*MI_BFIELD_BITS) + idx; *pidx = (i*MI_BFIELD_BITS) + idx;
mi_assert_internal(*pidx < MI_BCHUNK_BITS); mi_assert_internal(*pidx < MI_BCHUNK_BITS);
@ -753,7 +776,7 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
return true; return true;
} }
else { else {
// if failed to atomically commit, reload b and try again from this position // if we failed to atomically commit, reload b and try again from the start
b = mi_atomic_load_acquire(&chunk->bfields[i]); b = mi_atomic_load_acquire(&chunk->bfields[i]);
} }
} }
@ -764,6 +787,25 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
b = b & ~mi_bfield_mask(ones, idx); // clear the ones b = b & ~mi_bfield_mask(ones, idx); // clear the ones
} }
} }
// check if we can cross into the next bfield
if (i < MI_BCHUNK_FIELDS-1) {
const size_t post = mi_bfield_clz(~b);
if (post > 0) {
const size_t pre = mi_bfield_ctz(mi_atomic_load_relaxed(&chunk->bfields[i+1]));
if (post + pre <= n) {
// it fits -- try to claim it atomically
const size_t cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - post);
if (mi_bchunk_try_clearNX(chunk, cidx, n, NULL)) {
// we cleared all atomically
*pidx = cidx;
mi_assert_internal(*pidx < MI_BCHUNK_BITS);
mi_assert_internal(*pidx + n <= MI_BCHUNK_BITS);
return true;
}
}
}
}
} }
return false; return false;
} }
@ -775,46 +817,47 @@ mi_decl_noinline static bool mi_bchunk_try_find_and_clearNX(mi_bchunk_t* chunk,
static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk, size_t n, size_t* pidx) { static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
const size_t skip_count = n/MI_BFIELD_BITS; // we first scan ahead to see if there is a range of `n` set bits, and only then try to clear atomically
mi_assert_internal(n>0);
const size_t skip_count = (n-1)/MI_BFIELD_BITS;
size_t cidx; size_t cidx;
for (size_t i = 0; i <= MI_BCHUNK_FIELDS - skip_count; i++) for (size_t i = 0; i < MI_BCHUNK_FIELDS - skip_count; i++)
{ {
size_t m = n; // bits to go size_t m = n; // bits to go
// first field // first field
mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]); mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]);
size_t ones = mi_bfield_clz(~b); size_t ones = mi_bfield_clz(~b);
cidx = i*MI_BFIELD_BITS + (MI_BFIELD_BITS - ones); // start index cidx = (i*MI_BFIELD_BITS) + (MI_BFIELD_BITS - ones); // start index
if (ones >= m) { if (ones >= m) {
// we found enough bits! // we found enough bits!
m = 0; m = 0;
} }
else { else {
m -= ones; m -= ones;
mi_assert_internal(m>0);
} // keep scanning further fields?
size_t j = 1; // field count from i
// keep scanning further fields? while (i+j < MI_BCHUNK_FIELDS) {
size_t j = 1; // field count from i mi_assert_internal(m > 0);
while (i+j < MI_BCHUNK_FIELDS) { b = mi_atomic_load_relaxed(&chunk->bfields[i+j]);
mi_assert_internal(m > 0); ones = mi_bfield_ctz(~b);
b = mi_atomic_load_relaxed(&chunk->bfields[i+j]); if (ones >= m) {
ones = mi_bfield_ctz(~b); // we found enough bits
if (ones >= m) { m = 0;
// we found enough bits break;
m = 0; }
break; else if (ones == MI_BFIELD_BITS) {
} // not enough yet, proceed to the next field
else if (ones == MI_BFIELD_BITS) { j++;
// not enough yet, proceed to the next field m -= MI_BFIELD_BITS;
j++; }
m -= MI_BFIELD_BITS; else {
} // the range was not enough, start from scratch
else { i = i + j - 1; // no need to re-scan previous fields, except the last one (with clz this time)
// the range was not enough, start from scratch mi_assert_internal(m>0);
i = i + j - 1; // no need to re-scan previous fields, except the last one (with clz this time) break;
mi_assert_internal(m>0); }
break;
} }
} }
@ -838,9 +881,9 @@ static mi_decl_noinline bool mi_bchunk_try_find_and_clearN_(mi_bchunk_t* chunk,
//static inline bool mi_bchunk_try_find_and_clearN(mi_bchunk_t* chunk, size_t n, size_t* pidx) { //static inline bool mi_bchunk_try_find_and_clearN(mi_bchunk_t* chunk, size_t n, size_t* pidx) {
// if (n==1) return mi_bchunk_try_find_and_clear(chunk, pidx); // small pages // if (n==1) return mi_bchunk_try_find_and_clear(chunk, pidx); // small pages
// if (n==8) return mi_bchunk_try_find_and_clear8(chunk, pidx); // medium pages // if (n==8) return mi_bchunk_try_find_and_clear8(chunk, pidx); // medium pages
// if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages // // if (n==MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearX(chunk, pidx); // large pages
// if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk // if (n==0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
// if (n < MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx); // if (n<=MI_BFIELD_BITS) return mi_bchunk_try_find_and_clearNX(chunk, n, pidx);
// return mi_bchunk_try_find_and_clearN_(chunk, n, pidx); // return mi_bchunk_try_find_and_clearN_(chunk, n, pidx);
//} //}
@ -909,7 +952,7 @@ static void mi_bitmap_chunkmap_set_max(mi_bitmap_t* bitmap, size_t chunk_idx) {
static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) { static void mi_bitmap_chunkmap_set(mi_bitmap_t* bitmap, size_t chunk_idx) {
mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap)); mi_assert(chunk_idx < mi_bitmap_chunk_count(bitmap));
mi_bchunk_set(&bitmap->chunkmap, chunk_idx); mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL);
mi_bitmap_chunkmap_set_max(bitmap, chunk_idx); mi_bitmap_chunkmap_set_max(bitmap, chunk_idx);
} }
@ -922,7 +965,7 @@ static bool mi_bitmap_chunkmap_try_clear(mi_bitmap_t* bitmap, size_t chunk_idx)
// .. but a concurrent set may have happened in between our all-clear test and the clearing of the // .. but a concurrent set may have happened in between our all-clear test and the clearing of the
// bit in the mask. We check again to catch this situation. // bit in the mask. We check again to catch this situation.
if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) { if (!mi_bchunk_all_are_clear_relaxed(&bitmap->chunks[chunk_idx])) {
mi_bchunk_set(&bitmap->chunkmap, chunk_idx); mi_bchunk_set(&bitmap->chunkmap, chunk_idx, NULL);
return false; return false;
} }
mi_bitmap_chunkmap_set_max(bitmap, chunk_idx); mi_bitmap_chunkmap_set_max(bitmap, chunk_idx);
@ -1018,7 +1061,7 @@ bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx) {
const size_t chunk_idx = idx / MI_BCHUNK_BITS; const size_t chunk_idx = idx / MI_BCHUNK_BITS;
const size_t cidx = idx % MI_BCHUNK_BITS; const size_t cidx = idx % MI_BCHUNK_BITS;
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx, NULL);
mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards mi_bitmap_chunkmap_set(bitmap, chunk_idx); // set afterwards
return wasclear; return wasclear;
} }
@ -1235,9 +1278,9 @@ bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pid
return mi_bitmap_try_find_and_clear_generic(bitmap, tseq, 8, pidx, &mi_bchunk_try_find_and_clear_8); return mi_bitmap_try_find_and_clear_generic(bitmap, tseq, 8, pidx, &mi_bchunk_try_find_and_clear_8);
} }
bool mi_bitmap_try_find_and_clearX(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) { //bool mi_bitmap_try_find_and_clearX(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx) {
return mi_bitmap_try_find_and_clear_generic(bitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X); // return mi_bitmap_try_find_and_clear_generic(bitmap, tseq, MI_BFIELD_BITS, pidx, &mi_bchunk_try_find_and_clear_X);
} //}
bool mi_bitmap_try_find_and_clearNX(mi_bitmap_t* bitmap, size_t tseq, size_t n, size_t* pidx) { bool mi_bitmap_try_find_and_clearNX(mi_bitmap_t* bitmap, size_t tseq, size_t n, size_t* pidx) {
mi_assert_internal(n<=MI_BFIELD_BITS); mi_assert_internal(n<=MI_BFIELD_BITS);
@ -1279,7 +1322,7 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk
else { else {
// failed to claim it, set abandoned mapping again (unless the page was freed) // failed to claim it, set abandoned mapping again (unless the page was freed)
if (keep_set) { if (keep_set) {
const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx); const bool wasclear = mi_bchunk_set(&bitmap->chunks[chunk_idx], cidx, NULL);
mi_assert_internal(wasclear); MI_UNUSED(wasclear); mi_assert_internal(wasclear); MI_UNUSED(wasclear);
} }
} }

View file

@ -191,7 +191,7 @@ static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) {
// Specialized versions for common bit sequence sizes // Specialized versions for common bit sequence sizes
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 1-bit mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 1-bit
mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 8-bits mi_decl_nodiscard bool mi_bitmap_try_find_and_clear8(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 8-bits
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearX(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS // mi_decl_nodiscard bool mi_bitmap_try_find_and_clearX(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearNX(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS mi_decl_nodiscard bool mi_bitmap_try_find_and_clearNX(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS
@ -200,7 +200,7 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_
mi_decl_nodiscard static inline bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) { mi_decl_nodiscard static inline bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) {
if (n==1) return mi_bitmap_try_find_and_clear(bitmap, tseq, pidx); // small pages if (n==1) return mi_bitmap_try_find_and_clear(bitmap, tseq, pidx); // small pages
if (n==8) return mi_bitmap_try_find_and_clear8(bitmap, tseq, pidx); // medium pages if (n==8) return mi_bitmap_try_find_and_clear8(bitmap, tseq, pidx); // medium pages
if (n==MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearX(bitmap, tseq, pidx); // large pages // if (n==MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearX(bitmap, tseq, pidx); // large pages
if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk if (n == 0 || n > MI_BCHUNK_BITS) return false; // cannot be more than a chunk
if (n < MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearNX(bitmap, tseq, n, pidx); if (n < MI_BFIELD_BITS) return mi_bitmap_try_find_and_clearNX(bitmap, tseq, n, pidx);
return mi_bitmap_try_find_and_clearN_(bitmap, tseq, n, pidx); return mi_bitmap_try_find_and_clearN_(bitmap, tseq, n, pidx);