more documentation; better pairmap find_and_set_to_busy, busy flag is now 0x10

This commit is contained in:
daanx 2024-12-04 19:15:55 -08:00
parent 45f7fb559a
commit afe9089152
5 changed files with 296 additions and 393 deletions

View file

@ -199,7 +199,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
void* p = mi_arena_slice_start(arena, slice_index);
*memid = mi_memid_create_arena(arena->id, arena->exclusive, slice_index, slice_count);
memid->is_pinned = arena->memid.is_pinned;
// set the dirty bits
if (arena->memid.initially_zero) {
// size_t dirty_count = 0;
@ -239,7 +239,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
memid->initially_zero = false;
}
}
#endif
#endif
size_t already_committed_count = 0;
mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count);
if (already_committed_count < slice_count) {
@ -247,7 +247,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
mi_stat_decrease(_mi_stats_main.committed, mi_size_of_slices(already_committed_count));
}
}
}
}
}
else {
// no need to commit, but check if already fully committed
@ -282,8 +282,8 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_SLICE_SIZE);
if (arena_count >= 1 && arena_count <= 128) {
// scale up the arena sizes exponentially every 8 entries
const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16);
// scale up the arena sizes exponentially every 8 entries
const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16);
size_t reserve = 0;
if (!mi_mul_overflow(multiplier, arena_reserve, &reserve)) {
arena_reserve = reserve;
@ -399,7 +399,7 @@ again:
if (mi_lock_try_acquire(&mi_arena_reserve_lock)) {
mi_arena_id_t arena_id = 0;
bool ok = mi_arena_reserve(mi_size_of_slices(slice_count), allow_large, req_arena_id, &arena_id);
mi_lock_release(&mi_arena_reserve_lock);
mi_lock_release(&mi_arena_reserve_lock);
if (ok) {
// and try allocate in there
mi_assert_internal(req_arena_id == _mi_arena_id_none());
@ -476,6 +476,19 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t
Arena page allocation
----------------------------------------------------------- */
static bool mi_arena_claim_abandoned(size_t slice_index, void* arg1, void* arg2) {
mi_arena_t* arena = (mi_arena_t*)arg1;
mi_subproc_t* subproc = (mi_subproc_t*)arg2;
// found an abandoned page of the right size
// it is set busy for now so we can read safely even with concurrent mi_free reclaiming
// try to claim ownership atomically
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
if (subproc != page->subproc) return false;
if (!mi_page_try_claim_ownership(page)) return false;
return true;
}
static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t block_size, mi_arena_id_t req_arena_id, mi_tld_t* tld)
{
MI_UNUSED(slice_count);
@ -493,38 +506,29 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl
{
size_t slice_index;
mi_pairmap_t* const pairmap = &arena->pages_abandoned[bin];
while (mi_pairmap_try_find_and_set_busy(pairmap, tseq, &slice_index)) { // todo: don't restart from scratch if we fail for some entry?
// found an abandoned page of the right size
// it is set busy for now so we can read safely even with concurrent mi_free reclaiming
// try to claim ownership atomically
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
if (!mi_page_try_claim_ownership(page)) {
// a concurrent free already grabbed the page.
// Restore the abandoned_map to make it available again (unblocking busy waiters)
mi_pairmap_set(pairmap, slice_index);
}
else {
// we got ownership, clear the abandoned entry (unblocking busy waiters)
mi_pairmap_clear(pairmap, slice_index);
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
_mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1);
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1);
_mi_page_free_collect(page, false); // update `used` count
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
mi_assert_internal(mi_page_block_size(page) == block_size);
mi_assert_internal(mi_page_is_abandoned(page));
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(!mi_page_is_full(page));
return page;
}
}
if (mi_pairmap_try_find_and_set_busy(pairmap, tseq, &slice_index, &mi_arena_claim_abandoned, arena, subproc)) {
// found an abandoned page of the right size
// and claimed ownership.
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
_mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1);
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1);
_mi_page_free_collect(page, false); // update `used` count
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count));
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
mi_assert_internal(mi_page_block_size(page) == block_size);
mi_assert_internal(!mi_page_is_full(page));
return page;
}
}
mi_forall_arenas_end();
return NULL;
@ -565,8 +569,8 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz
mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment));
// claimed free slices: initialize the page partly
if (!memid.initially_zero) {
_mi_memzero_aligned(page, sizeof(*page));
if (!memid.initially_zero) {
_mi_memzero_aligned(page, sizeof(*page));
}
#if MI_DEBUG > 1
else {
@ -779,7 +783,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) {
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
// this busy waits until a concurrent reader (from alloc_abandoned) is done
mi_pairmap_clear_while_not_busy(&arena->pages_abandoned[bin], slice_index);
mi_pairmap_clear_once_not_busy(&arena->pages_abandoned[bin], slice_index);
mi_page_clear_abandoned_mapped(page);
mi_atomic_decrement_relaxed(&page->subproc->abandoned_count[bin]);
}
@ -999,7 +1003,7 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t*
mi_atomic_decrement_acq_rel(&mi_arena_count);
return false;
}
_mi_stat_counter_increase(&stats->arena_count,1);
arena->id = mi_arena_id_create(i);
mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
@ -1049,7 +1053,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
// todo: allow larger areas (either by splitting it up in arena's or having larger arena's)
_mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_slices(MI_BITMAP_MAX_BIT_COUNT)/MI_MiB);
return false;
}
}
size_t bitmap_base;
const size_t info_slices = mi_arena_info_slices_needed(slice_count, &bitmap_base);
if (slice_count < info_slices+1) {

View file

@ -995,13 +995,13 @@ mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, s
// Set/clear a sequence of 2 bits that were on an even `idx` in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
static bool mi_bitmap_xset_pair(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) {
static bool mi_bitmap_xset_pair(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal((idx%2)==0);
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS);
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
if (set) {
// first set the chunkmap since it is a conservative approximation (increases epoch)
mi_bitmap_chunkmap_set(bitmap, chunk_idx);
@ -1066,7 +1066,7 @@ static inline bool mi_bitmap_is_xset2(mi_xset_t set, mi_bitmap_t* bitmap, size_t
mi_assert_internal(idx + 2 <= mi_bitmap_max_bits(bitmap));
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS);
mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS);
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
return mi_bitmap_chunk_is_xset2(set, &bitmap->chunks[chunk_idx], cidx);
}
@ -1091,13 +1091,13 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
/* --------------------------------------------------------------------------------
bitmap try_find_and_clear
-------------------------------------------------------------------------------- */
/*
typedef bool (mi_bitmap_find_fun_t)(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx);
static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx, mi_bitmap_find_fun_t* find_fun)
{
if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false;
// start chunk index -- todo: can depend on the tseq to decrease contention between threads
MI_UNUSED(tseq);
const size_t chunk_start = 0;
@ -1105,7 +1105,7 @@ static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq
const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS;
// for each chunkmap entry `i`
for( size_t _i = 0; _i < bitmap->chunk_map_count; _i++)
for( size_t _i = 0; _i < bitmap->chunk_map_count; _i++)
{
size_t i = (_i + chunk_map_start);
if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; // adjust for the start position
@ -1122,50 +1122,106 @@ static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq
if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; }
// set the chunk idx
const size_t chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift;
// try to find and clear N bits in that chunk
if (chunk_idx < mi_bitmap_chunk_count(bitmap)) { // we can have less chunks than in the chunkmap..
if ((*find_fun)(bitmap, n, chunk_idx, epoch, pidx)) {
return true;
}
}
// skip to the next bit
cmap_idx_shift += cmap_idx+1;
cmap >>= cmap_idx; // skip scanned bits (and avoid UB for `cmap_idx+1`)
cmap >>= 1;
}
}
return false;
}
*/
static bool mi_bitmap_try_find_and_clearN_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) {
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
return true;
}
else {
// we may find that all are cleared only on a second iteration but that is ok as
// the chunkmap is a conservative approximation.
if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
}
return false;
}
}
#define mi_bitmap_forall_chunks(bitmap, tseq, name_epoch, name_chunk_idx) \
{ \
/* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \
MI_UNUSED(tseq); \
const size_t chunk_start = 0; \
const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS; \
const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS; \
/* for each chunkmap entry `i` */ \
for (size_t _i = 0; _i < bitmap->chunk_map_count; _i++) { \
size_t i = (_i + chunk_map_start); \
if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; /* adjust for the start position */ \
\
const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS; \
mi_epoch_t name_epoch; \
mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &name_epoch); \
if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); } /* rotate right for the start position (on the first iteration) */ \
\
uint32_t cmap_idx; /* one bit set of each chunk that may have bits set */ \
size_t cmap_idx_shift = 0; /* shift through the cmap */ \
while (mi_bsf32(cmap, &cmap_idx)) { /* find least bit that is set */ \
/* adjust for the start position again */ \
if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; } \
/* set the chunk idx */ \
const size_t name_chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift; \
/* try to find and clear N bits in that chunk */ \
if (name_chunk_idx < mi_bitmap_chunk_count(bitmap)) { /* we can have less chunks than in the chunkmap.. */
#define mi_bitmap_forall_chunks_end() \
} \
/* skip to the next bit */ \
cmap_idx_shift += cmap_idx+1; \
cmap >>= cmap_idx; /* skip scanned bits (and avoid UB for `cmap_idx+1`) */ \
cmap >>= 1; \
} \
}}
//static bool mi_bitmap_try_find_and_clearN_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) {
// size_t cidx;
// if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) {
// *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
// mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
// return true;
// }
// else {
// // we may find that all are cleared only on a second iteration but that is ok as
// // the chunkmap is a conservative approximation.
// if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
// mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
// }
// return false;
// }
//}
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx)
{
return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_clearN_at);
// return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_clearN_at);
mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx)
{
size_t cidx;
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
return true;
}
else {
// we may find that all are cleared only on a second iteration but that is ok as
// the chunkmap is a conservative approximation.
if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
}
// continue
}
}
mi_bitmap_forall_chunks_end();
return false;
}
/* --------------------------------------------------------------------------------
pairmap
pairmap
-------------------------------------------------------------------------------- */
void mi_pairmap_init(mi_pairmap_t* pairmap, mi_bitmap_t* bm1, mi_bitmap_t* bm2) {
@ -1215,10 +1271,10 @@ bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx) {
pairmap clear while not busy
-------------------------------------------------------------------------------- */
static inline bool mi_bfield_atomic_clear2_while_not_busy(_Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set).
static inline bool mi_bfield_atomic_clear2_once_not_busy(_Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 10 (busy), and 11 (set).
mi_assert_internal(idx < MI_BFIELD_BITS-1);
const mi_bfield_t mask = ((mi_bfield_t)0x03 << idx);
const mi_bfield_t mask = ((mi_bfield_t)MI_PAIR_SET << idx);
const mi_bfield_t mask_busy = ((mi_bfield_t)MI_PAIR_BUSY << idx);
mi_bfield_t bnew;
mi_bfield_t old = mi_atomic_load_relaxed(b);
@ -1238,32 +1294,32 @@ static inline bool mi_bfield_atomic_clear2_while_not_busy(_Atomic(mi_bfield_t)*b
return ((old&mask) == mask);
}
static inline bool mi_bitmap_chunk_clear2_while_not_busy(mi_bitmap_chunk_t* chunk, size_t cidx) {
static inline bool mi_bitmap_chunk_clear2_once_not_busy(mi_bitmap_chunk_t* chunk, size_t cidx) {
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
const size_t i = cidx / MI_BFIELD_BITS;
const size_t idx = cidx % MI_BFIELD_BITS;
return mi_bfield_atomic_clear2_while_not_busy(&chunk->bfields[i], idx);
return mi_bfield_atomic_clear2_once_not_busy(&chunk->bfields[i], idx);
}
static bool mi_bitmap_clear2_while_not_busy(mi_bitmap_t* bitmap, size_t idx) {
static bool mi_bitmap_clear2_once_not_busy(mi_bitmap_t* bitmap, size_t idx) {
mi_assert_internal((idx%2)==0);
mi_assert_internal(idx < mi_bitmap_max_bits(bitmap));
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap, chunk_idx);
bool cleared = mi_bitmap_chunk_clear2_while_not_busy(&bitmap->chunks[chunk_idx], cidx);
bool cleared = mi_bitmap_chunk_clear2_once_not_busy(&bitmap->chunks[chunk_idx], cidx);
if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
}
}
return cleared;
}
void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) {
void mi_pairmap_clear_once_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) {
mi_bitmap_t* bitmap;
size_t idx;
mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx);
mi_bitmap_clear2_while_not_busy(bitmap, idx);
mi_bitmap_clear2_once_not_busy(bitmap, idx);
}
@ -1274,9 +1330,9 @@ void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) {
// Atomically go from set to busy, or return false otherwise and leave the bit field as-is.
static inline bool mi_bfield_atomic_try_set_busy(_Atomic(mi_bfield_t)*b, size_t idx) {
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set).
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 10 (busy), and 11 (set).
mi_assert_internal(idx < MI_BFIELD_BITS-1);
const mi_bfield_t mask = ((mi_bfield_t)0x03 << idx);
const mi_bfield_t mask = ((mi_bfield_t)MI_PAIR_SET << idx);
const mi_bfield_t mask_busy = ((mi_bfield_t)MI_PAIR_BUSY << idx);
mi_bfield_t old;
mi_bfield_t bnew;
@ -1290,49 +1346,57 @@ static inline bool mi_bfield_atomic_try_set_busy(_Atomic(mi_bfield_t)*b, size_t
static inline bool mi_bitmap_chunk_try_find_and_set_busy(mi_bitmap_chunk_t* chunk, size_t* pidx) {
for (int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
size_t idx;
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i], &idx)) { // find least 1-bit, it may be set or busy
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set).
if mi_likely(mi_bfield_atomic_try_set_busy(&chunk->bfields[i], idx)) {
*pidx = (i*MI_BFIELD_BITS) + idx;
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS-1);
return true;
while (true) {
const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]) & MI_BFIELD_LO_BIT2; // only keep MI_PAIR_SET bits
size_t idx;
if (!mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
break; // not found: continue with the next field
}
else {
mi_assert_internal((idx%2)==0);
if mi_likely(mi_bfield_atomic_try_set_busy(&chunk->bfields[i], idx)) {
*pidx = (i*MI_BFIELD_BITS) + idx;
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS-1);
return true;
}
// else: try this word once again
}
}
}
return false;
}
static bool mi_bitmap_try_find_and_set_busy_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) {
MI_UNUSED(epoch); MI_UNUSED(n);
mi_assert_internal(n==2);
size_t cidx;
if mi_likely(mi_bitmap_chunk_try_find_and_set_busy(&bitmap->chunks[chunk_idx], &cidx)) {
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
return true;
}
else {
return false;
}
}
static bool mi_bitmap_try_find_and_set_busy(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) {
return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_set_busy_at);
static bool mi_bitmap_try_find_and_set_busy(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t idx_offset, size_t* ppair_idx,
mi_bitmap_claim_while_busy_fun_t* claim, void* arg1, void* arg2)
{
mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx)
{
MI_UNUSED(epoch); MI_UNUSED(n);
mi_assert_internal(n==2);
size_t cidx;
if mi_likely(mi_bitmap_chunk_try_find_and_set_busy(&bitmap->chunks[chunk_idx], &cidx)) {
const size_t idx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
mi_assert_internal((idx%2)==0);
const size_t pair_idx = (idx + idx_offset)/2;
if (claim(pair_idx, arg1, arg2)) { // while busy, the claim function can read from the page
mi_bitmap_xset_pair(MI_BIT_CLEAR, bitmap, idx); // claimed, clear the entry
*ppair_idx = pair_idx;
return true;
}
else {
mi_bitmap_xset_pair(MI_BIT_SET, bitmap, idx); // not claimed, reset the entry
// and continue
}
}
}
mi_bitmap_forall_chunks_end();
return false;
}
// Used to find an abandoned page, and transition from set to busy.
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx) {
size_t idx = 0;
if (!mi_bitmap_try_find_and_set_busy(pairmap->bitmap1, 2, tseq, &idx)) {
if (!mi_bitmap_try_find_and_set_busy(pairmap->bitmap2, 2, tseq, &idx)) {
return false;
}
else {
idx += mi_bitmap_max_bits(pairmap->bitmap1);
}
}
mi_assert_internal((idx%2)==0);
*pidx = idx/2;
return true;
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pair_idx,
mi_bitmap_claim_while_busy_fun_t* claim, void* arg1, void* arg2 ) {
if (mi_bitmap_try_find_and_set_busy(pairmap->bitmap1, 2, tseq, 0, pair_idx, claim, arg1, arg2)) return true;
return mi_bitmap_try_find_and_set_busy(pairmap->bitmap2, 2, tseq, mi_bitmap_max_bits(pairmap->bitmap1), pair_idx, claim, arg1, arg2);
}

View file

@ -13,9 +13,47 @@ Concurrent bitmap that can set/reset sequences of bits atomically
#define MI_BITMAP_H
/* --------------------------------------------------------------------------------
Definitions
-------------------------------------------------------------------------------- */
Atomic bitmaps:
`mi_bfield_t`: is a single machine word that can efficiently be bit counted (usually `size_t`)
each bit usually represents a single MI_ARENA_SLICE_SIZE in an arena (64 KiB).
We need 16K bits to represent a 1GiB arena.
`mi_bitmap_chunk_t`: a chunk of bfield's of a total of MI_BITMAP_CHUNK_BITS (= 512)
allocations never span across chunks -- so MI_ARENA_MAX_OBJ_SIZE is the number
of bits in a chunk times the MI_ARENA_SLICE_SIZE (512 * 64KiB = 32 MiB).
These chunks are cache-aligned and we can use AVX2/AVX512/SVE/SVE2/etc. instructions
to scan for bits (perhaps) more efficiently.
`mi_chunkmap_t`: for each chunk we track if it has (potentially) any bit set.
The chunkmap has 1 bit per chunk that is set if the chunk potentially has a bit set.
This is used to avoid scanning every chunk. (and thus strictly an optimization)
It is conservative: it is fine to a bit in the chunk map even if the chunk turns out
to have no bits set.
When we (potentially) set a bit in a chunk, we first update the chunkmap.
However, when we clear a bit in a chunk, and the chunk is indeed all clear, we
cannot safely clear the bit corresponding to the chunk in the chunkmap since it
may race with another thread setting a bit in the same chunk (and we may clear the
bit even though a bit is set in the chunk which is not allowed).
To fix this, the chunkmap contains 32-bits of bits for chunks, and a 32-bit "epoch"
counter that is increased everytime a bit is set. We only clear a bit if the epoch
stayed the same over our clear operation (so we know no other thread in the mean
time set a bit in any of the chunks corresponding to the chunkmap).
Since increasing the epoch and setting a bit must be atomic, we use only half-word
bits (32) (we could use 128-bit atomics if needed since modern hardware supports this)
`mi_bitmap_t`: a bitmap with N chunks. A bitmap always has MI_BITMAP_MAX_CHUNK_FIELDS (=16)
and can support arena's from few chunks up to 16 chunkmap's = 16 * 32 chunks = 16 GiB
The `chunk_count` can be anything from 1 to the max supported by the chunkmap's but
each chunk is always complete (512 bits, so 512 * 64KiB = 32MiB memory area's).
For now, the implementation assumes MI_HAS_FAST_BITSCAN and uses trailing-zero-count
and pop-count (but we think it can be adapted work reasonably well on older hardware too)
--------------------------------------------------------------------------------------------- */
// A word-size bit field.
typedef size_t mi_bfield_t;
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
@ -29,16 +67,18 @@ typedef size_t mi_bfield_t;
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
// 512 bits on 64_bit
// A bitmap chunk contains 512 bits of bfields on 64_bit (256 on 32-bit)
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_chunk_s {
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
} mi_bitmap_chunk_t;
// for now 32-bit epoch + 32-bit bit-set (note: with ABA instructions we can double this)
typedef uint64_t mi_chunkmap_t;
typedef uint32_t mi_epoch_t;
typedef uint32_t mi_cmap_t;
#define MI_CHUNKMAP_BITS (32) // 1 chunkmap tracks 32 chunks
#define MI_BITMAP_MAX_CHUNKMAPS (16)
@ -48,15 +88,18 @@ typedef uint32_t mi_cmap_t;
#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 16 GiB arena
#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 1 GiB arena
// An atomic bitmap
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_map_count;
_Atomic(size_t) chunk_count;
_Atomic(size_t) chunk_map_count; // valid chunk_map's
_Atomic(size_t) chunk_count; // total count of chunks
size_t padding[MI_BITMAP_CHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
_Atomic(mi_chunkmap_t) chunk_maps[MI_BITMAP_MAX_CHUNKMAPS];
mi_bitmap_chunk_t chunks[MI_BITMAP_MIN_BIT_COUNT]; // or more, up to MI_BITMAP_MAX_CHUNK_COUNT
} mi_bitmap_t;
static inline size_t mi_bitmap_chunk_map_count(const mi_bitmap_t* bitmap) {
return mi_atomic_load_relaxed(&bitmap->chunk_map_count);
}
@ -72,17 +115,19 @@ static inline size_t mi_bitmap_max_bits(const mi_bitmap_t* bitmap) {
/* --------------------------------------------------------------------------------
Atomic bitmap
Atomic bitmap operations
-------------------------------------------------------------------------------- */
// Many operations are generic over setting or clearing the bit sequence: we use `mi_xset_t` for this (true if setting, false if clearing)
typedef bool mi_xset_t;
#define MI_BIT_SET (true)
#define MI_BIT_CLEAR (false)
// Required size of a bitmap to represent `bit_count` bits.
size_t mi_bitmap_size(size_t bit_count, size_t* chunk_count);
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
// Initialize a bitmap to all clear; avoid a mem_zero if `already_zero` is true
// returns the size of the bitmap.
size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero);
@ -134,56 +179,46 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
// and false otherwise leaving the bitmask as is.
//mi_decl_nodiscard bool mi_bitmap_try_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx);
//
//static inline bool mi_bitmap_try_set(mi_bitmap_t* bitmap, size_t idx) {
// return mi_bitmap_try_xset(MI_BIT_SET, bitmap, idx);
//}
//
//static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) {
// return mi_bitmap_try_xset(MI_BIT_CLEAR, bitmap, idx);
//}
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
// and false otherwise leaving the bitmask as is.
//mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx);
//
//static inline bool mi_bitmap_try_set8(mi_bitmap_t* bitmap, size_t idx) {
// return mi_bitmap_try_xset8(MI_BIT_SET, bitmap, idx);
//}
//
//static inline bool mi_bitmap_try_clear8(mi_bitmap_t* bitmap, size_t idx) {
// return mi_bitmap_try_xset8(MI_BIT_CLEAR, bitmap, idx);
//}
/* --------------------------------------------------------------------------------
Atomic bitmap for a pair of bits
Atomic bitmap for a pair of bits.
The valid pairs are CLEAR (0), SET (3), or BUSY (2).
These bit pairs are used in the abandoned pages maps: when set, the entry has
an available page. When we scan for an available abandoned page and find an entry SET,
we first set it to BUSY, and try to claim the page atomically (since it can race
with a concurrent `mi_free` which also tries to claim the page). However, unlike `mi_free`,
we cannot be sure that a concurrent `mi_free` also didn't free (and decommit) the page
just when we got the entry. Therefore, a page can only be freed after `mi_arena_unabandon`
which (busy) waits until the BUSY flag is cleared to ensure all readers are done.
(and pair-bit operations must therefore be release_acquire).
-------------------------------------------------------------------------------- */
#define MI_PAIR_CLEAR (0)
#define MI_PAIR_BUSY (1)
#define MI_PAIR_UNUSED (2) // should never occur
#define MI_PAIR_UNUSED (1) // should never occur
#define MI_PAIR_BUSY (2)
#define MI_PAIR_SET (3)
// 0b....0101010101010101
#define MI_BFIELD_LO_BIT2 ((MI_BFIELD_LO_BIT8 << 6)|(MI_BFIELD_LO_BIT8 << 4)|(MI_BFIELD_LO_BIT8 << 2)|MI_BFIELD_LO_BIT8)
// A pairmap manipulates pairs of bits (and consists of 2 bitmaps)
typedef struct mi_pairmap_s {
mi_bitmap_t* bitmap1;
mi_bitmap_t* bitmap2;
mi_bitmap_t* bitmap2;
} mi_pairmap_t;
// initialize a pairmap to all unset; avoid a mem_zero if `already_zero` is true
// initialize a pairmap to all clear; avoid a mem_zero if `already_zero` is true
void mi_pairmap_init(mi_pairmap_t* pairmap, mi_bitmap_t* bm1, mi_bitmap_t* bm2);
bool mi_pairmap_set(mi_pairmap_t* pairmap, size_t pair_idx);
bool mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx);
bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx);
void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx);
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx);
void mi_pairmap_clear_once_not_busy(mi_pairmap_t* pairmap, size_t pair_idx);
typedef bool (mi_bitmap_claim_while_busy_fun_t)(size_t pair_index, void* arg1, void* arg2);
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx,
mi_bitmap_claim_while_busy_fun_t* claim, void* arg1 ,void* arg2
);
#endif // MI_XBITMAP_H
#endif // MI_BITMAP_H

View file

@ -148,15 +148,44 @@ void mi_free(void* p) mi_attr_noexcept
}
// ------------------------------------------------------
// Multi-threaded Free (`_mt`)
// ------------------------------------------------------
static void mi_decl_noinline mi_free_try_reclaim_mt(mi_page_t* page);
// Push a block that is owned by another thread (or abandoned) on its page-local thread free list.
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page, block));
// _mi_padding_shrink(page, block, sizeof(mi_block_t));
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
size_t dbgsize = mi_usable_size(block);
if (dbgsize > MI_MiB) { dbgsize = MI_MiB; }
_mi_memset_aligned(block, MI_DEBUG_FREED, dbgsize);
#endif
// push atomically on the page thread free list
mi_thread_free_t tf_new;
mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
do {
mi_block_set_next(page, block, mi_tf_block(tf_old));
tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new));
// and atomically reclaim the page if it was abandoned
bool reclaimed = !mi_tf_is_owned(tf_old);
if (reclaimed) {
mi_free_try_reclaim_mt(page);
}
}
static void mi_decl_noinline mi_free_try_reclaim_mt(mi_page_t* page) {
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
#if 1
// we own the page now..
// safe to collect the thread atomic free list
_mi_page_free_collect(page, false); // update `used` count
@ -209,237 +238,8 @@ static void mi_decl_noinline mi_free_try_reclaim_mt(mi_page_t* page) {
// not reclaimed or free'd, unown again
_mi_page_unown(page);
#else
if (!mi_page_is_abandoned_mapped(page)) {
// singleton or OS allocated
if (mi_page_is_singleton(page)) {
// free singleton pages
#if MI_DEBUG>1
_mi_page_free_collect(page, false); // update `used` count
mi_assert_internal(mi_page_all_free(page));
#endif
// we can free the page directly
_mi_arena_page_free(page);
return;
}
else {
const bool was_full = mi_page_is_full(page);
_mi_page_free_collect(page,false); // update used
if (mi_page_all_free(page)) {
// no need to unabandon as it is unmapped
_mi_arena_page_free(page);
return;
}
else if (was_full && _mi_arena_page_reabandon_full(page)) {
return;
}
else if (!mi_page_is_mostly_used(page) && _mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0) {
// the page has still some blocks in use (but not too many)
// reclaim in our heap if compatible, or otherwise abandon again
// todo: optimize this check further?
// note: don't use `mi_heap_get_default()` as we may just have terminated this thread and we should
// not reinitialize the heap for this thread. (can happen due to thread-local destructors for example -- issue #944)
mi_heap_t* const heap = mi_prim_get_default_heap();
if (heap != (mi_heap_t*)&_mi_heap_empty) { // we did not already terminate our thread (can this happen?
mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag);
if ((tagheap != NULL) && // don't reclaim across heap object types
(page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
(_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
)
{
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_free, 1);
// make it part of our heap (no need to unabandon as is unmapped)
_mi_heap_page_reclaim(tagheap, page);
return;
}
}
}
}
}
else {
// don't reclaim pages that can be found for fresh page allocations
}
// not reclaimed or free'd, unown again
_mi_page_unown(page);
#endif
}
/*
// we own the page now..
// safe to collect the thread atomic free list
_mi_page_free_collect(page, false); // update `used` count
if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_all_free(page)); }
if (mi_page_all_free(page)) {
// first remove it from the abandoned pages in the arena -- this waits for any readers to finish
_mi_arena_page_unabandon(page); // this must be before free'ing
// we can free the page directly
_mi_arena_page_free(page);
return;
}
else if (!mi_page_is_mostly_used(page)) {
// the page has still some blocks in use (but not too many)
// reclaim in our heap if compatible, or otherwise abandon again
// todo: optimize this check further?
// note: don't use `mi_heap_get_default()` as we may just have terminated this thread and we should
// not reinitialize the heap for this thread. (can happen due to thread-local destructors for example -- issue #944)
mi_heap_t* const heap = mi_prim_get_default_heap();
if ((_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0) && // only if reclaim on free is allowed
(heap != (mi_heap_t*)&_mi_heap_empty)) // we did not already terminate our thread (can this happen?
{
mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag);
if ((tagheap != NULL) && // don't reclaim across heap object types
(page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
(_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
)
{
// first remove it from the abandoned pages in the arena -- this waits for any readers to finish
_mi_arena_page_unabandon(page);
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_free, 1);
// make it part of our heap
_mi_heap_page_reclaim(tagheap, page);
return;
}
}
}
// we cannot reclaim this page.. leave it abandoned
// todo: should re-abandon or otherwise a partly used page could never be re-used if the
// objects in it are not freed explicitly.
_mi_page_unown(page);
*/
// Push a block that is owned by another thread (or abandoned) on its page-local thread free list.
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page, block));
// _mi_padding_shrink(page, block, sizeof(mi_block_t));
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
size_t dbgsize = mi_usable_size(block);
if (dbgsize > MI_MiB) { dbgsize = MI_MiB; }
_mi_memset_aligned(block, MI_DEBUG_FREED, dbgsize);
#endif
// push atomically on the page thread free list
mi_thread_free_t tf_new;
mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
do {
mi_block_set_next(page, block, mi_tf_block(tf_old));
tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new));
// and atomically reclaim the page if it was abandoned
bool reclaimed = !mi_tf_is_owned(tf_old);
if (reclaimed) {
mi_free_try_reclaim_mt(page);
}
}
/*
// Try to put the block on either the page-local thread free list,
// or the heap delayed free list (if this is the first non-local free in that page)
mi_thread_free_t tfreex;
bool use_delayed;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
if mi_unlikely(use_delayed) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
}
else {
// usual: directly add to page thread_free list
mi_block_set_next(page, block, mi_tf_block(tfree));
tfreex = mi_tf_set_block(tfree,block);
}
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
// If this was the first non-local free, we need to push it on the heap delayed free list instead
if mi_unlikely(use_delayed) {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do {
mi_block_set_nextx(heap,block,dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
}
// and reset the MI_DELAYED_FREEING flag
tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
tfreex = tfree;
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
}
}
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
// first see if the page was abandoned and if we can reclaim it into our thread
if (mi_page_is_abandoned(page)) {
if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 ||
mi_page_is_singleton(page)) { // only one block, and we are free-ing it
if (mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
{
// the page is abandoned, try to reclaim it into our heap
if (_mi_arena_try_reclaim(mi_heap_get_default(), page)) { // TODO: avoid putting it in the full free queue
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
// mi_assert_internal(mi_heap_get_default()->tld->subproc == page->subproc);
mi_free(block); // recursively free as now it will be a local free in our heap
return;
}
else {
if (mi_page_is_abandoned(page)) {
// mi_assert(false);
}
// mi_assert_internal(!mi_page_is_singleton(page)); // we should have succeeded on singleton pages
}
}
}
}
// The padding check may access the non-thread-owned page for the key values.
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
mi_check_padding(page, block);
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page,block));
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
_mi_padding_shrink(page, block, sizeof(mi_block_t));
if (mi_page_is_huge(page)) {
mi_assert_internal(mi_page_is_singleton(page));
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
_mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively
}
else {
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
#endif
}
// and finally free the actual block by pushing it on the owning heap
// thread_delayed free list (or heap delayed free list)
mi_free_block_delayed_mt(page,block);
}
*/
// ------------------------------------------------------
// Usable size

View file

@ -44,7 +44,7 @@ static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
mi_assert_internal(_mi_ptr_page(page) == page);
size_t count = 0;
while (head != NULL) {
mi_assert_internal((uint8_t*)head - (uint8_t*)page > MI_LARGE_PAGE_SIZE || page == _mi_ptr_page(head));
mi_assert_internal((uint8_t*)head - (uint8_t*)page > (ptrdiff_t)MI_LARGE_PAGE_SIZE || page == _mi_ptr_page(head));
count++;
head = mi_block_next(page, head);
}