mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-12 02:09:32 +03:00
more documentation; better pairmap find_and_set_to_busy, busy flag is now 0x10
This commit is contained in:
parent
45f7fb559a
commit
afe9089152
5 changed files with 296 additions and 393 deletions
88
src/arena.c
88
src/arena.c
|
@ -199,7 +199,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
|
|||
void* p = mi_arena_slice_start(arena, slice_index);
|
||||
*memid = mi_memid_create_arena(arena->id, arena->exclusive, slice_index, slice_count);
|
||||
memid->is_pinned = arena->memid.is_pinned;
|
||||
|
||||
|
||||
// set the dirty bits
|
||||
if (arena->memid.initially_zero) {
|
||||
// size_t dirty_count = 0;
|
||||
|
@ -239,7 +239,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
|
|||
memid->initially_zero = false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
size_t already_committed_count = 0;
|
||||
mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count);
|
||||
if (already_committed_count < slice_count) {
|
||||
|
@ -247,7 +247,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
|
|||
mi_stat_decrease(_mi_stats_main.committed, mi_size_of_slices(already_committed_count));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// no need to commit, but check if already fully committed
|
||||
|
@ -282,8 +282,8 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
|
|||
arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_SLICE_SIZE);
|
||||
|
||||
if (arena_count >= 1 && arena_count <= 128) {
|
||||
// scale up the arena sizes exponentially every 8 entries
|
||||
const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16);
|
||||
// scale up the arena sizes exponentially every 8 entries
|
||||
const size_t multiplier = (size_t)1 << _mi_clamp(arena_count/8, 0, 16);
|
||||
size_t reserve = 0;
|
||||
if (!mi_mul_overflow(multiplier, arena_reserve, &reserve)) {
|
||||
arena_reserve = reserve;
|
||||
|
@ -399,7 +399,7 @@ again:
|
|||
if (mi_lock_try_acquire(&mi_arena_reserve_lock)) {
|
||||
mi_arena_id_t arena_id = 0;
|
||||
bool ok = mi_arena_reserve(mi_size_of_slices(slice_count), allow_large, req_arena_id, &arena_id);
|
||||
mi_lock_release(&mi_arena_reserve_lock);
|
||||
mi_lock_release(&mi_arena_reserve_lock);
|
||||
if (ok) {
|
||||
// and try allocate in there
|
||||
mi_assert_internal(req_arena_id == _mi_arena_id_none());
|
||||
|
@ -476,6 +476,19 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t
|
|||
Arena page allocation
|
||||
----------------------------------------------------------- */
|
||||
|
||||
static bool mi_arena_claim_abandoned(size_t slice_index, void* arg1, void* arg2) {
|
||||
mi_arena_t* arena = (mi_arena_t*)arg1;
|
||||
mi_subproc_t* subproc = (mi_subproc_t*)arg2;
|
||||
|
||||
// found an abandoned page of the right size
|
||||
// it is set busy for now so we can read safely even with concurrent mi_free reclaiming
|
||||
// try to claim ownership atomically
|
||||
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
|
||||
if (subproc != page->subproc) return false;
|
||||
if (!mi_page_try_claim_ownership(page)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t block_size, mi_arena_id_t req_arena_id, mi_tld_t* tld)
|
||||
{
|
||||
MI_UNUSED(slice_count);
|
||||
|
@ -493,38 +506,29 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl
|
|||
{
|
||||
size_t slice_index;
|
||||
mi_pairmap_t* const pairmap = &arena->pages_abandoned[bin];
|
||||
while (mi_pairmap_try_find_and_set_busy(pairmap, tseq, &slice_index)) { // todo: don't restart from scratch if we fail for some entry?
|
||||
// found an abandoned page of the right size
|
||||
// it is set busy for now so we can read safely even with concurrent mi_free reclaiming
|
||||
// try to claim ownership atomically
|
||||
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
|
||||
if (!mi_page_try_claim_ownership(page)) {
|
||||
// a concurrent free already grabbed the page.
|
||||
// Restore the abandoned_map to make it available again (unblocking busy waiters)
|
||||
mi_pairmap_set(pairmap, slice_index);
|
||||
}
|
||||
else {
|
||||
// we got ownership, clear the abandoned entry (unblocking busy waiters)
|
||||
mi_pairmap_clear(pairmap, slice_index);
|
||||
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
|
||||
_mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1);
|
||||
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1);
|
||||
|
||||
_mi_page_free_collect(page, false); // update `used` count
|
||||
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
|
||||
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
|
||||
mi_assert_internal(_mi_ptr_page(page)==page);
|
||||
mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
|
||||
mi_assert_internal(mi_page_block_size(page) == block_size);
|
||||
mi_assert_internal(mi_page_is_abandoned(page));
|
||||
mi_assert_internal(mi_page_is_owned(page));
|
||||
mi_assert_internal(!mi_page_is_full(page));
|
||||
return page;
|
||||
}
|
||||
}
|
||||
if (mi_pairmap_try_find_and_set_busy(pairmap, tseq, &slice_index, &mi_arena_claim_abandoned, arena, subproc)) {
|
||||
// found an abandoned page of the right size
|
||||
// and claimed ownership.
|
||||
mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index);
|
||||
mi_assert_internal(mi_page_is_owned(page));
|
||||
mi_assert_internal(mi_page_is_abandoned(page));
|
||||
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
|
||||
_mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1);
|
||||
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1);
|
||||
|
||||
_mi_page_free_collect(page, false); // update `used` count
|
||||
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count));
|
||||
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
|
||||
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
|
||||
mi_assert_internal(_mi_ptr_page(page)==page);
|
||||
mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page);
|
||||
mi_assert_internal(mi_page_block_size(page) == block_size);
|
||||
mi_assert_internal(!mi_page_is_full(page));
|
||||
return page;
|
||||
}
|
||||
}
|
||||
mi_forall_arenas_end();
|
||||
return NULL;
|
||||
|
@ -565,8 +569,8 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz
|
|||
mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment));
|
||||
|
||||
// claimed free slices: initialize the page partly
|
||||
if (!memid.initially_zero) {
|
||||
_mi_memzero_aligned(page, sizeof(*page));
|
||||
if (!memid.initially_zero) {
|
||||
_mi_memzero_aligned(page, sizeof(*page));
|
||||
}
|
||||
#if MI_DEBUG > 1
|
||||
else {
|
||||
|
@ -779,7 +783,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) {
|
|||
mi_assert_internal(mi_bitmap_is_clearN(arena->slices_purge, slice_index, slice_count));
|
||||
|
||||
// this busy waits until a concurrent reader (from alloc_abandoned) is done
|
||||
mi_pairmap_clear_while_not_busy(&arena->pages_abandoned[bin], slice_index);
|
||||
mi_pairmap_clear_once_not_busy(&arena->pages_abandoned[bin], slice_index);
|
||||
mi_page_clear_abandoned_mapped(page);
|
||||
mi_atomic_decrement_relaxed(&page->subproc->abandoned_count[bin]);
|
||||
}
|
||||
|
@ -999,7 +1003,7 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t*
|
|||
mi_atomic_decrement_acq_rel(&mi_arena_count);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
_mi_stat_counter_increase(&stats->arena_count,1);
|
||||
arena->id = mi_arena_id_create(i);
|
||||
mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
|
||||
|
@ -1049,7 +1053,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
|
|||
// todo: allow larger areas (either by splitting it up in arena's or having larger arena's)
|
||||
_mi_warning_message("cannot use OS memory since it is too large (size %zu MiB, maximum is %zu MiB)", size/MI_MiB, mi_size_of_slices(MI_BITMAP_MAX_BIT_COUNT)/MI_MiB);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
size_t bitmap_base;
|
||||
const size_t info_slices = mi_arena_info_slices_needed(slice_count, &bitmap_base);
|
||||
if (slice_count < info_slices+1) {
|
||||
|
|
212
src/bitmap.c
212
src/bitmap.c
|
@ -995,13 +995,13 @@ mi_decl_nodiscard bool mi_bitmap_try_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, s
|
|||
|
||||
// Set/clear a sequence of 2 bits that were on an even `idx` in the bitmap; returns `true` if atomically transitioned from 0's to 1's (or 1's to 0's).
|
||||
// `n` cannot cross chunk boundaries (and `n <= MI_BITMAP_CHUNK_BITS`)!
|
||||
static bool mi_bitmap_xset_pair(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
static bool mi_bitmap_xset_pair(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal((idx%2)==0);
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
|
||||
|
||||
|
||||
if (set) {
|
||||
// first set the chunkmap since it is a conservative approximation (increases epoch)
|
||||
mi_bitmap_chunkmap_set(bitmap, chunk_idx);
|
||||
|
@ -1066,7 +1066,7 @@ static inline bool mi_bitmap_is_xset2(mi_xset_t set, mi_bitmap_t* bitmap, size_t
|
|||
mi_assert_internal(idx + 2 <= mi_bitmap_max_bits(bitmap));
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(cidx + 2 <= MI_BITMAP_CHUNK_BITS);
|
||||
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
|
||||
return mi_bitmap_chunk_is_xset2(set, &bitmap->chunks[chunk_idx], cidx);
|
||||
}
|
||||
|
@ -1091,13 +1091,13 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
|
|||
/* --------------------------------------------------------------------------------
|
||||
bitmap try_find_and_clear
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
typedef bool (mi_bitmap_find_fun_t)(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx);
|
||||
|
||||
static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx, mi_bitmap_find_fun_t* find_fun)
|
||||
{
|
||||
if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false;
|
||||
|
||||
|
||||
// start chunk index -- todo: can depend on the tseq to decrease contention between threads
|
||||
MI_UNUSED(tseq);
|
||||
const size_t chunk_start = 0;
|
||||
|
@ -1105,7 +1105,7 @@ static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq
|
|||
const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS;
|
||||
|
||||
// for each chunkmap entry `i`
|
||||
for( size_t _i = 0; _i < bitmap->chunk_map_count; _i++)
|
||||
for( size_t _i = 0; _i < bitmap->chunk_map_count; _i++)
|
||||
{
|
||||
size_t i = (_i + chunk_map_start);
|
||||
if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; // adjust for the start position
|
||||
|
@ -1122,50 +1122,106 @@ static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq
|
|||
if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; }
|
||||
// set the chunk idx
|
||||
const size_t chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift;
|
||||
|
||||
|
||||
// try to find and clear N bits in that chunk
|
||||
if (chunk_idx < mi_bitmap_chunk_count(bitmap)) { // we can have less chunks than in the chunkmap..
|
||||
if ((*find_fun)(bitmap, n, chunk_idx, epoch, pidx)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// skip to the next bit
|
||||
cmap_idx_shift += cmap_idx+1;
|
||||
cmap >>= cmap_idx; // skip scanned bits (and avoid UB for `cmap_idx+1`)
|
||||
cmap >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
static bool mi_bitmap_try_find_and_clearN_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) {
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// we may find that all are cleared only on a second iteration but that is ok as
|
||||
// the chunkmap is a conservative approximation.
|
||||
if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#define mi_bitmap_forall_chunks(bitmap, tseq, name_epoch, name_chunk_idx) \
|
||||
{ \
|
||||
/* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \
|
||||
MI_UNUSED(tseq); \
|
||||
const size_t chunk_start = 0; \
|
||||
const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS; \
|
||||
const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS; \
|
||||
/* for each chunkmap entry `i` */ \
|
||||
for (size_t _i = 0; _i < bitmap->chunk_map_count; _i++) { \
|
||||
size_t i = (_i + chunk_map_start); \
|
||||
if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; /* adjust for the start position */ \
|
||||
\
|
||||
const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS; \
|
||||
mi_epoch_t name_epoch; \
|
||||
mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &name_epoch); \
|
||||
if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); } /* rotate right for the start position (on the first iteration) */ \
|
||||
\
|
||||
uint32_t cmap_idx; /* one bit set of each chunk that may have bits set */ \
|
||||
size_t cmap_idx_shift = 0; /* shift through the cmap */ \
|
||||
while (mi_bsf32(cmap, &cmap_idx)) { /* find least bit that is set */ \
|
||||
/* adjust for the start position again */ \
|
||||
if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; } \
|
||||
/* set the chunk idx */ \
|
||||
const size_t name_chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift; \
|
||||
/* try to find and clear N bits in that chunk */ \
|
||||
if (name_chunk_idx < mi_bitmap_chunk_count(bitmap)) { /* we can have less chunks than in the chunkmap.. */
|
||||
|
||||
#define mi_bitmap_forall_chunks_end() \
|
||||
} \
|
||||
/* skip to the next bit */ \
|
||||
cmap_idx_shift += cmap_idx+1; \
|
||||
cmap >>= cmap_idx; /* skip scanned bits (and avoid UB for `cmap_idx+1`) */ \
|
||||
cmap >>= 1; \
|
||||
} \
|
||||
}}
|
||||
|
||||
//static bool mi_bitmap_try_find_and_clearN_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) {
|
||||
// size_t cidx;
|
||||
// if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) {
|
||||
// *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
// mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
|
||||
// return true;
|
||||
// }
|
||||
// else {
|
||||
// // we may find that all are cleared only on a second iteration but that is ok as
|
||||
// // the chunkmap is a conservative approximation.
|
||||
// if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
// mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
//}
|
||||
|
||||
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
|
||||
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
|
||||
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx)
|
||||
{
|
||||
return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_clearN_at);
|
||||
// return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_clearN_at);
|
||||
mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx)
|
||||
{
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
// we may find that all are cleared only on a second iteration but that is ok as
|
||||
// the chunkmap is a conservative approximation.
|
||||
if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
// continue
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
pairmap
|
||||
pairmap
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
void mi_pairmap_init(mi_pairmap_t* pairmap, mi_bitmap_t* bm1, mi_bitmap_t* bm2) {
|
||||
|
@ -1215,10 +1271,10 @@ bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx) {
|
|||
pairmap clear while not busy
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
static inline bool mi_bfield_atomic_clear2_while_not_busy(_Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set).
|
||||
static inline bool mi_bfield_atomic_clear2_once_not_busy(_Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 10 (busy), and 11 (set).
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS-1);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0x03 << idx);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)MI_PAIR_SET << idx);
|
||||
const mi_bfield_t mask_busy = ((mi_bfield_t)MI_PAIR_BUSY << idx);
|
||||
mi_bfield_t bnew;
|
||||
mi_bfield_t old = mi_atomic_load_relaxed(b);
|
||||
|
@ -1238,32 +1294,32 @@ static inline bool mi_bfield_atomic_clear2_while_not_busy(_Atomic(mi_bfield_t)*b
|
|||
return ((old&mask) == mask);
|
||||
}
|
||||
|
||||
static inline bool mi_bitmap_chunk_clear2_while_not_busy(mi_bitmap_chunk_t* chunk, size_t cidx) {
|
||||
static inline bool mi_bitmap_chunk_clear2_once_not_busy(mi_bitmap_chunk_t* chunk, size_t cidx) {
|
||||
mi_assert_internal(cidx < MI_BITMAP_CHUNK_BITS);
|
||||
const size_t i = cidx / MI_BFIELD_BITS;
|
||||
const size_t idx = cidx % MI_BFIELD_BITS;
|
||||
return mi_bfield_atomic_clear2_while_not_busy(&chunk->bfields[i], idx);
|
||||
return mi_bfield_atomic_clear2_once_not_busy(&chunk->bfields[i], idx);
|
||||
}
|
||||
|
||||
static bool mi_bitmap_clear2_while_not_busy(mi_bitmap_t* bitmap, size_t idx) {
|
||||
static bool mi_bitmap_clear2_once_not_busy(mi_bitmap_t* bitmap, size_t idx) {
|
||||
mi_assert_internal((idx%2)==0);
|
||||
mi_assert_internal(idx < mi_bitmap_max_bits(bitmap));
|
||||
const size_t chunk_idx = idx / MI_BITMAP_CHUNK_BITS;
|
||||
const size_t cidx = idx % MI_BITMAP_CHUNK_BITS;
|
||||
mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap));
|
||||
const mi_epoch_t epoch = mi_bitmap_chunkmap_epoch(bitmap, chunk_idx);
|
||||
bool cleared = mi_bitmap_chunk_clear2_while_not_busy(&bitmap->chunks[chunk_idx], cidx);
|
||||
bool cleared = mi_bitmap_chunk_clear2_once_not_busy(&bitmap->chunks[chunk_idx], cidx);
|
||||
if (cleared && epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
|
||||
mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
|
||||
}
|
||||
}
|
||||
return cleared;
|
||||
}
|
||||
|
||||
void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) {
|
||||
void mi_pairmap_clear_once_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) {
|
||||
mi_bitmap_t* bitmap;
|
||||
size_t idx;
|
||||
mi_pairmap_from_pair_idx(pairmap, pair_idx, &bitmap, &idx);
|
||||
mi_bitmap_clear2_while_not_busy(bitmap, idx);
|
||||
mi_bitmap_clear2_once_not_busy(bitmap, idx);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1274,9 +1330,9 @@ void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx) {
|
|||
|
||||
// Atomically go from set to busy, or return false otherwise and leave the bit field as-is.
|
||||
static inline bool mi_bfield_atomic_try_set_busy(_Atomic(mi_bfield_t)*b, size_t idx) {
|
||||
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set).
|
||||
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 10 (busy), and 11 (set).
|
||||
mi_assert_internal(idx < MI_BFIELD_BITS-1);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)0x03 << idx);
|
||||
const mi_bfield_t mask = ((mi_bfield_t)MI_PAIR_SET << idx);
|
||||
const mi_bfield_t mask_busy = ((mi_bfield_t)MI_PAIR_BUSY << idx);
|
||||
mi_bfield_t old;
|
||||
mi_bfield_t bnew;
|
||||
|
@ -1290,49 +1346,57 @@ static inline bool mi_bfield_atomic_try_set_busy(_Atomic(mi_bfield_t)*b, size_t
|
|||
|
||||
static inline bool mi_bitmap_chunk_try_find_and_set_busy(mi_bitmap_chunk_t* chunk, size_t* pidx) {
|
||||
for (int i = 0; i < MI_BITMAP_CHUNK_FIELDS; i++) {
|
||||
size_t idx;
|
||||
if mi_unlikely(mi_bfield_find_least_bit(chunk->bfields[i], &idx)) { // find least 1-bit, it may be set or busy
|
||||
mi_assert_internal((idx%2)==0); // bit patterns are 00 (clear), 01 (busy), and 11 (set).
|
||||
if mi_likely(mi_bfield_atomic_try_set_busy(&chunk->bfields[i], idx)) {
|
||||
*pidx = (i*MI_BFIELD_BITS) + idx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS-1);
|
||||
return true;
|
||||
while (true) {
|
||||
const mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[i]) & MI_BFIELD_LO_BIT2; // only keep MI_PAIR_SET bits
|
||||
size_t idx;
|
||||
if (!mi_bfield_find_least_bit(b, &idx)) { // find least 1-bit
|
||||
break; // not found: continue with the next field
|
||||
}
|
||||
else {
|
||||
mi_assert_internal((idx%2)==0);
|
||||
if mi_likely(mi_bfield_atomic_try_set_busy(&chunk->bfields[i], idx)) {
|
||||
*pidx = (i*MI_BFIELD_BITS) + idx;
|
||||
mi_assert_internal(*pidx < MI_BITMAP_CHUNK_BITS-1);
|
||||
return true;
|
||||
}
|
||||
// else: try this word once again
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool mi_bitmap_try_find_and_set_busy_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) {
|
||||
MI_UNUSED(epoch); MI_UNUSED(n);
|
||||
mi_assert_internal(n==2);
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_try_find_and_set_busy(&bitmap->chunks[chunk_idx], &cidx)) {
|
||||
*pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool mi_bitmap_try_find_and_set_busy(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) {
|
||||
return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_set_busy_at);
|
||||
static bool mi_bitmap_try_find_and_set_busy(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t idx_offset, size_t* ppair_idx,
|
||||
mi_bitmap_claim_while_busy_fun_t* claim, void* arg1, void* arg2)
|
||||
{
|
||||
mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx)
|
||||
{
|
||||
MI_UNUSED(epoch); MI_UNUSED(n);
|
||||
mi_assert_internal(n==2);
|
||||
size_t cidx;
|
||||
if mi_likely(mi_bitmap_chunk_try_find_and_set_busy(&bitmap->chunks[chunk_idx], &cidx)) {
|
||||
const size_t idx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
|
||||
mi_assert_internal((idx%2)==0);
|
||||
const size_t pair_idx = (idx + idx_offset)/2;
|
||||
if (claim(pair_idx, arg1, arg2)) { // while busy, the claim function can read from the page
|
||||
mi_bitmap_xset_pair(MI_BIT_CLEAR, bitmap, idx); // claimed, clear the entry
|
||||
*ppair_idx = pair_idx;
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
mi_bitmap_xset_pair(MI_BIT_SET, bitmap, idx); // not claimed, reset the entry
|
||||
// and continue
|
||||
}
|
||||
}
|
||||
}
|
||||
mi_bitmap_forall_chunks_end();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Used to find an abandoned page, and transition from set to busy.
|
||||
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx) {
|
||||
size_t idx = 0;
|
||||
if (!mi_bitmap_try_find_and_set_busy(pairmap->bitmap1, 2, tseq, &idx)) {
|
||||
if (!mi_bitmap_try_find_and_set_busy(pairmap->bitmap2, 2, tseq, &idx)) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
idx += mi_bitmap_max_bits(pairmap->bitmap1);
|
||||
}
|
||||
}
|
||||
mi_assert_internal((idx%2)==0);
|
||||
*pidx = idx/2;
|
||||
return true;
|
||||
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pair_idx,
|
||||
mi_bitmap_claim_while_busy_fun_t* claim, void* arg1, void* arg2 ) {
|
||||
if (mi_bitmap_try_find_and_set_busy(pairmap->bitmap1, 2, tseq, 0, pair_idx, claim, arg1, arg2)) return true;
|
||||
return mi_bitmap_try_find_and_set_busy(pairmap->bitmap2, 2, tseq, mi_bitmap_max_bits(pairmap->bitmap1), pair_idx, claim, arg1, arg2);
|
||||
}
|
||||
|
|
125
src/bitmap.h
125
src/bitmap.h
|
@ -13,9 +13,47 @@ Concurrent bitmap that can set/reset sequences of bits atomically
|
|||
#define MI_BITMAP_H
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Definitions
|
||||
-------------------------------------------------------------------------------- */
|
||||
Atomic bitmaps:
|
||||
|
||||
`mi_bfield_t`: is a single machine word that can efficiently be bit counted (usually `size_t`)
|
||||
each bit usually represents a single MI_ARENA_SLICE_SIZE in an arena (64 KiB).
|
||||
We need 16K bits to represent a 1GiB arena.
|
||||
|
||||
`mi_bitmap_chunk_t`: a chunk of bfield's of a total of MI_BITMAP_CHUNK_BITS (= 512)
|
||||
allocations never span across chunks -- so MI_ARENA_MAX_OBJ_SIZE is the number
|
||||
of bits in a chunk times the MI_ARENA_SLICE_SIZE (512 * 64KiB = 32 MiB).
|
||||
These chunks are cache-aligned and we can use AVX2/AVX512/SVE/SVE2/etc. instructions
|
||||
to scan for bits (perhaps) more efficiently.
|
||||
|
||||
`mi_chunkmap_t`: for each chunk we track if it has (potentially) any bit set.
|
||||
The chunkmap has 1 bit per chunk that is set if the chunk potentially has a bit set.
|
||||
This is used to avoid scanning every chunk. (and thus strictly an optimization)
|
||||
It is conservative: it is fine to a bit in the chunk map even if the chunk turns out
|
||||
to have no bits set.
|
||||
|
||||
When we (potentially) set a bit in a chunk, we first update the chunkmap.
|
||||
However, when we clear a bit in a chunk, and the chunk is indeed all clear, we
|
||||
cannot safely clear the bit corresponding to the chunk in the chunkmap since it
|
||||
may race with another thread setting a bit in the same chunk (and we may clear the
|
||||
bit even though a bit is set in the chunk which is not allowed).
|
||||
|
||||
To fix this, the chunkmap contains 32-bits of bits for chunks, and a 32-bit "epoch"
|
||||
counter that is increased everytime a bit is set. We only clear a bit if the epoch
|
||||
stayed the same over our clear operation (so we know no other thread in the mean
|
||||
time set a bit in any of the chunks corresponding to the chunkmap).
|
||||
Since increasing the epoch and setting a bit must be atomic, we use only half-word
|
||||
bits (32) (we could use 128-bit atomics if needed since modern hardware supports this)
|
||||
|
||||
`mi_bitmap_t`: a bitmap with N chunks. A bitmap always has MI_BITMAP_MAX_CHUNK_FIELDS (=16)
|
||||
and can support arena's from few chunks up to 16 chunkmap's = 16 * 32 chunks = 16 GiB
|
||||
The `chunk_count` can be anything from 1 to the max supported by the chunkmap's but
|
||||
each chunk is always complete (512 bits, so 512 * 64KiB = 32MiB memory area's).
|
||||
|
||||
For now, the implementation assumes MI_HAS_FAST_BITSCAN and uses trailing-zero-count
|
||||
and pop-count (but we think it can be adapted work reasonably well on older hardware too)
|
||||
--------------------------------------------------------------------------------------------- */
|
||||
|
||||
// A word-size bit field.
|
||||
typedef size_t mi_bfield_t;
|
||||
|
||||
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
|
||||
|
@ -29,16 +67,18 @@ typedef size_t mi_bfield_t;
|
|||
#define MI_BITMAP_CHUNK_FIELDS (MI_BITMAP_CHUNK_BITS / MI_BFIELD_BITS)
|
||||
#define MI_BITMAP_CHUNK_BITS_MOD_MASK (MI_BITMAP_CHUNK_BITS - 1)
|
||||
|
||||
// 512 bits on 64_bit
|
||||
// A bitmap chunk contains 512 bits of bfields on 64_bit (256 on 32-bit)
|
||||
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_chunk_s {
|
||||
_Atomic(mi_bfield_t) bfields[MI_BITMAP_CHUNK_FIELDS];
|
||||
} mi_bitmap_chunk_t;
|
||||
|
||||
|
||||
// for now 32-bit epoch + 32-bit bit-set (note: with ABA instructions we can double this)
|
||||
typedef uint64_t mi_chunkmap_t;
|
||||
typedef uint32_t mi_epoch_t;
|
||||
typedef uint32_t mi_cmap_t;
|
||||
|
||||
|
||||
#define MI_CHUNKMAP_BITS (32) // 1 chunkmap tracks 32 chunks
|
||||
|
||||
#define MI_BITMAP_MAX_CHUNKMAPS (16)
|
||||
|
@ -48,15 +88,18 @@ typedef uint32_t mi_cmap_t;
|
|||
#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 16 GiB arena
|
||||
#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BITMAP_CHUNK_BITS) // 1 GiB arena
|
||||
|
||||
|
||||
// An atomic bitmap
|
||||
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s {
|
||||
_Atomic(size_t) chunk_map_count;
|
||||
_Atomic(size_t) chunk_count;
|
||||
_Atomic(size_t) chunk_map_count; // valid chunk_map's
|
||||
_Atomic(size_t) chunk_count; // total count of chunks
|
||||
size_t padding[MI_BITMAP_CHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
|
||||
_Atomic(mi_chunkmap_t) chunk_maps[MI_BITMAP_MAX_CHUNKMAPS];
|
||||
|
||||
|
||||
mi_bitmap_chunk_t chunks[MI_BITMAP_MIN_BIT_COUNT]; // or more, up to MI_BITMAP_MAX_CHUNK_COUNT
|
||||
} mi_bitmap_t;
|
||||
|
||||
|
||||
static inline size_t mi_bitmap_chunk_map_count(const mi_bitmap_t* bitmap) {
|
||||
return mi_atomic_load_relaxed(&bitmap->chunk_map_count);
|
||||
}
|
||||
|
@ -72,17 +115,19 @@ static inline size_t mi_bitmap_max_bits(const mi_bitmap_t* bitmap) {
|
|||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Atomic bitmap
|
||||
Atomic bitmap operations
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
// Many operations are generic over setting or clearing the bit sequence: we use `mi_xset_t` for this (true if setting, false if clearing)
|
||||
typedef bool mi_xset_t;
|
||||
#define MI_BIT_SET (true)
|
||||
#define MI_BIT_CLEAR (false)
|
||||
|
||||
|
||||
// Required size of a bitmap to represent `bit_count` bits.
|
||||
size_t mi_bitmap_size(size_t bit_count, size_t* chunk_count);
|
||||
|
||||
// initialize a bitmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
// Initialize a bitmap to all clear; avoid a mem_zero if `already_zero` is true
|
||||
// returns the size of the bitmap.
|
||||
size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero);
|
||||
|
||||
|
@ -134,56 +179,46 @@ mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t
|
|||
|
||||
|
||||
|
||||
|
||||
// Try to set/clear a bit in the bitmap; returns `true` if atomically transitioned from 0 to 1 (or 1 to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
//mi_decl_nodiscard bool mi_bitmap_try_xset(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
//
|
||||
//static inline bool mi_bitmap_try_set(mi_bitmap_t* bitmap, size_t idx) {
|
||||
// return mi_bitmap_try_xset(MI_BIT_SET, bitmap, idx);
|
||||
//}
|
||||
//
|
||||
//static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) {
|
||||
// return mi_bitmap_try_xset(MI_BIT_CLEAR, bitmap, idx);
|
||||
//}
|
||||
|
||||
|
||||
// Try to set/clear a byte in the bitmap; returns `true` if atomically transitioned from 0 to 0xFF (or 0xFF to 0)
|
||||
// and false otherwise leaving the bitmask as is.
|
||||
//mi_decl_nodiscard bool mi_bitmap_try_xset8(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx);
|
||||
//
|
||||
//static inline bool mi_bitmap_try_set8(mi_bitmap_t* bitmap, size_t idx) {
|
||||
// return mi_bitmap_try_xset8(MI_BIT_SET, bitmap, idx);
|
||||
//}
|
||||
//
|
||||
//static inline bool mi_bitmap_try_clear8(mi_bitmap_t* bitmap, size_t idx) {
|
||||
// return mi_bitmap_try_xset8(MI_BIT_CLEAR, bitmap, idx);
|
||||
//}
|
||||
|
||||
|
||||
/* --------------------------------------------------------------------------------
|
||||
Atomic bitmap for a pair of bits
|
||||
Atomic bitmap for a pair of bits.
|
||||
|
||||
The valid pairs are CLEAR (0), SET (3), or BUSY (2).
|
||||
|
||||
These bit pairs are used in the abandoned pages maps: when set, the entry has
|
||||
an available page. When we scan for an available abandoned page and find an entry SET,
|
||||
we first set it to BUSY, and try to claim the page atomically (since it can race
|
||||
with a concurrent `mi_free` which also tries to claim the page). However, unlike `mi_free`,
|
||||
we cannot be sure that a concurrent `mi_free` also didn't free (and decommit) the page
|
||||
just when we got the entry. Therefore, a page can only be freed after `mi_arena_unabandon`
|
||||
which (busy) waits until the BUSY flag is cleared to ensure all readers are done.
|
||||
(and pair-bit operations must therefore be release_acquire).
|
||||
-------------------------------------------------------------------------------- */
|
||||
|
||||
#define MI_PAIR_CLEAR (0)
|
||||
#define MI_PAIR_BUSY (1)
|
||||
#define MI_PAIR_UNUSED (2) // should never occur
|
||||
#define MI_PAIR_UNUSED (1) // should never occur
|
||||
#define MI_PAIR_BUSY (2)
|
||||
#define MI_PAIR_SET (3)
|
||||
|
||||
// 0b....0101010101010101
|
||||
#define MI_BFIELD_LO_BIT2 ((MI_BFIELD_LO_BIT8 << 6)|(MI_BFIELD_LO_BIT8 << 4)|(MI_BFIELD_LO_BIT8 << 2)|MI_BFIELD_LO_BIT8)
|
||||
|
||||
// A pairmap manipulates pairs of bits (and consists of 2 bitmaps)
|
||||
typedef struct mi_pairmap_s {
|
||||
mi_bitmap_t* bitmap1;
|
||||
mi_bitmap_t* bitmap2;
|
||||
mi_bitmap_t* bitmap2;
|
||||
} mi_pairmap_t;
|
||||
|
||||
|
||||
|
||||
// initialize a pairmap to all unset; avoid a mem_zero if `already_zero` is true
|
||||
// initialize a pairmap to all clear; avoid a mem_zero if `already_zero` is true
|
||||
void mi_pairmap_init(mi_pairmap_t* pairmap, mi_bitmap_t* bm1, mi_bitmap_t* bm2);
|
||||
bool mi_pairmap_set(mi_pairmap_t* pairmap, size_t pair_idx);
|
||||
bool mi_pairmap_clear(mi_pairmap_t* pairmap, size_t pair_idx);
|
||||
bool mi_pairmap_is_clear(mi_pairmap_t* pairmap, size_t pair_idx);
|
||||
void mi_pairmap_clear_while_not_busy(mi_pairmap_t* pairmap, size_t pair_idx);
|
||||
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx);
|
||||
void mi_pairmap_clear_once_not_busy(mi_pairmap_t* pairmap, size_t pair_idx);
|
||||
|
||||
typedef bool (mi_bitmap_claim_while_busy_fun_t)(size_t pair_index, void* arg1, void* arg2);
|
||||
mi_decl_nodiscard bool mi_pairmap_try_find_and_set_busy(mi_pairmap_t* pairmap, size_t tseq, size_t* pidx,
|
||||
mi_bitmap_claim_while_busy_fun_t* claim, void* arg1 ,void* arg2
|
||||
);
|
||||
|
||||
|
||||
#endif // MI_XBITMAP_H
|
||||
#endif // MI_BITMAP_H
|
||||
|
|
262
src/free.c
262
src/free.c
|
@ -148,15 +148,44 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
}
|
||||
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Multi-threaded Free (`_mt`)
|
||||
// ------------------------------------------------------
|
||||
|
||||
static void mi_decl_noinline mi_free_try_reclaim_mt(mi_page_t* page);
|
||||
|
||||
// Push a block that is owned by another thread (or abandoned) on its page-local thread free list.
|
||||
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
|
||||
{
|
||||
// adjust stats (after padding check and potentially recursive `mi_free` above)
|
||||
mi_stat_free(page, block); // stat_free may access the padding
|
||||
mi_track_free_size(block, mi_page_usable_size_of(page, block));
|
||||
|
||||
// _mi_padding_shrink(page, block, sizeof(mi_block_t));
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
|
||||
size_t dbgsize = mi_usable_size(block);
|
||||
if (dbgsize > MI_MiB) { dbgsize = MI_MiB; }
|
||||
_mi_memset_aligned(block, MI_DEBUG_FREED, dbgsize);
|
||||
#endif
|
||||
|
||||
// push atomically on the page thread free list
|
||||
mi_thread_free_t tf_new;
|
||||
mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
mi_block_set_next(page, block, mi_tf_block(tf_old));
|
||||
tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
|
||||
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new));
|
||||
|
||||
// and atomically reclaim the page if it was abandoned
|
||||
bool reclaimed = !mi_tf_is_owned(tf_old);
|
||||
if (reclaimed) {
|
||||
mi_free_try_reclaim_mt(page);
|
||||
}
|
||||
}
|
||||
|
||||
static void mi_decl_noinline mi_free_try_reclaim_mt(mi_page_t* page) {
|
||||
mi_assert_internal(mi_page_is_owned(page));
|
||||
mi_assert_internal(mi_page_is_abandoned(page));
|
||||
#if 1
|
||||
// we own the page now..
|
||||
// safe to collect the thread atomic free list
|
||||
_mi_page_free_collect(page, false); // update `used` count
|
||||
|
@ -209,237 +238,8 @@ static void mi_decl_noinline mi_free_try_reclaim_mt(mi_page_t* page) {
|
|||
|
||||
// not reclaimed or free'd, unown again
|
||||
_mi_page_unown(page);
|
||||
|
||||
#else
|
||||
if (!mi_page_is_abandoned_mapped(page)) {
|
||||
// singleton or OS allocated
|
||||
if (mi_page_is_singleton(page)) {
|
||||
// free singleton pages
|
||||
#if MI_DEBUG>1
|
||||
_mi_page_free_collect(page, false); // update `used` count
|
||||
mi_assert_internal(mi_page_all_free(page));
|
||||
#endif
|
||||
// we can free the page directly
|
||||
_mi_arena_page_free(page);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
const bool was_full = mi_page_is_full(page);
|
||||
_mi_page_free_collect(page,false); // update used
|
||||
if (mi_page_all_free(page)) {
|
||||
// no need to unabandon as it is unmapped
|
||||
_mi_arena_page_free(page);
|
||||
return;
|
||||
}
|
||||
else if (was_full && _mi_arena_page_reabandon_full(page)) {
|
||||
return;
|
||||
}
|
||||
else if (!mi_page_is_mostly_used(page) && _mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0) {
|
||||
// the page has still some blocks in use (but not too many)
|
||||
// reclaim in our heap if compatible, or otherwise abandon again
|
||||
// todo: optimize this check further?
|
||||
// note: don't use `mi_heap_get_default()` as we may just have terminated this thread and we should
|
||||
// not reinitialize the heap for this thread. (can happen due to thread-local destructors for example -- issue #944)
|
||||
mi_heap_t* const heap = mi_prim_get_default_heap();
|
||||
if (heap != (mi_heap_t*)&_mi_heap_empty) { // we did not already terminate our thread (can this happen?
|
||||
mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag);
|
||||
if ((tagheap != NULL) && // don't reclaim across heap object types
|
||||
(page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
|
||||
(_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
|
||||
)
|
||||
{
|
||||
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_free, 1);
|
||||
// make it part of our heap (no need to unabandon as is unmapped)
|
||||
_mi_heap_page_reclaim(tagheap, page);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// don't reclaim pages that can be found for fresh page allocations
|
||||
}
|
||||
|
||||
// not reclaimed or free'd, unown again
|
||||
_mi_page_unown(page);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
// we own the page now..
|
||||
// safe to collect the thread atomic free list
|
||||
_mi_page_free_collect(page, false); // update `used` count
|
||||
if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_all_free(page)); }
|
||||
|
||||
if (mi_page_all_free(page)) {
|
||||
// first remove it from the abandoned pages in the arena -- this waits for any readers to finish
|
||||
_mi_arena_page_unabandon(page); // this must be before free'ing
|
||||
// we can free the page directly
|
||||
_mi_arena_page_free(page);
|
||||
return;
|
||||
}
|
||||
else if (!mi_page_is_mostly_used(page)) {
|
||||
// the page has still some blocks in use (but not too many)
|
||||
// reclaim in our heap if compatible, or otherwise abandon again
|
||||
// todo: optimize this check further?
|
||||
// note: don't use `mi_heap_get_default()` as we may just have terminated this thread and we should
|
||||
// not reinitialize the heap for this thread. (can happen due to thread-local destructors for example -- issue #944)
|
||||
mi_heap_t* const heap = mi_prim_get_default_heap();
|
||||
|
||||
if ((_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0) && // only if reclaim on free is allowed
|
||||
(heap != (mi_heap_t*)&_mi_heap_empty)) // we did not already terminate our thread (can this happen?
|
||||
{
|
||||
mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag);
|
||||
if ((tagheap != NULL) && // don't reclaim across heap object types
|
||||
(page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
|
||||
(_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
|
||||
)
|
||||
{
|
||||
// first remove it from the abandoned pages in the arena -- this waits for any readers to finish
|
||||
_mi_arena_page_unabandon(page);
|
||||
_mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_free, 1);
|
||||
// make it part of our heap
|
||||
_mi_heap_page_reclaim(tagheap, page);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we cannot reclaim this page.. leave it abandoned
|
||||
// todo: should re-abandon or otherwise a partly used page could never be re-used if the
|
||||
// objects in it are not freed explicitly.
|
||||
_mi_page_unown(page);
|
||||
*/
|
||||
|
||||
|
||||
// Push a block that is owned by another thread (or abandoned) on its page-local thread free list.
|
||||
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
|
||||
{
|
||||
// adjust stats (after padding check and potentially recursive `mi_free` above)
|
||||
mi_stat_free(page, block); // stat_free may access the padding
|
||||
mi_track_free_size(block, mi_page_usable_size_of(page, block));
|
||||
|
||||
// _mi_padding_shrink(page, block, sizeof(mi_block_t));
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
|
||||
size_t dbgsize = mi_usable_size(block);
|
||||
if (dbgsize > MI_MiB) { dbgsize = MI_MiB; }
|
||||
_mi_memset_aligned(block, MI_DEBUG_FREED, dbgsize);
|
||||
#endif
|
||||
|
||||
// push atomically on the page thread free list
|
||||
mi_thread_free_t tf_new;
|
||||
mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
mi_block_set_next(page, block, mi_tf_block(tf_old));
|
||||
tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
|
||||
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new));
|
||||
|
||||
// and atomically reclaim the page if it was abandoned
|
||||
bool reclaimed = !mi_tf_is_owned(tf_old);
|
||||
if (reclaimed) {
|
||||
mi_free_try_reclaim_mt(page);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// Try to put the block on either the page-local thread free list,
|
||||
// or the heap delayed free list (if this is the first non-local free in that page)
|
||||
mi_thread_free_t tfreex;
|
||||
bool use_delayed;
|
||||
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
|
||||
if mi_unlikely(use_delayed) {
|
||||
// unlikely: this only happens on the first concurrent free in a page that is in the full list
|
||||
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
|
||||
}
|
||||
else {
|
||||
// usual: directly add to page thread_free list
|
||||
mi_block_set_next(page, block, mi_tf_block(tfree));
|
||||
tfreex = mi_tf_set_block(tfree,block);
|
||||
}
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
|
||||
// If this was the first non-local free, we need to push it on the heap delayed free list instead
|
||||
if mi_unlikely(use_delayed) {
|
||||
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
|
||||
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
|
||||
mi_assert_internal(heap != NULL);
|
||||
if (heap != NULL) {
|
||||
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
|
||||
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
|
||||
do {
|
||||
mi_block_set_nextx(heap,block,dfree, heap->keys);
|
||||
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
|
||||
}
|
||||
|
||||
// and reset the MI_DELAYED_FREEING flag
|
||||
tfree = mi_atomic_load_relaxed(&page->xthread_free);
|
||||
do {
|
||||
tfreex = tfree;
|
||||
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
|
||||
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
|
||||
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
|
||||
}
|
||||
}
|
||||
|
||||
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
|
||||
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_block_t* block)
|
||||
{
|
||||
// first see if the page was abandoned and if we can reclaim it into our thread
|
||||
if (mi_page_is_abandoned(page)) {
|
||||
if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 ||
|
||||
mi_page_is_singleton(page)) { // only one block, and we are free-ing it
|
||||
if (mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
|
||||
{
|
||||
// the page is abandoned, try to reclaim it into our heap
|
||||
if (_mi_arena_try_reclaim(mi_heap_get_default(), page)) { // TODO: avoid putting it in the full free queue
|
||||
mi_assert_internal(_mi_thread_id() == mi_page_thread_id(page));
|
||||
// mi_assert_internal(mi_heap_get_default()->tld->subproc == page->subproc);
|
||||
mi_free(block); // recursively free as now it will be a local free in our heap
|
||||
return;
|
||||
}
|
||||
else {
|
||||
if (mi_page_is_abandoned(page)) {
|
||||
// mi_assert(false);
|
||||
}
|
||||
// mi_assert_internal(!mi_page_is_singleton(page)); // we should have succeeded on singleton pages
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// The padding check may access the non-thread-owned page for the key values.
|
||||
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
|
||||
mi_check_padding(page, block);
|
||||
|
||||
// adjust stats (after padding check and potentially recursive `mi_free` above)
|
||||
mi_stat_free(page, block); // stat_free may access the padding
|
||||
mi_track_free_size(block, mi_page_usable_size_of(page,block));
|
||||
|
||||
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
|
||||
_mi_padding_shrink(page, block, sizeof(mi_block_t));
|
||||
|
||||
if (mi_page_is_huge(page)) {
|
||||
mi_assert_internal(mi_page_is_singleton(page));
|
||||
// huge pages are special as they occupy the entire segment
|
||||
// as these are large we reset the memory occupied by the page so it is available to other threads
|
||||
// (as the owning thread needs to actually free the memory later).
|
||||
_mi_os_reset(mi_page_start(page), mi_page_block_size(page), NULL); // resets conservatively
|
||||
}
|
||||
else {
|
||||
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
|
||||
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
|
||||
#endif
|
||||
}
|
||||
|
||||
// and finally free the actual block by pushing it on the owning heap
|
||||
// thread_delayed free list (or heap delayed free list)
|
||||
mi_free_block_delayed_mt(page,block);
|
||||
}
|
||||
*/
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Usable size
|
||||
|
|
|
@ -44,7 +44,7 @@ static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
|
|||
mi_assert_internal(_mi_ptr_page(page) == page);
|
||||
size_t count = 0;
|
||||
while (head != NULL) {
|
||||
mi_assert_internal((uint8_t*)head - (uint8_t*)page > MI_LARGE_PAGE_SIZE || page == _mi_ptr_page(head));
|
||||
mi_assert_internal((uint8_t*)head - (uint8_t*)page > (ptrdiff_t)MI_LARGE_PAGE_SIZE || page == _mi_ptr_page(head));
|
||||
count++;
|
||||
head = mi_block_next(page, head);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue