small adjustments

This commit is contained in:
daanx 2024-12-04 21:40:57 -08:00
parent afe9089152
commit bc67be4d79
6 changed files with 43 additions and 134 deletions

View file

@ -314,6 +314,19 @@ static inline bool mi_bsr(size_t x, size_t* idx) {
#endif #endif
} }
// Bit scan reverse: find the most significant bit that is set
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsr32(uint32_t x, uint32_t* idx) {
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
return (_BitScanReverse(&i, x) ? (*idx = i, true) : false);
#else
const size_t r = mi_clz((size_t)x);
*idx = (~r & (MI_SIZE_BITS - 1)) - (MI_SIZE_SIZE - sizeof(uint32_t));
return (x!=0);
#endif
}
/* -------------------------------------------------------------------------------- /* --------------------------------------------------------------------------------

View file

@ -335,7 +335,7 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are
size_t _start; \ size_t _start; \
if (req_arena_id == _mi_arena_id_none()) { \ if (req_arena_id == _mi_arena_id_none()) { \
_max_arena = mi_atomic_load_relaxed(&mi_arena_count); \ _max_arena = mi_atomic_load_relaxed(&mi_arena_count); \
_start = (_max_arena <= 1 ? 0 : (tseq / MI_THREADS_PER_ARENA) % _max_arena); \ _start = (_max_arena <= 2 ? 0 : (tseq % (_max_arena-1))); \
} \ } \
else { \ else { \
_max_arena = 1; \ _max_arena = 1; \
@ -795,62 +795,6 @@ void _mi_arena_page_unabandon(mi_page_t* page) {
_mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1);
} }
/*
bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page) {
if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_is_abandoned(page)); }
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
// if (!mi_page_is_abandoned(page)) return false; // it is not abandoned (anymore)
// note: we can access the page even it is in the meantime reclaimed by another thread since
// we only call this when on free (and thus there is still an object alive in the page)
mi_memid_t memid = page->memid;
if (!_mi_arena_memid_is_suitable(memid, heap->arena_id)) return false; // don't reclaim between exclusive and non-exclusive arena's
if (mi_atomic_load_acquire(&page->xheap) != (uintptr_t)heap->tld->subproc) return false;
if mi_likely(memid.memkind == MI_MEM_ARENA) {
size_t slice_index;
mi_arena_t* arena = mi_page_arena(page, &slice_index, NULL);
//if (arena->subproc != heap->tld->subproc) return false; // only reclaim within the same subprocess
// don't reclaim more from a `free` call than half the current segments
// this is to prevent a pure free-ing thread to start owning too many segments
// (but not for out-of-arena segments as that is the main way to be reclaimed for those)
// if (segment->memid.memkind == MI_MEM_ARENA && heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) {
// return false;
// }
const size_t bin = _mi_bin(page->block_size);
if (mi_bitmap_try_clear(&arena->slices_abandoned[bin], slice_index)) {
// we got it atomically
_mi_page_reclaim(heap, page);
mi_assert_internal(!mi_page_is_abandoned(page));
return true;
}
else {
if (mi_page_is_abandoned(page)) {
// mi_assert(false);
}
}
}
else {
// A page in OS or external memory
if (mi_atomic_load_acquire(&page->xheap) != (uintptr_t)heap->tld->subproc) return false;
// we use the thread_id to atomically grab ownership
mi_threadid_t abandoned_thread_id = 0;
if (mi_atomic_cas_strong_acq_rel(&page->xthread_id, &abandoned_thread_id, heap->thread_id)) {
// we got it atomically
_mi_page_reclaim(heap, page);
mi_assert_internal(!mi_page_is_abandoned(page));
return true;
}
}
return false;
}
*/
void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) {
MI_UNUSED(heap); MI_UNUSED(heap);
// TODO: implement this // TODO: implement this

View file

@ -768,7 +768,7 @@ static inline bool mi_bitmap_chunk_all_are_clear(mi_bitmap_chunk_t* chunk) {
static void mi_chunkmap_split(mi_chunkmap_t es, mi_cmap_t* cmap, mi_epoch_t* epoch) { static void mi_chunkmap_split(mi_chunkmap_t es, mi_cmap_t* cmap, mi_epoch_t* epoch) {
*cmap = (mi_cmap_t)es; *cmap = (mi_cmap_t)es;
*epoch = (mi_epoch_t)(es >> 32); if (epoch!=NULL) { *epoch = (mi_epoch_t)(es >> 32); }
} }
static mi_chunkmap_t mi_chunkmap_join(mi_cmap_t cmap, mi_epoch_t epoch) { static mi_chunkmap_t mi_chunkmap_join(mi_cmap_t cmap, mi_epoch_t epoch) {
@ -1091,80 +1091,50 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n
/* -------------------------------------------------------------------------------- /* --------------------------------------------------------------------------------
bitmap try_find_and_clear bitmap try_find_and_clear
-------------------------------------------------------------------------------- */ -------------------------------------------------------------------------------- */
/* static inline size_t mi_bitmap_find_hi_chunk(mi_bitmap_t* bitmap) {
typedef bool (mi_bitmap_find_fun_t)(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx); size_t hi_chunk_map_idx = 0;
mi_cmap_t hi_cmap = 0;
static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx, mi_bitmap_find_fun_t* find_fun) for (size_t i = 1; i < mi_bitmap_chunk_map_count(bitmap); i++) {
{ mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, i, NULL);
if (n == 0 || n > MI_BITMAP_CHUNK_BITS) return false; if (cmap != 0) {
hi_chunk_map_idx = i;
// start chunk index -- todo: can depend on the tseq to decrease contention between threads hi_cmap = cmap;
MI_UNUSED(tseq);
const size_t chunk_start = 0;
const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS;
const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS;
// for each chunkmap entry `i`
for( size_t _i = 0; _i < bitmap->chunk_map_count; _i++)
{
size_t i = (_i + chunk_map_start);
if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; // adjust for the start position
const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS;
mi_epoch_t epoch;
mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &epoch);
if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); } // rotate right for the start position (on the first iteration)
uint32_t cmap_idx; // one bit set of each chunk that may have bits set
size_t cmap_idx_shift = 0; // shift through the cmap
while (mi_bsf32(cmap, &cmap_idx)) { // find least bit that is set
// adjust for the start position
if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; }
// set the chunk idx
const size_t chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift;
// try to find and clear N bits in that chunk
if (chunk_idx < mi_bitmap_chunk_count(bitmap)) { // we can have less chunks than in the chunkmap..
if ((*find_fun)(bitmap, n, chunk_idx, epoch, pidx)) {
return true;
}
}
// skip to the next bit
cmap_idx_shift += cmap_idx+1;
cmap >>= cmap_idx; // skip scanned bits (and avoid UB for `cmap_idx+1`)
cmap >>= 1;
} }
} }
uint32_t cmap_idx;
return false; if (mi_bsr32(hi_cmap, &cmap_idx)) {
const size_t hi = (hi_chunk_map_idx * MI_CHUNKMAP_BITS) + cmap_idx;
mi_assert_internal(hi < mi_bitmap_chunk_count(bitmap));
return hi;
}
else {
return 0;
}
} }
*/
#define mi_bitmap_forall_chunks(bitmap, tseq, name_epoch, name_chunk_idx) \ #define mi_bitmap_forall_chunks(bitmap, tseq, name_epoch, name_chunk_idx) \
{ \ { \
/* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \ /* start chunk index -- todo: can depend on the tseq to decrease contention between threads */ \
MI_UNUSED(tseq); \ MI_UNUSED(tseq); \
const size_t chunk_start = 0; \ const size_t chunk_start = 0; /* tseq % (1 + mi_bitmap_find_hi_chunk(bitmap)); */ \
const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS; \ const size_t chunk_map_start = chunk_start / MI_CHUNKMAP_BITS; \
const size_t chunk_map_start_idx = chunk_start % MI_CHUNKMAP_BITS; \ const uint32_t chunk_map_start_idx = (uint32_t)(chunk_start % MI_CHUNKMAP_BITS); \
/* for each chunkmap entry `i` */ \ /* for each chunkmap entry `i` */ \
for (size_t _i = 0; _i < bitmap->chunk_map_count; _i++) { \ for (size_t _i = 0; _i < bitmap->chunk_map_count; _i++) { \
size_t i = (_i + chunk_map_start); \ size_t i = (_i + chunk_map_start); \
if (i > bitmap->chunk_map_count) i -= bitmap->chunk_map_count; /* adjust for the start position */ \ if (i >= bitmap->chunk_map_count) { i -= bitmap->chunk_map_count; } /* adjust for the start position */ \
\ \
const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS; \ const size_t chunk_idx0 = i*MI_CHUNKMAP_BITS; \
mi_epoch_t name_epoch; \ mi_epoch_t name_epoch; \
mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &name_epoch); \ mi_cmap_t cmap = mi_bitmap_chunkmap(bitmap, chunk_idx0, &name_epoch); \
if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); } /* rotate right for the start position (on the first iteration) */ \ uint32_t cmap_idx_shift = 0; /* shift through the cmap */ \
if (_i == 0) { cmap = mi_rotr32(cmap, chunk_map_start_idx); cmap_idx_shift = chunk_map_start_idx; } /* rotate right for the start position (on the first iteration) */ \
\ \
uint32_t cmap_idx; /* one bit set of each chunk that may have bits set */ \ uint32_t cmap_idx; /* one bit set of each chunk that may have bits set */ \
size_t cmap_idx_shift = 0; /* shift through the cmap */ \
while (mi_bsf32(cmap, &cmap_idx)) { /* find least bit that is set */ \ while (mi_bsf32(cmap, &cmap_idx)) { /* find least bit that is set */ \
/* adjust for the start position again */ \
if (_i == 0) { cmap_idx = (cmap_idx + chunk_map_start_idx) % MI_CHUNKMAP_BITS; } \
/* set the chunk idx */ \ /* set the chunk idx */ \
const size_t name_chunk_idx = chunk_idx0 + cmap_idx + cmap_idx_shift; \ size_t name_chunk_idx = chunk_idx0 + ((cmap_idx + cmap_idx_shift) % MI_CHUNKMAP_BITS); \
if (name_chunk_idx >= mi_bitmap_chunk_count(bitmap)) { name_chunk_idx -= mi_bitmap_chunk_count(bitmap); } \
/* try to find and clear N bits in that chunk */ \ /* try to find and clear N bits in that chunk */ \
if (name_chunk_idx < mi_bitmap_chunk_count(bitmap)) { /* we can have less chunks than in the chunkmap.. */ if (name_chunk_idx < mi_bitmap_chunk_count(bitmap)) { /* we can have less chunks than in the chunkmap.. */
@ -1177,28 +1147,10 @@ static inline bool mi_bitmap_try_find(mi_bitmap_t* bitmap, size_t n, size_t tseq
} \ } \
}} }}
//static bool mi_bitmap_try_find_and_clearN_at(mi_bitmap_t* bitmap, size_t n, size_t chunk_idx, mi_epoch_t epoch, size_t* pidx) {
// size_t cidx;
// if mi_likely(mi_bitmap_chunk_find_and_try_clearN(&bitmap->chunks[chunk_idx], n, &cidx)) {
// *pidx = (chunk_idx * MI_BITMAP_CHUNK_BITS) + cidx;
// mi_assert_internal(*pidx <= mi_bitmap_max_bits(bitmap) - n);
// return true;
// }
// else {
// // we may find that all are cleared only on a second iteration but that is ok as
// // the chunkmap is a conservative approximation.
// if (epoch == mi_bitmap_chunkmap_epoch(bitmap, chunk_idx) && mi_bitmap_chunk_all_are_clear(&bitmap->chunks[chunk_idx])) {
// mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx, epoch);
// }
// return false;
// }
//}
// Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all. // Find a sequence of `n` bits in the bitmap with all bits set, and atomically unset all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx) mi_decl_nodiscard bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* bitmap, size_t n, size_t tseq, size_t* pidx)
{ {
// return mi_bitmap_try_find(bitmap, n, tseq, pidx, &mi_bitmap_try_find_and_clearN_at);
mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx) mi_bitmap_forall_chunks(bitmap, tseq, epoch, chunk_idx)
{ {
size_t cidx; size_t cidx;

View file

@ -91,7 +91,7 @@ typedef uint32_t mi_cmap_t;
// An atomic bitmap // An atomic bitmap
typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s { typedef mi_decl_align(MI_BITMAP_CHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_map_count; // valid chunk_map's _Atomic(size_t) chunk_map_count; // valid chunk_maps entries
_Atomic(size_t) chunk_count; // total count of chunks _Atomic(size_t) chunk_count; // total count of chunks
size_t padding[MI_BITMAP_CHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc size_t padding[MI_BITMAP_CHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
_Atomic(mi_chunkmap_t) chunk_maps[MI_BITMAP_MAX_CHUNKMAPS]; _Atomic(mi_chunkmap_t) chunk_maps[MI_BITMAP_MAX_CHUNKMAPS];

View file

@ -400,7 +400,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
tld->heap_backing = bheap; tld->heap_backing = bheap;
tld->heaps = NULL; tld->heaps = NULL;
tld->subproc = &mi_subproc_default; tld->subproc = &mi_subproc_default;
tld->tseq = 0; // mi_atomic_add_acq_rel(&mi_tcount, 1); tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1);
tld->os.stats = &tld->stats; tld->os.stats = &tld->stats;
} }

View file

@ -343,9 +343,9 @@ int main(int argc, char** argv) {
#ifndef USE_STD_MALLOC #ifndef USE_STD_MALLOC
#ifndef NDEBUG #ifndef NDEBUG
// mi_debug_show_arenas(true, true, false); mi_debug_show_arenas(true, true, false);
mi_collect(true); mi_collect(true);
mi_debug_show_arenas(true,true,false); // mi_debug_show_arenas(true,true,false);
#endif #endif
// mi_stats_print(NULL); // mi_stats_print(NULL);
#else #else