Merge branch 'dev3' into dev3-bin

This commit is contained in:
daanx 2025-02-04 19:49:46 -08:00
commit 54c90fcac4
9 changed files with 122 additions and 64 deletions

View file

@ -63,7 +63,7 @@ need a specific redirection DLL:
mode on Windows arm64. Unfortunately we cannot run x64 code emulated on Windows arm64 with
the x64 mimalloc override directly (since the C runtime always uses `arm64ec`). Instead:
1. Build the program as normal for x64 and link as normal with the x64
`mimalloc.lib` export library.
`mimalloc.dll.lib` export library.
2. Now separately build `mimalloc.dll` in `arm64ec` mode and _overwrite_ your
previous (x64) `mimalloc.dll` -- the loader can handle the mix of arm64ec
and x64 code. Now use `mimalloc-redirect-arm64ec.dll` to match your new

View file

@ -116,6 +116,7 @@ mi_subproc_t* _mi_subproc_main(void);
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id);
mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
size_t _mi_thread_seq_id(void) mi_attr_noexcept;
mi_tld_t* _mi_thread_tld(void) mi_attr_noexcept;
void _mi_heap_guarded_init(mi_heap_t* heap);
// os.c
@ -171,7 +172,7 @@ void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld);
mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment);
void _mi_arenas_page_free(mi_page_t* page);
void _mi_arenas_page_abandon(mi_page_t* page);
void _mi_arenas_page_abandon(mi_page_t* page, mi_tld_t* tld);
void _mi_arenas_page_unabandon(mi_page_t* page);
bool _mi_arenas_page_try_reabandon_to_mapped(mi_page_t* page);
@ -199,7 +200,8 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
void _mi_deferred_free(mi_heap_t* heap, bool force);
void _mi_page_free_collect(mi_page_t* page,bool force);
void _mi_page_free_collect(mi_page_t* page, bool force);
void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head);
void _mi_page_init(mi_heap_t* heap, mi_page_t* page);
size_t _mi_bin_size(uint8_t bin); // for stats
@ -433,7 +435,7 @@ static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
return (heap->tld->heap_backing == heap);
}
static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
static inline bool mi_heap_is_initialized(const mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
return (heap != NULL && heap != &_mi_heap_empty);
}

View file

@ -544,13 +544,20 @@ void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount);
#define mi_subproc_stat_adjust_increase(subproc,stat,amnt,b) __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amnt, b)
#define mi_subproc_stat_adjust_decrease(subproc,stat,amnt,b) __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amnt, b)
#define mi_tld_stat_counter_increase(tld,stat,amount) __mi_stat_counter_increase( &(tld)->stats.stat, amount)
#define mi_tld_stat_increase(tld,stat,amount) __mi_stat_increase( &(tld)->stats.stat, amount)
#define mi_tld_stat_decrease(tld,stat,amount) __mi_stat_decrease( &(tld)->stats.stat, amount)
#define mi_tld_stat_adjust_increase(tld,stat,amnt,b) __mi_stat_adjust_increase( &(tld)->stats.stat, amnt, b)
#define mi_tld_stat_adjust_decrease(tld,stat,amnt,b) __mi_stat_adjust_decrease( &(tld)->stats.stat, amnt, b)
#define mi_os_stat_counter_increase(stat,amount) mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount)
#define mi_os_stat_increase(stat,amount) mi_subproc_stat_increase(_mi_subproc(),stat,amount)
#define mi_os_stat_decrease(stat,amount) mi_subproc_stat_decrease(_mi_subproc(),stat,amount)
#define mi_heap_stat_counter_increase(heap,stat,amount) __mi_stat_counter_increase( &(heap)->tld->stats.stat, amount)
#define mi_heap_stat_increase(heap,stat,amount) __mi_stat_increase( &(heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) __mi_stat_decrease( &(heap)->tld->stats.stat, amount)
#define mi_heap_stat_counter_increase(heap,stat,amount) mi_tld_stat_counter_increase(heap->tld, stat, amount)
#define mi_heap_stat_increase(heap,stat,amount) mi_tld_stat_increase( heap->tld, stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_tld_stat_decrease( heap->tld, stat, amount)
#define mi_debug_heap_stat_counter_increase(heap,stat,amount) mi_debug_stat_counter_increase( (heap)->tld->stats.stat, amount)
#define mi_debug_heap_stat_increase(heap,stat,amount) mi_debug_stat_increase( (heap)->tld->stats.stat, amount)

View file

@ -563,8 +563,9 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_
mi_assert_internal(mi_page_is_abandoned(page));
mi_assert_internal(mi_arena_has_page(arena,page));
mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]);
mi_subproc_stat_decrease( arena->subproc, pages_abandoned, 1);
mi_subproc_stat_counter_increase(arena->subproc, pages_reclaim_on_alloc, 1);
mi_tld_t* tld = _mi_thread_tld();
mi_tld_stat_decrease( tld, pages_abandoned, 1);
mi_tld_stat_counter_increase( tld, pages_reclaim_on_alloc, 1);
_mi_page_free_collect(page, false); // update `used` count
mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count));
@ -855,7 +856,7 @@ void _mi_arenas_page_free(mi_page_t* page) {
Arena abandon
----------------------------------------------------------- */
void _mi_arenas_page_abandon(mi_page_t* page) {
void _mi_arenas_page_abandon(mi_page_t* page, mi_tld_t* tld) {
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(mi_page_is_owned(page));
@ -878,7 +879,7 @@ void _mi_arenas_page_abandon(mi_page_t* page) {
const bool wasclear = mi_bitmap_set(arena->pages_abandoned[bin], slice_index);
MI_UNUSED(wasclear); mi_assert_internal(wasclear);
mi_atomic_increment_relaxed(&arena->subproc->abandoned_count[bin]);
mi_subproc_stat_increase(arena->subproc, pages_abandoned, 1);
mi_tld_stat_increase(tld, pages_abandoned, 1);
}
else {
// page is full (or a singleton), or the page is OS/externally allocated
@ -894,7 +895,7 @@ void _mi_arenas_page_abandon(mi_page_t* page) {
subproc->os_abandoned_pages = page;
}
}
mi_subproc_stat_increase(_mi_subproc(), pages_abandoned, 1);
mi_tld_stat_increase(tld, pages_abandoned, 1);
}
_mi_page_unown(page);
}
@ -912,10 +913,10 @@ bool _mi_arenas_page_try_reabandon_to_mapped(mi_page_t* page) {
return false;
}
else {
mi_subproc_t* subproc = _mi_subproc();
mi_subproc_stat_counter_increase( subproc, pages_reabandon_full, 1);
mi_subproc_stat_adjust_decrease( subproc, pages_abandoned, 1, true /* on alloc */); // adjust as we are not abandoning fresh
_mi_arenas_page_abandon(page);
mi_tld_t* tld = _mi_thread_tld();
mi_tld_stat_counter_increase( tld, pages_reabandon_full, 1);
mi_tld_stat_adjust_decrease( tld, pages_abandoned, 1, true /* on alloc */); // adjust as we are not abandoning fresh
_mi_arenas_page_abandon(page,tld);
return true;
}
}
@ -942,14 +943,14 @@ void _mi_arenas_page_unabandon(mi_page_t* page) {
mi_bitmap_clear_once_set(arena->pages_abandoned[bin], slice_index);
mi_page_clear_abandoned_mapped(page);
mi_atomic_decrement_relaxed(&arena->subproc->abandoned_count[bin]);
mi_subproc_stat_decrease(arena->subproc, pages_abandoned, 1);
mi_tld_stat_decrease(_mi_thread_tld(), pages_abandoned, 1);
}
else {
// page is full (or a singleton), page is OS allocated
mi_subproc_t* subproc = _mi_subproc();
mi_subproc_stat_decrease(_mi_subproc(), pages_abandoned, 1);
mi_tld_stat_decrease(_mi_thread_tld(), pages_abandoned, 1);
// if not an arena page, remove from the subproc os pages list
if (page->memid.memkind != MI_MEM_ARENA && mi_option_is_enabled(mi_option_visit_abandoned)) {
mi_subproc_t* subproc = _mi_subproc();
mi_lock(&subproc->os_abandoned_pages_lock) {
if (page->prev != NULL) { page->prev->next = page->next; }
if (page->next != NULL) { page->next->prev = page->prev; }

View file

@ -48,7 +48,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
}
// Forward declaration for multi-threaded collect
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) mi_attr_noexcept;
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t* mt_free) mi_attr_noexcept;
// Free a block multi-threaded
static inline void mi_free_block_mt(mi_page_t* page, mi_block_t* block) mi_attr_noexcept
@ -69,14 +69,14 @@ static inline void mi_free_block_mt(mi_page_t* page, mi_block_t* block) mi_attr_
mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
do {
mi_block_set_next(page, block, mi_tf_block(tf_old));
tf_new = mi_tf_create(block, true /* always owned: try to claim it if abandoned */);
tf_new = mi_tf_create(block, true /* always use owned: try to claim it if the page is abandoned */);
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new)); // todo: release is enough?
// and atomically try to collect the page if it was abandoned
const bool is_owned_now = !mi_tf_is_owned(tf_old);
if (is_owned_now) {
mi_assert_internal(mi_page_is_abandoned(page));
mi_free_try_collect_mt(page);
mi_free_try_collect_mt(page,block);
}
}
@ -185,7 +185,7 @@ void mi_free(void* p) mi_attr_noexcept
else {
// page is full or contains (inner) aligned blocks; use generic multi-thread path
mi_free_generic_mt(page, p);
}
}
}
@ -194,18 +194,20 @@ void mi_free(void* p) mi_attr_noexcept
// ------------------------------------------------------
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) mi_attr_noexcept {
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t* mt_free) mi_attr_noexcept {
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
// we own the page now..
// safe to collect the thread atomic free list
_mi_page_free_collect(page, false); // update `used` count
// use the `_partly` version to avoid atomic operations since we already have the `mt_free` pointing into the thread free list
_mi_page_free_collect_partly(page, mt_free);
#if MI_DEBUG > 1
if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_all_free(page)); }
#endif
// 1. free if the page is free now
// 1. free if the page is free now (this is updated by `_mi_page_free_collect_partly`)
if (mi_page_all_free(page))
{
// first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish)
@ -215,11 +217,13 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) mi_attr_noe
return;
}
const bool too_full = mi_page_is_used_at_frac(page, 8); // more than 7/8th of the page is in use?
// 2. if the page is not too full, we can try to reclaim it for ourselves
// note: this seems a bad idea but it speeds up some benchmarks (like `larson`) quite a bit.
if (_mi_option_get_fast(mi_option_page_reclaim_on_free) != 0 &&
!mi_page_is_used_at_frac(page,8)
// && !mi_page_is_abandoned_mapped(page)
if (!too_full &&
_mi_option_get_fast(mi_option_page_reclaim_on_free) != 0 &&
page->block_size <= MI_SMALL_MAX_OBJ_SIZE // only for small sized blocks
)
{
// the page has still some blocks in use (but not too many)
@ -228,11 +232,11 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) mi_attr_noe
// note: don't use `mi_heap_get_default()` as we may just have terminated this thread and we should
// not reinitialize the heap for this thread. (can happen due to thread-local destructors for example -- issue #944)
mi_heap_t* const heap = mi_prim_get_default_heap();
if (heap != (mi_heap_t*)&_mi_heap_empty) // we did not already terminate our thread (can this happen?
if (mi_heap_is_initialized(heap)) // we did not already terminate our thread
{
mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag);
if ((tagheap != NULL) && // don't reclaim across heap object types
(tagheap->allow_page_reclaim) && // we are allowed to reclaim abandoned pages
(tagheap->allow_page_reclaim) && // and we are allowed to reclaim abandoned pages
// (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
(_mi_arena_memid_is_suitable(page->memid, tagheap->exclusive_arena)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
)
@ -249,7 +253,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) mi_attr_noe
}
// 3. if the page is unmapped, try to reabandon so it can possibly be mapped and found for allocations
if (!mi_page_is_used_at_frac(page,8) && // only reabandon if a full page starts to have enough blocks available to prevent immediate re-abandon of a full page
if (!too_full && // only reabandon if a full page starts to have enough blocks available to prevent immediate re-abandon of a full page
!mi_page_is_abandoned_mapped(page) && page->memid.memkind == MI_MEM_ARENA &&
_mi_arenas_page_try_reabandon_to_mapped(page))
{

View file

@ -115,14 +115,14 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
// collect retired pages
_mi_heap_collect_retired(heap, force);
// if (_mi_is_main_thread()) { mi_debug_show_arenas(true, false, false); }
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
// collect arenas (this is program wide so don't force purges on abandonment of threads)
//mi_atomic_storei64_release(&heap->tld->subproc->purge_expire, 1);
// collect arenas (this is program wide so don't force purges on abandonment of threads)
//mi_atomic_storei64_release(&heap->tld->subproc->purge_expire, 1);
_mi_arenas_collect(collect == MI_FORCE /* force purge? */, collect >= MI_FORCE /* visit all? */, heap->tld);
}

View file

@ -357,6 +357,18 @@ mi_subproc_t* _mi_subproc(void) {
}
mi_tld_t* _mi_thread_tld(void) mi_attr_noexcept {
// should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()`
mi_heap_t* heap = mi_prim_get_default_heap();
if (heap == NULL) {
return &tld_empty;
}
else {
return heap->tld;
}
}
/* -----------------------------------------------------------
Sub process
----------------------------------------------------------- */

View file

@ -137,9 +137,39 @@ bool _mi_page_is_valid(mi_page_t* page) {
Page collect the `local_free` and `thread_free` lists
----------------------------------------------------------- */
// Collect the local `thread_free` list using an atomic exchange.
static void _mi_page_thread_free_collect(mi_page_t* page)
static void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head)
{
if (head == NULL) return;
// find the last block in the list -- also to get a proper use count (without data races)
size_t max_count = page->capacity; // cannot collect more than capacity
size_t count = 1;
mi_block_t* last = head;
mi_block_t* next;
while ((next = mi_block_next(page, last)) != NULL && count <= max_count) {
count++;
last = next;
}
// if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free)
if (count > max_count) {
_mi_error_message(EFAULT, "corrupted thread-free list\n");
return; // the thread-free items cannot be freed
}
// and append the current local free list
mi_block_set_next(page, last, page->local_free);
page->local_free = head;
// update counts now
mi_assert_internal(count <= UINT16_MAX);
page->used = page->used - (uint16_t)count;
}
// Collect the local `thread_free` list using an atomic exchange.
static void mi_page_thread_free_collect(mi_page_t* page)
{
// atomically capture the thread free list
mi_block_t* head;
mi_thread_free_t tfreex;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
@ -150,35 +180,15 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); // release is enough?
mi_assert_internal(head != NULL);
// find the tail -- also to get a proper count (without data races)
size_t max_count = page->capacity; // cannot collect more than capacity
size_t count = 1;
mi_block_t* tail = head;
mi_block_t* next;
while( (next = mi_block_next(page,tail)) != NULL && count <= max_count) {
count++;
tail = next;
}
// if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free)
if (count > max_count) {
_mi_error_message(EFAULT, "corrupted thread-free list\n");
return; // the thread-free items cannot be freed
}
// and append the current local free list
mi_block_set_next(page,tail, page->local_free);
page->local_free = head;
// update counts now
page->used -= (uint16_t)count;
// and move it to the local list
mi_page_thread_collect_to_local(page, head);
}
void _mi_page_free_collect(mi_page_t* page, bool force) {
mi_assert_internal(page!=NULL);
// collect the thread free list
_mi_page_thread_free_collect(page);
mi_page_thread_free_collect(page);
// and the local free list
if (page->local_free != NULL) {
@ -205,6 +215,23 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
mi_assert_internal(!force || page->local_free == NULL);
}
// collect elements in the thread-free list starting at `head`.
void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head) {
if (head == NULL) return;
mi_block_t* next = mi_block_next(page,head); // we cannot collect the head element itself as `page->thread_free` may point at it (and we want to avoid atomic ops)
if (next != NULL) {
mi_page_thread_collect_to_local(page, next);
if (page->local_free != NULL && page->free == NULL) {
page->free = page->local_free;
page->local_free = NULL;
page->free_is_zero = false;
}
}
if (page->used == 1) {
// all elements are free'd since we skipped the `head` element itself
_mi_page_free_collect(page, false); // collect the final element
}
}
/* -----------------------------------------------------------
@ -253,7 +280,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_page_queue_remove(pq, page);
mi_tld_t* tld = page->heap->tld;
mi_page_set_heap(page, NULL);
_mi_arenas_page_abandon(page);
_mi_arenas_page_abandon(page,tld);
_mi_arenas_collect(false, false, tld); // allow purging
}
}
@ -333,9 +360,8 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
// abandon full pages
_mi_page_abandon(page, pq);
}
else {
else if (!mi_page_is_in_full(page)) {
// put full pages in a heap local queue
if (mi_page_is_in_full(page)) return;
mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
_mi_page_free_collect(page, false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
}

View file

@ -152,6 +152,12 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1);
mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1);
mi_stat_counter_add(&stats->guarded_alloc_count, &src->guarded_alloc_count, 1);
mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1);
mi_stat_counter_add(&stats->pages_reclaim_on_alloc, &src->pages_reclaim_on_alloc, 1);
mi_stat_counter_add(&stats->pages_reclaim_on_free, &src->pages_reclaim_on_free, 1);
mi_stat_counter_add(&stats->pages_reabandon_full, &src->pages_reabandon_full, 1);
mi_stat_counter_add(&stats->pages_unabandon_busy_wait, &src->pages_unabandon_busy_wait, 1);
#if MI_STAT>1
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) {