From 878627072ba0fc4d089a019f63c3144231e149c0 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 17 Jun 2025 06:03:05 -0700 Subject: [PATCH 1/7] add test for issue #1024 --- test/main-override.cpp | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/test/main-override.cpp b/test/main-override.cpp index 408b5ee8..f3cb999f 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -33,6 +33,7 @@ static void strdup_test(); // issue #445 static void heap_thread_free_huge(); static void test_std_string(); // issue #697 static void test_thread_local(); // issue #944 +static void test_thread_leak(); // issue #1104 // static void test_mixed0(); // issue #942 static void test_mixed1(); // issue #942 static void test_stl_allocators(); @@ -45,11 +46,12 @@ static void test_dep() { }; #endif int main() { - mi_stats_reset(); // ignore earlier allocations + //mi_stats_reset(); // ignore earlier allocations //various_tests(); //test_mixed1(); - test_dep(); + // test_dep(); + test_thread_leak(); //test_std_string(); //test_thread_local(); @@ -68,7 +70,7 @@ int main() { test_mt_shutdown(); */ //fail_aslr(); - mi_stats_print(NULL); + //mi_stats_print(NULL); return 0; } @@ -378,6 +380,32 @@ static void heap_thread_free_huge() { } } +static std::atomic gsum; + +static void local_alloc() { + long sum = 0; + for(int i = 0; i < 1000000; i++) { + const int n = 1 + std::rand() % 1000; + uint8_t* p = (uint8_t*)calloc(n, 1); + p[0] = 1; + sum += p[std::rand() % n]; + if ((std::rand() % 100) > 24) { + free(p); + } + } + gsum += sum; +} + +static void test_thread_leak() { + std::vector threads; + for (int i=1; i<=100; ++i) { + threads.emplace_back(std::thread(&local_alloc)); + } + for (auto& th : threads) { + th.join(); + } +} + static void test_mt_shutdown() { From d8321f6d662325dfedb147260cd5ce4515f7ec7b Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 17 Jun 2025 18:05:12 -0700 Subject: [PATCH 2/7] check commit success for pagemap extension so NULL can be returned instead of faulting (issue #1098) --- include/mimalloc/internal.h | 7 ++- src/arena.c | 16 +++-- src/init.c | 2 +- src/page-map.c | 119 ++++++++++++++++++++++++------------ 4 files changed, 97 insertions(+), 47 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 62387dc3..ce666162 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -238,7 +238,7 @@ bool _mi_meta_is_meta_page(void* p); // "page-map.c" bool _mi_page_map_init(void); -void _mi_page_map_register(mi_page_t* page); +mi_decl_nodiscard bool _mi_page_map_register(mi_page_t* page); void _mi_page_map_unregister(mi_page_t* page); void _mi_page_map_unregister_range(void* start, size_t size); mi_page_t* _mi_safe_ptr_page(const void* p); @@ -604,7 +604,8 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { #define MI_PAGE_MAP_SHIFT (MI_MAX_VABITS - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT) #define MI_PAGE_MAP_COUNT (MI_ZU(1) << MI_PAGE_MAP_SHIFT) -extern mi_decl_hidden _Atomic(mi_page_t**)* _mi_page_map; +typedef mi_page_t** mi_submap_t; +extern mi_decl_hidden _Atomic(mi_submap_t)* _mi_page_map; static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { const size_t u = (size_t)((uintptr_t)p / MI_ARENA_SLICE_SIZE); @@ -625,7 +626,7 @@ static inline mi_page_t* _mi_unchecked_ptr_page(const void* p) { static inline mi_page_t* _mi_checked_ptr_page(const void* p) { size_t sub_idx; const size_t idx = _mi_page_map_index(p, &sub_idx); - mi_page_t** const sub = _mi_page_map_at(idx); + mi_submap_t const sub = _mi_page_map_at(idx); if mi_unlikely(sub == NULL) return NULL; return sub[sub_idx]; } diff --git a/src/arena.c b/src/arena.c index b26f4442..c7869797 100644 --- a/src/arena.c +++ b/src/arena.c @@ -684,7 +684,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si commit_size = _mi_align_up(block_start + block_size, MI_PAGE_MIN_COMMIT_SIZE); if (commit_size > page_noguard_size) { commit_size = page_noguard_size; } bool is_zero; - if (!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) { + if mi_unlikely(!mi_arena_commit( mi_memid_arena(memid), page, commit_size, &is_zero, 0)) { _mi_arenas_free(page, alloc_size, memid); return NULL; } @@ -710,7 +710,10 @@ static mi_page_t* mi_arenas_page_alloc_fresh(size_t slice_count, size_t block_si mi_page_try_claim_ownership(page); // register in the page map - _mi_page_map_register(page); + if mi_unlikely(!_mi_page_map_register(page)) { + _mi_arenas_free( page, alloc_size, memid ); + return NULL; + } // stats mi_tld_stat_increase(tld, pages, 1); @@ -1894,12 +1897,12 @@ static bool mi_arena_page_register(size_t slice_index, size_t slice_count, mi_ar mi_assert_internal(slice_count == 1); mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1)); - _mi_page_map_register(page); + if (!_mi_page_map_register(page)) return false; // break mi_assert_internal(_mi_ptr_page(page)==page); return true; } -static bool mi_arena_pages_reregister(mi_arena_t* arena) { +static mi_decl_nodiscard bool mi_arena_pages_reregister(mi_arena_t* arena) { return _mi_bitmap_forall_set(arena->pages, &mi_arena_page_register, arena, NULL); } @@ -1979,7 +1982,10 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_commit_fun_t* c if (!mi_arenas_add(arena->subproc, arena, arena_id)) { return false; } - mi_arena_pages_reregister(arena); + if (!mi_arena_pages_reregister(arena)) { + // todo: clear arena entry in the subproc? + return false; + } // adjust abandoned page count for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) { diff --git a/src/init.c b/src/init.c index 73847a4b..52a8f11c 100644 --- a/src/init.c +++ b/src/init.c @@ -711,7 +711,7 @@ void mi_process_init(void) mi_attr_noexcept { mi_detect_cpu_features(); _mi_stats_init(); _mi_os_init(); - _mi_page_map_init(); + _mi_page_map_init(); // this could fail.. should we abort in that case? mi_heap_main_init(); mi_tld_main_init(); // the following two can potentially allocate (on freeBSD for locks and thread keys) diff --git a/src/page-map.c b/src/page-map.c index ce70495b..18a3a035 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "bitmap.h" static void mi_page_map_cannot_commit(void) { - _mi_error_message(EFAULT,"unable to commit memory for the page address map\n"); + _mi_warning_message("unable to commit the allocation page-map on-demand\n" ); } #if MI_PAGE_MAP_FLAT @@ -32,7 +32,7 @@ static mi_memid_t mi_page_map_memid; #define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries -static void mi_page_map_ensure_committed(size_t idx, size_t slice_count); +static mi_decl_nodiscard bool mi_page_map_ensure_committed(size_t idx, size_t slice_count); bool _mi_page_map_init(void) { size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); @@ -71,7 +71,10 @@ bool _mi_page_map_init(void) { // commit the first part so NULL pointers get resolved without an access violation if (!commit) { - mi_page_map_ensure_committed(0, 1); + if (!mi_page_map_ensure_committed(0, 1)) { + mi_page_map_cannot_commit(); + return false; + } } _mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL mi_assert_internal(_mi_ptr_page(NULL)==NULL); @@ -90,7 +93,7 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) { } -static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) { +static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count) { // is the page map area that contains the page address committed? // we always set the commit bits so we can track what ranges are in-use. // we only actually commit if the map wasn't committed fully already. @@ -103,8 +106,11 @@ static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) { bool is_zero; uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT); const size_t size = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT; - if (!_mi_os_commit(start, size, &is_zero)) return; - if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); } + if (!_mi_os_commit(start, size, &is_zero)) { + mi_page_map_cannot_commit(); + return false; + } + if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); } mi_bitmap_set(mi_page_map_commit, i); } } @@ -113,6 +119,7 @@ static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) { _mi_page_map[idx] = 0; _mi_page_map[idx+slice_count-1] = 0; #endif + return true; } @@ -124,25 +131,28 @@ static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* return _mi_page_map_index(page); } -void _mi_page_map_register(mi_page_t* page) { +bool _mi_page_map_register(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! if mi_unlikely(_mi_page_map == NULL) { - if (!_mi_page_map_init()) return; + if (!_mi_page_map_init()) return false; } mi_assert(_mi_page_map!=NULL); uint8_t* page_start; size_t slice_count; const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count); - mi_page_map_ensure_committed(idx, slice_count); + if (!mi_page_map_ensure_committed(idx, slice_count)) { + return false; + } // set the offsets for (size_t i = 0; i < slice_count; i++) { mi_assert_internal(i < 128); _mi_page_map[idx + i] = (uint8_t)(i+1); } + return true; } void _mi_page_map_unregister(mi_page_t* page) { @@ -158,7 +168,10 @@ void _mi_page_map_unregister(mi_page_t* page) { void _mi_page_map_unregister_range(void* start, size_t size) { const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE); const uintptr_t index = _mi_page_map_index(start); - mi_page_map_ensure_committed(index, slice_count); // we commit the range in total; todo: scan the commit bits and clear only those ranges? + // todo: scan the commit bits and clear only those ranges? + if (!mi_page_map_ensure_committed(index, slice_count)) { // we commit the range in total; + return; + } _mi_memzero(&_mi_page_map[index], slice_count); } @@ -179,9 +192,10 @@ mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_att #else // A 2-level page map -#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*)) +#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*)) +#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS) -mi_decl_cache_align _Atomic(mi_page_t**)* _mi_page_map; +mi_decl_cache_align _Atomic(mi_submap_t)* _mi_page_map; static size_t mi_page_map_count; static void* mi_page_map_max_address; static mi_memid_t mi_page_map_memid; @@ -189,26 +203,30 @@ static mi_memid_t mi_page_map_memid; // divide the main map in 64 (`MI_BFIELD_BITS`) parts commit those parts on demand static _Atomic(mi_bfield_t) mi_page_map_commit; -#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS) +static mi_decl_nodiscard bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap); +static mi_decl_nodiscard bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap); +static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count); static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) { mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit); - const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT; + const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT; mi_assert_internal(bit_idx < MI_BFIELD_BITS); if (pbit_idx != NULL) { *pbit_idx = bit_idx; } return ((commit & (MI_ZU(1) << bit_idx)) != 0); } -static mi_page_t** mi_page_map_ensure_committed(size_t idx) { +static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap) { + mi_assert_internal(submap!=NULL && *submap==NULL); size_t bit_idx; if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) { uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT]; - if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL)) { - return NULL; + if (!_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_submap_t), NULL)) { + mi_page_map_cannot_commit(); + return false; } - mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx); } - return mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // _mi_page_map_at(idx); + *submap = _mi_page_map[idx]; + return true; } // initialize the page map @@ -258,7 +276,11 @@ bool _mi_page_map_init(void) { if (!mi_page_map_memid.initially_zero) { // initialize low addresses with NULL _mi_memzero_aligned(sub0, submap_size); } - mi_page_map_ensure_committed(0); + mi_submap_t nullsub = NULL; + if (!mi_page_map_ensure_committed(0,&nullsub)) { + mi_page_map_cannot_commit(); + return false; + } mi_atomic_store_ptr_release(mi_page_t*, &_mi_page_map[0], sub0); mi_assert_internal(_mi_ptr_page(NULL)==NULL); @@ -290,41 +312,62 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) { } -static mi_page_t** mi_page_map_ensure_submap_at(size_t idx) { - mi_page_t** sub = mi_page_map_ensure_committed(idx); +static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap) { + mi_assert_internal(submap!=NULL && *submap==NULL); + mi_submap_t sub = NULL; + if (!mi_page_map_ensure_committed(idx, &sub)) { + return false; + } if mi_unlikely(sub == NULL) { // sub map not yet allocated, alloc now mi_memid_t memid; mi_page_t** expect = sub; const size_t submap_size = MI_PAGE_MAP_SUB_SIZE; - sub = (mi_page_t**)_mi_os_zalloc(submap_size, &memid); - if (sub == NULL) { - _mi_error_message(EFAULT, "internal error: unable to extend the page map\n"); - return NULL; + sub = (mi_submap_t)_mi_os_zalloc(submap_size, &memid); + if (sub==NULL) { + _mi_warning_message("internal error: unable to extend the page map\n"); + return false; } if (!mi_atomic_cas_ptr_strong_acq_rel(mi_page_t*, &_mi_page_map[idx], &expect, sub)) { // another thread already allocated it.. free and continue _mi_os_free(sub, submap_size, memid); - sub = expect; - mi_assert_internal(sub!=NULL); + sub = expect; } - } - return sub; + } + mi_assert_internal(sub!=NULL); + *submap = sub; + return true; } -static void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) { +static bool mi_page_map_set_range_prim(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) { // is the page map area that contains the page address committed? while (slice_count > 0) { - mi_page_t** sub = mi_page_map_ensure_submap_at(idx); + mi_submap_t sub = NULL; + if (!mi_page_map_ensure_submap_at(idx, &sub)) { + return false; + }; + mi_assert_internal(sub!=NULL); // set the offsets for the page - while (sub_idx < MI_PAGE_MAP_SUB_COUNT) { + while (slice_count > 0 && sub_idx < MI_PAGE_MAP_SUB_COUNT) { sub[sub_idx] = page; - slice_count--; if (slice_count == 0) return; - sub_idx++; + slice_count--; + sub_idx++; } idx++; // potentially wrap around to the next idx sub_idx = 0; } + return true; +} + +static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) { + if mi_unlikely(!mi_page_map_set_range_prim(page,idx,sub_idx,slice_count)) { + // failed to commit, call again to reset the page pointer if needed + if (page!=NULL) { + mi_page_map_set_range_prim(NULL,idx,sub_idx,slice_count); + } + return false; + } + return true; } static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) { @@ -335,18 +378,18 @@ static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slic return _mi_page_map_index(page, sub_idx); } -void _mi_page_map_register(mi_page_t* page) { +bool _mi_page_map_register(mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access! if mi_unlikely(_mi_page_map == NULL) { - if (!_mi_page_map_init()) return; + if (!_mi_page_map_init()) return false; } mi_assert(_mi_page_map!=NULL); size_t slice_count; size_t sub_idx; const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count); - mi_page_map_set_range(page, idx, sub_idx, slice_count); + return mi_page_map_set_range(page, idx, sub_idx, slice_count); } void _mi_page_map_unregister(mi_page_t* page) { From 2ce6568af68f62d1488ab643859de55d714523d9 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 17 Jun 2025 18:44:17 -0700 Subject: [PATCH 3/7] fix compile error for mi_decl_nodiscard --- src/arena.c | 2 +- src/page-map.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arena.c b/src/arena.c index c7869797..b5fc095c 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1902,7 +1902,7 @@ static bool mi_arena_page_register(size_t slice_index, size_t slice_count, mi_ar return true; } -static mi_decl_nodiscard bool mi_arena_pages_reregister(mi_arena_t* arena) { +mi_decl_nodiscard static bool mi_arena_pages_reregister(mi_arena_t* arena) { return _mi_bitmap_forall_set(arena->pages, &mi_arena_page_register, arena, NULL); } diff --git a/src/page-map.c b/src/page-map.c index 18a3a035..d7b99a75 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -32,7 +32,7 @@ static mi_memid_t mi_page_map_memid; #define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries -static mi_decl_nodiscard bool mi_page_map_ensure_committed(size_t idx, size_t slice_count); +mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, size_t slice_count); bool _mi_page_map_init(void) { size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS); @@ -203,8 +203,8 @@ static mi_memid_t mi_page_map_memid; // divide the main map in 64 (`MI_BFIELD_BITS`) parts commit those parts on demand static _Atomic(mi_bfield_t) mi_page_map_commit; -static mi_decl_nodiscard bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap); -static mi_decl_nodiscard bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap); +mi_decl_nodiscard static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap); +mi_decl_nodiscard static bool mi_page_map_ensure_submap_at(size_t idx, mi_submap_t* submap); static bool mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count); static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) { From 99976d6c2be110ec3d9a2ce905e8545d6c249af9 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 17 Jun 2025 19:34:32 -0700 Subject: [PATCH 4/7] fix merge error where commit bits in the pagemap were not set --- include/mimalloc/internal.h | 2 +- src/page-map.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ce666162..fd6c8d5d 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -613,7 +613,7 @@ static inline size_t _mi_page_map_index(const void* p, size_t* sub_idx) { return (u / MI_PAGE_MAP_SUB_COUNT); } -static inline mi_page_t** _mi_page_map_at(size_t idx) { +static inline mi_submap_t _mi_page_map_at(size_t idx) { return mi_atomic_load_ptr_relaxed(mi_page_t*, &_mi_page_map[idx]); } diff --git a/src/page-map.c b/src/page-map.c index d7b99a75..08ded252 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -224,8 +224,9 @@ static bool mi_page_map_ensure_committed(size_t idx, mi_submap_t* submap) { mi_page_map_cannot_commit(); return false; } + mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx); } - *submap = _mi_page_map[idx]; + *submap = mi_atomic_load_ptr_acquire(mi_page_t*, &_mi_page_map[idx]); // acquire _mi_page_map_at(idx); return true; } @@ -295,7 +296,7 @@ void _mi_page_map_unsafe_destroy(mi_subproc_t* subproc) { for (size_t idx = 1; idx < mi_page_map_count; idx++) { // skip entry 0 (as we allocate that submap at the end of the page_map) // free all sub-maps if (mi_page_map_is_committed(idx, NULL)) { - mi_page_t** sub = _mi_page_map_at(idx); + mi_submap_t sub = _mi_page_map_at(idx); if (sub != NULL) { mi_memid_t memid = _mi_memid_create_os(sub, MI_PAGE_MAP_SUB_SIZE, true, false, false); _mi_os_free_ex(memid.mem.os.base, memid.mem.os.size, true, memid, subproc); From aaf8da9abaf3bc30233c3cade9a1ed7702bb5e74 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 17 Jun 2025 19:49:09 -0700 Subject: [PATCH 5/7] potential fix for use-after-free of the tld on unsafe arenas destroy --- include/mimalloc/internal.h | 2 +- src/arena.c | 15 +++++++-------- src/init.c | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index fd6c8d5d..46274567 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -223,7 +223,7 @@ void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_ void _mi_arenas_free(void* p, size_t size, mi_memid_t memid); bool _mi_arenas_contain(const void* p); void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld); -void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld); +void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc); mi_page_t* _mi_arenas_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment); void _mi_arenas_page_free(mi_page_t* page, mi_tld_t* tld); diff --git a/src/arena.c b/src/arena.c index b5fc095c..3d78fd91 100644 --- a/src/arena.c +++ b/src/arena.c @@ -993,7 +993,7 @@ void _mi_arenas_page_unabandon(mi_page_t* page) { Arena free ----------------------------------------------------------- */ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices); -static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld); +static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq); void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) { if (p==NULL) return; @@ -1054,7 +1054,7 @@ void _mi_arenas_free(void* p, size_t size, mi_memid_t memid) { // Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld) { - mi_arenas_try_purge(force_purge, visit_all, tld); + mi_arenas_try_purge(force_purge, visit_all, tld->subproc, tld->thread_seq); } @@ -1109,9 +1109,9 @@ static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. -void _mi_arenas_unsafe_destroy_all(mi_tld_t* tld) { - mi_arenas_unsafe_destroy(tld->subproc); - _mi_arenas_collect(true /* force purge */, true /* visit all*/, tld); // purge non-owned arenas +void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc) { + mi_arenas_unsafe_destroy(subproc); + mi_arenas_try_purge(true /* force purge */, true /* visit all*/, subproc, 0 /* thread seq */); // purge non-owned arenas } @@ -1775,14 +1775,13 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) } -static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld) +static void mi_arenas_try_purge(bool force, bool visit_all, mi_subproc_t* subproc, size_t tseq) { // try purge can be called often so try to only run when needed const long delay = mi_arena_purge_delay(); if (_mi_preloading() || delay <= 0) return; // nothing will be scheduled // check if any arena needs purging? - mi_subproc_t* subproc = tld->subproc; const mi_msecs_t now = _mi_clock_now(); const mi_msecs_t arenas_expire = mi_atomic_loadi64_acquire(&subproc->purge_expire); if (!visit_all && !force && (arenas_expire == 0 || arenas_expire > now)) return; @@ -1796,7 +1795,7 @@ static void mi_arenas_try_purge(bool force, bool visit_all, mi_tld_t* tld) { // increase global expire: at most one purge per delay cycle if (arenas_expire > now) { mi_atomic_storei64_release(&subproc->purge_expire, now + (delay/10)); } - const size_t arena_start = tld->thread_seq % max_arena; + const size_t arena_start = tseq % max_arena; size_t max_purge_count = (visit_all ? max_arena : (max_arena/4)+1); bool all_visited = true; bool any_purged = false; diff --git a/src/init.c b/src/init.c index 52a8f11c..eb508bc9 100644 --- a/src/init.c +++ b/src/init.c @@ -778,7 +778,7 @@ void mi_cdecl mi_process_done(void) mi_attr_noexcept { if (mi_option_is_enabled(mi_option_destroy_on_exit)) { mi_heap_collect(heap, true /* force */); _mi_heap_unsafe_destroy_all(heap); // forcefully release all memory held by all heaps (of this thread only!) - _mi_arenas_unsafe_destroy_all(heap->tld); + _mi_arenas_unsafe_destroy_all(_mi_subproc_main()); _mi_page_map_unsafe_destroy(_mi_subproc_main()); } //_mi_page_map_unsafe_destroy(_mi_subproc_main()); From 1b5ee4e2d0f0bc1b4f79750b7d5d5975db5ac204 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 17 Jun 2025 19:59:12 -0700 Subject: [PATCH 6/7] skip purge after arenas destroy --- src/arena.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/arena.c b/src/arena.c index 3d78fd91..a95248a7 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1088,9 +1088,8 @@ bool _mi_arenas_contain(const void* p) { // for dynamic libraries that are unloaded and need to release all their allocated memory. static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { mi_assert_internal(subproc != NULL); - const size_t max_arena = mi_arenas_get_count(subproc); - size_t new_max_arena = 0; - for (size_t i = 0; i < max_arena; i++) { + const size_t arena_count = mi_arenas_get_count(subproc); + for (size_t i = 0; i < arena_count; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena != NULL) { // mi_lock_done(&arena->abandoned_visit_lock); @@ -1100,10 +1099,9 @@ static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { } } } - // try to lower the max arena. - size_t expected = max_arena; - mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, new_max_arena); + size_t expected = arena_count; + mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, 0); } @@ -1111,7 +1109,7 @@ static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { // for dynamic libraries that are unloaded and need to release all their allocated memory. void _mi_arenas_unsafe_destroy_all(mi_subproc_t* subproc) { mi_arenas_unsafe_destroy(subproc); - mi_arenas_try_purge(true /* force purge */, true /* visit all*/, subproc, 0 /* thread seq */); // purge non-owned arenas + // mi_arenas_try_purge(true /* force purge */, true /* visit all*/, subproc, 0 /* thread seq */); // purge non-owned arenas } From c1f17cd2538417620f60bff70bffe7e68d332aec Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 17 Jun 2025 20:10:32 -0700 Subject: [PATCH 7/7] include std headers in bits for IDE --- include/mimalloc/bits.h | 105 +++++++++++++-------------------------- include/mimalloc/types.h | 1 + 2 files changed, 36 insertions(+), 70 deletions(-) diff --git a/include/mimalloc/bits.h b/include/mimalloc/bits.h index 1d4063bb..c40b32f6 100644 --- a/include/mimalloc/bits.h +++ b/include/mimalloc/bits.h @@ -13,6 +13,9 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MI_BITS_H #define MI_BITS_H +#include // size_t +#include // int64_t etc +#include // bool // ------------------------------------------------------ // Size of a pointer. @@ -90,7 +93,7 @@ typedef int32_t mi_ssize_t; #endif #endif -#if (MI_ARCH_X86 || MI_ARCH_X64) +#if MI_ARCH_X64 && defined(__AVX2__) #include #elif MI_ARCH_ARM64 && MI_OPT_SIMD #include @@ -120,37 +123,20 @@ typedef int32_t mi_ssize_t; #define MI_MAX_VABITS (32) #endif - // use a flat page-map (or a 2-level one) #ifndef MI_PAGE_MAP_FLAT -#if MI_MAX_VABITS <= 40 && !MI_SECURE && !defined(__APPLE__) +#if MI_MAX_VABITS <= 40 && !defined(__APPLE__) #define MI_PAGE_MAP_FLAT 1 #else #define MI_PAGE_MAP_FLAT 0 #endif #endif -#if MI_PAGE_MAP_FLAT && MI_SECURE -#error should not use MI_PAGE_MAP_FLAT with a secure build -#endif - /* -------------------------------------------------------------------------------- Builtin's -------------------------------------------------------------------------------- */ -#if defined(__GNUC__) || defined(__clang__) -#define mi_unlikely(x) (__builtin_expect(!!(x),false)) -#define mi_likely(x) (__builtin_expect(!!(x),true)) -#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) -#define mi_unlikely(x) (x) [[unlikely]] -#define mi_likely(x) (x) [[likely]] -#else -#define mi_unlikely(x) (x) -#define mi_likely(x) (x) -#endif - - #ifndef __has_builtin #define __has_builtin(x) 0 #endif @@ -188,28 +174,17 @@ typedef int32_t mi_ssize_t; -------------------------------------------------------------------------------- */ size_t _mi_popcount_generic(size_t x); -extern bool _mi_cpu_has_popcnt; static inline size_t mi_popcount(size_t x) { - #if defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) - #if !defined(__BMI1__) - if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); } - #endif - size_t r; - __asm ("popcnt\t%1,%0" : "=r"(r) : "r"(x) : "cc"); - return r; - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86) - #if !defined(__BMI1__) - if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); } - #endif - return (size_t)mi_msc_builtinz(__popcnt)(x); - #elif defined(_MSC_VER) && MI_ARCH_ARM64 - return (size_t)mi_msc_builtinz(__popcnt)(x); - #elif mi_has_builtinz(popcount) + #if mi_has_builtinz(popcount) return mi_builtinz(popcount)(x); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + return mi_msc_builtinz(__popcnt)(x); + #elif MI_ARCH_X64 && defined(__BMI1__) + return (size_t)_mm_popcnt_u64(x); #else #define MI_HAS_FAST_POPCOUNT 0 - return _mi_popcount_generic(x); + return (x<=1 ? x : _mi_popcount_generic(x)); #endif } @@ -223,29 +198,19 @@ size_t _mi_clz_generic(size_t x); size_t _mi_ctz_generic(size_t x); static inline size_t mi_ctz(size_t x) { - #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) + #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0 size_t r; __asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; - #elif defined(__GNUC__) && MI_ARCH_X64 - // tzcnt is interpreted as bsf if BMI1 is not supported (pre-haswell) - // if the argument is zero: - // - tzcnt: sets carry-flag, and returns MI_SIZE_BITS - // - bsf : sets zero-flag, and leaves the destination _unmodified_ (on both AMD and Intel now, see ) - // so we always initialize r to MI_SIZE_BITS to work correctly on all cpu's without branching - size_t r = MI_SIZE_BITS; - __asm ("tzcnt\t%1, %0" : "+r"(r) : "r"(x) : "cc"); // use '+r' to keep the assignment to r in case this becomes bsf on older cpu's - return r; + #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) + return _tzcnt_u64(x); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long idx; + return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS); #elif mi_has_builtinz(ctz) return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS); - #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) - return (x!=0 ? _tzcnt_u64(x) : MI_SIZE_BITS); // ensure it still works on non-BMI1 cpu's as well - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071) - unsigned long idx; mi_msc_builtinz(_BitScanForward)(&idx, x); - return (size_t)idx; - #elif defined(__GNUC__) && MI_ARCH_X86 - size_t r = MI_SIZE_BITS; + #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) + size_t r = MI_SIZE_BITS; // bsf leaves destination unmodified if the argument is 0 (see ) __asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc"); return r; #elif MI_HAS_FAST_POPCOUNT @@ -257,18 +222,17 @@ static inline size_t mi_ctz(size_t x) { } static inline size_t mi_clz(size_t x) { - // we don't optimize anymore to lzcnt as there are still non BMI1 cpu's around (like Intel Celeron, see issue #1016) - // on pre-haswell cpu's lzcnt gets executed as bsr which is not equivalent (at it returns the bit position) #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0 size_t r; __asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; + #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) + return _lzcnt_u64(x); + #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long idx; + return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS); #elif mi_has_builtinz(clz) return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS); - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - if (x==0) return MI_SIZE_BITS; // test explicitly for `x==0` to avoid codegen bug (issue #1071) - unsigned long idx; mi_msc_builtinz(_BitScanReverse)(&idx, x); - return (MI_SIZE_BITS - 1 - (size_t)idx); #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) if (x==0) return MI_SIZE_BITS; size_t r; @@ -297,11 +261,9 @@ static inline bool mi_bsf(size_t x, size_t* idx) { bool is_zero; __asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" ); return !is_zero; - #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071) - unsigned long i; mi_msc_builtinz(_BitScanForward)(&i, x); - *idx = (size_t)i; - return true; + #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long i; + return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false); #else return (x!=0 ? (*idx = mi_ctz(x), true) : false); #endif @@ -311,11 +273,14 @@ static inline bool mi_bsf(size_t x, size_t* idx) { // return false if `x==0` (with `*idx` undefined) and true otherwise, // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). static inline bool mi_bsr(size_t x, size_t* idx) { - #if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) - if (x==0) return false; // test explicitly for `x==0` to avoid codegen bug (issue #1071) - unsigned long i; mi_msc_builtinz(_BitScanReverse)(&i, x); - *idx = (size_t)i; - return true; + #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9) + // on x64 the carry flag is set on zero which gives better codegen + bool is_zero; + __asm ("lzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc"); + return !is_zero; + #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) + unsigned long i; + return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false); #else return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false); #endif diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 71b2c93b..b4e0cae6 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -22,6 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #include // ptrdiff_t #include // uintptr_t, uint16_t, etc +#include // bool #include // SIZE_MAX etc. #include // error codes #include "bits.h" // size defines (MI_INTPTR_SIZE etc), bit operations