From ce74c905f877c2a8e5ccb8d1fc767e467df47175 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 3 Mar 2025 20:18:40 -0800 Subject: [PATCH 1/5] improve generic_find_free --- src/bitmap.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/bitmap.c b/src/bitmap.c index 3907e91d..2d0bb8f3 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1560,11 +1560,18 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]); const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits); size_t eidx = 0; - mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`) + mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) { mi_assert_internal(eidx <= MI_BFIELD_BITS); + + // don't search into non-acgcessed memory until we tried other size bins as well + if (bin < bbin && eidx >= cmap_entry_cycle) break; + + // get the chunk idx const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx; mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap)); + mi_assert_internal(bin >= bbin || chunk_idx <= chunk_acc); + // only in the current size class! const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]); if ((mi_bbin_t)bin == chunk_bin || (bin == bbin && chunk_bin == MI_BBIN_NONE)) // only allow NONE at the final run From c18a5537dc94d6103f0f4bb97a7175a2a57b0abc Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Mon, 3 Mar 2025 20:50:21 -0800 Subject: [PATCH 2/5] reduce medium page block size to 64k to reducemem usage --- include/mimalloc/types.h | 4 ++-- src/bitmap.c | 2 +- src/options.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index daf44a22..2f76cfe6 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -343,10 +343,10 @@ typedef struct mi_page_s { // The max object size are checked to not waste more than 12.5% internally over the page sizes. #define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < ~8 KiB #if MI_ENABLE_LARGE_PAGES -#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 64 KiB +#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < ~64 KiB #define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/8) // <= 512KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin` #else -#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/4) // <= 128 KiB +#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/8) // <= 64 KiB #define MI_LARGE_MAX_OBJ_SIZE MI_MEDIUM_MAX_OBJ_SIZE // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin` #endif #define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE) diff --git a/src/bitmap.c b/src/bitmap.c index 2d0bb8f3..908562c0 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1560,7 +1560,7 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap, const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]); const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits); size_t eidx = 0; - mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) + mi_bfield_cycle_iterate(cmap_entry, tseq, cmap_entry_cycle, eidx, Y) { mi_assert_internal(eidx <= MI_BFIELD_BITS); diff --git a/src/options.c b/src/options.c index bf6cf437..94cb8b67 100644 --- a/src/options.c +++ b/src/options.c @@ -175,7 +175,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(max_vabits) }, // max virtual address space bits { MI_DEFAULT_PAGEMAP_COMMIT, UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront? - { 0, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this only on overcommit systems (like Linux)) + { 1, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this only on overcommit systems (like Linux)) { 16, UNINIT, MI_OPTION(page_reclaim_max) }, // don't reclaim pages if we already own N pages (in that size class) }; From 45f0b0a8a6d66ee3bc5eed73b622bb3abfaae68a Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Mar 2025 07:46:10 -0800 Subject: [PATCH 3/5] remove extra verbose messages --- src/init.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/init.c b/src/init.c index 0ed0198a..0e867f8b 100644 --- a/src/init.c +++ b/src/init.c @@ -157,7 +157,7 @@ static mi_decl_cache_align mi_tld_t tld_main = { mi_decl_cache_align mi_heap_t heap_main = { &tld_main, // thread local data NULL, // exclusive arena - 0, // preferred numa node + 0, // preferred numa node 0, // initial cookie //{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { {0x846ca68b}, {0}, 0, true }, // random @@ -690,15 +690,6 @@ void mi_process_init(void) mi_attr_noexcept { // the following two can potentially allocate (on freeBSD for locks and thread keys) mi_subproc_main_init(); mi_process_setup_auto_thread_done(); - - #if MI_DEBUG - _mi_verbose_message("debug level : %d\n", MI_DEBUG); - #endif - _mi_verbose_message("secure level: %d\n", MI_SECURE); - _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL); - #if MI_TSAN - _mi_verbose_message("thread santizer enabled\n"); - #endif mi_thread_init(); #if defined(_WIN32) && defined(MI_WIN_USE_FLS) From 1c6b40d8bd7a3fd77f4590f50e88bf4d040a5375 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Mar 2025 09:22:41 -0800 Subject: [PATCH 4/5] fix verbose option printing --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 94cb8b67..e8eb85ad 100644 --- a/src/options.c +++ b/src/options.c @@ -202,7 +202,7 @@ void _mi_options_init(void) { } } #endif - if (!mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); } + if (mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); } } #define mi_stringifyx(str) #str // and stringify From 119f2eff6c61eb4f0c315773944b914af47d48ef Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Mar 2025 09:51:40 -0800 Subject: [PATCH 5/5] use int for numa node count --- include/mimalloc/internal.h | 2 +- src/arena.c | 10 +++++----- src/os.c | 20 +++++++++++--------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b4515831..e8b1c919 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -160,7 +160,7 @@ bool _mi_os_secure_guard_page_reset_at(void* addr); bool _mi_os_secure_guard_page_reset_before(void* addr); int _mi_os_numa_node(void); -size_t _mi_os_numa_node_count(void); +int _mi_os_numa_node_count(void); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); diff --git a/src/arena.c b/src/arena.c index 70e1802b..2aa1f8fe 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1524,17 +1524,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (pages == 0) return 0; // pages per numa node - size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); - if (numa_count <= 0) numa_count = 1; + int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count()); + if (numa_count <= 0) { numa_count = 1; } const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes - for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if ((size_t)numa_node < pages_mod) { node_pages++; } + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; diff --git a/src/os.c b/src/os.c index 69decb71..2ee4c897 100644 --- a/src/os.c +++ b/src/os.c @@ -694,18 +694,19 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) { Support NUMA aware allocation -----------------------------------------------------------------------------*/ -static _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count +static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count -size_t _mi_os_numa_node_count(void) { +int _mi_os_numa_node_count(void) { size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); if mi_unlikely(count <= 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? - if (ncount > 0) { - count = (size_t)ncount; + if (ncount > 0 && ncount < INT_MAX) { + count = (int)ncount; } else { - count = _mi_prim_numa_node_count(); // or detect dynamically - if (count == 0) { count = 1; } + const size_t n = _mi_prim_numa_node_count(); // or detect dynamically + if (n == 0 || n > INT_MAX) { count = 1; } + else { count = (int)n; } } mi_atomic_store_release(&_mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); @@ -715,12 +716,13 @@ size_t _mi_os_numa_node_count(void) { static int mi_os_numa_node_get(void) { - size_t numa_count = _mi_os_numa_node_count(); + int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = _mi_prim_numa_node(); + const size_t n = _mi_prim_numa_node(); + int numa_node = (n < INT_MAX ? (int)n : 0); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - return (int)numa_node; + return numa_node; } int _mi_os_numa_node(void) {