From a78374d816a84b5c72db3842eaada88785cc027e Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 28 Mar 2025 14:22:02 -0700 Subject: [PATCH 01/68] bump version to 1.9.4 for further development --- cmake/mimalloc-config-version.cmake | 2 +- include/mimalloc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index aeea621f..0446485b 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 1) set(mi_version_minor 9) -set(mi_version_patch 3) +set(mi_version_patch 4) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/include/mimalloc.h b/include/mimalloc.h index 8ccfcec3..97cf7856 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 193 // major + 2 digits minor +#define MI_MALLOC_VERSION 194 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes From f2539bbe343c7f5c2c862d2d0cd9f1094ef4289e Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 28 Mar 2025 15:37:11 -0700 Subject: [PATCH 02/68] update readme --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 87cee98d..5a495275 100644 --- a/readme.md +++ b/readme.md @@ -84,7 +84,7 @@ Enjoy! ### Releases -* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3-beta`: Various small bug and build fixes, including: +* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. From cbab63f6c9935080052dd2648d4480013d6ec2c7 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 30 Mar 2025 16:15:27 -0700 Subject: [PATCH 03/68] fix release build warning (unused mi_stat_total_print) --- src/stats.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/stats.c b/src/stats.c index 07ce7d16..92bc049c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -218,12 +218,14 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int _mi_fprintf(out, arg, "\n"); } +#if MI_STAT>1 static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { _mi_fprintf(out, arg, "%10s:", msg); _mi_fprintf(out, arg, "%12s", " "); // no peak mi_print_amount(stat->total, unit, out, arg); _mi_fprintf(out, arg, "\n"); } +#endif static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); From 07743454e5a04356144918e32b1e2ce8e80c1726 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 31 Mar 2025 10:57:16 -0700 Subject: [PATCH 04/68] fix dynamic override test on non-windows platforms --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cc7ec5c..879aa668 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -729,8 +729,8 @@ if (MI_BUILD_TESTS) endif() target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress-dynamic PRIVATE include) - target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version if(WIN32) + target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 $) else() if(APPLE) From a9e94674299479588b742cefa3ebe36bb72cc83b Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 31 Mar 2025 11:00:05 -0700 Subject: [PATCH 05/68] make dynamic override test verbose --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 879aa668..66b24d89 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -731,14 +731,14 @@ if (MI_BUILD_TESTS) target_include_directories(mimalloc-test-stress-dynamic PRIVATE include) if(WIN32) target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version - add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 $) + add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $) else() if(APPLE) set(LD_PRELOAD "DYLD_INSERT_LIBRARIES") else() set(LD_PRELOAD "LD_PRELOAD") endif() - add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 ${LD_PRELOAD}=$ $) + add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 ${LD_PRELOAD}=$ $) endif() endif() endif() From e1110cdb9f64ec319f91fb5b5607bffd3ed76559 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 31 Mar 2025 11:02:10 -0700 Subject: [PATCH 06/68] nicer cmake logic for windows override test --- CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 66b24d89..591ba130 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -724,13 +724,11 @@ if (MI_BUILD_TESTS) if(MI_BUILD_SHARED AND NOT (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN)) add_executable(mimalloc-test-stress-dynamic test/test-stress.c) target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE ${mi_defines} "USE_STD_MALLOC=1") - if(WIN32) - target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1") - endif() target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-stress-dynamic PRIVATE include) if(WIN32) - target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version + target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1") # link mi_version + target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # link mi_version add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $) else() if(APPLE) From 77b622511ad86ff7ba01154dfca86dafe9032b86 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 31 Mar 2025 14:44:46 -0700 Subject: [PATCH 07/68] fix alpine compilation with prctl.h (issue #1059) --- src/prim/unix/prim.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 32004fe4..09aa91b5 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -31,10 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include - #include // PR_SET_VMA - //#if defined(MI_NO_THP) - #include // THP disable - //#endif + #include // THP disable, PR_SET_VMA #if defined(__GLIBC__) #include // linux mmap flags #else @@ -208,7 +205,7 @@ static int unix_madvise(void* addr, size_t size, int advice) { static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) { void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */); - #if (defined(__linux__) && defined(PR_SET_VMA)) + #if defined(__linux__) && defined(PR_SET_VMA) if (p!=MAP_FAILED && p!=NULL) { prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc"); } From fe47ec625db55a51eb5cc66655a8dfb06e03b6a6 Mon Sep 17 00:00:00 2001 From: Eduard Voronkin Date: Mon, 31 Mar 2025 15:51:26 -0700 Subject: [PATCH 08/68] fix recursion in TLS init on Android --- src/options.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 772dfe66..610fe5ba 100644 --- a/src/options.c +++ b/src/options.c @@ -425,14 +425,14 @@ static mi_decl_noinline void mi_recurse_exit_prim(void) { } static bool mi_recurse_enter(void) { - #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return false; #endif return mi_recurse_enter_prim(); } static void mi_recurse_exit(void) { - #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(__ANDROID__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return; #endif mi_recurse_exit_prim(); From 235a0390eec64c54f97e6bcbfc0e24307031812a Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:34:00 -0700 Subject: [PATCH 09/68] refactor numa_node_count --- include/mimalloc/internal.h | 22 +++------------------ src/os.c | 39 ++++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 5b3e7e23..51fad09c 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -140,9 +140,11 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); - void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); +int _mi_os_numa_node_count(void); +int _mi_os_numa_node(void); + // arena.c mi_arena_id_t _mi_arena_id_none(void); void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid); @@ -813,24 +815,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) { return x; } -// ------------------------------------------------------------------- -// Optimize numa node access for the common case (= one node) -// ------------------------------------------------------------------- - -int _mi_os_numa_node_get(void); -size_t _mi_os_numa_node_count_get(void); - -extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count; -static inline int _mi_os_numa_node(void) { - if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } - else return _mi_os_numa_node_get(); -} -static inline size_t _mi_os_numa_node_count(void) { - const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); - if mi_likely(count > 0) { return count; } - else return _mi_os_numa_node_count_get(); -} - // ----------------------------------------------------------------------- diff --git a/src/os.c b/src/os.c index 12cc5da3..894e3a45 100644 --- a/src/os.c +++ b/src/os.c @@ -696,34 +696,47 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) { } } + /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count +static _Atomic(size_t) mi_numa_node_count; // = 0 // cache the node count -size_t _mi_os_numa_node_count_get(void) { - size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); - if (count <= 0) { +int _mi_os_numa_node_count(void) { + size_t count = mi_atomic_load_acquire(&mi_numa_node_count); + if mi_unlikely(count == 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? - if (ncount > 0) { + if (ncount > 0 && ncount < INT_MAX) { count = (size_t)ncount; } else { - count = _mi_prim_numa_node_count(); // or detect dynamically - if (count == 0) count = 1; + const size_t n = _mi_prim_numa_node_count(); // or detect dynamically + if (n == 0 || n > INT_MAX) { count = 1; } + else { count = n; } } - mi_atomic_store_release(&_mi_numa_node_count, count); // save it + mi_atomic_store_release(&mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); } - return count; + mi_assert_internal(count > 0 && count <= INT_MAX); + return (int)count; } -int _mi_os_numa_node_get(void) { - size_t numa_count = _mi_os_numa_node_count(); +static int mi_os_numa_node_get(void) { + int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = _mi_prim_numa_node(); + const size_t n = _mi_prim_numa_node(); + int numa_node = (n < INT_MAX ? (int)n : 0); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - return (int)numa_node; + return numa_node; +} + +int _mi_os_numa_node(void) { + if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { + return 0; + } + else { + return mi_os_numa_node_get(); + } } From 3c3600f85fe4a6d7f01a9e69db3cada9e63627cb Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:36:01 -0700 Subject: [PATCH 10/68] add atomic_cas_ptr_strong_acq_rel --- include/mimalloc/atomic.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 39ff5c90..e8bac316 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -111,6 +111,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) @@ -120,6 +121,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) @@ -303,6 +305,7 @@ static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) From 5a58df6534cbc8673a655e5772461ef7fd4bcbcb Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:40:30 -0700 Subject: [PATCH 11/68] fix signed compare warning --- src/arena.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index a7c20764..bdae8da1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1007,17 +1007,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (pages == 0) return 0; // pages per numa node - size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); - if (numa_count <= 0) numa_count = 1; + int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count()); + if (numa_count == 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes - for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if ((size_t)numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; From d767dbfbb45e2e38502b03dbb57698845899d34f Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:50:36 -0700 Subject: [PATCH 12/68] use C++ compilation with clang-cl (as well as msvc) on Windows --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 591ba130..a3acf83e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,7 +173,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall) endif() -if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") +# force C++ compilation with msvc or clang-cl to use modern C++ atomics +if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL) set(MI_USE_CXX "ON") endif() From 8c99ac1bbd9c692239bbf70c40f9be578d54d394 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 11:16:33 -0700 Subject: [PATCH 13/68] fix typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 5a495275..70a25fc1 100644 --- a/readme.md +++ b/readme.md @@ -177,7 +177,7 @@ mimalloc is used in various large scale low-latency services and programs, for e Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build. The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the -`mimalloc-override-dll` project builds DLL for overriding malloc +`mimalloc-override-dll` project builds a DLL for overriding malloc in the entire program. ## Linux, macOS, BSD, etc. From bc8eca8bf2641f12ecc23d7527aecdb62d6d2939 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Apr 2025 12:09:09 -0700 Subject: [PATCH 14/68] typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 70a25fc1..cee78898 100644 --- a/readme.md +++ b/readme.md @@ -84,7 +84,7 @@ Enjoy! ### Releases -* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: +* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including: fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. From 951538d469f72532e43a2437d556a73518f5dde4 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 8 Apr 2025 13:56:31 -0700 Subject: [PATCH 15/68] fix prctl.h includes for alpine linux/musl (hopefully fixes #1065, #1066, #1067) --- src/prim/unix/prim.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 09aa91b5..e3888e73 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -32,6 +32,9 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include #include // THP disable, PR_SET_VMA + #if !defined(PR_SET_VMA) + #include + #endif #if defined(__GLIBC__) #include // linux mmap flags #else From 7543e8989abe41e87f260424b3711931d680da77 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 13 Apr 2025 19:49:47 -0700 Subject: [PATCH 16/68] validate pointer before assertion in mi_free_size (issue #754) --- src/free.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/free.c b/src/free.c index 7e529530..22284135 100644 --- a/src/free.c +++ b/src/free.c @@ -340,7 +340,11 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { void mi_free_size(void* p, size_t size) mi_attr_noexcept { MI_UNUSED_RELEASE(size); + #if MI_DEBUG + mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free_size"); + if mi_unlikely(segment==NULL) return; mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); + #endif mi_free(p); } From fae61ed946ceaf88f8e902aa596bb46305b531d6 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 13 Apr 2025 19:56:49 -0700 Subject: [PATCH 17/68] fix assertion in mi_free_size (issue #754) --- src/free.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/free.c b/src/free.c index 22284135..d0940a16 100644 --- a/src/free.c +++ b/src/free.c @@ -341,9 +341,8 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { void mi_free_size(void* p, size_t size) mi_attr_noexcept { MI_UNUSED_RELEASE(size); #if MI_DEBUG - mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free_size"); - if mi_unlikely(segment==NULL) return; - mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); + const size_t available = _mi_usable_size(p,"mi_free_size"); + mi_assert(p == NULL || size <= available || available == 0 /* invalid pointer */ ); #endif mi_free(p); } From aad0bc2ae3bc498b8e405d3f89be90c22abe76d8 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 2 May 2025 08:09:40 -0700 Subject: [PATCH 18/68] fix cast on msvc --- include/mimalloc/internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 51fad09c..6283f1d1 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -855,7 +855,7 @@ static inline size_t mi_clz(size_t x) { #else _BitScanReverse64(&idx, x); #endif - return ((MI_SIZE_BITS - 1) - idx); + return ((MI_SIZE_BITS - 1) - (size_t)idx); } static inline size_t mi_ctz(size_t x) { if (x==0) return MI_SIZE_BITS; @@ -865,7 +865,7 @@ static inline size_t mi_ctz(size_t x) { #else _BitScanForward64(&idx, x); #endif - return idx; + return (size_t)idx; } #else From 6bfb1c656c6da4eaa8dab4d8fc0197f1ed946483 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 2 May 2025 08:40:21 -0700 Subject: [PATCH 19/68] allow size==0 for mi_prim_free (issue #1041) --- src/os.c | 14 +++++++------- src/prim/emscripten/prim.c | 4 ++-- src/prim/unix/prim.c | 5 +++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/os.c b/src/os.c index 894e3a45..be7e532c 100644 --- a/src/os.c +++ b/src/os.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2025, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -167,8 +167,8 @@ static void mi_os_free_huge_os_pages(void* p, size_t size); static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { mi_assert_internal((size % _mi_os_page_size()) == 0); - if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) - int err = _mi_prim_free(addr, size); + if (addr == NULL) return; // || _mi_os_is_huge_reserved(addr) + int err = _mi_prim_free(addr, size); // allow size==0 (issue #1041) if (err != 0) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } @@ -186,10 +186,10 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me void* base = addr; // different base? (due to alignment) if (memid.mem.os.base != base) { - mi_assert(memid.mem.os.base <= addr); + mi_assert(memid.mem.os.base <= addr); base = memid.mem.os.base; const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base; - if (memid.mem.os.size==0) { + if (memid.mem.os.size==0) { csize += diff; } if (still_committed) { @@ -733,8 +733,8 @@ static int mi_os_numa_node_get(void) { } int _mi_os_numa_node(void) { - if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { - return 0; + if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { + return 0; } else { return mi_os_numa_node_get(); diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index 82147de7..a8677cbc 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen, Alon Zakai +Copyright (c) 2018-2025, Microsoft Research, Daan Leijen, Alon Zakai This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -58,7 +58,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config) { extern void emmalloc_free(void*); int _mi_prim_free(void* addr, size_t size) { - MI_UNUSED(size); + if (size==0) return 0; emmalloc_free(addr); return 0; } diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index e3888e73..f93e458a 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +Copyright (c) 2018-2025, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -70,7 +70,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MADV_FREE POSIX_MADV_FREE #endif - + //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. // and do allocation themselves; using syscalls prevents recursion when mimalloc is @@ -186,6 +186,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) //--------------------------------------------- int _mi_prim_free(void* addr, size_t size ) { + if (size==0) return 0; bool err = (munmap(addr, size) == -1); return (err ? errno : 0); } From 2c34250f43c0c06cdc3c405781eed90daf008361 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 2 May 2025 08:55:16 -0700 Subject: [PATCH 20/68] extend override test on windows --- test/main-override-dep.cpp | 38 +++++++++++++++++++++++++++++++++++++- test/main-override.cpp | 5 +++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/test/main-override-dep.cpp b/test/main-override-dep.cpp index e92f6fc4..d89e3fca 100644 --- a/test/main-override-dep.cpp +++ b/test/main-override-dep.cpp @@ -1,6 +1,7 @@ // Issue #981: test overriding allocation in a DLL that is compiled independent of mimalloc. // This is imported by the `mimalloc-test-override` project. #include +#include #include "main-override-dep.h" std::string TestAllocInDll::GetString() @@ -10,6 +11,41 @@ std::string TestAllocInDll::GetString() const char* t = "test"; memcpy(test, t, 4); std::string r = test; + std::cout << "override-dep: GetString: " << r << "\n"; delete[] test; return r; -} \ No newline at end of file +} + + +class Static { +private: + void* p; +public: + Static() { + printf("override-dep: static constructor\n"); + p = malloc(64); + return; + } + ~Static() { + free(p); + printf("override-dep: static destructor\n"); + return; + } +}; + +static Static s = Static(); + + +#include + +BOOL WINAPI DllMain(HINSTANCE module, DWORD reason, LPVOID reserved) { + (void)(reserved); + (void)(module); + if (reason==DLL_PROCESS_ATTACH) { + printf("override-dep: dll attach\n"); + } + else if (reason==DLL_PROCESS_DETACH) { + printf("override-dep: dll detach\n"); + } + return TRUE; +} diff --git a/test/main-override.cpp b/test/main-override.cpp index db594acc..576f47bc 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -37,7 +37,7 @@ static void test_thread_local(); // issue #944 static void test_mixed1(); // issue #942 static void test_stl_allocators(); -#if x_WIN32 +#if _WIN32 #include "main-override-dep.h" static void test_dep(); // issue #981: test overriding in another DLL #else @@ -145,11 +145,12 @@ static bool test_stl_allocator1() { struct some_struct { int i; int j; double z; }; -#if x_WIN32 +#if _WIN32 static void test_dep() { TestAllocInDll t; std::string s = t.GetString(); + std::cout << "test_dep GetString: " << s << "\n"; } #endif From f3e92b1edd851a4d1e2e2c4bbada87f2855dc834 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 2 May 2025 16:12:20 -0700 Subject: [PATCH 21/68] Use second user TLS slot to avoid using reserved fields in the TEB (issue #1078) --- include/mimalloc/prim.h | 15 ++++++++------- src/prim/windows/prim.c | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index bddd66e9..b0ddc2d0 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -208,19 +208,20 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) // On windows we can store the thread-local heap at a fixed TLS slot to avoid -// thread-local initialization checks in the fast path. This uses a fixed location -// in the TCB though (last user-reserved slot by default) which may clash with other applications. - +// thread-local initialization checks in the fast path. +// We always use the second user TLS slot (the first one is always allocated already), +// and at initialization (`windows/prim.c`) we call TlsAlloc and verify +// we indeed get the second slot (and fail otherwise). +// Todo: we could make the Tls slot completely dynamic but that would require +// an extra read of the static Tls slot instead of using a constant offset. #define MI_HAS_TLS_SLOT 2 // 2 = we can reliably initialize the slot (saving a test on each malloc) #if MI_WIN_USE_FIXED_TLS > 1 #define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS) #elif MI_SIZE_SIZE == 4 -#define MI_TLS_SLOT (0x710) // Last user-reserved slot -// #define MI_TLS_SLOT (0xF0C) // Last TlsSlot (might clash with other app reserved slot) +#define MI_TLS_SLOT (0x0E18) // Second User TLS slot #else -#define MI_TLS_SLOT (0x888) // Last user-reserved slot -// #define MI_TLS_SLOT (0x1678) // Last TlsSlot (might clash with other app reserved slot) +#define MI_TLS_SLOT (0x1488) // Second User TLS slot #endif static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index a080f4bc..7daa09ef 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -631,8 +631,20 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); #if MI_TLS_SLOT >= 2 - if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + if (reason==DLL_PROCESS_ATTACH) { + const DWORD tls_slot = TlsAlloc(); + if (tls_slot != 1) { + _mi_error_message(EFAULT, "unable to allocate the second TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + } + } + if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { + if (mi_prim_get_default_heap() == NULL) { + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + } + #if MI_DEBUG + void* const p = TlsGetValue(1); + mi_assert_internal(p == (void*)&_mi_heap_empty); + #endif } #endif if (reason==DLL_PROCESS_ATTACH) { From 417e8176bddce86c2d53656c5552f5bb96304c46 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 2 May 2025 16:27:00 -0700 Subject: [PATCH 22/68] add unix large page size constant and adjust aligment to the large page size for large allocations --- src/prim/unix/prim.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index f93e458a..f3ccb013 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -70,6 +70,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MADV_FREE POSIX_MADV_FREE #endif +#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this? //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. @@ -156,7 +157,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) } #endif } - config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE; config->has_overcommit = unix_detect_overcommit(); config->has_partial_free = true; // mmap can free in parts config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) @@ -386,6 +387,9 @@ int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool comm mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); + if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) { + try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations + } *is_zero = true; int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); @@ -433,7 +437,7 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) err = unix_madvise(start, size, MADV_DONTNEED); - #if !MI_DEBUG && !MI_SECURE + #if !MI_DEBUG && MI_SECURE<=2 *needs_recommit = false; #else *needs_recommit = true; From e4c5d09d65ff7743fe5e5dfadd6f082e839ff791 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 09:04:57 -0700 Subject: [PATCH 23/68] improve TLS access on Windows with msvc (by Frank Richter, issue #1078) --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 ++-- include/mimalloc/prim.h | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index d6af71ce..128a4ff6 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index b0ddc2d0..a722d721 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) // On windows we can store the thread-local heap at a fixed TLS slot to avoid -// thread-local initialization checks in the fast path. +// thread-local initialization checks in the fast path. // We always use the second user TLS slot (the first one is always allocated already), // and at initialization (`windows/prim.c`) we call TlsAlloc and verify // we indeed get the second slot (and fail otherwise). @@ -270,6 +270,9 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly +#ifdef _MSC_VER +__declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) +#endif extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern bool _mi_process_is_initialized; // has mi_process_init been called? From f989a1cbb9b63043f1e56d248efe1ede9a6651d7 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 09:10:38 -0700 Subject: [PATCH 24/68] add more decl_hidden specifiers on extern variables to improve access on arm64 --- include/mimalloc/internal.h | 6 +++--- include/mimalloc/prim.h | 6 +++--- src/page.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 51fad09c..c9362aa0 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -96,7 +96,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c -extern mi_decl_cache_align mi_stats_t _mi_stats_main; +extern mi_decl_hidden mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_hidden mi_decl_cache_align const mi_page_t _mi_page_empty; void _mi_process_load(void); void mi_cdecl _mi_process_done(void); @@ -958,8 +958,8 @@ static inline size_t mi_popcount(size_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include -extern bool _mi_cpu_has_fsrm; -extern bool _mi_cpu_has_erms; +extern mi_decl_hidden bool _mi_cpu_has_fsrm; +extern mi_decl_hidden bool _mi_cpu_has_erms; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) { __movsb((unsigned char*)dst, (const unsigned char*)src, n); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index a722d721..527bb97a 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -273,8 +273,8 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #ifdef _MSC_VER __declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) #endif -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; @@ -402,7 +402,7 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) { #elif defined(MI_TLS_PTHREAD) -extern pthread_key_t _mi_heap_default_key; +extern mi_decl_hidden pthread_key_t _mi_heap_default_key; static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); diff --git a/src/page.c b/src/page.c index 6a693e89..55150f33 100644 --- a/src/page.c +++ b/src/page.c @@ -114,7 +114,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); @@ -979,9 +979,9 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // free delayed frees from other threads (but skip contended ones) _mi_heap_delayed_free_partial(heap); - + // collect every once in a while (10000 by default) - const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); + const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); if (heap->generic_collect_count >= generic_collect) { heap->generic_collect_count = 0; mi_heap_collect(heap, false /* force? */); From 9194362e4858bdd2eaf1b1cb9075abaa6ace2460 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 09:04:57 -0700 Subject: [PATCH 25/68] improve TLS access on Windows with msvc (by Frank Richter, issue #1078) --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 ++-- include/mimalloc/prim.h | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index d6af71ce..128a4ff6 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index b0ddc2d0..a722d721 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #elif _WIN32 && MI_WIN_USE_FIXED_TLS && !defined(MI_WIN_USE_FLS) // On windows we can store the thread-local heap at a fixed TLS slot to avoid -// thread-local initialization checks in the fast path. +// thread-local initialization checks in the fast path. // We always use the second user TLS slot (the first one is always allocated already), // and at initialization (`windows/prim.c`) we call TlsAlloc and verify // we indeed get the second slot (and fail otherwise). @@ -270,6 +270,9 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly +#ifdef _MSC_VER +__declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) +#endif extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern bool _mi_process_is_initialized; // has mi_process_init been called? From 9c24c428cb06c735ccc3dcca996c2d09bb139d08 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 09:10:38 -0700 Subject: [PATCH 26/68] add more decl_hidden specifiers on extern variables to improve access on arm64 --- include/mimalloc/internal.h | 6 +++--- include/mimalloc/prim.h | 6 +++--- src/page.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 6283f1d1..b11bd357 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -96,7 +96,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c -extern mi_decl_cache_align mi_stats_t _mi_stats_main; +extern mi_decl_hidden mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_hidden mi_decl_cache_align const mi_page_t _mi_page_empty; void _mi_process_load(void); void mi_cdecl _mi_process_done(void); @@ -958,8 +958,8 @@ static inline size_t mi_popcount(size_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include -extern bool _mi_cpu_has_fsrm; -extern bool _mi_cpu_has_erms; +extern mi_decl_hidden bool _mi_cpu_has_fsrm; +extern mi_decl_hidden bool _mi_cpu_has_erms; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) { __movsb((unsigned char*)dst, (const unsigned char*)src, n); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index a722d721..527bb97a 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -273,8 +273,8 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce #ifdef _MSC_VER __declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) #endif -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; @@ -402,7 +402,7 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) { #elif defined(MI_TLS_PTHREAD) -extern pthread_key_t _mi_heap_default_key; +extern mi_decl_hidden pthread_key_t _mi_heap_default_key; static inline mi_heap_t* mi_prim_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); diff --git a/src/page.c b/src/page.c index 6a693e89..55150f33 100644 --- a/src/page.c +++ b/src/page.c @@ -114,7 +114,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } -extern bool _mi_process_is_initialized; // has mi_process_init been called? +extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); @@ -979,9 +979,9 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // free delayed frees from other threads (but skip contended ones) _mi_heap_delayed_free_partial(heap); - + // collect every once in a while (10000 by default) - const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); + const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L); if (heap->generic_collect_count >= generic_collect) { heap->generic_collect_count = 0; mi_heap_collect(heap, false /* force? */); From 63b0989df57a9dd2b867920307b0d038df695a54 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 4 May 2025 21:41:26 -0700 Subject: [PATCH 27/68] revert use of selectany for msvc (issue #1078) --- include/mimalloc/prim.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 527bb97a..2d508148 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -270,9 +270,6 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // defined in `init.c`; do not use these directly -#ifdef _MSC_VER -__declspec(selectany) // make it part of the comdat section to have faster TLS access (issue #1078) -#endif extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called? From 52b75693c48308e8b19b94ffa7fbc0580021ba87 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 22:03:10 -0700 Subject: [PATCH 28/68] use TlsAlloc with a dynamic offset for MI_WIN_USE_FIXED_TLS by default (issue #1078) --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 ++-- include/mimalloc/prim.h | 13 ++++++------- src/prim/windows/prim.c | 15 ++++++++++----- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index 128a4ff6..d6af71ce 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 2d508148..60af4d59 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -209,19 +209,18 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // On windows we can store the thread-local heap at a fixed TLS slot to avoid // thread-local initialization checks in the fast path. -// We always use the second user TLS slot (the first one is always allocated already), -// and at initialization (`windows/prim.c`) we call TlsAlloc and verify -// we indeed get the second slot (and fail otherwise). -// Todo: we could make the Tls slot completely dynamic but that would require -// an extra read of the static Tls slot instead of using a constant offset. +// We allocate a user TLS slot at process initialization (see `windows/prim.c`) +// and store the offset `_mi_win_tls_offset`. #define MI_HAS_TLS_SLOT 2 // 2 = we can reliably initialize the slot (saving a test on each malloc) +extern mi_decl_hidden size_t _mi_win_tls_offset; + #if MI_WIN_USE_FIXED_TLS > 1 #define MI_TLS_SLOT (MI_WIN_USE_FIXED_TLS) #elif MI_SIZE_SIZE == 4 -#define MI_TLS_SLOT (0x0E18) // Second User TLS slot +#define MI_TLS_SLOT (0x0E10 + _mi_win_tls_offset) // User TLS slots #else -#define MI_TLS_SLOT (0x1488) // Second User TLS slot +#define MI_TLS_SLOT (0x1480 + _mi_win_tls_offset) // User TLS slots #endif static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 7daa09ef..c91102a2 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -627,22 +627,27 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { // Process & Thread Init/Done //---------------------------------------------------------------- +#if MI_HAS_TLS_SLOT +mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*); // use 2nd slot by default +#endif + static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); - #if MI_TLS_SLOT >= 2 + #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if (reason==DLL_PROCESS_ATTACH) { const DWORD tls_slot = TlsAlloc(); - if (tls_slot != 1) { - _mi_error_message(EFAULT, "unable to allocate the second TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + if (tls_slot == TLS_OUT_OF_INDEXES) { + _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); } + _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); } if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { if (mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); } #if MI_DEBUG - void* const p = TlsGetValue(1); + void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); mi_assert_internal(p == (void*)&_mi_heap_empty); #endif } @@ -808,7 +813,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain - #if MI_TLS_SLOT >= 2 + #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); } From 303b196d403876f324e7456854a148e85682c2d9 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 4 May 2025 22:09:56 -0700 Subject: [PATCH 29/68] fix MI_WIN_USE_FIXED_TLS conditions --- src/prim/windows/prim.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index c91102a2..d0fee4c2 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -627,7 +627,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { // Process & Thread Init/Done //---------------------------------------------------------------- -#if MI_HAS_TLS_SLOT +#if MI_WIN_USE_FIXED_TLS==1 mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*); // use 2nd slot by default #endif @@ -635,6 +635,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(reserved); MI_UNUSED(module); #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation + #if MI_WIN_USE_FIXED_TLS==1 if (reason==DLL_PROCESS_ATTACH) { const DWORD tls_slot = TlsAlloc(); if (tls_slot == TLS_OUT_OF_INDEXES) { @@ -642,11 +643,12 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { } _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); } + #endif if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { if (mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); } - #if MI_DEBUG + #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); mi_assert_internal(p == (void*)&_mi_heap_empty); #endif From e2d7c24c7362a19429f7338f0e5ed493f7c1d7b0 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 4 May 2025 22:17:59 -0700 Subject: [PATCH 30/68] add fixed TLS slot test to pipeline on Windows --- azure-pipelines.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 25d4a6e0..b7fc59d4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -42,6 +42,14 @@ jobs: BuildType: release cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -A Win32 MSBuildConfiguration: Release + Debug Fixed TLS: + BuildType: debug + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON -DMI_WIN_USE_FIXED_TLS=ON + MSBuildConfiguration: Debug + Release Fixed TLS: + BuildType: release + cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_WIN_USE_FIXED_TLS=ON + MSBuildConfiguration: Release steps: - task: CMake@1 inputs: From c84d996e884412b1fa58fa48ee6fc6e2fa841446 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 5 May 2025 10:23:52 -0700 Subject: [PATCH 31/68] fix TLS initialization for MI_WIN_USE_FIXED_TLS with redirection --- include/mimalloc/prim.h | 2 +- src/prim/windows/prim.c | 28 +++++++++++++++------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 60af4d59..d3157949 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -119,7 +119,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); - +#define MI_WIN_USE_FIXED_TLS 1 //------------------------------------------------------------------- // Access to TLS (thread local storage) slots. diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index d0fee4c2..535d34a6 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -628,18 +628,16 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { //---------------------------------------------------------------- #if MI_WIN_USE_FIXED_TLS==1 -mi_decl_cache_align size_t _mi_win_tls_offset = sizeof(void*); // use 2nd slot by default +mi_decl_cache_align size_t _mi_win_tls_offset = 0; #endif -static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { - MI_UNUSED(reserved); - MI_UNUSED(module); +static void mi_win_tls_init(DWORD reason) { #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation #if MI_WIN_USE_FIXED_TLS==1 - if (reason==DLL_PROCESS_ATTACH) { - const DWORD tls_slot = TlsAlloc(); - if (tls_slot == TLS_OUT_OF_INDEXES) { - _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + if (reason==DLL_PROCESS_ATTACH && _mi_win_tls_offset == 0) { + const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 + if (tls_slot == TLS_OUT_OF_INDEXES) { + _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); } _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); } @@ -653,7 +651,15 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { mi_assert_internal(p == (void*)&_mi_heap_empty); #endif } + #else + MI_UNUSED(reason); #endif +} + +static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { + MI_UNUSED(reserved); + MI_UNUSED(module); + mi_win_tls_init(reason); if (reason==DLL_PROCESS_ATTACH) { _mi_process_load(); } @@ -815,11 +821,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #endif mi_decl_export void _mi_redirect_entry(DWORD reason) { // called on redirection; careful as this may be called before DllMain - #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation - if ((reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) && mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); - } - #endif + mi_win_tls_init(reason); if (reason == DLL_PROCESS_ATTACH) { mi_redirected = true; } From 0184a86eaf4cf0018d544e5992b86f5ede688601 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 15:32:11 -0700 Subject: [PATCH 32/68] add alpine x86 docker file --- contrib/docker/alpine-arm32v7/Dockerfile | 2 +- contrib/docker/alpine-x86/Dockerfile | 28 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 contrib/docker/alpine-x86/Dockerfile diff --git a/contrib/docker/alpine-arm32v7/Dockerfile b/contrib/docker/alpine-arm32v7/Dockerfile index f74934fb..daa60f50 100644 --- a/contrib/docker/alpine-arm32v7/Dockerfile +++ b/contrib/docker/alpine-arm32v7/Dockerfile @@ -1,6 +1,6 @@ # install from an image # download first an appropriate tar.gz image into the current directory -# from: +# from FROM scratch # Substitute the image name that was downloaded diff --git a/contrib/docker/alpine-x86/Dockerfile b/contrib/docker/alpine-x86/Dockerfile new file mode 100644 index 00000000..a0f76c17 --- /dev/null +++ b/contrib/docker/alpine-x86/Dockerfile @@ -0,0 +1,28 @@ +# install from an image +# download first an appropriate tar.gz image into the current directory +# from +FROM scratch + +# Substitute the image name that was downloaded +ADD alpine-minirootfs-20250108-x86.tar.gz / + +# Install tools +RUN apk add build-base make cmake +RUN apk add git +RUN apk add vim + +RUN mkdir -p /home/dev +WORKDIR /home/dev + +# Get mimalloc +RUN git clone https://github.com/microsoft/mimalloc -b dev2 +RUN mkdir -p mimalloc/out/release +RUN mkdir -p mimalloc/out/debug + +# Build mimalloc debug +WORKDIR /home/dev/mimalloc/out/debug +RUN cmake ../.. -DMI_DEBUG_FULL=ON +# RUN make -j +# RUN make test + +CMD ["/bin/sh"] From 341149391fee496790a7fa916b1fd3fdd0cce1a1 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 15:33:29 -0700 Subject: [PATCH 33/68] fix include of prctl.h on alpine linux x86 --- src/prim/unix/prim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index f3ccb013..a90fa659 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -32,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__linux__) #include #include // THP disable, PR_SET_VMA - #if !defined(PR_SET_VMA) + #if defined(__GLIBC__) && !defined(PR_SET_VMA) #include #endif #if defined(__GLIBC__) From a6ecb5c299e65eb7dd6602b97235126acc01a868 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 15:35:29 -0700 Subject: [PATCH 34/68] fix format specifier (for alpine linux x86, issue #1086) --- src/arena.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/arena.c b/src/arena.c index bdae8da1..aa01ffcb 100644 --- a/src/arena.c +++ b/src/arena.c @@ -44,7 +44,7 @@ typedef struct mi_arena_s { mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited _Atomic(size_t) search_idx; // optimization to start the search for free blocks _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be purged from `blocks_purge`. - + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) @@ -365,7 +365,7 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t *arena_id) { if (_mi_preloading()) return false; // use OS only while pre loading - + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); if (arena_count > (MI_MAX_ARENAS - 4)) return false; @@ -407,7 +407,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (!mi_option_is_enabled(mi_option_disallow_arena_alloc)) { // is arena allocation allowed? - if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid); if (p != NULL) return p; @@ -487,7 +487,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks) // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory). mi_assert_internal(already_committed < blocks); mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); - needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed)); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed)); } // clear the purged blocks @@ -556,7 +556,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) { // check pre-conditions if (arena->memid.is_pinned) return false; - + // expired yet? mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); if (!force && (expire == 0 || expire > now)) return false; @@ -611,7 +611,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) return any_purged; } -static void mi_arenas_try_purge( bool force, bool visit_all ) +static void mi_arenas_try_purge( bool force, bool visit_all ) { if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled @@ -628,7 +628,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all ) mi_atomic_guard(&purge_guard) { // increase global expire: at most one purge per delay cycle - mi_atomic_storei64_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay()); + mi_atomic_storei64_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay()); size_t max_purge_count = (visit_all ? max_arena : 2); bool all_visited = true; for (size_t i = 0; i < max_arena; i++) { @@ -947,7 +947,7 @@ void mi_debug_show_arenas(void) mi_attr_noexcept { for (size_t i = 0; i < max_arenas; i++) { mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); if (arena == NULL) break; - _mi_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, MI_ARENA_BLOCK_SIZE / MI_MiB, arena->field_count, (arena->memid.is_pinned ? ", pinned" : "")); + _mi_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, (size_t)(MI_ARENA_BLOCK_SIZE / MI_MiB), arena->field_count, (arena->memid.is_pinned ? ", pinned" : "")); if (show_inuse) { inuse_total += mi_debug_show_bitmap(" ", "inuse blocks", arena->block_count, arena->blocks_inuse, arena->field_count); } From 72f05e2f076b3e1b160b8aaca7bc220a2532ced0 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 15:58:45 -0700 Subject: [PATCH 35/68] fix guarded sample rate of 1 (issue #1085) --- include/mimalloc/types.h | 1 - src/init.c | 17 ++++++++--------- test/main-override-static.c | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ab697f23..e2b5d318 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -512,7 +512,6 @@ struct mi_heap_s { size_t guarded_size_min; // minimal size for guarded objects size_t guarded_size_max; // maximal size for guarded objects size_t guarded_sample_rate; // sample rate (set to 0 to disable guarded pages) - size_t guarded_sample_seed; // starting sample count size_t guarded_sample_count; // current sample count (counting down to 0) #endif mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. diff --git a/src/init.c b/src/init.c index 8a48ae5e..fe0acd8a 100644 --- a/src/init.c +++ b/src/init.c @@ -110,7 +110,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { false, // can reclaim 0, // tag #if MI_GUARDED - 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) + 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY @@ -153,7 +153,7 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = { false, // can reclaim 0, // tag #if MI_GUARDED - 0, 0, 0, 0, 0, + 0, 0, 0, 0, #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY @@ -165,15 +165,14 @@ mi_stats_t _mi_stats_main = { MI_STAT_VERSION, MI_STATS_NULL }; #if MI_GUARDED mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { - heap->guarded_sample_seed = seed; - if (heap->guarded_sample_seed == 0) { - heap->guarded_sample_seed = _mi_heap_random_next(heap); - } heap->guarded_sample_rate = sample_rate; - if (heap->guarded_sample_rate >= 1) { - heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate; + heap->guarded_sample_count = sample_rate; // count down samples + if (heap->guarded_sample_rate > 1) { + if (seed == 0) { + seed = _mi_heap_random_next(heap); + } + heap->guarded_sample_count = (seed % heap->guarded_sample_rate) + 1; // start at random count between 1 and `sample_rate` } - heap->guarded_sample_count = heap->guarded_sample_seed; // count down samples } mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) { diff --git a/test/main-override-static.c b/test/main-override-static.c index 06d7baa5..c94b98f4 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -43,7 +43,7 @@ int main() { // corrupt_free(); // block_overflow1(); // block_overflow2(); - // test_canary_leak(); + test_canary_leak(); // test_aslr(); // invalid_free(); // test_reserved(); From 0ae310327f83abd3b354bf03b819f3595be0daf2 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 13 May 2025 16:22:08 -0700 Subject: [PATCH 36/68] fix debug assertion for windows TLS --- src/prim/windows/prim.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 535d34a6..6ab715e6 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -644,12 +644,12 @@ static void mi_win_tls_init(DWORD reason) { #endif if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { if (mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 + void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); + mi_assert_internal(p == (void*)&_mi_heap_empty); + #endif } - #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 - void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); - mi_assert_internal(p == (void*)&_mi_heap_empty); - #endif } #else MI_UNUSED(reason); From df3e1916209b3783bb3d001013ce8fbba4815da6 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 13 May 2025 16:38:53 -0700 Subject: [PATCH 37/68] make windows fixed TLS opt-in --- include/mimalloc/prim.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index d3157949..fbf0cc74 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -118,9 +118,6 @@ void _mi_prim_thread_done_auto_done(void); void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); - -#define MI_WIN_USE_FIXED_TLS 1 - //------------------------------------------------------------------- // Access to TLS (thread local storage) slots. // We need fast access to both a unique thread id (in `free.c:mi_free`) and From 41cc1bfe5199fbfc4dc5e7c7ecb1453ad4e8ad7b Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 13 May 2025 17:38:16 -0700 Subject: [PATCH 38/68] add guarded TLS test for Windows fixed TLS --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 +-- include/mimalloc/prim.h | 2 +- src/init.c | 1 - src/prim/windows/prim.c | 33 ++++++++++++++----------- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index d6af71ce..128a4ff6 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -282,8 +282,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index fbf0cc74..3d8f1806 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -208,7 +208,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // thread-local initialization checks in the fast path. // We allocate a user TLS slot at process initialization (see `windows/prim.c`) // and store the offset `_mi_win_tls_offset`. -#define MI_HAS_TLS_SLOT 2 // 2 = we can reliably initialize the slot (saving a test on each malloc) +#define MI_HAS_TLS_SLOT 1 // 2 = we can reliably initialize the slot (saving a test on each malloc) extern mi_decl_hidden size_t _mi_win_tls_offset; diff --git a/src/init.c b/src/init.c index fe0acd8a..ff6c5d29 100644 --- a/src/init.c +++ b/src/init.c @@ -225,7 +225,6 @@ mi_heap_t* _mi_heap_main_get(void) { return &_mi_heap_main; } - /* ----------------------------------------------------------- Sub process ----------------------------------------------------------- */ diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 6ab715e6..9ffacaa3 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -631,18 +631,23 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { mi_decl_cache_align size_t _mi_win_tls_offset = 0; #endif +//static void mi_debug_out(const char* s) { +// HANDLE h = GetStdHandle(STD_ERROR_HANDLE); +// WriteConsole(h, s, (DWORD)_mi_strlen(s), NULL, NULL); +//} + static void mi_win_tls_init(DWORD reason) { - #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation - #if MI_WIN_USE_FIXED_TLS==1 - if (reason==DLL_PROCESS_ATTACH && _mi_win_tls_offset == 0) { - const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 - if (tls_slot == TLS_OUT_OF_INDEXES) { - _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); - } - _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); - } - #endif if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { + #if MI_WIN_USE_FIXED_TLS==1 // we must allocate a TLS slot dynamically + if (_mi_win_tls_offset == 0 && reason=DLL_PROCESS_ATTACH) { + const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 + if (tls_slot == TLS_OUT_OF_INDEXES) { + _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); + } + _mi_win_tls_offset = (size_t)tls_slot * sizeof(void*); + } + #endif + #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if (mi_prim_get_default_heap() == NULL) { _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 @@ -650,10 +655,8 @@ static void mi_win_tls_init(DWORD reason) { mi_assert_internal(p == (void*)&_mi_heap_empty); #endif } - } - #else - MI_UNUSED(reason); - #endif + #endif + } } static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { @@ -676,7 +679,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #define MI_PRIM_HAS_PROCESS_ATTACH 1 // Windows DLL: easy to hook into process_init and thread_done - BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { + BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { mi_win_main((PVOID)inst,reason,reserved); return TRUE; } From 15c917ef1522957a260686cf027a9f294ba1c5cd Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 13 May 2025 17:45:10 -0700 Subject: [PATCH 39/68] fix syntax error --- src/prim/windows/prim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 9ffacaa3..b82918c1 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -639,7 +639,7 @@ mi_decl_cache_align size_t _mi_win_tls_offset = 0; static void mi_win_tls_init(DWORD reason) { if (reason==DLL_PROCESS_ATTACH || reason==DLL_THREAD_ATTACH) { #if MI_WIN_USE_FIXED_TLS==1 // we must allocate a TLS slot dynamically - if (_mi_win_tls_offset == 0 && reason=DLL_PROCESS_ATTACH) { + if (_mi_win_tls_offset == 0 && reason == DLL_PROCESS_ATTACH) { const DWORD tls_slot = TlsAlloc(); // usually returns slot 1 if (tls_slot == TLS_OUT_OF_INDEXES) { _mi_error_message(EFAULT, "unable to allocate the a TLS slot (rebuild without MI_WIN_USE_FIXED_TLS?)\n"); From 08c33768a5344e43a1ba95b88c3adcbb6a5c3498 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 21 May 2025 11:09:34 -0700 Subject: [PATCH 40/68] fix stats for pages and page_bins --- include/mimalloc/internal.h | 2 ++ src/heap.c | 4 +--- src/page-queue.c | 4 ++-- src/page.c | 5 ++--- src/segment.c | 1 + src/stats.c | 6 +++++- test/test-stress.c | 1 + 7 files changed, 14 insertions(+), 9 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b11bd357..e277f0ff 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -217,6 +217,7 @@ void _mi_deferred_free(mi_heap_t* heap, bool force); void _mi_page_free_collect(mi_page_t* page,bool force); void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments +size_t _mi_page_bin(const mi_page_t* page); // for stats size_t _mi_bin_size(size_t bin); // for stats size_t _mi_bin(size_t size); // for stats @@ -233,6 +234,7 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* pa // "stats.c" void _mi_stats_done(mi_stats_t* stats); +void _mi_stats_merge_thread(mi_tld_t* tld); mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); diff --git a/src/heap.c b/src/heap.c index 0ea9a2ff..118438b9 100644 --- a/src/heap.c +++ b/src/heap.c @@ -169,9 +169,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_arenas_collect(collect == MI_FORCE /* force purge? */); // merge statistics - if (collect <= MI_FORCE) { - mi_stats_merge(); - } + if (collect <= MI_FORCE) { _mi_stats_merge_thread(heap->tld); } } void _mi_heap_collect_abandon(mi_heap_t* heap) { diff --git a/src/page-queue.c b/src/page-queue.c index 3507505d..38b9aff4 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -136,7 +136,7 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* } #endif -static size_t mi_page_bin(const mi_page_t* page) { +size_t _mi_page_bin(const mi_page_t* page) { const size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); mi_assert_internal(bin <= MI_BIN_FULL); return bin; @@ -144,7 +144,7 @@ static size_t mi_page_bin(const mi_page_t* page) { static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { mi_assert_internal(heap!=NULL); - const size_t bin = mi_page_bin(page); + const size_t bin = _mi_page_bin(page); mi_page_queue_t* pq = &heap->pages[bin]; mi_assert_internal((mi_page_block_size(page) == pq->block_size) || (mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) || diff --git a/src/page.c b/src/page.c index 55150f33..89acb409 100644 --- a/src/page.c +++ b/src/page.c @@ -290,7 +290,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_assert_internal(full_block_size >= block_size); mi_page_init(heap, page, full_block_size, heap->tld); mi_heap_stat_increase(heap, pages, 1); - mi_heap_stat_increase(heap, page_bins[mi_page_bin(page)], 1); + mi_heap_stat_increase(heap, page_bins[_mi_page_bin(page)], 1); if (pq != NULL) { mi_page_queue_push(heap, pq, page); } mi_assert_expensive(_mi_page_is_valid(page)); return page; @@ -443,8 +443,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_segments_tld_t* segments_tld = &heap->tld->segments; mi_page_queue_remove(pq, page); - // and free it - mi_heap_stat_decrease(heap, page_bins[mi_page_bin(page)], 1); + // and free it mi_page_set_heap(page,NULL); _mi_segment_page_free(page, force, segments_tld); } diff --git a/src/segment.c b/src/segment.c index 75f8dacb..708ddd00 100644 --- a/src/segment.c +++ b/src/segment.c @@ -718,6 +718,7 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_seg size_t inuse = page->capacity * mi_page_block_size(page); _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); + _mi_stat_decrease(&tld->stats->page_bins[_mi_page_bin(page)], 1); page->is_zero_init = false; page->segment_in_use = false; diff --git a/src/stats.c b/src/stats.c index 92bc049c..ec8b65a3 100644 --- a/src/stats.c +++ b/src/stats.c @@ -395,6 +395,10 @@ void mi_stats_merge(void) mi_attr_noexcept { mi_stats_merge_from( mi_stats_get_default() ); } +void _mi_stats_merge_thread(mi_tld_t* tld) { + mi_stats_merge_from( &tld->stats ); +} + void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` mi_stats_merge_from(stats); } @@ -498,7 +502,7 @@ static bool mi_heap_buf_expand(mi_heap_buf_t* hbuf) { hbuf->buf[hbuf->size-1] = 0; } if (hbuf->size > SIZE_MAX/2 || !hbuf->can_realloc) return false; - const size_t newsize = (hbuf->size == 0 ? 2*MI_KiB : 2*hbuf->size); + const size_t newsize = (hbuf->size == 0 ? mi_good_size(12*MI_KiB) : 2*hbuf->size); char* const newbuf = (char*)mi_rezalloc(hbuf->buf, newsize); if (newbuf == NULL) return false; hbuf->buf = newbuf; diff --git a/test/test-stress.c b/test/test-stress.c index 9e041064..1abe56d2 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -337,6 +337,7 @@ int main(int argc, char** argv) { mi_free(json); } #endif + mi_collect(true); mi_stats_print(NULL); #endif //bench_end_program(); From 6cb4861f3eb4757ad4d1f1b0ef6aca793244381e Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 21 May 2025 17:36:31 -0700 Subject: [PATCH 41/68] fix format specifier for numa nodes --- src/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stats.c b/src/stats.c index ec8b65a3..89d0e103 100644 --- a/src/stats.c +++ b/src/stats.c @@ -348,7 +348,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->malloc_guarded_count, "guarded", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->page_searches, "searches", out, arg); - _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); + _mi_fprintf(out, arg, "%10s: %5i\n", "numa nodes", _mi_os_numa_node_count()); size_t elapsed; size_t user_time; From 44e370bdaaaf2ba9d062113ba3902624a29eaa25 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 21 May 2025 19:20:31 -0700 Subject: [PATCH 42/68] fix format specifier in stat output --- src/stats.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/stats.c b/src/stats.c index 89d0e103..03eafb66 100644 --- a/src/stats.c +++ b/src/stats.c @@ -359,9 +359,9 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) size_t peak_commit; size_t page_faults; mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); - _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); - _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", - user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); + _mi_fprintf(out, arg, "%10s: %5zu.%03zu s\n", "elapsed", elapsed/1000, elapsed%1000); + _mi_fprintf(out, arg, "%10s: user: %zu.%03zu s, system: %zu.%03zu s, faults: %zu, rss: ", "process", + user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); if (peak_commit > 0) { _mi_fprintf(out, arg, ", commit: "); From 3e32b4c38563b6d037d8289cc0a73ea694f88ff1 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 28 May 2025 08:37:34 -0700 Subject: [PATCH 43/68] fix OS allocation size tracking in the memid --- include/mimalloc/internal.h | 4 +++- src/os.c | 11 +++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e277f0ff..2e770943 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -784,8 +784,10 @@ static inline mi_memid_t _mi_memid_none(void) { return _mi_memid_create(MI_MEM_NONE); } -static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { +static inline mi_memid_t _mi_memid_create_os(void* base, size_t size, bool committed, bool is_zero, bool is_large) { mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.mem.os.base = base; + memid.mem.os.size = size; memid.initially_committed = committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; diff --git a/src/os.c b/src/os.c index be7e532c..d134feba 100644 --- a/src/os.c +++ b/src/os.c @@ -339,7 +339,7 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) { bool os_is_zero = false; void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero); if (p != NULL) { - *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); + *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); } return p; } @@ -357,10 +357,9 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo void* os_base = NULL; void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base ); if (p != NULL) { - *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); + *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); memid->mem.os.base = os_base; - // memid->mem.os.alignment = alignment; - memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned + memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned? } return p; } @@ -618,7 +617,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; - uint8_t* start = mi_os_claim_huge_pages(pages, &size); + uint8_t* const start = mi_os_claim_huge_pages(pages, &size); if (start == NULL) return NULL; // or 32-bit systems // Allocate one page at the time but try to place them contiguously @@ -674,7 +673,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } if (page != 0) { mi_assert(start != NULL); - *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); + *memid = _mi_memid_create_os(start, size, true /* is committed */, all_zero, true /* is_large */); memid->memkind = MI_MEM_OS_HUGE; mi_assert(memid->is_pinned); #ifdef MI_TRACK_ASAN From 715acc03294d36c08c8f9f0dc0cbb2a87c320f2b Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 30 May 2025 09:29:35 -0700 Subject: [PATCH 44/68] mark assert_fail as cold and noreturn; move assert to internal.h (see issue #1091, and python/cpython#134586) --- include/mimalloc/internal.h | 94 +++++++++++++++++++++++++++---------- include/mimalloc/types.h | 21 --------- src/options.c | 2 +- 3 files changed, 69 insertions(+), 48 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2e770943..ddd0ba4f 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -8,7 +8,6 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H - // -------------------------------------------------------------------------- // This file contains the internal API's of mimalloc and various utility // functions and macros. @@ -17,6 +16,11 @@ terms of the MIT license. A copy of the license can be found in the file #include "types.h" #include "track.h" + +// -------------------------------------------------------------------------- +// Compiler defines +// -------------------------------------------------------------------------- + #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) #else @@ -30,37 +34,69 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#define mi_decl_noreturn __declspec(noreturn) #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#define mi_decl_noreturn __attribute__((noreturn)) #define mi_decl_weak __attribute__((weak)) #define mi_decl_hidden __attribute__((visibility("hidden"))) +#if (__GNUC__ >= 4) || defined(__clang__) +#define mi_decl_cold __attribute__((cold)) +#else +#define mi_decl_cold +#endif #elif __cplusplus >= 201103L // c++11 #define mi_decl_noinline #define mi_decl_thread thread_local #define mi_decl_cache_align alignas(MI_CACHE_LINE) +#define mi_decl_noreturn [[noreturn]] #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_cache_align +#define mi_decl_noreturn #define mi_decl_weak #define mi_decl_hidden +#define mi_decl_cold +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if defined(__cplusplus) +#define mi_decl_externc extern "C" +#else +#define mi_decl_externc #endif #if defined(__EMSCRIPTEN__) && !defined(__wasi__) #define __wasi__ #endif -#if defined(__cplusplus) -#define mi_decl_externc extern "C" -#else -#define mi_decl_externc -#endif + +// -------------------------------------------------------------------------- +// Internal functions +// -------------------------------------------------------------------------- // "libc.c" #include @@ -256,26 +292,6 @@ bool _mi_page_is_valid(mi_page_t* page); #endif -// ------------------------------------------------------ -// Branches -// ------------------------------------------------------ - -#if defined(__GNUC__) || defined(__clang__) -#define mi_unlikely(x) (__builtin_expect(!!(x),false)) -#define mi_likely(x) (__builtin_expect(!!(x),true)) -#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) -#define mi_unlikely(x) (x) [[unlikely]] -#define mi_likely(x) (x) [[likely]] -#else -#define mi_unlikely(x) (x) -#define mi_likely(x) (x) -#endif - -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - - /* ----------------------------------------------------------- Error codes passed to `_mi_fatal_error` All are recoverable but EFAULT is a serious error and aborts by default in secure mode. @@ -300,6 +316,32 @@ bool _mi_page_is_valid(mi_page_t* page); #endif +// ------------------------------------------------------ +// Assertions +// ------------------------------------------------------ + +#if (MI_DEBUG) +// use our own assertion to print without memory allocation +mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func) mi_attr_noexcept; +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) +#else +#define mi_assert(x) +#endif + +#if (MI_DEBUG>1) +#define mi_assert_internal mi_assert +#else +#define mi_assert_internal(x) +#endif + +#if (MI_DEBUG>2) +#define mi_assert_expensive mi_assert +#else +#define mi_assert_expensive(x) +#endif + + + /* ----------------------------------------------------------- Inlined definitions ----------------------------------------------------------- */ diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index e2b5d318..855374e5 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -574,7 +574,6 @@ struct mi_tld_s { }; - // ------------------------------------------------------ // Debug // ------------------------------------------------------ @@ -589,26 +588,6 @@ struct mi_tld_s { #define MI_DEBUG_PADDING (0xDE) #endif -#if (MI_DEBUG) -// use our own assertion to print without memory allocation -void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); -#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) -#else -#define mi_assert(x) -#endif - -#if (MI_DEBUG>1) -#define mi_assert_internal mi_assert -#else -#define mi_assert_internal(x) -#endif - -#if (MI_DEBUG>2) -#define mi_assert_expensive mi_assert -#else -#define mi_assert_expensive(x) -#endif - // ------------------------------------------------------ // Statistics diff --git a/src/options.c b/src/options.c index 772dfe66..9bb5d1b3 100644 --- a/src/options.c +++ b/src/options.c @@ -525,7 +525,7 @@ void _mi_warning_message(const char* fmt, ...) { #if MI_DEBUG -void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { +mi_decl_noreturn mi_decl_cold void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) mi_attr_noexcept { _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); abort(); } From e19c0222380a8d53b7d408657dfba0c03d99133c Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 30 May 2025 09:36:38 -0700 Subject: [PATCH 45/68] define mi_decl_align separate from mi_decl_cache_align --- include/mimalloc/internal.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index ddd0ba4f..e951b576 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -27,13 +27,14 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_trace_message(...) #endif -#define MI_CACHE_LINE 64 +#define mi_decl_cache_align mi_decl_align(64) + #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #pragma warning(disable:26812) // unscoped enum warning #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) -#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#define mi_decl_align(a) __declspec(align(a)) #define mi_decl_noreturn __declspec(noreturn) #define mi_decl_weak #define mi_decl_hidden @@ -41,7 +42,7 @@ terms of the MIT license. A copy of the license can be found in the file #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread -#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#define mi_decl_align(a) __attribute__((aligned(a))) #define mi_decl_noreturn __attribute__((noreturn)) #define mi_decl_weak __attribute__((weak)) #define mi_decl_hidden __attribute__((visibility("hidden"))) @@ -53,7 +54,7 @@ terms of the MIT license. A copy of the license can be found in the file #elif __cplusplus >= 201103L // c++11 #define mi_decl_noinline #define mi_decl_thread thread_local -#define mi_decl_cache_align alignas(MI_CACHE_LINE) +#define mi_decl_align(a) alignas(a) #define mi_decl_noreturn [[noreturn]] #define mi_decl_weak #define mi_decl_hidden @@ -61,7 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) -#define mi_decl_cache_align +#define mi_decl_align(a) #define mi_decl_noreturn #define mi_decl_weak #define mi_decl_hidden From 1c514847996fba21af276b96d0d748b1ce8f3772 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 11:36:07 -0700 Subject: [PATCH 46/68] fix missing csize assignment in _mi_os_free_ex --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index d134feba..dee263bc 100644 --- a/src/os.c +++ b/src/os.c @@ -181,7 +181,7 @@ static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; - if (csize==0) { _mi_os_good_alloc_size(size); } + if (csize==0) { csize = _mi_os_good_alloc_size(size); } size_t commit_size = (still_committed ? csize : 0); void* base = addr; // different base? (due to alignment) From 60f7e6a6a846dcf9ebe2b29530f0b309d4fd1dd2 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 12:20:02 -0700 Subject: [PATCH 47/68] check all os_commit calls and return NULL on failure --- include/mimalloc/internal.h | 6 +++--- src/os.c | 5 ++++- src/page.c | 26 +++++++++++++++++--------- src/segment.c | 4 +++- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 2e770943..e1052787 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -126,13 +126,13 @@ bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero); -bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); bool _mi_os_decommit(void* addr, size_t size); -bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size); +mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero); +mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); +mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); diff --git a/src/os.c b/src/os.c index dee263bc..c720c8ad 100644 --- a/src/os.c +++ b/src/os.c @@ -300,7 +300,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { - _mi_os_commit(p, size, NULL); + if (!_mi_os_commit(p, size, NULL)) { + mi_os_prim_free(p, over_size, 0); + return NULL; + } } } else { // mmap can free inside an allocation diff --git a/src/page.c b/src/page.c index 89acb409..b34ee414 100644 --- a/src/page.c +++ b/src/page.c @@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { @@ -630,14 +630,14 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); mi_assert(page->local_free == NULL); - if (page->free != NULL) return; + if (page->free != NULL) return true; #endif - if (page->capacity >= page->reserved) return; + if (page->capacity >= page->reserved) return true; size_t page_size; //uint8_t* page_start = @@ -673,6 +673,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); + return true; } // Initialize a fresh page @@ -724,8 +725,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,tld); - mi_assert(mi_page_immediate_available(page)); + if (mi_page_extend_free(heap,page,tld)) { + mi_assert(mi_page_immediate_available(page)); + } + return; } @@ -817,9 +820,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page_candidate != NULL) { page = page_candidate; } - if (page != NULL && !mi_page_immediate_available(page)) { - mi_assert_internal(mi_page_is_expandable(page)); - mi_page_extend_free(heap, page, heap->tld); + if (page != NULL) { + if (!mi_page_immediate_available(page)) { + mi_assert_internal(mi_page_is_expandable(page)); + if (!mi_page_extend_free(heap, page, heap->tld)) { + page = NULL; // failed to extend + } + } + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); } if (page == NULL) { diff --git a/src/segment.c b/src/segment.c index 708ddd00..1813a1fc 100644 --- a/src/segment.c +++ b/src/segment.c @@ -182,7 +182,9 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) static void mi_segment_protect_range(void* p, size_t size, bool protect) { if (protect) { - _mi_os_protect(p, size); + if (!_mi_os_protect(p, size)) { + _mi_error_message(EFAULT,"unable to protect segment memory at %p\n", p); + } } else { _mi_os_unprotect(p, size); From a0072ba7c335e38bcd20f854692930fcb1d00d5a Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 15:07:15 -0700 Subject: [PATCH 48/68] fix base address if commit fails on aligned overallocation --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index c720c8ad..580b8af0 100644 --- a/src/os.c +++ b/src/os.c @@ -301,7 +301,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { if (!_mi_os_commit(p, size, NULL)) { - mi_os_prim_free(p, over_size, 0); + mi_os_prim_free(*base, over_size, 0); return NULL; } } From 30a17bf1b773e57fa79c1c96667bf5163a024c02 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 11:36:07 -0700 Subject: [PATCH 49/68] fix missing csize assignment in _mi_os_free_ex --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index d134feba..dee263bc 100644 --- a/src/os.c +++ b/src/os.c @@ -181,7 +181,7 @@ static void mi_os_prim_free(void* addr, size_t size, size_t commit_size) { void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; - if (csize==0) { _mi_os_good_alloc_size(size); } + if (csize==0) { csize = _mi_os_good_alloc_size(size); } size_t commit_size = (still_committed ? csize : 0); void* base = addr; // different base? (due to alignment) From 21425bc334ff67d0daafbc1d98056a45f9fab594 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 12:20:02 -0700 Subject: [PATCH 50/68] check all os_commit calls and return NULL on failure --- include/mimalloc/internal.h | 6 +++--- src/os.c | 5 ++++- src/page.c | 26 +++++++++++++++++--------- src/segment.c | 4 +++- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e951b576..5ee59252 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -163,13 +163,13 @@ bool _mi_os_has_overcommit(void); bool _mi_os_has_virtual_reserve(void); bool _mi_os_reset(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero); -bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); bool _mi_os_decommit(void* addr, size_t size); -bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size); +mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero); +mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); +mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); diff --git a/src/os.c b/src/os.c index dee263bc..c720c8ad 100644 --- a/src/os.c +++ b/src/os.c @@ -300,7 +300,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { - _mi_os_commit(p, size, NULL); + if (!_mi_os_commit(p, size, NULL)) { + mi_os_prim_free(p, over_size, 0); + return NULL; + } } } else { // mmap can free inside an allocation diff --git a/src/page.c b/src/page.c index 89acb409..b34ee414 100644 --- a/src/page.c +++ b/src/page.c @@ -37,7 +37,7 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta } static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld); -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld); #if (MI_DEBUG>=3) static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { @@ -630,14 +630,14 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co // Note: we also experimented with "bump" allocation on the first // allocations but this did not speed up any benchmark (due to an // extra test in malloc? or cache effects?) -static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { +static bool mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) { mi_assert_expensive(mi_page_is_valid_init(page)); #if (MI_SECURE<=2) mi_assert(page->free == NULL); mi_assert(page->local_free == NULL); - if (page->free != NULL) return; + if (page->free != NULL) return true; #endif - if (page->capacity >= page->reserved) return; + if (page->capacity >= page->reserved) return true; size_t page_size; //uint8_t* page_start = @@ -673,6 +673,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); + return true; } // Initialize a fresh page @@ -724,8 +725,10 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list - mi_page_extend_free(heap,page,tld); - mi_assert(mi_page_immediate_available(page)); + if (mi_page_extend_free(heap,page,tld)) { + mi_assert(mi_page_immediate_available(page)); + } + return; } @@ -817,9 +820,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p if (page_candidate != NULL) { page = page_candidate; } - if (page != NULL && !mi_page_immediate_available(page)) { - mi_assert_internal(mi_page_is_expandable(page)); - mi_page_extend_free(heap, page, heap->tld); + if (page != NULL) { + if (!mi_page_immediate_available(page)) { + mi_assert_internal(mi_page_is_expandable(page)); + if (!mi_page_extend_free(heap, page, heap->tld)) { + page = NULL; // failed to extend + } + } + mi_assert_internal(page == NULL || mi_page_immediate_available(page)); } if (page == NULL) { diff --git a/src/segment.c b/src/segment.c index 708ddd00..1813a1fc 100644 --- a/src/segment.c +++ b/src/segment.c @@ -182,7 +182,9 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) static void mi_segment_protect_range(void* p, size_t size, bool protect) { if (protect) { - _mi_os_protect(p, size); + if (!_mi_os_protect(p, size)) { + _mi_error_message(EFAULT,"unable to protect segment memory at %p\n", p); + } } else { _mi_os_unprotect(p, size); From 6c3d75a355c14bd3e67c67aed76f0297ab24ed6f Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 3 Jun 2025 15:07:15 -0700 Subject: [PATCH 51/68] fix base address if commit fails on aligned overallocation --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index c720c8ad..580b8af0 100644 --- a/src/os.c +++ b/src/os.c @@ -301,7 +301,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { if (!_mi_os_commit(p, size, NULL)) { - mi_os_prim_free(p, over_size, 0); + mi_os_prim_free(*base, over_size, 0); return NULL; } } From 2d34956bedded440b4aff9d8a53570f5c6a8e2be Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 15:53:28 -0700 Subject: [PATCH 52/68] use main instead of master in readme --- azure-pipelines.yml | 35 ++--------------------------------- readme.md | 5 ++--- 2 files changed, 4 insertions(+), 36 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b7fc59d4..ea915815 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -6,10 +6,8 @@ trigger: branches: include: - - master - - dev3 - - dev2 - - dev + - main + - dev* tags: include: - v* @@ -184,35 +182,6 @@ jobs: # Other OS versions (just debug mode) # ---------------------------------------------------------- -- job: - displayName: Windows 2019 - pool: - vmImage: - windows-2019 - strategy: - matrix: - Debug: - BuildType: debug - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON - MSBuildConfiguration: Debug - Release: - BuildType: release - cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release - MSBuildConfiguration: Release - steps: - - task: CMake@1 - inputs: - workingDirectory: $(BuildType) - cmakeArgs: .. $(cmakeExtraArgs) - - task: MSBuild@1 - inputs: - solution: $(BuildType)/libmimalloc.sln - configuration: '$(MSBuildConfiguration)' - msbuildArguments: -m - - script: ctest --verbose --timeout 240 -C $(MSBuildConfiguration) - workingDirectory: $(BuildType) - displayName: CTest - - job: displayName: Ubuntu 24.04 pool: diff --git a/readme.md b/readme.md index cee78898..601a7e24 100644 --- a/readme.md +++ b/readme.md @@ -72,15 +72,14 @@ Enjoy! ### Branches -* `master`: latest stable release (still based on `dev2`). +* `main`: latest stable release (still based on `dev2`). * `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage mimalloc pages that can reduce fragmentation. * `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version simplifies the lock-free ownership of previous versions, has no thread-local segments any more. - This improves sharing of memory between threads, and on certain large workloads may use less memory - with less fragmentation. + This improves sharing of memory between threads, and on certain large workloads may use (much) less memory. ### Releases From d389819cc9243c2647684544c2942ef6cc893a2a Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 17:59:46 -0700 Subject: [PATCH 53/68] add initial support for _mi_prim_reuse and MADV_FREE_REUSABLE on macOS (issue #1097) --- include/mimalloc/internal.h | 1 + include/mimalloc/prim.h | 5 +++++ src/arena.c | 6 +++++- src/os.c | 11 +++++++++++ src/prim/emscripten/prim.c | 5 +++++ src/prim/unix/prim.c | 33 +++++++++++++++++++++++++++------ src/prim/wasi/prim.c | 5 +++++ src/prim/windows/prim.c | 5 +++++ 8 files changed, 64 insertions(+), 7 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 5ee59252..3e57e252 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -167,6 +167,7 @@ bool _mi_os_decommit(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_purge(void* p, size_t size); bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size); +void _mi_os_reuse(void* p, size_t size); mi_decl_nodiscard bool _mi_os_commit(void* p, size_t size, bool* is_zero); mi_decl_nodiscard bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size); mi_decl_nodiscard bool _mi_os_protect(void* addr, size_t size); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 3d8f1806..c71678cc 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -63,6 +63,11 @@ int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); // Returns error code or 0 on success. int _mi_prim_reset(void* addr, size_t size); +// Reuse memory. This is called for memory that is already committed but +// may have been reset (`_mi_prim_reset`) or decommitted (`_mi_prim_decommit`) where `needs_recommit` was false. +// Returns error code or 0 on success. On most platforms this is a no-op. +int _mi_prim_reuse(void* addr, size_t size); + // Protect memory. Returns error code or 0 on success. int _mi_prim_protect(void* addr, size_t size, bool protect); diff --git a/src/arena.c b/src/arena.c index aa01ffcb..25cef886 100644 --- a/src/arena.c +++ b/src/arena.c @@ -266,12 +266,12 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar else if (commit) { // commit requested, but the range may not be committed as a whole: ensure it is committed now memid->initially_committed = true; + const size_t commit_size = mi_arena_block_size(needed_bcount); bool any_uncommitted; size_t already_committed = 0; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed); if (any_uncommitted) { mi_assert_internal(already_committed < needed_bcount); - const size_t commit_size = mi_arena_block_size(needed_bcount); const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed); bool commit_zero = false; if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) { @@ -281,6 +281,10 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar if (commit_zero) { memid->initially_zero = true; } } } + else { + // all are already committed: signal that we are reusing memory in case it was purged before + _mi_os_reuse( p, commit_size ); + } } else { // no need to commit, but check if already fully committed diff --git a/src/os.c b/src/os.c index 580b8af0..4c99d625 100644 --- a/src/os.c +++ b/src/os.c @@ -512,6 +512,17 @@ bool _mi_os_reset(void* addr, size_t size) { } +void _mi_os_reuse( void* addr, size_t size ) { + // page align conservatively within the range + size_t csize = 0; + void* const start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return; + const int err = _mi_prim_reuse(start, csize); + if (err != 0) { + _mi_warning_message("cannot reuse OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + } +} + // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size) diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index a8677cbc..c4cfc35d 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -114,6 +114,11 @@ int _mi_prim_reset(void* addr, size_t size) { return 0; } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index a90fa659..9ac855a5 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -433,13 +433,27 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) { return err; } +int _mi_prim_reuse(void* start, size_t size) { + #if defined(__APPLE__) && defined(MADV_FREE_REUSE) + return unix_madvise(start, size, MADV_FREE_REUSE); + #endif + return 0; +} + int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - err = unix_madvise(start, size, MADV_DONTNEED); #if !MI_DEBUG && MI_SECURE<=2 *needs_recommit = false; + #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) + // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097) + err = unix_madvise(start, size, MADV_FREE_REUSABLE); + #else + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + err = unix_madvise(start, size, MADV_DONTNEED); + #endif #else + // note: don't use MADV_FREE_REUSABLE as the range may contain protected areas + err = unix_madvise(start, size, MADV_DONTNEED); *needs_recommit = true; mprotect(start, size, PROT_NONE); #endif @@ -454,14 +468,21 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { } int _mi_prim_reset(void* start, size_t size) { - // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it + int err = 0; + #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) + // on macOS we try to use MADV_FREE_REUSABLE as it seems the fastest + err = unix_madvise(start, size, MADV_FREE_REUSABLE); + if (err == 0) return 0; + // fall through + #endif + + #if defined(MADV_FREE) + // Otherwise, we try to use `MADV_FREE` as that is the fastest. A drawback though is that it // will not reduce the `rss` stats in tools like `top` even though the memory is available // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by // default `MADV_DONTNEED` is used though. - #if defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); - int err; while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on @@ -469,7 +490,7 @@ int _mi_prim_reset(void* start, size_t size) { err = unix_madvise(start, size, MADV_DONTNEED); } #else - int err = unix_madvise(start, size, MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); #endif return err; } diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index e1e7de5e..745a41fd 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -149,6 +149,11 @@ int _mi_prim_reset(void* addr, size_t size) { return 0; } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); return 0; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index b82918c1..df941af9 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -352,6 +352,11 @@ int _mi_prim_reset(void* addr, size_t size) { return (p != NULL ? 0 : (int)GetLastError()); } +int _mi_prim_reuse(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + int _mi_prim_protect(void* addr, size_t size, bool protect) { DWORD oldprotect = 0; BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); From 2696627aafef1afc52ead72fe27c3a2a1347b27a Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 20:07:37 -0700 Subject: [PATCH 54/68] add MI_UNUSED for unix _mi_prim_reuse --- src/prim/unix/prim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 9ac855a5..8452b8c2 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -434,6 +434,7 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) { } int _mi_prim_reuse(void* start, size_t size) { + MI_UNUSED(start); MI_UNUSED(size); #if defined(__APPLE__) && defined(MADV_FREE_REUSE) return unix_madvise(start, size, MADV_FREE_REUSE); #endif From d7431402c5ef192a5d9c277abdc2fb4640abc4c1 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 20:15:16 -0700 Subject: [PATCH 55/68] fall back to MADV_DONTNEED if MADV_FREE_REUSABLE fails on macOS; disable use of MADV_FREE_REUSE on a reset (issue #1097) --- src/prim/unix/prim.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 8452b8c2..780d254f 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -443,18 +443,17 @@ int _mi_prim_reuse(void* start, size_t size) { int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int err = 0; - #if !MI_DEBUG && MI_SECURE<=2 - *needs_recommit = false; - #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) + #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) // decommit on macOS: use MADV_FREE_REUSABLE as it does immediate rss accounting (issue #1097) err = unix_madvise(start, size, MADV_FREE_REUSABLE); - #else + if (err) { err = unix_madvise(start, size, MADV_DONTNEED); } + #else // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) err = unix_madvise(start, size, MADV_DONTNEED); - #endif + #endif + #if !MI_DEBUG && MI_SECURE<=2 + *needs_recommit = false; #else - // note: don't use MADV_FREE_REUSABLE as the range may contain protected areas - err = unix_madvise(start, size, MADV_DONTNEED); *needs_recommit = true; mprotect(start, size, PROT_NONE); #endif @@ -470,10 +469,11 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { int _mi_prim_reset(void* start, size_t size) { int err = 0; - #if defined(__APPLE__) && defined(MADV_FREE_REUSABLE) - // on macOS we try to use MADV_FREE_REUSABLE as it seems the fastest + + // on macOS can use MADV_FREE_REUSABLE (but we disable this for now as it seems slower) + #if 0 && defined(__APPLE__) && defined(MADV_FREE_REUSABLE) err = unix_madvise(start, size, MADV_FREE_REUSABLE); - if (err == 0) return 0; + if (err==0) return 0; // fall through #endif From 2f0540c4f9e57c55cca4e0d621dd8b3c74843ef0 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 20:50:50 -0700 Subject: [PATCH 56/68] add _mi_os_zalloc --- include/mimalloc/internal.h | 1 + include/mimalloc/prim.h | 2 +- src/arena.c | 7 +----- src/init.c | 28 +++++++++-------------- src/os.c | 45 ++++++++++++++++++++++++++++++------- src/segment-map.c | 2 +- 6 files changed, 51 insertions(+), 34 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 3e57e252..7250d31a 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -154,6 +154,7 @@ void _mi_heap_guarded_init(mi_heap_t* heap); // os.c void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_memid_t* memid); +void* _mi_os_zalloc(size_t size, mi_memid_t* memid); void _mi_os_free(void* p, size_t size, mi_memid_t memid); void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid); diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index c71678cc..1087d9b8 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -59,7 +59,7 @@ int _mi_prim_commit(void* addr, size_t size, bool* is_zero); // pre: needs_recommit != NULL int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); -// Reset memory. The range keeps being accessible but the content might be reset. +// Reset memory. The range keeps being accessible but the content might be reset to zero at any moment. // Returns error code or 0 on success. int _mi_prim_reset(void* addr, size_t size); diff --git a/src/arena.c b/src/arena.c index 25cef886..ba36c415 100644 --- a/src/arena.c +++ b/src/arena.c @@ -188,14 +188,9 @@ void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid) { if (p != NULL) return p; // or fall back to the OS - p = _mi_os_alloc(size, memid); + p = _mi_os_zalloc(size, memid); if (p == NULL) return NULL; - // zero the OS memory if needed - if (!memid->initially_zero) { - _mi_memzero_aligned(p, size); - memid->initially_zero = true; - } return p; } diff --git a/src/init.c b/src/init.c index ff6c5d29..05ff688e 100644 --- a/src/init.c +++ b/src/init.c @@ -298,7 +298,6 @@ static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; static mi_thread_data_t* mi_thread_data_zalloc(void) { // try to find thread metadata in the cache - bool is_zero = false; mi_thread_data_t* td = NULL; for (int i = 0; i < TD_CACHE_SIZE; i++) { td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); @@ -306,32 +305,25 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { // found cached allocation, try use it td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - break; + _mi_memzero(td, offsetof(mi_thread_data_t,memid)); + return td; } } } // if that fails, allocate as meta data + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid); if (td == NULL) { - mi_memid_t memid; - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid); + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_zalloc(sizeof(mi_thread_data_t), &memid); if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid); - if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); - } - } - if (td != NULL) { - td->memid = memid; - is_zero = memid.initially_zero; + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + return NULL; } } - - if (td != NULL && !is_zero) { - _mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid)); - } + td->memid = memid; return td; } diff --git a/src/os.c b/src/os.c index 4c99d625..3c25ff59 100644 --- a/src/os.c +++ b/src/os.c @@ -182,6 +182,7 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; if (csize==0) { csize = _mi_os_good_alloc_size(size); } + mi_assert_internal(csize >= size); size_t commit_size = (still_committed ? csize : 0); void* base = addr; // different base? (due to alignment) @@ -341,9 +342,11 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) { bool os_is_large = false; bool os_is_zero = false; void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero); - if (p != NULL) { - *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); - } + if (p == NULL) return NULL; + + *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); + mi_assert_internal(memid->mem.os.size >= size); + mi_assert_internal(memid->initially_committed); return p; } @@ -359,14 +362,40 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo bool os_is_zero = false; void* os_base = NULL; void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base ); - if (p != NULL) { - *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); - memid->mem.os.base = os_base; - memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned? - } + if (p == NULL) return NULL; + + *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); + memid->mem.os.base = os_base; + memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned? + + mi_assert_internal(memid->mem.os.size >= size); + mi_assert_internal(_mi_is_aligned(p,alignment)); + mi_assert_internal(!commit || memid->initially_committed); + mi_assert_internal(!memid->initially_zero || memid->initially_committed); return p; } + +mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) { + if (p==NULL || size==0 || memid->initially_zero) return p; + if (!memid->initially_committed) { + bool is_zero = false; + if (!_mi_os_commit(p, size, &is_zero)) { + _mi_os_free(p, size, *memid); + return NULL; + } + memid->initially_committed = true; + } + _mi_memzero_aligned(p,size); + memid->initially_zero = true; + return p; +} + +void* _mi_os_zalloc(size_t size, mi_memid_t* memid) { + void* p = _mi_os_alloc(size,memid); + return mi_os_ensure_zero(p, size, memid); +} + /* ----------------------------------------------------------- OS aligned allocation with an offset. This is used for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc diff --git a/src/segment-map.c b/src/segment-map.c index 2f68f8c4..bbcea28a 100644 --- a/src/segment-map.c +++ b/src/segment-map.c @@ -61,7 +61,7 @@ static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bo if mi_unlikely(part == NULL) { if (!create_on_demand) return NULL; mi_memid_t memid; - part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid); + part = (mi_segmap_part_t*)_mi_os_zalloc(sizeof(mi_segmap_part_t), &memid); if (part == NULL) return NULL; part->memid = memid; mi_segmap_part_t* expected = NULL; From 57830a4b254673de60900ab83031b5b8454d947a Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 21:09:32 -0700 Subject: [PATCH 57/68] fix assertion in mi_os_ensure_zero --- src/os.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index 3c25ff59..028a5ee5 100644 --- a/src/os.c +++ b/src/os.c @@ -370,14 +370,15 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo mi_assert_internal(memid->mem.os.size >= size); mi_assert_internal(_mi_is_aligned(p,alignment)); - mi_assert_internal(!commit || memid->initially_committed); - mi_assert_internal(!memid->initially_zero || memid->initially_committed); + if (commit) { mi_assert_internal(memid->initially_committed); } + if (memid->initially_zero) { mi_assert_internal(memid->initially_committed); } return p; } mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_t* memid) { - if (p==NULL || size==0 || memid->initially_zero) return p; + if (p==NULL || size==0) return p; + // ensure committed if (!memid->initially_committed) { bool is_zero = false; if (!_mi_os_commit(p, size, &is_zero)) { @@ -386,6 +387,8 @@ mi_decl_nodiscard static void* mi_os_ensure_zero(void* p, size_t size, mi_memid_ } memid->initially_committed = true; } + // ensure zero'd + if (memid->initially_zero) return p; _mi_memzero_aligned(p,size); memid->initially_zero = true; return p; From d7d6c3b5c3ffe106077625bc21741b522c603f03 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 6 Jun 2025 21:22:44 -0700 Subject: [PATCH 58/68] fix assertion --- src/os.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/os.c b/src/os.c index 028a5ee5..279b60fb 100644 --- a/src/os.c +++ b/src/os.c @@ -370,8 +370,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo mi_assert_internal(memid->mem.os.size >= size); mi_assert_internal(_mi_is_aligned(p,alignment)); - if (commit) { mi_assert_internal(memid->initially_committed); } - if (memid->initially_zero) { mi_assert_internal(memid->initially_committed); } + if (commit) { mi_assert_internal(memid->initially_committed); } return p; } From e7cbbbfb1417c8c4cc0855f081d1c29a64023ab5 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 7 Jun 2025 09:51:35 -0700 Subject: [PATCH 59/68] add mi_process_done to the api --- include/mimalloc.h | 14 +++++++++----- src/init.c | 6 +++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 97cf7856..ce814d18 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -153,17 +153,21 @@ mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; -mi_decl_export void mi_options_print(void) mi_attr_noexcept; - -mi_decl_export void mi_process_init(void) mi_attr_noexcept; -mi_decl_export void mi_thread_init(void) mi_attr_noexcept; -mi_decl_export void mi_thread_done(void) mi_attr_noexcept; mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; +mi_decl_export void mi_options_print(void) mi_attr_noexcept; mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; + +// Generally do not use the following as these are usually called automatically +mi_decl_export void mi_process_init(void) mi_attr_noexcept; +mi_decl_export void mi_cdecl mi_process_done(void) mi_attr_noexcept; +mi_decl_export void mi_thread_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_done(void) mi_attr_noexcept; + + // ------------------------------------------------------------------------------------- // Aligned allocation // Note that `alignment` always follows `size` for consistency with unaligned diff --git a/src/init.c b/src/init.c index 05ff688e..eb87ab3c 100644 --- a/src/init.c +++ b/src/init.c @@ -642,7 +642,11 @@ void mi_process_init(void) mi_attr_noexcept { } } -// Called when the process is done (through `at_exit`) +void mi_cdecl mi_process_done(void) mi_attr_noexcept { + _mi_process_done(); +} + +// Called when the process is done (cdecl as it is used with `at_exit` on some platforms) void mi_cdecl _mi_process_done(void) { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; From c1249a4b1502a35d68be51aceddd466a301f5a25 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 7 Jun 2025 10:12:53 -0700 Subject: [PATCH 60/68] do not automatically call mi_process_done if mi_option_destroy_on_exit > 1 --- include/mimalloc/internal.h | 6 +++--- src/init.c | 14 +++++++------- src/prim/prim.c | 12 ++++++------ src/prim/windows/prim.c | 18 +++++++++--------- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 7250d31a..a29a419a 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -63,7 +63,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_align(a) -#define mi_decl_noreturn +#define mi_decl_noreturn #define mi_decl_weak #define mi_decl_hidden #define mi_decl_cold @@ -135,8 +135,8 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c extern mi_decl_hidden mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_hidden mi_decl_cache_align const mi_page_t _mi_page_empty; -void _mi_process_load(void); -void mi_cdecl _mi_process_done(void); +void _mi_auto_process_init(void); +void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept; bool _mi_is_redirected(void); bool _mi_allocator_init(const char** message); void _mi_allocator_done(void); diff --git a/src/init.c b/src/init.c index eb87ab3c..0a71ce05 100644 --- a/src/init.c +++ b/src/init.c @@ -323,7 +323,7 @@ static mi_thread_data_t* mi_thread_data_zalloc(void) { return NULL; } } - td->memid = memid; + td->memid = memid; return td; } @@ -555,7 +555,7 @@ mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { } // Called once by the process loader from `src/prim/prim.c` -void _mi_process_load(void) { +void _mi_auto_process_init(void) { mi_heap_main_init(); #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; @@ -642,12 +642,8 @@ void mi_process_init(void) mi_attr_noexcept { } } -void mi_cdecl mi_process_done(void) mi_attr_noexcept { - _mi_process_done(); -} - // Called when the process is done (cdecl as it is used with `at_exit` on some platforms) -void mi_cdecl _mi_process_done(void) { +void mi_cdecl mi_process_done(void) mi_attr_noexcept { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; // ensure we are called once @@ -690,3 +686,7 @@ void mi_cdecl _mi_process_done(void) { os_preloading = true; // don't call the C runtime anymore } +void mi_cdecl _mi_auto_process_done(void) mi_attr_noexcept { + if (_mi_option_get_fast(mi_option_destroy_on_exit)>1) return; + mi_process_done(); +} diff --git a/src/prim/prim.c b/src/prim/prim.c index 2002853f..5147bae8 100644 --- a/src/prim/prim.c +++ b/src/prim/prim.c @@ -39,29 +39,29 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_destructor __attribute__((destructor)) #endif static void mi_attr_constructor mi_process_attach(void) { - _mi_process_load(); + _mi_auto_process_init(); } static void mi_attr_destructor mi_process_detach(void) { - _mi_process_done(); + _mi_auto_process_done(); } #elif defined(__cplusplus) // C++: use static initialization to detect process start/end // This is not guaranteed to be first/last but the best we can generally do? struct mi_init_done_t { mi_init_done_t() { - _mi_process_load(); + _mi_auto_process_init(); } ~mi_init_done_t() { - _mi_process_done(); + _mi_auto_process_done(); } }; static mi_init_done_t mi_init_done; #else - #pragma message("define a way to call _mi_process_load/done on your platform") + #pragma message("define a way to call _mi_auto_process_init/done on your platform") #endif #endif -// Generic allocator init/done callback +// Generic allocator init/done callback #ifndef MI_PRIM_HAS_ALLOCATOR_INIT bool _mi_is_redirected(void) { return false; diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index df941af9..6752569c 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -633,7 +633,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { //---------------------------------------------------------------- #if MI_WIN_USE_FIXED_TLS==1 -mi_decl_cache_align size_t _mi_win_tls_offset = 0; +mi_decl_cache_align size_t _mi_win_tls_offset = 0; #endif //static void mi_debug_out(const char* s) { @@ -654,14 +654,14 @@ static void mi_win_tls_init(DWORD reason) { #endif #if MI_HAS_TLS_SLOT >= 2 // we must initialize the TLS slot before any allocation if (mi_prim_get_default_heap() == NULL) { - _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct((mi_heap_t*)&_mi_heap_empty); #if MI_DEBUG && MI_WIN_USE_FIXED_TLS==1 void* const p = TlsGetValue((DWORD)(_mi_win_tls_offset / sizeof(void*))); mi_assert_internal(p == (void*)&_mi_heap_empty); - #endif + #endif } - #endif - } + #endif + } } static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { @@ -669,10 +669,10 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { MI_UNUSED(module); mi_win_tls_init(reason); if (reason==DLL_PROCESS_ATTACH) { - _mi_process_load(); + _mi_auto_process_init(); } else if (reason==DLL_PROCESS_DETACH) { - _mi_process_done(); + _mi_auto_process_done(); } else if (reason==DLL_THREAD_DETACH && !_mi_is_redirected()) { _mi_thread_done(NULL); @@ -684,7 +684,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { #define MI_PRIM_HAS_PROCESS_ATTACH 1 // Windows DLL: easy to hook into process_init and thread_done - BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { + BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { mi_win_main((PVOID)inst,reason,reserved); return TRUE; } @@ -762,7 +762,7 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { static int mi_process_attach(void) { mi_win_main(NULL,DLL_PROCESS_ATTACH,NULL); - atexit(&_mi_process_done); + atexit(&_mi_auto_process_done); return 0; } typedef int(*mi_crt_callback_t)(void); From 82cd90083494284691326e13362db61d8d21672f Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 8 Jun 2025 15:54:01 -0700 Subject: [PATCH 61/68] make macOS interposes compile for older macOS versions (by @noxybot, PR #1028) --- src/alloc-override.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/alloc-override.c b/src/alloc-override.c index b5109ded..52ab69c5 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -71,24 +71,20 @@ typedef void* mi_nothrow_t; #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) - __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) = + #define MI_INTERPOSE_DECLS(name) __attribute__((used)) static struct mi_interpose_s name[] __attribute__((section("__DATA, __interpose"))) + + MI_INTERPOSE_DECLS(_mi_interposes) = { MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), MI_INTERPOSE_MI(strdup), - #if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7 - MI_INTERPOSE_MI(strndup), - #endif MI_INTERPOSE_MI(realpath), MI_INTERPOSE_MI(posix_memalign), MI_INTERPOSE_MI(reallocf), MI_INTERPOSE_MI(valloc), MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked), MI_INTERPOSE_MI(malloc_good_size), - #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 - MI_INTERPOSE_MI(aligned_alloc), - #endif #ifdef MI_OSX_ZONE // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely MI_INTERPOSE_MI(free), @@ -99,6 +95,12 @@ typedef void* mi_nothrow_t; MI_INTERPOSE_FUN(vfree,mi_cfree), #endif }; + MI_INTERPOSE_DECLS(_mi_interposes_10_7) __OSX_AVAILABLE(10.7) = { + MI_INTERPOSE_MI(strndup), + }; + MI_INTERPOSE_DECLS(_mi_interposes_10_15) __OSX_AVAILABLE(10.15) = { + MI_INTERPOSE_MI(aligned_alloc), + }; #ifdef __cplusplus extern "C" { From a981d40787251ae52c720df4b602df687da93fd2 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 8 Jun 2025 16:02:44 -0700 Subject: [PATCH 62/68] fix link error without static library build (by @fd00, PR #1082) --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a3acf83e..353127d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -713,10 +713,10 @@ if (MI_BUILD_TESTS) target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include) - if(MI_BUILD_SHARED AND (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN)) - target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries}) - else() + if(MI_BUILD_STATIC) target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries}) + else() + target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries}) endif() add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME}) endforeach() From 99ed3ea754c31e383fe88da467eb861aff9f7146 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 8 Jun 2025 16:41:04 -0700 Subject: [PATCH 63/68] enable building for xbox, based on pr #1084 by @maxbachmann --- src/prim/windows/prim.c | 48 ++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 6752569c..eebdc4a6 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -12,6 +12,10 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/prim.h" #include // fputs, stderr +// xbox has no console IO +#if !defined(WINAPI_FAMILY_PARTITION) || WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP | WINAPI_PARTITION_SYSTEM) +#define MI_HAS_CONSOLE_IO +#endif //--------------------------------------------- // Dynamically bind Windows API points for portability @@ -45,22 +49,30 @@ typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { #define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 #include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef LONG (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); // avoid NTSTATUS as it is not defined on xbox (pr #1084) static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; -// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7 +// Similarly, GetNumaProcessorNodeEx is only supported since Windows 7 (and GetNumaNodeProcessorMask is not supported on xbox) typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); +typedef BOOL (__stdcall* PGetNumaNodeProcessorMask)(UCHAR Node, PULONGLONG ProcessorMask); +typedef BOOL (__stdcall* PGetNumaHighestNodeNumber)(PULONG Node); static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; +static PGetNumaNodeProcessorMask pGetNumaNodeProcessorMask = NULL; +static PGetNumaHighestNodeNumber pGetNumaHighestNodeNumber = NULL; + +// Not available on xbox +typedef SIZE_T(__stdcall* PGetLargePageMinimum)(VOID); +static PGetLargePageMinimum pGetLargePageMinimum = NULL; // Available after Windows XP typedef BOOL (__stdcall *PGetPhysicallyInstalledSystemMemory)( PULONGLONG TotalMemoryInKilobytes ); @@ -74,6 +86,7 @@ static bool win_enable_large_os_pages(size_t* large_page_size) static bool large_initialized = false; if (large_initialized) return (_mi_os_large_page_size() > 0); large_initialized = true; + if (pGetLargePageMinimum==NULL) return false; // no large page support (xbox etc.) // Try to see if large OS pages are supported // To use large pages on Windows, we first need access permission @@ -92,8 +105,8 @@ static bool win_enable_large_os_pages(size_t* large_page_size) if (ok) { err = GetLastError(); ok = (err == ERROR_SUCCESS); - if (ok && large_page_size != NULL) { - *large_page_size = GetLargePageMinimum(); + if (ok && large_page_size != NULL && pGetLargePageMinimum != NULL) { + *large_page_size = (*pGetLargePageMinimum)(); } } } @@ -149,6 +162,9 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); + pGetNumaNodeProcessorMask = (PGetNumaNodeProcessorMask)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMask"); + pGetNumaHighestNodeNumber = (PGetNumaHighestNodeNumber)(void (*)(void))GetProcAddress(hDll, "GetNumaHighestNodeNumber"); + pGetLargePageMinimum = (PGetLargePageMinimum)(void (*)(void))GetProcAddress(hDll, "GetLargePageMinimum"); // Get physical memory (not available on XP, so check dynamically) PGetPhysicallyInstalledSystemMemory pGetPhysicallyInstalledSystemMemory = (PGetPhysicallyInstalledSystemMemory)(void (*)(void))GetProcAddress(hDll,"GetPhysicallyInstalledSystemMemory"); if (pGetPhysicallyInstalledSystemMemory != NULL) { @@ -388,7 +404,7 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num } SIZE_T psize = size; void* base = hint_addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + LONG err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); if (err == 0 && base != NULL) { return base; } @@ -442,9 +458,11 @@ size_t _mi_prim_numa_node(void) { size_t _mi_prim_numa_node_count(void) { ULONG numa_max = 0; - GetNumaHighestNodeNumber(&numa_max); + if (pGetNumaHighestNodeNumber!=NULL) { + (*pGetNumaHighestNodeNumber)(&numa_max); + } // find the highest node number that has actual processors assigned to it. Issue #282 - while(numa_max > 0) { + while (numa_max > 0) { if (pGetNumaNodeProcessorMaskEx != NULL) { // Extended API is supported GROUP_AFFINITY affinity; @@ -455,8 +473,10 @@ size_t _mi_prim_numa_node_count(void) { else { // Vista or earlier, use older API that is limited to 64 processors. ULONGLONG mask; - if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { - if (mask != 0) break; // found the maximum non-empty node + if (pGetNumaNodeProcessorMask != NULL) { + if ((*pGetNumaNodeProcessorMask)((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node + } }; } // max node was invalid or had no processor assigned, try again @@ -546,17 +566,21 @@ void _mi_prim_out_stderr( const char* msg ) if (!_mi_preloading()) { // _cputs(msg); // _cputs cannot be used as it aborts when failing to lock the console static HANDLE hcon = INVALID_HANDLE_VALUE; - static bool hconIsConsole; + static bool hconIsConsole = false; if (hcon == INVALID_HANDLE_VALUE) { - CONSOLE_SCREEN_BUFFER_INFO sbi; hcon = GetStdHandle(STD_ERROR_HANDLE); + #ifdef MI_HAS_CONSOLE_IO + CONSOLE_SCREEN_BUFFER_INFO sbi; hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); + #endif } const size_t len = _mi_strlen(msg); if (len > 0 && len < UINT32_MAX) { DWORD written = 0; if (hconIsConsole) { + #ifdef MI_HAS_CONSOLE_IO WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + #endif } else if (hcon != INVALID_HANDLE_VALUE) { // use direct write if stderr was redirected From 3b2daccf9d1d50ba3a58375f6e9ff0733d1a0c9a Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 8 Jun 2025 16:50:29 -0700 Subject: [PATCH 64/68] fix build for TSAN tests --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 353127d5..c58e64f8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -713,10 +713,12 @@ if (MI_BUILD_TESTS) target_compile_definitions(mimalloc-test-${TEST_NAME} PRIVATE ${mi_defines}) target_compile_options(mimalloc-test-${TEST_NAME} PRIVATE ${mi_cflags}) target_include_directories(mimalloc-test-${TEST_NAME} PRIVATE include) - if(MI_BUILD_STATIC) + if(MI_BUILD_STATIC AND NOT MI_DEBUG_TSAN) target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc-static ${mi_libraries}) - else() + elseif(MI_BUILD_SHARED) target_link_libraries(mimalloc-test-${TEST_NAME} PRIVATE mimalloc ${mi_libraries}) + else() + message(STATUS "cannot build TSAN tests without MI_BUILD_SHARED being enabled") endif() add_test(NAME test-${TEST_NAME} COMMAND mimalloc-test-${TEST_NAME}) endforeach() From 316a434d8e282656bcad243b6d794fc7feb07038 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 9 Jun 2025 19:05:30 -0700 Subject: [PATCH 65/68] fix armv7 detection --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c58e64f8..5ce084f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,7 +126,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENE set(MI_ARCH "x64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) set(MI_ARCH "arm64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567].?|ARM)$") set(MI_ARCH "arm32") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") if(CMAKE_SIZEOF_VOID_P==4) From 82b67862c8763040ee90a724e78bf4738e50eb34 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 9 Jun 2025 19:18:51 -0700 Subject: [PATCH 66/68] update vcpkg config to 1.9.4 --- contrib/vcpkg/portfile.cmake | 5 +++-- contrib/vcpkg/vcpkg.json | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake index 69661526..b59c3675 100644 --- a/contrib/vcpkg/portfile.cmake +++ b/contrib/vcpkg/portfile.cmake @@ -4,8 +4,8 @@ vcpkg_from_github( HEAD_REF master # The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1). - # REF "v${VERSION}" - REF 866ce5b89db1dbc3e66bbf89041291fd16329518 + REF "v${VERSION}" + # REF 866ce5b89db1dbc3e66bbf89041291fd16329518 # The sha512 is the hash of the tar.gz bundle. # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=` and copy the sha from the error message.) @@ -19,6 +19,7 @@ vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS secure MI_SECURE override MI_OVERRIDE optarch MI_OPT_ARCH + nooptarch MI_NO_OPT_ARCH optsimd MI_OPT_SIMD xmalloc MI_XMALLOC asm MI_SEE_ASM diff --git a/contrib/vcpkg/vcpkg.json b/contrib/vcpkg/vcpkg.json index 45f8097b..b38555a1 100644 --- a/contrib/vcpkg/vcpkg.json +++ b/contrib/vcpkg/vcpkg.json @@ -1,6 +1,6 @@ { "name": "mimalloc", - "version": "1.9.2", + "version": "1.9.4", "port-version": 2, "description": "Compact general purpose allocator with excellent performance", "homepage": "https://github.com/microsoft/mimalloc", @@ -35,6 +35,9 @@ "optarch": { "description": "Use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')" }, + "nooptarch": { + "description": "Do _not_ use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')" + }, "optsimd": { "description": "Allow use of SIMD instructions (avx2 or neon) (requires 'optarch' to be enabled)" }, From 6d3c8607699da8375ec9985aaacdb1fa8ea6ea4d Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 9 Jun 2025 19:19:18 -0700 Subject: [PATCH 67/68] update readme for upcoming release --- readme.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/readme.md b/readme.md index 601a7e24..71aaf7a2 100644 --- a/readme.md +++ b/readme.md @@ -12,9 +12,9 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release : `v3.0.3` (beta) (2025-03-28). -Latest v2 release: `v2.2.3` (2025-03-28). -Latest v1 release: `v1.9.3` (2024-03-28). +Latest release : `v3.1.4` (beta) (2025-06-09). +Latest v2 release: `v2.2.4` (2025-06-09). +Latest v1 release: `v1.9.4` (2024-06-09). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -77,12 +77,16 @@ Enjoy! * `dev2`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage mimalloc pages that can reduce fragmentation. -* `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version - simplifies the lock-free ownership of previous versions, has no thread-local segments any more. - This improves sharing of memory between threads, and on certain large workloads may use (much) less memory. +* `dev3`: development branch for mimalloc v3 beta. This branch is downstream of `dev`. This version + simplifies the lock-free ownership of previous versions, and improves sharing of memory between + threads. On certain large workloads this version may use (much) less memory. ### Releases +* 2025-06-09, `v1.9.4`, `v2.2.4`, `v3.1.4` (beta) : Some important bug fixes, including a case where OS memory + was not always fully released. Improved v3 performance, build on XBox, fix build on Android, support interpose + for older macOS versions, use MADV_FREE_REUSABLE on macOS, always check commit success, better support for Windows + fixed TLS offset, etc. * 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including: fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. From cfff6bfd47bd491145364b210d6552ea2c42444d Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 9 Jun 2025 20:01:28 -0700 Subject: [PATCH 68/68] bump version to v1.9.5 for further development --- cmake/mimalloc-config-version.cmake | 2 +- contrib/vcpkg/portfile.cmake | 2 +- include/mimalloc.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/mimalloc-config-version.cmake b/cmake/mimalloc-config-version.cmake index 0446485b..1057b5c0 100644 --- a/cmake/mimalloc-config-version.cmake +++ b/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 1) set(mi_version_minor 9) -set(mi_version_patch 4) +set(mi_version_patch 5) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/contrib/vcpkg/portfile.cmake b/contrib/vcpkg/portfile.cmake index b59c3675..a13b57c3 100644 --- a/contrib/vcpkg/portfile.cmake +++ b/contrib/vcpkg/portfile.cmake @@ -9,7 +9,7 @@ vcpkg_from_github( # The sha512 is the hash of the tar.gz bundle. # (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=` and copy the sha from the error message.) - SHA512 0b0e5ff823c49b9534b8c32800679806c5d7c29020af058da043c3e6e36ae3c32a1cdd5a21ece97dd60bc7dd4703967f683beac435dbb8514638a6cc55e5dea8 + SHA512 fb5aa8c2e6c15e5d22746ee40ed196f2fb7eafec9abfcbf94e7e70854734e99dd09886e1d68374fb995fe597e158100aa89260579e34cf5d9fb75d501b186d6a ) vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS diff --git a/include/mimalloc.h b/include/mimalloc.h index ce814d18..7bde743f 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 194 // major + 2 digits minor +#define MI_MALLOC_VERSION 195 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes