From 235a0390eec64c54f97e6bcbfc0e24307031812a Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:34:00 -0700 Subject: [PATCH 1/7] refactor numa_node_count --- include/mimalloc/internal.h | 22 +++------------------ src/os.c | 39 ++++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 5b3e7e23..51fad09c 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -140,9 +140,11 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); - void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); +int _mi_os_numa_node_count(void); +int _mi_os_numa_node(void); + // arena.c mi_arena_id_t _mi_arena_id_none(void); void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid); @@ -813,24 +815,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) { return x; } -// ------------------------------------------------------------------- -// Optimize numa node access for the common case (= one node) -// ------------------------------------------------------------------- - -int _mi_os_numa_node_get(void); -size_t _mi_os_numa_node_count_get(void); - -extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count; -static inline int _mi_os_numa_node(void) { - if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } - else return _mi_os_numa_node_get(); -} -static inline size_t _mi_os_numa_node_count(void) { - const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); - if mi_likely(count > 0) { return count; } - else return _mi_os_numa_node_count_get(); -} - // ----------------------------------------------------------------------- diff --git a/src/os.c b/src/os.c index 12cc5da3..894e3a45 100644 --- a/src/os.c +++ b/src/os.c @@ -696,34 +696,47 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) { } } + /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count +static _Atomic(size_t) mi_numa_node_count; // = 0 // cache the node count -size_t _mi_os_numa_node_count_get(void) { - size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); - if (count <= 0) { +int _mi_os_numa_node_count(void) { + size_t count = mi_atomic_load_acquire(&mi_numa_node_count); + if mi_unlikely(count == 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? - if (ncount > 0) { + if (ncount > 0 && ncount < INT_MAX) { count = (size_t)ncount; } else { - count = _mi_prim_numa_node_count(); // or detect dynamically - if (count == 0) count = 1; + const size_t n = _mi_prim_numa_node_count(); // or detect dynamically + if (n == 0 || n > INT_MAX) { count = 1; } + else { count = n; } } - mi_atomic_store_release(&_mi_numa_node_count, count); // save it + mi_atomic_store_release(&mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); } - return count; + mi_assert_internal(count > 0 && count <= INT_MAX); + return (int)count; } -int _mi_os_numa_node_get(void) { - size_t numa_count = _mi_os_numa_node_count(); +static int mi_os_numa_node_get(void) { + int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = _mi_prim_numa_node(); + const size_t n = _mi_prim_numa_node(); + int numa_node = (n < INT_MAX ? (int)n : 0); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - return (int)numa_node; + return numa_node; +} + +int _mi_os_numa_node(void) { + if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) { + return 0; + } + else { + return mi_os_numa_node_get(); + } } From 3c3600f85fe4a6d7f01a9e69db3cada9e63627cb Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:36:01 -0700 Subject: [PATCH 2/7] add atomic_cas_ptr_strong_acq_rel --- include/mimalloc/atomic.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 39ff5c90..e8bac316 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -111,6 +111,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) @@ -120,6 +121,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) @@ -303,6 +305,7 @@ static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) From 5a58df6534cbc8673a655e5772461ef7fd4bcbcb Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:40:30 -0700 Subject: [PATCH 3/7] fix signed compare warning --- src/arena.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index a7c20764..bdae8da1 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1007,17 +1007,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (pages == 0) return 0; // pages per numa node - size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); - if (numa_count <= 0) numa_count = 1; + int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count()); + if (numa_count == 0) numa_count = 1; const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes - for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if ((size_t)numa_node < pages_mod) node_pages++; + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; From d767dbfbb45e2e38502b03dbb57698845899d34f Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 10:50:36 -0700 Subject: [PATCH 4/7] use C++ compilation with clang-cl (as well as msvc) on Windows --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 591ba130..a3acf83e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,7 +173,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall) endif() -if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") +# force C++ compilation with msvc or clang-cl to use modern C++ atomics +if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL) set(MI_USE_CXX "ON") endif() From 8c99ac1bbd9c692239bbf70c40f9be578d54d394 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 2 Apr 2025 11:16:33 -0700 Subject: [PATCH 5/7] fix typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 5a495275..70a25fc1 100644 --- a/readme.md +++ b/readme.md @@ -177,7 +177,7 @@ mimalloc is used in various large scale low-latency services and programs, for e Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build. The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the -`mimalloc-override-dll` project builds DLL for overriding malloc +`mimalloc-override-dll` project builds a DLL for overriding malloc in the entire program. ## Linux, macOS, BSD, etc. From bc8eca8bf2641f12ecc23d7527aecdb62d6d2939 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 2 Apr 2025 12:09:09 -0700 Subject: [PATCH 6/7] typo --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 70a25fc1..cee78898 100644 --- a/readme.md +++ b/readme.md @@ -84,7 +84,7 @@ Enjoy! ### Releases -* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: +* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including: fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix execution on non BMI1 x64 systems. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. From 7543e8989abe41e87f260424b3711931d680da77 Mon Sep 17 00:00:00 2001 From: Daan Date: Sun, 13 Apr 2025 19:49:47 -0700 Subject: [PATCH 7/7] validate pointer before assertion in mi_free_size (issue #754) --- src/free.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/free.c b/src/free.c index 7e529530..22284135 100644 --- a/src/free.c +++ b/src/free.c @@ -340,7 +340,11 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { void mi_free_size(void* p, size_t size) mi_attr_noexcept { MI_UNUSED_RELEASE(size); + #if MI_DEBUG + mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free_size"); + if mi_unlikely(segment==NULL) return; mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); + #endif mi_free(p); }