Merge branch 'dev' into dev2

This commit is contained in:
Daan 2025-04-08 13:56:59 -07:00
commit 9b9c1e0512
7 changed files with 44 additions and 40 deletions

View file

@ -173,7 +173,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
list(APPEND mi_cflags -Wall) list(APPEND mi_cflags -Wall)
endif() endif()
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") # force C++ compilation with msvc or clang-cl to use modern C++ atomics
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL)
set(MI_USE_CXX "ON") set(MI_USE_CXX "ON")
endif() endif()

View file

@ -111,6 +111,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x)
@ -120,6 +121,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
@ -303,6 +305,7 @@ static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p,
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)

View file

@ -140,9 +140,11 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
bool _mi_os_use_large_page(size_t size, size_t alignment); bool _mi_os_use_large_page(size_t size, size_t alignment);
size_t _mi_os_large_page_size(void); size_t _mi_os_large_page_size(void);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid);
int _mi_os_numa_node_count(void);
int _mi_os_numa_node(void);
// arena.c // arena.c
mi_arena_id_t _mi_arena_id_none(void); mi_arena_id_t _mi_arena_id_none(void);
void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid); void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid);
@ -896,24 +898,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
return x; return x;
} }
// -------------------------------------------------------------------
// Optimize numa node access for the common case (= one node)
// -------------------------------------------------------------------
int _mi_os_numa_node_get(void);
size_t _mi_os_numa_node_count_get(void);
extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count;
static inline int _mi_os_numa_node(void) {
if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
else return _mi_os_numa_node_get();
}
static inline size_t _mi_os_numa_node_count(void) {
const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
if mi_likely(count > 0) { return count; }
else return _mi_os_numa_node_count_get();
}
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------

View file

@ -84,7 +84,7 @@ Enjoy!
### Releases ### Releases
* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: * 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including:
fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts,
fix execution on non BMI1 x64 systems. fix execution on non BMI1 x64 systems.
* 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes.
@ -177,7 +177,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build. Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the
`mimalloc-override-dll` project builds DLL for overriding malloc `mimalloc-override-dll` project builds a DLL for overriding malloc
in the entire program. in the entire program.
## Linux, macOS, BSD, etc. ## Linux, macOS, BSD, etc.

View file

@ -1011,17 +1011,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
if (pages == 0) return 0; if (pages == 0) return 0;
// pages per numa node // pages per numa node
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) numa_count = 1; if (numa_count == 0) numa_count = 1;
const size_t pages_per = pages / numa_count; const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count; const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
// reserve evenly among numa nodes // reserve evenly among numa nodes
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0 size_t node_pages = pages_per; // can be 0
if (numa_node < pages_mod) node_pages++; if ((size_t)numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
if (err) return err; if (err) return err;
if (pages < node_pages) { if (pages < node_pages) {
pages = 0; pages = 0;

View file

@ -680,34 +680,47 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
} }
} }
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
Support NUMA aware allocation Support NUMA aware allocation
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count static _Atomic(size_t) mi_numa_node_count; // = 0 // cache the node count
size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_count(void) {
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); size_t count = mi_atomic_load_acquire(&mi_numa_node_count);
if (count <= 0) { if mi_unlikely(count == 0) {
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount > 0) { if (ncount > 0 && ncount < INT_MAX) {
count = (size_t)ncount; count = (size_t)ncount;
} }
else { else {
count = _mi_prim_numa_node_count(); // or detect dynamically const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
if (count == 0) count = 1; if (n == 0 || n > INT_MAX) { count = 1; }
else { count = n; }
} }
mi_atomic_store_release(&_mi_numa_node_count, count); // save it mi_atomic_store_release(&mi_numa_node_count, count); // save it
_mi_verbose_message("using %zd numa regions\n", count); _mi_verbose_message("using %zd numa regions\n", count);
} }
return count; mi_assert_internal(count > 0 && count <= INT_MAX);
return (int)count;
} }
int _mi_os_numa_node_get(void) { static int mi_os_numa_node_get(void) {
size_t numa_count = _mi_os_numa_node_count(); int numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0 // never more than the node count and >= 0
size_t numa_node = _mi_prim_numa_node(); const size_t n = _mi_prim_numa_node();
int numa_node = (n < INT_MAX ? (int)n : 0);
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return (int)numa_node; return numa_node;
}
int _mi_os_numa_node(void) {
if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) {
return 0;
}
else {
return mi_os_numa_node_get();
}
} }

View file

@ -32,6 +32,9 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(__linux__) #if defined(__linux__)
#include <features.h> #include <features.h>
#include <sys/prctl.h> // THP disable, PR_SET_VMA #include <sys/prctl.h> // THP disable, PR_SET_VMA
#if !defined(PR_SET_VMA)
#include <linux/prctl.h>
#endif
#if defined(__GLIBC__) #if defined(__GLIBC__)
#include <linux/mman.h> // linux mmap flags #include <linux/mman.h> // linux mmap flags
#else #else