Compare commits

...

8 commits

Author SHA1 Message Date
Eduard Voronkin
383b219ac8
Merge 7023c205aa into bc8eca8bf2 2025-04-07 17:04:38 +02:00
Daan
bc8eca8bf2 typo 2025-04-02 12:09:09 -07:00
daanx
8c99ac1bbd fix typo 2025-04-02 11:16:33 -07:00
daanx
d767dbfbb4 use C++ compilation with clang-cl (as well as msvc) on Windows 2025-04-02 10:50:36 -07:00
daanx
5a58df6534 fix signed compare warning 2025-04-02 10:40:30 -07:00
daanx
3c3600f85f add atomic_cas_ptr_strong_acq_rel 2025-04-02 10:36:01 -07:00
daanx
235a0390ee refactor numa_node_count 2025-04-02 10:34:00 -07:00
Eduard Voronkin
7023c205aa improve MacOS interposes to make mimalloc compile for older MacOS
I've got a failure while trying to compile with "-mmacosx-version-min=10.7" since MAC_OS_X_VERSION_MAX_ALLOWED is greater than MAC_OS_X_VERSION_10_15 but still we can't use `aligned_alloc` since of the lower bound (10.7).
I'm not sure about internal details of how `__OSX_AVAILABLE` works, but my hope is the it would only make this interpose available starting from the OS version specified.
Also, it helps me to compile mimalloc with "-mmacosx-version-min=10.7".
2025-03-06 10:26:22 -08:00
7 changed files with 50 additions and 46 deletions

View file

@ -173,7 +173,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
list(APPEND mi_cflags -Wall) list(APPEND mi_cflags -Wall)
endif() endif()
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") # force C++ compilation with msvc or clang-cl to use modern C++ atomics
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL)
set(MI_USE_CXX "ON") set(MI_USE_CXX "ON")
endif() endif()

View file

@ -111,6 +111,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x)
@ -120,6 +121,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
@ -303,6 +305,7 @@ static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p,
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)

View file

@ -140,9 +140,11 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
bool _mi_os_use_large_page(size_t size, size_t alignment); bool _mi_os_use_large_page(size_t size, size_t alignment);
size_t _mi_os_large_page_size(void); size_t _mi_os_large_page_size(void);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid);
int _mi_os_numa_node_count(void);
int _mi_os_numa_node(void);
// arena.c // arena.c
mi_arena_id_t _mi_arena_id_none(void); mi_arena_id_t _mi_arena_id_none(void);
void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid); void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid);
@ -813,24 +815,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
return x; return x;
} }
// -------------------------------------------------------------------
// Optimize numa node access for the common case (= one node)
// -------------------------------------------------------------------
int _mi_os_numa_node_get(void);
size_t _mi_os_numa_node_count_get(void);
extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count;
static inline int _mi_os_numa_node(void) {
if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
else return _mi_os_numa_node_get();
}
static inline size_t _mi_os_numa_node_count(void) {
const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
if mi_likely(count > 0) { return count; }
else return _mi_os_numa_node_count_get();
}
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------

View file

@ -84,7 +84,7 @@ Enjoy!
### Releases ### Releases
* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including: * 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including:
fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts, fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts,
fix execution on non BMI1 x64 systems. fix execution on non BMI1 x64 systems.
* 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes. * 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes.
@ -177,7 +177,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build. Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the
`mimalloc-override-dll` project builds DLL for overriding malloc `mimalloc-override-dll` project builds a DLL for overriding malloc
in the entire program. in the entire program.
## Linux, macOS, BSD, etc. ## Linux, macOS, BSD, etc.

View file

@ -77,18 +77,12 @@ typedef void* mi_nothrow_t;
MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(calloc),
MI_INTERPOSE_MI(realloc), MI_INTERPOSE_MI(realloc),
MI_INTERPOSE_MI(strdup), MI_INTERPOSE_MI(strdup),
#if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7
MI_INTERPOSE_MI(strndup),
#endif
MI_INTERPOSE_MI(realpath), MI_INTERPOSE_MI(realpath),
MI_INTERPOSE_MI(posix_memalign), MI_INTERPOSE_MI(posix_memalign),
MI_INTERPOSE_MI(reallocf), MI_INTERPOSE_MI(reallocf),
MI_INTERPOSE_MI(valloc), MI_INTERPOSE_MI(valloc),
MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked), MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked),
MI_INTERPOSE_MI(malloc_good_size), MI_INTERPOSE_MI(malloc_good_size),
#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
MI_INTERPOSE_MI(aligned_alloc),
#endif
#ifdef MI_OSX_ZONE #ifdef MI_OSX_ZONE
// we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
MI_INTERPOSE_MI(free), MI_INTERPOSE_MI(free),
@ -99,6 +93,15 @@ typedef void* mi_nothrow_t;
MI_INTERPOSE_FUN(vfree,mi_cfree), MI_INTERPOSE_FUN(vfree,mi_cfree),
#endif #endif
}; };
__attribute__((used)) static struct mi_interpose_s _mi_interposes_10_7[]
__attribute__((section("__DATA, __interpose"))) __OSX_AVAILABLE(10.7) = {
MI_INTERPOSE_MI(strndup),
};
__attribute__((used)) static struct mi_interpose_s _mi_interposes_10_15[]
__attribute__((section("__DATA, __interpose"))) __OSX_AVAILABLE(10.15) = {
MI_INTERPOSE_MI(aligned_alloc),
};
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {

View file

@ -1007,17 +1007,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
if (pages == 0) return 0; if (pages == 0) return 0;
// pages per numa node // pages per numa node
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) numa_count = 1; if (numa_count == 0) numa_count = 1;
const size_t pages_per = pages / numa_count; const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count; const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
// reserve evenly among numa nodes // reserve evenly among numa nodes
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0 size_t node_pages = pages_per; // can be 0
if (numa_node < pages_mod) node_pages++; if ((size_t)numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
if (err) return err; if (err) return err;
if (pages < node_pages) { if (pages < node_pages) {
pages = 0; pages = 0;

View file

@ -696,34 +696,47 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
} }
} }
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
Support NUMA aware allocation Support NUMA aware allocation
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count static _Atomic(size_t) mi_numa_node_count; // = 0 // cache the node count
size_t _mi_os_numa_node_count_get(void) { int _mi_os_numa_node_count(void) {
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); size_t count = mi_atomic_load_acquire(&mi_numa_node_count);
if (count <= 0) { if mi_unlikely(count == 0) {
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount > 0) { if (ncount > 0 && ncount < INT_MAX) {
count = (size_t)ncount; count = (size_t)ncount;
} }
else { else {
count = _mi_prim_numa_node_count(); // or detect dynamically const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
if (count == 0) count = 1; if (n == 0 || n > INT_MAX) { count = 1; }
else { count = n; }
} }
mi_atomic_store_release(&_mi_numa_node_count, count); // save it mi_atomic_store_release(&mi_numa_node_count, count); // save it
_mi_verbose_message("using %zd numa regions\n", count); _mi_verbose_message("using %zd numa regions\n", count);
} }
return count; mi_assert_internal(count > 0 && count <= INT_MAX);
return (int)count;
} }
int _mi_os_numa_node_get(void) { static int mi_os_numa_node_get(void) {
size_t numa_count = _mi_os_numa_node_count(); int numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0 // never more than the node count and >= 0
size_t numa_node = _mi_prim_numa_node(); const size_t n = _mi_prim_numa_node();
int numa_node = (n < INT_MAX ? (int)n : 0);
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return (int)numa_node; return numa_node;
}
int _mi_os_numa_node(void) {
if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) {
return 0;
}
else {
return mi_os_numa_node_get();
}
} }