Compare commits

...

13 commits

Author SHA1 Message Date
Eduard Voronkin
f5a99f6e9c
Merge fd5ab9f1f3 into bc8eca8bf2 2025-04-07 17:01:54 +02:00
Daan
bc8eca8bf2 typo 2025-04-02 12:09:09 -07:00
daanx
8c99ac1bbd fix typo 2025-04-02 11:16:33 -07:00
daanx
d767dbfbb4 use C++ compilation with clang-cl (as well as msvc) on Windows 2025-04-02 10:50:36 -07:00
daanx
5a58df6534 fix signed compare warning 2025-04-02 10:40:30 -07:00
daanx
3c3600f85f add atomic_cas_ptr_strong_acq_rel 2025-04-02 10:36:01 -07:00
daanx
235a0390ee refactor numa_node_count 2025-04-02 10:34:00 -07:00
Daan
77b622511a fix alpine compilation with prctl.h (issue #1059) 2025-03-31 14:44:46 -07:00
Daan
e1110cdb9f nicer cmake logic for windows override test 2025-03-31 11:02:10 -07:00
Daan
a9e9467429 make dynamic override test verbose 2025-03-31 11:00:05 -07:00
Daan
07743454e5 fix dynamic override test on non-windows platforms 2025-03-31 10:57:16 -07:00
Daan
cbab63f6c9 fix release build warning (unused mi_stat_total_print) 2025-03-30 16:15:27 -07:00
Eduard Voronkin
fd5ab9f1f3 fix new/delete overrides
apparently, even if MI_USE_CXX is OFF, mimalloc still provides mangled definitions of new/delete operators. It causes inability to statically link mimaloc in our scenario, since we want to have our own overrides of new/delete functions.
2024-11-06 12:02:22 -08:00
9 changed files with 51 additions and 53 deletions

View file

@ -173,7 +173,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
list(APPEND mi_cflags -Wall)
endif()
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
# force C++ compilation with msvc or clang-cl to use modern C++ atomics
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel" OR MI_CLANG_CL)
set(MI_USE_CXX "ON")
endif()
@ -724,21 +725,19 @@ if (MI_BUILD_TESTS)
if(MI_BUILD_SHARED AND NOT (MI_TRACK_ASAN OR MI_DEBUG_TSAN OR MI_DEBUG_UBSAN))
add_executable(mimalloc-test-stress-dynamic test/test-stress.c)
target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE ${mi_defines} "USE_STD_MALLOC=1")
if(WIN32)
target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1")
endif()
target_compile_options(mimalloc-test-stress-dynamic PRIVATE ${mi_cflags})
target_include_directories(mimalloc-test-stress-dynamic PRIVATE include)
target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # mi_version
if(WIN32)
add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 $<TARGET_FILE:mimalloc-test-stress-dynamic>)
target_compile_definitions(mimalloc-test-stress-dynamic PRIVATE "MI_LINK_VERSION=1") # link mi_version
target_link_libraries(mimalloc-test-stress-dynamic PRIVATE mimalloc ${mi_libraries}) # link mi_version
add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 $<TARGET_FILE:mimalloc-test-stress-dynamic>)
else()
if(APPLE)
set(LD_PRELOAD "DYLD_INSERT_LIBRARIES")
else()
set(LD_PRELOAD "LD_PRELOAD")
endif()
add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_SHOW_STATS=1 ${LD_PRELOAD}=$<TARGET_FILE:mimalloc> $<TARGET_FILE:mimalloc-test-stress-dynamic>)
add_test(NAME test-stress-dynamic COMMAND ${CMAKE_COMMAND} -E env MIMALLOC_VERBOSE=1 ${LD_PRELOAD}=$<TARGET_FILE:mimalloc> $<TARGET_FILE:mimalloc-test-stress-dynamic>)
endif()
endif()
endif()

View file

@ -111,6 +111,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,(tp*)des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,(tp*)x)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x)
@ -120,6 +121,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel(p,exp,des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) mi_atomic_exchange_relaxed(p,x)
#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
@ -303,6 +305,7 @@ static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p,
#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_cas_ptr_strong_acq_rel(tp,p,exp,des) mi_atomic_cas_strong_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
#define mi_atomic_exchange_ptr_relaxed(tp,p,x) (tp*)mi_atomic_exchange_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)

View file

@ -140,9 +140,11 @@ void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
bool _mi_os_use_large_page(size_t size, size_t alignment);
size_t _mi_os_large_page_size(void);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid);
int _mi_os_numa_node_count(void);
int _mi_os_numa_node(void);
// arena.c
mi_arena_id_t _mi_arena_id_none(void);
void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid);
@ -813,24 +815,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
return x;
}
// -------------------------------------------------------------------
// Optimize numa node access for the common case (= one node)
// -------------------------------------------------------------------
int _mi_os_numa_node_get(void);
size_t _mi_os_numa_node_count_get(void);
extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count;
static inline int _mi_os_numa_node(void) {
if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
else return _mi_os_numa_node_get();
}
static inline size_t _mi_os_numa_node_count(void) {
const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
if mi_likely(count > 0) { return count; }
else return _mi_os_numa_node_count_get();
}
// -----------------------------------------------------------------------

View file

@ -84,7 +84,7 @@ Enjoy!
### Releases
* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta): Various small bug and build fixes, including:
* 2025-03-28, `v1.9.3`, `v2.2.3`, `v3.0.3` (beta) : Various small bug and build fixes, including:
fix arm32 pre v7 builds, fix mingw build, get runtime statistics, improve statistic commit counts,
fix execution on non BMI1 x64 systems.
* 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes.
@ -177,7 +177,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the
`mimalloc-override-dll` project builds DLL for overriding malloc
`mimalloc-override-dll` project builds a DLL for overriding malloc
in the entire program.
## Linux, macOS, BSD, etc.

View file

@ -189,9 +189,8 @@ typedef void* mi_nothrow_t;
void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
#endif
#elif (defined(__GNUC__) || defined(__clang__))
#elif (defined(__GNUC__) || defined(__clang__))
// ------------------------------------------------------
// Override by defining the mangled C++ names of the operators (as
// used by GCC and CLang).
@ -234,6 +233,7 @@ typedef void* mi_nothrow_t;
#else
#error "define overloads for new/delete for this platform (just for performance, can be skipped)"
#endif
#endif
#endif // __cplusplus
// ------------------------------------------------------

View file

@ -1007,17 +1007,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
if (pages == 0) return 0;
// pages per numa node
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) numa_count = 1;
int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
if (numa_count == 0) numa_count = 1;
const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
// reserve evenly among numa nodes
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0
if (numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
if ((size_t)numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
if (err) return err;
if (pages < node_pages) {
pages = 0;

View file

@ -696,34 +696,47 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
}
}
/* ----------------------------------------------------------------------------
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
static _Atomic(size_t) mi_numa_node_count; // = 0 // cache the node count
size_t _mi_os_numa_node_count_get(void) {
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
if (count <= 0) {
int _mi_os_numa_node_count(void) {
size_t count = mi_atomic_load_acquire(&mi_numa_node_count);
if mi_unlikely(count == 0) {
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount > 0) {
if (ncount > 0 && ncount < INT_MAX) {
count = (size_t)ncount;
}
else {
count = _mi_prim_numa_node_count(); // or detect dynamically
if (count == 0) count = 1;
const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
if (n == 0 || n > INT_MAX) { count = 1; }
else { count = n; }
}
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
mi_atomic_store_release(&mi_numa_node_count, count); // save it
_mi_verbose_message("using %zd numa regions\n", count);
}
return count;
mi_assert_internal(count > 0 && count <= INT_MAX);
return (int)count;
}
int _mi_os_numa_node_get(void) {
size_t numa_count = _mi_os_numa_node_count();
static int mi_os_numa_node_get(void) {
int numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0
size_t numa_node = _mi_prim_numa_node();
const size_t n = _mi_prim_numa_node();
int numa_node = (n < INT_MAX ? (int)n : 0);
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return (int)numa_node;
return numa_node;
}
int _mi_os_numa_node(void) {
if mi_likely(mi_atomic_load_relaxed(&mi_numa_node_count) == 1) {
return 0;
}
else {
return mi_os_numa_node_get();
}
}

View file

@ -31,10 +31,7 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(__linux__)
#include <features.h>
#include <linux/prctl.h> // PR_SET_VMA
//#if defined(MI_NO_THP)
#include <sys/prctl.h> // THP disable
//#endif
#include <sys/prctl.h> // THP disable, PR_SET_VMA
#if defined(__GLIBC__)
#include <linux/mman.h> // linux mmap flags
#else
@ -208,7 +205,7 @@ static int unix_madvise(void* addr, size_t size, int advice) {
static void* unix_mmap_prim(void* addr, size_t size, int protect_flags, int flags, int fd) {
void* p = mmap(addr, size, protect_flags, flags, fd, 0 /* offset */);
#if (defined(__linux__) && defined(PR_SET_VMA))
#if defined(__linux__) && defined(PR_SET_VMA)
if (p!=MAP_FAILED && p!=NULL) {
prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, p, size, "mimalloc");
}

View file

@ -218,12 +218,14 @@ static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int
_mi_fprintf(out, arg, "\n");
}
#if MI_STAT>1
static void mi_stat_total_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
_mi_fprintf(out, arg, "%10s:", msg);
_mi_fprintf(out, arg, "%12s", " "); // no peak
mi_print_amount(stat->total, unit, out, arg);
_mi_fprintf(out, arg, "\n");
}
#endif
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
_mi_fprintf(out, arg, "%10s:", msg);