From 1c6b40d8bd7a3fd77f4590f50e88bf4d040a5375 Mon Sep 17 00:00:00 2001 From: daanx Date: Tue, 4 Mar 2025 09:22:41 -0800 Subject: [PATCH 1/6] fix verbose option printing --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 94cb8b67..e8eb85ad 100644 --- a/src/options.c +++ b/src/options.c @@ -202,7 +202,7 @@ void _mi_options_init(void) { } } #endif - if (!mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); } + if (mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); } } #define mi_stringifyx(str) #str // and stringify From 119f2eff6c61eb4f0c315773944b914af47d48ef Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Mar 2025 09:51:40 -0800 Subject: [PATCH 2/6] use int for numa node count --- include/mimalloc/internal.h | 2 +- src/arena.c | 10 +++++----- src/os.c | 20 +++++++++++--------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b4515831..e8b1c919 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -160,7 +160,7 @@ bool _mi_os_secure_guard_page_reset_at(void* addr); bool _mi_os_secure_guard_page_reset_before(void* addr); int _mi_os_numa_node(void); -size_t _mi_os_numa_node_count(void); +int _mi_os_numa_node_count(void); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); diff --git a/src/arena.c b/src/arena.c index 70e1802b..2aa1f8fe 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1524,17 +1524,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (pages == 0) return 0; // pages per numa node - size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); - if (numa_count <= 0) numa_count = 1; + int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count()); + if (numa_count <= 0) { numa_count = 1; } const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes - for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if ((size_t)numa_node < pages_mod) { node_pages++; } + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; diff --git a/src/os.c b/src/os.c index 69decb71..2ee4c897 100644 --- a/src/os.c +++ b/src/os.c @@ -694,18 +694,19 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) { Support NUMA aware allocation -----------------------------------------------------------------------------*/ -static _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count +static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count -size_t _mi_os_numa_node_count(void) { +int _mi_os_numa_node_count(void) { size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); if mi_unlikely(count <= 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? - if (ncount > 0) { - count = (size_t)ncount; + if (ncount > 0 && ncount < INT_MAX) { + count = (int)ncount; } else { - count = _mi_prim_numa_node_count(); // or detect dynamically - if (count == 0) { count = 1; } + const size_t n = _mi_prim_numa_node_count(); // or detect dynamically + if (n == 0 || n > INT_MAX) { count = 1; } + else { count = (int)n; } } mi_atomic_store_release(&_mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); @@ -715,12 +716,13 @@ size_t _mi_os_numa_node_count(void) { static int mi_os_numa_node_get(void) { - size_t numa_count = _mi_os_numa_node_count(); + int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = _mi_prim_numa_node(); + const size_t n = _mi_prim_numa_node(); + int numa_node = (n < INT_MAX ? (int)n : 0); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - return (int)numa_node; + return numa_node; } int _mi_os_numa_node(void) { From 438903421a9772fb1870325eb003f0b75f9c6fb6 Mon Sep 17 00:00:00 2001 From: Daan Date: Wed, 5 Mar 2025 09:57:51 -0800 Subject: [PATCH 3/6] fix verbosity condition for printing options --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index d4c72a99..772dfe66 100644 --- a/src/options.c +++ b/src/options.c @@ -188,7 +188,7 @@ void _mi_options_init(void) { } } #endif - if (!mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); } + if (mi_option_is_enabled(mi_option_verbose)) { mi_options_print(); } } #define mi_stringifyx(str) #str // and stringify From 5c6ab532d98d2c617a7c21849d60c8e06a618a76 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 5 Mar 2025 15:37:37 -0800 Subject: [PATCH 4/6] make MI_OPT_ARCH by default OFF except for arm64 where we assume v8.1-a for fast atomics --- CMakeLists.txt | 77 +++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68153468..17e8d3f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhea option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) -option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for arm64: '-march=armv8.1-a' (2016))" ON) +option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for arm64: '-march=armv8.1-a' (2016))" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON) @@ -116,9 +116,44 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") set(MI_SECURE "ON") endif() + +# Determine architecture +set(MI_OPT_ARCH_FLAGS "") +set(MI_ARCH "unknown") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$") + set(MI_ARCH "x86") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64 + set(MI_ARCH "x64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) + set(MI_ARCH "arm64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") + set(MI_ARCH "arm32") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") + if(CMAKE_SIZEOF_VOID_P==4) + set(MI_ARCH "riscv32") + else() + set(MI_ARCH "riscv64") + endif() +else() + set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR}) +endif() +message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})") + +# negative overrides (mainly to support vcpkg features) +if(MI_NO_USE_CXX) + set(MI_USE_CXX "OFF") +endif() +if(MI_NO_OPT_ARCH) + set(MI_OPT_ARCH "OFF") +elseif(MI_ARCH STREQUAL "arm64") + set(MI_OPT_ARCH "ON") # enable armv8.1-a by default on arm64 unless MI_NO_OPT_ARCH is set +endif() + + # ----------------------------------------------------------------------------- # Process options # ----------------------------------------------------------------------------- + if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") set(MI_CLANG_CL "ON") endif() @@ -138,27 +173,10 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall) endif() -# negative overrides (mainly to support vcpkg features) -if(MI_NO_USE_CXX) - set(MI_USE_CXX "OFF") -endif() -if(MI_NO_OPT_ARCH) - set(MI_OPT_ARCH "OFF") -endif() - - if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() -if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo") - if (NOT MI_OPT_ARCH) - message(STATUS "Architecture specific optimizations are disabled (MI_OPT_ARCH=OFF)") - endif() -else() - set(MI_OPT_ARCH OFF) -endif() - if(MI_OVERRIDE) message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) @@ -365,28 +383,6 @@ if(MI_WIN_USE_FIXED_TLS) list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1) endif() -# Determine architecture -set(MI_OPT_ARCH_FLAGS "") -set(MI_ARCH "unknown") -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$") - set(MI_ARCH "x86") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64 - set(MI_ARCH "x64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) - set(MI_ARCH "arm64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") - set(MI_ARCH "arm32") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") - if(CMAKE_SIZEOF_VOID_P==4) - set(MI_ARCH "riscv32") - else() - set(MI_ARCH "riscv64") - endif() -else() - set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR}) -endif() -message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})") - # Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits. # (this will skip the aligned hinting in that case. Issue #939, #949) if (EXISTS /proc/cpuinfo) @@ -439,7 +435,6 @@ endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") if(MI_OPT_ARCH) if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) - set(MI_OPT_ARCH_FLAGS "") if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a") endif() From 8f40bed0864364adcf34f6bbf13bd8b8962a15a4 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Mar 2025 15:48:57 -0800 Subject: [PATCH 5/6] fix erms detection --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index 4ddc5bd1..215eed20 100644 --- a/src/init.c +++ b/src/init.c @@ -600,7 +600,7 @@ static void mi_detect_cpu_features(void) { int32_t cpu_info[4]; __cpuid(cpu_info, 7); _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see - _mi_cpu_has_erms = ((cpu_info[2] & (1 << 9)) != 0); // bit 9 of ECX : see + _mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see } #else static void mi_detect_cpu_features(void) { From ad52fc1b7e1ef0f33696c5d89e5dbc7dcef772d8 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Mar 2025 16:09:22 -0800 Subject: [PATCH 6/6] fix type --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 2ee4c897..01ec2c46 100644 --- a/src/os.c +++ b/src/os.c @@ -697,7 +697,7 @@ Support NUMA aware allocation static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count int _mi_os_numa_node_count(void) { - size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); + int count = mi_atomic_load_acquire(&_mi_numa_node_count); if mi_unlikely(count <= 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? if (ncount > 0 && ncount < INT_MAX) {