From 5c6ab532d98d2c617a7c21849d60c8e06a618a76 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 5 Mar 2025 15:37:37 -0800 Subject: [PATCH 1/3] make MI_OPT_ARCH by default OFF except for arm64 where we assume v8.1-a for fast atomics --- CMakeLists.txt | 77 +++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68153468..17e8d3f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhea option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF) option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) -option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for arm64: '-march=armv8.1-a' (2016))" ON) +option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for arm64: '-march=armv8.1-a' (2016))" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON) @@ -116,9 +116,44 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") set(MI_SECURE "ON") endif() + +# Determine architecture +set(MI_OPT_ARCH_FLAGS "") +set(MI_ARCH "unknown") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$") + set(MI_ARCH "x86") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64 + set(MI_ARCH "x64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) + set(MI_ARCH "arm64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") + set(MI_ARCH "arm32") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") + if(CMAKE_SIZEOF_VOID_P==4) + set(MI_ARCH "riscv32") + else() + set(MI_ARCH "riscv64") + endif() +else() + set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR}) +endif() +message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})") + +# negative overrides (mainly to support vcpkg features) +if(MI_NO_USE_CXX) + set(MI_USE_CXX "OFF") +endif() +if(MI_NO_OPT_ARCH) + set(MI_OPT_ARCH "OFF") +elseif(MI_ARCH STREQUAL "arm64") + set(MI_OPT_ARCH "ON") # enable armv8.1-a by default on arm64 unless MI_NO_OPT_ARCH is set +endif() + + # ----------------------------------------------------------------------------- # Process options # ----------------------------------------------------------------------------- + if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") set(MI_CLANG_CL "ON") endif() @@ -138,27 +173,10 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall) endif() -# negative overrides (mainly to support vcpkg features) -if(MI_NO_USE_CXX) - set(MI_USE_CXX "OFF") -endif() -if(MI_NO_OPT_ARCH) - set(MI_OPT_ARCH "OFF") -endif() - - if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() -if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo") - if (NOT MI_OPT_ARCH) - message(STATUS "Architecture specific optimizations are disabled (MI_OPT_ARCH=OFF)") - endif() -else() - set(MI_OPT_ARCH OFF) -endif() - if(MI_OVERRIDE) message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) @@ -365,28 +383,6 @@ if(MI_WIN_USE_FIXED_TLS) list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1) endif() -# Determine architecture -set(MI_OPT_ARCH_FLAGS "") -set(MI_ARCH "unknown") -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$") - set(MI_ARCH "x86") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64 - set(MI_ARCH "x64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) - set(MI_ARCH "arm64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") - set(MI_ARCH "arm32") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") - if(CMAKE_SIZEOF_VOID_P==4) - set(MI_ARCH "riscv32") - else() - set(MI_ARCH "riscv64") - endif() -else() - set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR}) -endif() -message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})") - # Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits. # (this will skip the aligned hinting in that case. Issue #939, #949) if (EXISTS /proc/cpuinfo) @@ -439,7 +435,6 @@ endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") if(MI_OPT_ARCH) if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) - set(MI_OPT_ARCH_FLAGS "") if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a") endif() From 8f40bed0864364adcf34f6bbf13bd8b8962a15a4 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Mar 2025 15:48:57 -0800 Subject: [PATCH 2/3] fix erms detection --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index 4ddc5bd1..215eed20 100644 --- a/src/init.c +++ b/src/init.c @@ -600,7 +600,7 @@ static void mi_detect_cpu_features(void) { int32_t cpu_info[4]; __cpuid(cpu_info, 7); _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see - _mi_cpu_has_erms = ((cpu_info[2] & (1 << 9)) != 0); // bit 9 of ECX : see + _mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see } #else static void mi_detect_cpu_features(void) { From ad52fc1b7e1ef0f33696c5d89e5dbc7dcef772d8 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 5 Mar 2025 16:09:22 -0800 Subject: [PATCH 3/3] fix type --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 2ee4c897..01ec2c46 100644 --- a/src/os.c +++ b/src/os.c @@ -697,7 +697,7 @@ Support NUMA aware allocation static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count int _mi_os_numa_node_count(void) { - size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); + int count = mi_atomic_load_acquire(&_mi_numa_node_count); if mi_unlikely(count <= 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? if (ncount > 0 && ncount < INT_MAX) {