diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c47671d..1f48cb1c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,7 @@ option(MI_XMALLOC "Enable abort() call on memory allocation failure by option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF) option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF) option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF) - -option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" ON) +option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" OFF) option(MI_OPT_SIMD "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF) option(MI_SEE_ASM "Generate assembly files" OFF) option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON) @@ -121,9 +120,44 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") set(MI_SECURE "ON") endif() + +# Determine architecture +set(MI_OPT_ARCH_FLAGS "") +set(MI_ARCH "unknown") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$") + set(MI_ARCH "x86") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64 + set(MI_ARCH "x64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) + set(MI_ARCH "arm64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") + set(MI_ARCH "arm32") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") + if(CMAKE_SIZEOF_VOID_P==4) + set(MI_ARCH "riscv32") + else() + set(MI_ARCH "riscv64") + endif() +else() + set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR}) +endif() +message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})") + +# negative overrides (mainly to support vcpkg features) +if(MI_NO_USE_CXX) + set(MI_USE_CXX "OFF") +endif() +if(MI_NO_OPT_ARCH) + set(MI_OPT_ARCH "OFF") +elseif(MI_ARCH STREQUAL "arm64") + set(MI_OPT_ARCH "ON") # enable armv8.1-a by default on arm64 unless MI_NO_OPT_ARCH is set +endif() + + # ----------------------------------------------------------------------------- # Process options # ----------------------------------------------------------------------------- + if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC") set(MI_CLANG_CL "ON") endif() @@ -143,27 +177,10 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel") list(APPEND mi_cflags -Wall) endif() -# negative overrides (mainly to support vcpkg features) -if(MI_NO_USE_CXX) - set(MI_USE_CXX "OFF") -endif() -if(MI_NO_OPT_ARCH) - set(MI_OPT_ARCH "OFF") -endif() - - if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel") set(MI_USE_CXX "ON") endif() -if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo") - if (NOT MI_OPT_ARCH) - message(STATUS "Architecture specific optimizations are disabled (MI_OPT_ARCH=OFF)") - endif() -#else() -# set(MI_OPT_ARCH OFF) -endif() - if(MI_OVERRIDE) message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") if(APPLE) @@ -370,28 +387,6 @@ if(MI_WIN_USE_FIXED_TLS) list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1) endif() -# Determine architecture -set(MI_OPT_ARCH_FLAGS "") -set(MI_ARCH "unknown") -if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$") - set(MI_ARCH "x86") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64 - set(MI_ARCH "x64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) - set(MI_ARCH "arm64") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$") - set(MI_ARCH "arm32") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$") - if(CMAKE_SIZEOF_VOID_P==4) - set(MI_ARCH "riscv32") - else() - set(MI_ARCH "riscv64") - endif() -else() - set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR}) -endif() -message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})") - # Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits. # (this will skip the aligned hinting in that case. Issue #939, #949) if (EXISTS /proc/cpuinfo) @@ -448,7 +443,6 @@ endif() if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku") if(MI_OPT_ARCH) if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999) - set(MI_OPT_ARCH_FLAGS "") if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES) list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a") endif() diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index b4515831..e8b1c919 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -160,7 +160,7 @@ bool _mi_os_secure_guard_page_reset_at(void* addr); bool _mi_os_secure_guard_page_reset_before(void* addr); int _mi_os_numa_node(void); -size_t _mi_os_numa_node_count(void); +int _mi_os_numa_node_count(void); void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); diff --git a/src/arena.c b/src/arena.c index 62811720..daf36411 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1525,17 +1525,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t if (pages == 0) return 0; // pages per numa node - size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count()); - if (numa_count <= 0) numa_count = 1; + int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count()); + if (numa_count <= 0) { numa_count = 1; } const size_t pages_per = pages / numa_count; const size_t pages_mod = pages % numa_count; const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50); // reserve evenly among numa nodes - for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { + for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) { size_t node_pages = pages_per; // can be 0 - if (numa_node < pages_mod) node_pages++; - int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per); + if ((size_t)numa_node < pages_mod) { node_pages++; } + int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per); if (err) return err; if (pages < node_pages) { pages = 0; diff --git a/src/init.c b/src/init.c index 0e867f8b..f9678cc5 100644 --- a/src/init.c +++ b/src/init.c @@ -663,7 +663,7 @@ static void mi_detect_cpu_features(void) { int32_t cpu_info[4]; __cpuid(cpu_info, 7); _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see - _mi_cpu_has_erms = ((cpu_info[2] & (1 << 9)) != 0); // bit 9 of ECX : see + _mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see } #else static void mi_detect_cpu_features(void) { diff --git a/src/os.c b/src/os.c index 69decb71..01ec2c46 100644 --- a/src/os.c +++ b/src/os.c @@ -694,18 +694,19 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) { Support NUMA aware allocation -----------------------------------------------------------------------------*/ -static _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count +static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count -size_t _mi_os_numa_node_count(void) { - size_t count = mi_atomic_load_acquire(&_mi_numa_node_count); +int _mi_os_numa_node_count(void) { + int count = mi_atomic_load_acquire(&_mi_numa_node_count); if mi_unlikely(count <= 0) { long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly? - if (ncount > 0) { - count = (size_t)ncount; + if (ncount > 0 && ncount < INT_MAX) { + count = (int)ncount; } else { - count = _mi_prim_numa_node_count(); // or detect dynamically - if (count == 0) { count = 1; } + const size_t n = _mi_prim_numa_node_count(); // or detect dynamically + if (n == 0 || n > INT_MAX) { count = 1; } + else { count = (int)n; } } mi_atomic_store_release(&_mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); @@ -715,12 +716,13 @@ size_t _mi_os_numa_node_count(void) { static int mi_os_numa_node_get(void) { - size_t numa_count = _mi_os_numa_node_count(); + int numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = _mi_prim_numa_node(); + const size_t n = _mi_prim_numa_node(); + int numa_node = (n < INT_MAX ? (int)n : 0); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } - return (int)numa_node; + return numa_node; } int _mi_os_numa_node(void) {