mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-06 23:39:31 +03:00
Merge branch 'dev3' into dev3-binx
This commit is contained in:
commit
dd3a74d89d
5 changed files with 55 additions and 59 deletions
|
@ -12,8 +12,7 @@ option(MI_XMALLOC "Enable abort() call on memory allocation failure by
|
||||||
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
|
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
|
||||||
option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
|
option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
|
||||||
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
|
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
|
||||||
|
option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" OFF)
|
||||||
option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" ON)
|
|
||||||
option(MI_OPT_SIMD "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF)
|
option(MI_OPT_SIMD "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF)
|
||||||
option(MI_SEE_ASM "Generate assembly files" OFF)
|
option(MI_SEE_ASM "Generate assembly files" OFF)
|
||||||
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
|
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
|
||||||
|
@ -121,9 +120,44 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$")
|
||||||
set(MI_SECURE "ON")
|
set(MI_SECURE "ON")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
# Determine architecture
|
||||||
|
set(MI_OPT_ARCH_FLAGS "")
|
||||||
|
set(MI_ARCH "unknown")
|
||||||
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$")
|
||||||
|
set(MI_ARCH "x86")
|
||||||
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64
|
||||||
|
set(MI_ARCH "x64")
|
||||||
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
||||||
|
set(MI_ARCH "arm64")
|
||||||
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$")
|
||||||
|
set(MI_ARCH "arm32")
|
||||||
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$")
|
||||||
|
if(CMAKE_SIZEOF_VOID_P==4)
|
||||||
|
set(MI_ARCH "riscv32")
|
||||||
|
else()
|
||||||
|
set(MI_ARCH "riscv64")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||||
|
endif()
|
||||||
|
message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})")
|
||||||
|
|
||||||
|
# negative overrides (mainly to support vcpkg features)
|
||||||
|
if(MI_NO_USE_CXX)
|
||||||
|
set(MI_USE_CXX "OFF")
|
||||||
|
endif()
|
||||||
|
if(MI_NO_OPT_ARCH)
|
||||||
|
set(MI_OPT_ARCH "OFF")
|
||||||
|
elseif(MI_ARCH STREQUAL "arm64")
|
||||||
|
set(MI_OPT_ARCH "ON") # enable armv8.1-a by default on arm64 unless MI_NO_OPT_ARCH is set
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Process options
|
# Process options
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
|
if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
|
||||||
set(MI_CLANG_CL "ON")
|
set(MI_CLANG_CL "ON")
|
||||||
endif()
|
endif()
|
||||||
|
@ -143,27 +177,10 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||||
list(APPEND mi_cflags -Wall)
|
list(APPEND mi_cflags -Wall)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# negative overrides (mainly to support vcpkg features)
|
|
||||||
if(MI_NO_USE_CXX)
|
|
||||||
set(MI_USE_CXX "OFF")
|
|
||||||
endif()
|
|
||||||
if(MI_NO_OPT_ARCH)
|
|
||||||
set(MI_OPT_ARCH "OFF")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
|
||||||
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
|
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
|
||||||
set(MI_USE_CXX "ON")
|
set(MI_USE_CXX "ON")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo")
|
|
||||||
if (NOT MI_OPT_ARCH)
|
|
||||||
message(STATUS "Architecture specific optimizations are disabled (MI_OPT_ARCH=OFF)")
|
|
||||||
endif()
|
|
||||||
#else()
|
|
||||||
# set(MI_OPT_ARCH OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(MI_OVERRIDE)
|
if(MI_OVERRIDE)
|
||||||
message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
|
message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
|
@ -370,28 +387,6 @@ if(MI_WIN_USE_FIXED_TLS)
|
||||||
list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1)
|
list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Determine architecture
|
|
||||||
set(MI_OPT_ARCH_FLAGS "")
|
|
||||||
set(MI_ARCH "unknown")
|
|
||||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$")
|
|
||||||
set(MI_ARCH "x86")
|
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64
|
|
||||||
set(MI_ARCH "x64")
|
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
|
||||||
set(MI_ARCH "arm64")
|
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$")
|
|
||||||
set(MI_ARCH "arm32")
|
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$")
|
|
||||||
if(CMAKE_SIZEOF_VOID_P==4)
|
|
||||||
set(MI_ARCH "riscv32")
|
|
||||||
else()
|
|
||||||
set(MI_ARCH "riscv64")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
|
||||||
endif()
|
|
||||||
message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})")
|
|
||||||
|
|
||||||
# Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits.
|
# Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits.
|
||||||
# (this will skip the aligned hinting in that case. Issue #939, #949)
|
# (this will skip the aligned hinting in that case. Issue #939, #949)
|
||||||
if (EXISTS /proc/cpuinfo)
|
if (EXISTS /proc/cpuinfo)
|
||||||
|
@ -448,7 +443,6 @@ endif()
|
||||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
||||||
if(MI_OPT_ARCH)
|
if(MI_OPT_ARCH)
|
||||||
if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999)
|
if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999)
|
||||||
set(MI_OPT_ARCH_FLAGS "")
|
|
||||||
if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
||||||
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a")
|
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a")
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -160,7 +160,7 @@ bool _mi_os_secure_guard_page_reset_at(void* addr);
|
||||||
bool _mi_os_secure_guard_page_reset_before(void* addr);
|
bool _mi_os_secure_guard_page_reset_before(void* addr);
|
||||||
|
|
||||||
int _mi_os_numa_node(void);
|
int _mi_os_numa_node(void);
|
||||||
size_t _mi_os_numa_node_count(void);
|
int _mi_os_numa_node_count(void);
|
||||||
|
|
||||||
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
|
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
|
||||||
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
|
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
|
||||||
|
|
10
src/arena.c
10
src/arena.c
|
@ -1525,17 +1525,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
|
||||||
if (pages == 0) return 0;
|
if (pages == 0) return 0;
|
||||||
|
|
||||||
// pages per numa node
|
// pages per numa node
|
||||||
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
|
int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
|
||||||
if (numa_count <= 0) numa_count = 1;
|
if (numa_count <= 0) { numa_count = 1; }
|
||||||
const size_t pages_per = pages / numa_count;
|
const size_t pages_per = pages / numa_count;
|
||||||
const size_t pages_mod = pages % numa_count;
|
const size_t pages_mod = pages % numa_count;
|
||||||
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
|
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
|
||||||
|
|
||||||
// reserve evenly among numa nodes
|
// reserve evenly among numa nodes
|
||||||
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
|
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
|
||||||
size_t node_pages = pages_per; // can be 0
|
size_t node_pages = pages_per; // can be 0
|
||||||
if (numa_node < pages_mod) node_pages++;
|
if ((size_t)numa_node < pages_mod) { node_pages++; }
|
||||||
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
|
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
|
||||||
if (err) return err;
|
if (err) return err;
|
||||||
if (pages < node_pages) {
|
if (pages < node_pages) {
|
||||||
pages = 0;
|
pages = 0;
|
||||||
|
|
|
@ -663,7 +663,7 @@ static void mi_detect_cpu_features(void) {
|
||||||
int32_t cpu_info[4];
|
int32_t cpu_info[4];
|
||||||
__cpuid(cpu_info, 7);
|
__cpuid(cpu_info, 7);
|
||||||
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||||
_mi_cpu_has_erms = ((cpu_info[2] & (1 << 9)) != 0); // bit 9 of ECX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
_mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static void mi_detect_cpu_features(void) {
|
static void mi_detect_cpu_features(void) {
|
||||||
|
|
22
src/os.c
22
src/os.c
|
@ -694,18 +694,19 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
|
||||||
Support NUMA aware allocation
|
Support NUMA aware allocation
|
||||||
-----------------------------------------------------------------------------*/
|
-----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
static _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
|
static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count
|
||||||
|
|
||||||
size_t _mi_os_numa_node_count(void) {
|
int _mi_os_numa_node_count(void) {
|
||||||
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
|
int count = mi_atomic_load_acquire(&_mi_numa_node_count);
|
||||||
if mi_unlikely(count <= 0) {
|
if mi_unlikely(count <= 0) {
|
||||||
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
|
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
|
||||||
if (ncount > 0) {
|
if (ncount > 0 && ncount < INT_MAX) {
|
||||||
count = (size_t)ncount;
|
count = (int)ncount;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
count = _mi_prim_numa_node_count(); // or detect dynamically
|
const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
|
||||||
if (count == 0) { count = 1; }
|
if (n == 0 || n > INT_MAX) { count = 1; }
|
||||||
|
else { count = (int)n; }
|
||||||
}
|
}
|
||||||
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
|
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
|
||||||
_mi_verbose_message("using %zd numa regions\n", count);
|
_mi_verbose_message("using %zd numa regions\n", count);
|
||||||
|
@ -715,12 +716,13 @@ size_t _mi_os_numa_node_count(void) {
|
||||||
|
|
||||||
|
|
||||||
static int mi_os_numa_node_get(void) {
|
static int mi_os_numa_node_get(void) {
|
||||||
size_t numa_count = _mi_os_numa_node_count();
|
int numa_count = _mi_os_numa_node_count();
|
||||||
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
|
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
|
||||||
// never more than the node count and >= 0
|
// never more than the node count and >= 0
|
||||||
size_t numa_node = _mi_prim_numa_node();
|
const size_t n = _mi_prim_numa_node();
|
||||||
|
int numa_node = (n < INT_MAX ? (int)n : 0);
|
||||||
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
|
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
|
||||||
return (int)numa_node;
|
return numa_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
int _mi_os_numa_node(void) {
|
int _mi_os_numa_node(void) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue