mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-08 00:09:31 +03:00
merge from dev3
This commit is contained in:
commit
8edce30c17
23 changed files with 357 additions and 262 deletions
|
@ -14,8 +14,7 @@ option(MI_XMALLOC "Enable abort() call on memory allocation failure by
|
||||||
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
|
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
|
||||||
option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
|
option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
|
||||||
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
|
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
|
||||||
|
option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" OFF)
|
||||||
option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" ON)
|
|
||||||
option(MI_OPT_SIMD "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF)
|
option(MI_OPT_SIMD "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF)
|
||||||
option(MI_SEE_ASM "Generate assembly files" OFF)
|
option(MI_SEE_ASM "Generate assembly files" OFF)
|
||||||
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
|
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
|
||||||
|
@ -125,9 +124,44 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$")
|
||||||
set(MI_SECURE "ON")
|
set(MI_SECURE "ON")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
# Determine architecture
|
||||||
|
set(MI_OPT_ARCH_FLAGS "")
|
||||||
|
set(MI_ARCH "unknown")
|
||||||
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$")
|
||||||
|
set(MI_ARCH "x86")
|
||||||
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64
|
||||||
|
set(MI_ARCH "x64")
|
||||||
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
||||||
|
set(MI_ARCH "arm64")
|
||||||
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$")
|
||||||
|
set(MI_ARCH "arm32")
|
||||||
|
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$")
|
||||||
|
if(CMAKE_SIZEOF_VOID_P==4)
|
||||||
|
set(MI_ARCH "riscv32")
|
||||||
|
else()
|
||||||
|
set(MI_ARCH "riscv64")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||||
|
endif()
|
||||||
|
message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})")
|
||||||
|
|
||||||
|
# negative overrides (mainly to support vcpkg features)
|
||||||
|
if(MI_NO_USE_CXX)
|
||||||
|
set(MI_USE_CXX "OFF")
|
||||||
|
endif()
|
||||||
|
if(MI_NO_OPT_ARCH)
|
||||||
|
set(MI_OPT_ARCH "OFF")
|
||||||
|
elseif(MI_ARCH STREQUAL "arm64")
|
||||||
|
set(MI_OPT_ARCH "ON") # enable armv8.1-a by default on arm64 unless MI_NO_OPT_ARCH is set
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Process options
|
# Process options
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
|
if(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
|
||||||
set(MI_CLANG_CL "ON")
|
set(MI_CLANG_CL "ON")
|
||||||
endif()
|
endif()
|
||||||
|
@ -147,32 +181,16 @@ if(CMAKE_C_COMPILER_ID MATCHES "Intel")
|
||||||
list(APPEND mi_cflags -Wall)
|
list(APPEND mi_cflags -Wall)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# negative overrides (mainly to support vcpkg features)
|
|
||||||
if(MI_NO_USE_CXX)
|
|
||||||
set(MI_USE_CXX "OFF")
|
|
||||||
endif()
|
|
||||||
if(MI_NO_OPT_ARCH)
|
|
||||||
set(MI_OPT_ARCH "OFF")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
add_compile_options(/std:c++20)
|
add_compile_options(/std:c++20)
|
||||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
||||||
add_compile_options(-std=c++20)
|
add_compile_options(-std=c++20)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
|
if(CMAKE_C_COMPILER_ID MATCHES "MSVC|Intel")
|
||||||
set(MI_USE_CXX "ON")
|
set(MI_USE_CXX "ON")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo")
|
|
||||||
if (NOT MI_OPT_ARCH)
|
|
||||||
message(STATUS "Architecture specific optimizations are disabled (MI_OPT_ARCH=OFF)")
|
|
||||||
endif()
|
|
||||||
#else()
|
|
||||||
# set(MI_OPT_ARCH OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(MI_OVERRIDE)
|
if(MI_OVERRIDE)
|
||||||
message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
|
message(STATUS "Override standard malloc (MI_OVERRIDE=ON)")
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
|
@ -397,28 +415,6 @@ if(MI_WIN_USE_FIXED_TLS)
|
||||||
list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1)
|
list(APPEND mi_defines MI_WIN_USE_FIXED_TLS=1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Determine architecture
|
|
||||||
set(MI_OPT_ARCH_FLAGS "")
|
|
||||||
set(MI_ARCH "unknown")
|
|
||||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86|i[3456]86)$" OR CMAKE_GENERATOR_PLATFORM MATCHES "^(x86|Win32)$")
|
|
||||||
set(MI_ARCH "x86")
|
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|x64|amd64|AMD64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "x64" OR "x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES) # must be before arm64
|
|
||||||
set(MI_ARCH "x64")
|
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv[89].?|ARM64)$" OR CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64" OR "arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
|
||||||
set(MI_ARCH "arm64")
|
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|armv[34567]|ARM)$")
|
|
||||||
set(MI_ARCH "arm32")
|
|
||||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv|riscv32|riscv64)$")
|
|
||||||
if(CMAKE_SIZEOF_VOID_P==4)
|
|
||||||
set(MI_ARCH "riscv32")
|
|
||||||
else()
|
|
||||||
set(MI_ARCH "riscv64")
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
set(MI_ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
|
||||||
endif()
|
|
||||||
message(STATUS "Architecture: ${MI_ARCH}") # (${CMAKE_SYSTEM_PROCESSOR}, ${CMAKE_GENERATOR_PLATFORM}, ${CMAKE_GENERATOR})")
|
|
||||||
|
|
||||||
# Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits.
|
# Check /proc/cpuinfo for an SV39 MMU and limit the virtual address bits.
|
||||||
# (this will skip the aligned hinting in that case. Issue #939, #949)
|
# (this will skip the aligned hinting in that case. Issue #939, #949)
|
||||||
if (EXISTS /proc/cpuinfo)
|
if (EXISTS /proc/cpuinfo)
|
||||||
|
@ -475,7 +471,6 @@ endif()
|
||||||
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
|
||||||
if(MI_OPT_ARCH)
|
if(MI_OPT_ARCH)
|
||||||
if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999)
|
if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999)
|
||||||
set(MI_OPT_ARCH_FLAGS "")
|
|
||||||
if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
|
||||||
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a")
|
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a")
|
||||||
endif()
|
endif()
|
||||||
|
@ -502,7 +497,7 @@ if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) # vs2017+
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(MINGW)
|
if(MINGW)
|
||||||
add_definitions(-D_WIN32_WINNT=0x601) # issue #976
|
add_definitions(-D_WIN32_WINNT=0x600) # issue #976
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(MI_OPT_ARCH_FLAGS)
|
if(MI_OPT_ARCH_FLAGS)
|
||||||
|
|
|
@ -32,7 +32,7 @@ jobs:
|
||||||
MSBuildConfiguration: Release
|
MSBuildConfiguration: Release
|
||||||
Release SIMD:
|
Release SIMD:
|
||||||
BuildType: release-simd
|
BuildType: release-simd
|
||||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_SIMD=ON -DMI_WIN_USE_FIXED_TLS=ON
|
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_ARCH=ON -DMI_OPT_SIMD=ON -DMI_WIN_USE_FIXED_TLS=ON
|
||||||
MSBuildConfiguration: Release
|
MSBuildConfiguration: Release
|
||||||
Secure:
|
Secure:
|
||||||
BuildType: secure
|
BuildType: secure
|
||||||
|
@ -97,7 +97,7 @@ jobs:
|
||||||
CC: clang
|
CC: clang
|
||||||
CXX: clang++
|
CXX: clang++
|
||||||
BuildType: release-simd-clang
|
BuildType: release-simd-clang
|
||||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_SIMD=ON
|
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_ARCH=ON -DMI_OPT_SIMD=ON
|
||||||
Secure Clang:
|
Secure Clang:
|
||||||
CC: clang
|
CC: clang
|
||||||
CXX: clang++
|
CXX: clang++
|
||||||
|
@ -159,7 +159,7 @@ jobs:
|
||||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
|
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
|
||||||
Release SIMD:
|
Release SIMD:
|
||||||
BuildType: release-simd
|
BuildType: release-simd
|
||||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_SIMD=ON
|
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_ARCH=ON -DMI_OPT_SIMD=ON
|
||||||
Secure:
|
Secure:
|
||||||
BuildType: secure
|
BuildType: secure
|
||||||
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
|
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
set(mi_version_major 3)
|
set(mi_version_major 3)
|
||||||
set(mi_version_minor 0)
|
set(mi_version_minor 0)
|
||||||
set(mi_version_patch 2)
|
set(mi_version_patch 3)
|
||||||
set(mi_version ${mi_version_major}.${mi_version_minor})
|
set(mi_version ${mi_version_major}.${mi_version_minor})
|
||||||
|
|
||||||
set(PACKAGE_VERSION ${mi_version})
|
set(PACKAGE_VERSION ${mi_version})
|
||||||
|
|
|
@ -18,6 +18,8 @@ vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
|
||||||
guarded MI_GUARDED
|
guarded MI_GUARDED
|
||||||
secure MI_SECURE
|
secure MI_SECURE
|
||||||
override MI_OVERRIDE
|
override MI_OVERRIDE
|
||||||
|
optarch MI_OPT_ARCH
|
||||||
|
optsimd MI_OPT_SIMD
|
||||||
xmalloc MI_XMALLOC
|
xmalloc MI_XMALLOC
|
||||||
asm MI_SEE_ASM
|
asm MI_SEE_ASM
|
||||||
)
|
)
|
||||||
|
@ -26,16 +28,14 @@ string(COMPARE EQUAL "${VCPKG_LIBRARY_LINKAGE}" "dynamic" MI_BUILD_SHARED)
|
||||||
|
|
||||||
vcpkg_cmake_configure(
|
vcpkg_cmake_configure(
|
||||||
SOURCE_PATH "${SOURCE_PATH}"
|
SOURCE_PATH "${SOURCE_PATH}"
|
||||||
OPTIONS_RELEASE
|
|
||||||
-DMI_OPT_ARCH=ON
|
|
||||||
OPTIONS
|
OPTIONS
|
||||||
-DMI_USE_CXX=ON
|
-DMI_USE_CXX=ON
|
||||||
-DMI_BUILD_TESTS=OFF
|
-DMI_BUILD_TESTS=OFF
|
||||||
-DMI_BUILD_OBJECT=ON
|
-DMI_BUILD_OBJECT=ON
|
||||||
${FEATURE_OPTIONS}
|
|
||||||
-DMI_BUILD_STATIC=${MI_BUILD_STATIC}
|
-DMI_BUILD_STATIC=${MI_BUILD_STATIC}
|
||||||
-DMI_BUILD_SHARED=${MI_BUILD_SHARED}
|
-DMI_BUILD_SHARED=${MI_BUILD_SHARED}
|
||||||
-DMI_INSTALL_TOPLEVEL=ON
|
-DMI_INSTALL_TOPLEVEL=ON
|
||||||
|
${FEATURE_OPTIONS}
|
||||||
)
|
)
|
||||||
|
|
||||||
vcpkg_cmake_install()
|
vcpkg_cmake_install()
|
||||||
|
|
|
@ -26,9 +26,18 @@
|
||||||
"secure": {
|
"secure": {
|
||||||
"description": "Use full security mitigations (like guard pages and randomization)"
|
"description": "Use full security mitigations (like guard pages and randomization)"
|
||||||
},
|
},
|
||||||
|
"guarded": {
|
||||||
|
"description": "Use build that support guard pages after objects controlled with MIMALLOC_GUARDED_SAMPLE_RATE"
|
||||||
|
},
|
||||||
"xmalloc": {
|
"xmalloc": {
|
||||||
"description": "If out-of-memory, call abort() instead of returning NULL"
|
"description": "If out-of-memory, call abort() instead of returning NULL"
|
||||||
},
|
},
|
||||||
|
"optarch": {
|
||||||
|
"description": "Use architecture specific optimizations (on x64: '-march=haswell;-mavx2', on arm64: '-march=armv8.1-a')"
|
||||||
|
},
|
||||||
|
"optsimd": {
|
||||||
|
"description": "Allow use of SIMD instructions (avx2 or neon) (requires 'optarch' to be enabled)"
|
||||||
|
},
|
||||||
"asm": {
|
"asm": {
|
||||||
"description": "Generate assembly files"
|
"description": "Generate assembly files"
|
||||||
}
|
}
|
||||||
|
|
|
@ -315,7 +315,7 @@
|
||||||
<CompileAs>CompileAsCpp</CompileAs>
|
<CompileAs>CompileAsCpp</CompileAs>
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
<LanguageStandard>stdcpp20</LanguageStandard>
|
<LanguageStandard>stdcpp20</LanguageStandard>
|
||||||
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
<EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
|
||||||
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
|
|
|
@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||||
#ifndef MIMALLOC_H
|
#ifndef MIMALLOC_H
|
||||||
#define MIMALLOC_H
|
#define MIMALLOC_H
|
||||||
|
|
||||||
#define MI_MALLOC_VERSION 302 // major + 2 digits minor
|
#define MI_MALLOC_VERSION 303 // major + 2 digits minor
|
||||||
|
|
||||||
// ------------------------------------------------------
|
// ------------------------------------------------------
|
||||||
// Compiler specific attributes
|
// Compiler specific attributes
|
||||||
|
@ -266,7 +266,7 @@ typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_
|
||||||
|
|
||||||
mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
||||||
|
|
||||||
// Experimental
|
// Advanced
|
||||||
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
|
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
|
||||||
mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
|
mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
|
||||||
|
|
||||||
|
@ -279,7 +279,7 @@ mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_commi
|
||||||
mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
|
mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
|
||||||
mi_decl_export void mi_arenas_print(void) mi_attr_noexcept;
|
mi_decl_export void mi_arenas_print(void) mi_attr_noexcept;
|
||||||
|
|
||||||
// Experimental: heaps associated with specific memory arena's
|
// Advanced: heaps associated with specific memory arena's
|
||||||
typedef void* mi_arena_id_t;
|
typedef void* mi_arena_id_t;
|
||||||
mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
|
mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
|
||||||
mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
|
mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
|
||||||
|
@ -292,7 +292,7 @@ mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t a
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// Experimental: allow sub-processes whose memory areas stay separated (and no reclamation between them)
|
// Advanced: allow sub-processes whose memory areas stay separated (and no reclamation between them)
|
||||||
// Used for example for separate interpreters in one process.
|
// Used for example for separate interpreters in one process.
|
||||||
typedef void* mi_subproc_id_t;
|
typedef void* mi_subproc_id_t;
|
||||||
mi_decl_export mi_subproc_id_t mi_subproc_main(void);
|
mi_decl_export mi_subproc_id_t mi_subproc_main(void);
|
||||||
|
@ -300,10 +300,15 @@ mi_decl_export mi_subproc_id_t mi_subproc_new(void);
|
||||||
mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc);
|
mi_decl_export void mi_subproc_delete(mi_subproc_id_t subproc);
|
||||||
mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
|
mi_decl_export void mi_subproc_add_current_thread(mi_subproc_id_t subproc); // this should be called right after a thread is created (and no allocation has taken place yet)
|
||||||
|
|
||||||
// Experimental: visit abandoned heap areas (that are not owned by a specific heap)
|
// Advanced: visit abandoned heap areas (that are not owned by a specific heap)
|
||||||
mi_decl_export bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
mi_decl_export bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);
|
||||||
|
|
||||||
|
// Experimental: set numa-affinity of a heap
|
||||||
|
mi_decl_export void mi_heap_set_numa_affinity(mi_heap_t* heap, int numa_node);
|
||||||
|
|
||||||
// Experimental: objects followed by a guard page.
|
// Experimental: objects followed by a guard page.
|
||||||
|
// Setting the sample rate on a specific heap can be used to test parts of the program more
|
||||||
|
// specifically (in combination with `mi_heap_set_default`).
|
||||||
// A sample rate of 0 disables guarded objects, while 1 uses a guard page for every object.
|
// A sample rate of 0 disables guarded objects, while 1 uses a guard page for every object.
|
||||||
// A seed of 0 uses a random start point. Only objects within the size bound are eligable for guard pages.
|
// A seed of 0 uses a random start point. Only objects within the size bound are eligable for guard pages.
|
||||||
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed);
|
mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed);
|
||||||
|
@ -324,13 +329,6 @@ mi_decl_export void mi_collect_reduce(size_t target_thread_owned) mi_attr_noexce
|
||||||
|
|
||||||
|
|
||||||
// experimental
|
// experimental
|
||||||
//mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size);
|
|
||||||
//mi_decl_export void* mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, void** base, size_t* full_size);
|
|
||||||
//mi_decl_export void* mi_os_alloc_aligned_allow_large(size_t size, size_t alignment, bool commit, bool* is_committed, bool* is_pinned, void** base, size_t* full_size);
|
|
||||||
//mi_decl_export void mi_os_free(void* p, size_t size);
|
|
||||||
//mi_decl_export void mi_os_commit(void* p, size_t size);
|
|
||||||
//mi_decl_export void mi_os_decommit(void* p, size_t size);
|
|
||||||
|
|
||||||
mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
|
mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
|
||||||
mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
|
mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
|
||||||
mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena);
|
mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena);
|
||||||
|
|
|
@ -134,6 +134,12 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
|
||||||
static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
|
static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
|
||||||
return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
|
return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
|
||||||
}
|
}
|
||||||
|
static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
|
||||||
|
const int64_t add = mi_atomic_load_relaxed((_Atomic(int64_t)*)padd);
|
||||||
|
if (add != 0) {
|
||||||
|
mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
|
||||||
|
}
|
||||||
|
}
|
||||||
static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
|
static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
|
||||||
int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
|
int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
|
||||||
while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ };
|
while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ };
|
||||||
|
|
|
@ -90,7 +90,7 @@ typedef int32_t mi_ssize_t;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MI_ARCH_X64 && defined(__AVX2__)
|
#if (MI_ARCH_X86 || MI_ARCH_X64)
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#elif MI_ARCH_ARM64 && MI_OPT_SIMD
|
#elif MI_ARCH_ARM64 && MI_OPT_SIMD
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
@ -134,6 +134,18 @@ typedef int32_t mi_ssize_t;
|
||||||
Builtin's
|
Builtin's
|
||||||
-------------------------------------------------------------------------------- */
|
-------------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#define mi_unlikely(x) (__builtin_expect(!!(x),false))
|
||||||
|
#define mi_likely(x) (__builtin_expect(!!(x),true))
|
||||||
|
#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
||||||
|
#define mi_unlikely(x) (x) [[unlikely]]
|
||||||
|
#define mi_likely(x) (x) [[likely]]
|
||||||
|
#else
|
||||||
|
#define mi_unlikely(x) (x)
|
||||||
|
#define mi_likely(x) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifndef __has_builtin
|
#ifndef __has_builtin
|
||||||
#define __has_builtin(x) 0
|
#define __has_builtin(x) 0
|
||||||
#endif
|
#endif
|
||||||
|
@ -171,14 +183,25 @@ typedef int32_t mi_ssize_t;
|
||||||
-------------------------------------------------------------------------------- */
|
-------------------------------------------------------------------------------- */
|
||||||
|
|
||||||
size_t _mi_popcount_generic(size_t x);
|
size_t _mi_popcount_generic(size_t x);
|
||||||
|
extern bool _mi_cpu_has_popcnt;
|
||||||
|
|
||||||
static inline size_t mi_popcount(size_t x) {
|
static inline size_t mi_popcount(size_t x) {
|
||||||
#if mi_has_builtinz(popcount)
|
#if defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||||
|
#if !defined(__BMI1__)
|
||||||
|
if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); }
|
||||||
|
#endif
|
||||||
|
size_t r;
|
||||||
|
__asm ("popcnt\t%1,%0" : "=r"(r) : "r"(x) : "cc");
|
||||||
|
return r;
|
||||||
|
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||||
|
#if !defined(__BMI1__)
|
||||||
|
if mi_unlikely(!_mi_cpu_has_popcnt) { return _mi_popcount_generic(x); }
|
||||||
|
#endif
|
||||||
|
return (size_t)mi_msc_builtinz(__popcnt)(x);
|
||||||
|
#elif defined(_MSC_VER) && MI_ARCH_ARM64
|
||||||
|
return (size_t)mi_msc_builtinz(__popcnt)(x);
|
||||||
|
#elif mi_has_builtinz(popcount)
|
||||||
return mi_builtinz(popcount)(x);
|
return mi_builtinz(popcount)(x);
|
||||||
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
|
|
||||||
return mi_msc_builtinz(__popcnt)(x);
|
|
||||||
#elif MI_ARCH_X64 && defined(__BMI1__)
|
|
||||||
return (size_t)_mm_popcnt_u64(x);
|
|
||||||
#else
|
#else
|
||||||
#define MI_HAS_FAST_POPCOUNT 0
|
#define MI_HAS_FAST_POPCOUNT 0
|
||||||
return (x<=1 ? x : _mi_popcount_generic(x));
|
return (x<=1 ? x : _mi_popcount_generic(x));
|
||||||
|
|
|
@ -159,6 +159,8 @@ bool _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned);
|
||||||
bool _mi_os_secure_guard_page_reset_at(void* addr);
|
bool _mi_os_secure_guard_page_reset_at(void* addr);
|
||||||
bool _mi_os_secure_guard_page_reset_before(void* addr);
|
bool _mi_os_secure_guard_page_reset_before(void* addr);
|
||||||
|
|
||||||
|
int _mi_os_numa_node(void);
|
||||||
|
int _mi_os_numa_node_count(void);
|
||||||
|
|
||||||
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
|
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid);
|
||||||
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
|
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid);
|
||||||
|
@ -174,8 +176,8 @@ mi_arena_id_t _mi_arena_id_none(void);
|
||||||
mi_arena_t* _mi_arena_from_id(mi_arena_id_t id);
|
mi_arena_t* _mi_arena_from_id(mi_arena_id_t id);
|
||||||
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena);
|
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena);
|
||||||
|
|
||||||
void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid);
|
void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid);
|
||||||
void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid);
|
void* _mi_arenas_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_pinned, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid);
|
||||||
void _mi_arenas_free(void* p, size_t size, mi_memid_t memid);
|
void _mi_arenas_free(void* p, size_t size, mi_memid_t memid);
|
||||||
bool _mi_arenas_contain(const void* p);
|
bool _mi_arenas_contain(const void* p);
|
||||||
void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld);
|
void _mi_arenas_collect(bool force_purge, bool visit_all, mi_tld_t* tld);
|
||||||
|
@ -254,25 +256,6 @@ bool _mi_page_is_valid(mi_page_t* page);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// ------------------------------------------------------
|
|
||||||
// Branches
|
|
||||||
// ------------------------------------------------------
|
|
||||||
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
|
||||||
#define mi_unlikely(x) (__builtin_expect(!!(x),false))
|
|
||||||
#define mi_likely(x) (__builtin_expect(!!(x),true))
|
|
||||||
#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
|
|
||||||
#define mi_unlikely(x) (x) [[unlikely]]
|
|
||||||
#define mi_likely(x) (x) [[likely]]
|
|
||||||
#else
|
|
||||||
#define mi_unlikely(x) (x)
|
|
||||||
#define mi_likely(x) (x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __has_builtin
|
|
||||||
#define __has_builtin(x) 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* -----------------------------------------------------------
|
/* -----------------------------------------------------------
|
||||||
Assertions
|
Assertions
|
||||||
|
@ -1026,24 +1009,6 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------
|
|
||||||
// Optimize numa node access for the common case (= one node)
|
|
||||||
// -------------------------------------------------------------------
|
|
||||||
|
|
||||||
int _mi_os_numa_node_get(void);
|
|
||||||
size_t _mi_os_numa_node_count_get(void);
|
|
||||||
|
|
||||||
extern mi_decl_hidden _Atomic(size_t) _mi_numa_node_count;
|
|
||||||
static inline int _mi_os_numa_node(void) {
|
|
||||||
if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
|
|
||||||
else return _mi_os_numa_node_get();
|
|
||||||
}
|
|
||||||
static inline size_t _mi_os_numa_node_count(void) {
|
|
||||||
const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
|
|
||||||
if mi_likely(count > 0) { return count; }
|
|
||||||
else return _mi_os_numa_node_count_get();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------
|
||||||
// Provide our own `_mi_memcpy` for potential performance optimizations.
|
// Provide our own `_mi_memcpy` for potential performance optimizations.
|
||||||
|
@ -1053,10 +1018,10 @@ static inline size_t _mi_os_numa_node_count(void) {
|
||||||
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
|
// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
|
||||||
// ---------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------
|
||||||
|
|
||||||
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
|
#if !MI_TRACK_ENABLED && defined(_WIN32) && (MI_ARCH_X64 || MI_ARCH_X86)
|
||||||
#include <intrin.h>
|
|
||||||
extern bool _mi_cpu_has_fsrm;
|
extern bool _mi_cpu_has_fsrm;
|
||||||
extern bool _mi_cpu_has_erms;
|
extern bool _mi_cpu_has_erms;
|
||||||
|
|
||||||
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
|
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
|
||||||
if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
|
if ((_mi_cpu_has_fsrm && n <= 128) || (_mi_cpu_has_erms && n > 128)) {
|
||||||
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
|
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
|
||||||
|
|
|
@ -343,10 +343,10 @@ typedef struct mi_page_s {
|
||||||
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
|
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
|
||||||
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < ~8 KiB
|
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < ~8 KiB
|
||||||
#if MI_ENABLE_LARGE_PAGES
|
#if MI_ENABLE_LARGE_PAGES
|
||||||
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 64 KiB
|
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < ~64 KiB
|
||||||
#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/8) // <= 512KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
|
#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/8) // <= 512KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
|
||||||
#else
|
#else
|
||||||
#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/4) // <= 128 KiB
|
#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/8) // <= 64 KiB
|
||||||
#define MI_LARGE_MAX_OBJ_SIZE MI_MEDIUM_MAX_OBJ_SIZE // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
|
#define MI_LARGE_MAX_OBJ_SIZE MI_MEDIUM_MAX_OBJ_SIZE // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
|
||||||
#endif
|
#endif
|
||||||
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
|
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
|
||||||
|
@ -424,6 +424,7 @@ typedef struct mi_padding_s {
|
||||||
struct mi_heap_s {
|
struct mi_heap_s {
|
||||||
mi_tld_t* tld; // thread-local data
|
mi_tld_t* tld; // thread-local data
|
||||||
mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL)
|
mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL)
|
||||||
|
int numa_node; // preferred numa node (or -1 for no preference)
|
||||||
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
|
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
|
||||||
mi_random_ctx_t random; // random number context used for secure allocation
|
mi_random_ctx_t random; // random number context used for secure allocation
|
||||||
size_t page_count; // total number of pages in the `pages` queues.
|
size_t page_count; // total number of pages in the `pages` queues.
|
||||||
|
@ -485,6 +486,7 @@ typedef int64_t mi_msecs_t;
|
||||||
struct mi_tld_s {
|
struct mi_tld_s {
|
||||||
mi_threadid_t thread_id; // thread id of this thread
|
mi_threadid_t thread_id; // thread id of this thread
|
||||||
size_t thread_seq; // thread sequence id (linear count of created threads)
|
size_t thread_seq; // thread sequence id (linear count of created threads)
|
||||||
|
int numa_node; // thread preferred numa node
|
||||||
mi_subproc_t* subproc; // sub-process this thread belongs to.
|
mi_subproc_t* subproc; // sub-process this thread belongs to.
|
||||||
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
|
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
|
||||||
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
|
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
|
||||||
|
|
35
readme.md
35
readme.md
|
@ -12,8 +12,9 @@ is a general purpose allocator with excellent [performance](#performance) charac
|
||||||
Initially developed by Daan Leijen for the runtime systems of the
|
Initially developed by Daan Leijen for the runtime systems of the
|
||||||
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
|
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
|
||||||
|
|
||||||
Latest release tag: `v2.1.9` (2025-01-03).
|
Latest release : `v3.0.2` (beta) (2025-03-06).
|
||||||
Latest v1 tag: `v1.8.9` (2024-01-03).
|
Latest v2 release: `v2.2.2` (2025-03-06).
|
||||||
|
Latest v1 release: `v1.9.2` (2024-03-06).
|
||||||
|
|
||||||
mimalloc is a drop-in replacement for `malloc` and can be used in other programs
|
mimalloc is a drop-in replacement for `malloc` and can be used in other programs
|
||||||
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
|
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
|
||||||
|
@ -71,22 +72,28 @@ Enjoy!
|
||||||
|
|
||||||
### Branches
|
### Branches
|
||||||
|
|
||||||
* `master`: latest stable release (based on `dev2`).
|
* `master`: latest stable release (still based on `dev2`).
|
||||||
* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
|
* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
|
||||||
* `dev2`: development branch for mimalloc v2. This branch is downstream of `dev`
|
* `dev2`: development branch for mimalloc v2. This branch is downstream of `dev`
|
||||||
(and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage
|
(and is essentially equal to `dev` except for `src/segment.c`). Uses larger sliced segments to manage
|
||||||
mimalloc pages what can reduce fragmentation.
|
mimalloc pages that can reduce fragmentation.
|
||||||
* `dev3`: development branch for mimalloc v3-alpha. This branch is downstream of `dev`. This is still experimental,
|
* `dev3`: development branch for mimalloc v3-beta. This branch is downstream of `dev`. This version
|
||||||
but simplifies previous versions by having no segments any more. This improves sharing of memory
|
simplifies the lock-free ownership of previous versions, has no thread-local segments any more.
|
||||||
between threads, and on certain large workloads uses less memory with less fragmentation.
|
This improves sharing of memory between threads, and on certain large workloads may use less memory
|
||||||
|
with less fragmentation.
|
||||||
|
|
||||||
### Releases
|
### Releases
|
||||||
|
|
||||||
|
* 2025-03-06, `v1.9.2`, `v2.2.2`, `v3.0.2-beta`: Various small bug and build fixes.
|
||||||
|
Add `mi_options_print`, `mi_arenas_print`, and the experimental `mi_stat_get` and `mi_stat_get_json`.
|
||||||
|
Add `mi_thread_set_in_threadpool` and `mi_heap_set_numa_affinity` (v3 only). Add vcpkg portfile.
|
||||||
|
Upgrade mimalloc-redirect to v1.3.2. `MI_OPT_ARCH` is off by default now but still assumes armv8.1-a on arm64
|
||||||
|
for fast atomic operations. Add QNX support.
|
||||||
* 2025-01-03, `v1.8.9`, `v2.1.9`, `v3.0.1-alpha`: Interim release. Support Windows arm64. New [guarded](#guarded) build that can place OS
|
* 2025-01-03, `v1.8.9`, `v2.1.9`, `v3.0.1-alpha`: Interim release. Support Windows arm64. New [guarded](#guarded) build that can place OS
|
||||||
guard pages behind objects to catch buffer overflows as they occur.
|
guard pages behind objects to catch buffer overflows as they occur.
|
||||||
Many small fixes: build on Windows arm64, cygwin, riscV, and dragonfly; fix Windows static library initialization to account for
|
Many small fixes: build on Windows arm64, cygwin, riscV, and dragonfly; fix Windows static library initialization to account for
|
||||||
thread local destructors (in Rust/C++); macOS tag change; macOS TLS slot fix; improve stats;
|
thread local destructors (in Rust/C++); macOS tag change; macOS TLS slot fix; improve stats;
|
||||||
consistent mimalloc.dll on Windows (instead of mimalloc-override.dll); fix mimalloc-redirect on Win11 H2;
|
consistent `mimalloc.dll` on Windows (instead of `mimalloc-override.dll`); fix mimalloc-redirect on Win11 H2;
|
||||||
add 0-byte to canary; upstream CPython fixes; reduce .bss size; allow fixed TLS slot on Windows for improved performance.
|
add 0-byte to canary; upstream CPython fixes; reduce .bss size; allow fixed TLS slot on Windows for improved performance.
|
||||||
* 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches
|
* 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches
|
||||||
from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches.
|
from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches.
|
||||||
|
@ -167,7 +174,7 @@ mimalloc is used in various large scale low-latency services and programs, for e
|
||||||
|
|
||||||
Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
|
Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build.
|
||||||
The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the
|
The `mimalloc-lib` project builds a static library (in `out/msvc-x64`), while the
|
||||||
`mimalloc-override-dll` project builds a DLL for overriding malloc
|
`mimalloc-override-dll` project builds DLL for overriding malloc
|
||||||
in the entire program.
|
in the entire program.
|
||||||
|
|
||||||
## Linux, macOS, BSD, etc.
|
## Linux, macOS, BSD, etc.
|
||||||
|
@ -240,13 +247,13 @@ on Windows to build with the `clang-cl` compiler directly:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Single source
|
## Single Source
|
||||||
|
|
||||||
You can also directly build the single `src/static.c` file as part of your project without
|
You can also directly build the single `src/static.c` file as part of your project without
|
||||||
needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path.
|
needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path.
|
||||||
|
|
||||||
|
|
||||||
# Using the library
|
# Using the Library
|
||||||
|
|
||||||
The preferred usage is including `<mimalloc.h>`, linking with
|
The preferred usage is including `<mimalloc.h>`, linking with
|
||||||
the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example,
|
the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example,
|
||||||
|
@ -474,7 +481,7 @@ Note that certain security restrictions may apply when doing this from
|
||||||
the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
|
the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
|
||||||
|
|
||||||
|
|
||||||
# Windows Override
|
### Dynamic Override on Windows
|
||||||
|
|
||||||
<span id="override_on_windows">We use a separate redirection DLL to override mimalloc on Windows</span>
|
<span id="override_on_windows">We use a separate redirection DLL to override mimalloc on Windows</span>
|
||||||
such that we redirect all malloc/free calls that go through the (dynamic) C runtime allocator,
|
such that we redirect all malloc/free calls that go through the (dynamic) C runtime allocator,
|
||||||
|
|
|
@ -64,11 +64,11 @@ static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) {
|
||||||
// allocate a fresh meta page and add it to the global list.
|
// allocate a fresh meta page and add it to the global list.
|
||||||
static mi_meta_page_t* mi_meta_page_zalloc(void) {
|
static mi_meta_page_t* mi_meta_page_zalloc(void) {
|
||||||
// allocate a fresh arena slice
|
// allocate a fresh arena slice
|
||||||
// note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again..
|
// note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again.. (same with _mi_os_numa_node()...)
|
||||||
mi_memid_t memid;
|
mi_memid_t memid;
|
||||||
uint8_t* base = (uint8_t*)_mi_arenas_alloc_aligned(_mi_subproc(), MI_META_PAGE_SIZE, MI_META_PAGE_ALIGN, 0,
|
uint8_t* base = (uint8_t*)_mi_arenas_alloc_aligned(_mi_subproc(), MI_META_PAGE_SIZE, MI_META_PAGE_ALIGN, 0,
|
||||||
true /* commit*/, (MI_SECURE==0) /* allow large? */,
|
true /* commit*/, (MI_SECURE==0) /* allow large? */,
|
||||||
NULL /* req arena */, 0 /* thread_seq */, &memid);
|
NULL /* req arena */, 0 /* thread_seq */, -1 /* numa node */, &memid);
|
||||||
if (base == NULL) return NULL;
|
if (base == NULL) return NULL;
|
||||||
mi_assert_internal(_mi_is_aligned(base,MI_META_PAGE_ALIGN));
|
mi_assert_internal(_mi_is_aligned(base,MI_META_PAGE_ALIGN));
|
||||||
if (!memid.initially_zero) {
|
if (!memid.initially_zero) {
|
||||||
|
|
72
src/arena.c
72
src/arena.c
|
@ -335,12 +335,13 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_
|
||||||
Arena iteration
|
Arena iteration
|
||||||
----------------------------------------------------------- */
|
----------------------------------------------------------- */
|
||||||
|
|
||||||
static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_pinned) {
|
static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, bool match_numa, int numa_node, bool allow_pinned) {
|
||||||
if (!allow_pinned && arena->memid.is_pinned) return false;
|
if (!allow_pinned && arena->memid.is_pinned) return false;
|
||||||
if (!mi_arena_id_is_suitable(arena, req_arena)) return false;
|
if (!mi_arena_id_is_suitable(arena, req_arena)) return false;
|
||||||
if (req_arena == NULL) { // if not specific, check numa affinity
|
if (req_arena == NULL) { // if not specific, check numa affinity
|
||||||
const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
|
const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
|
||||||
if (!numa_suitable) return false;
|
if (match_numa) { if (!numa_suitable) return false; }
|
||||||
|
else { if (numa_suitable) return false; }
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -375,9 +376,9 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, name_arena) \
|
#define mi_forall_suitable_arenas(subproc, req_arena, tseq, match_numa, numa_node, allow_large, name_arena) \
|
||||||
mi_forall_arenas(subproc, req_arena,tseq,name_arena) { \
|
mi_forall_arenas(subproc, req_arena,tseq,name_arena) { \
|
||||||
if (mi_arena_is_suitable(name_arena, req_arena, -1 /* todo: numa node */, allow_large)) { \
|
if (mi_arena_is_suitable(name_arena, req_arena, match_numa, numa_node, allow_large)) { \
|
||||||
|
|
||||||
#define mi_forall_suitable_arenas_end() \
|
#define mi_forall_suitable_arenas_end() \
|
||||||
}} \
|
}} \
|
||||||
|
@ -390,19 +391,28 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena
|
||||||
// allocate slices from the arenas
|
// allocate slices from the arenas
|
||||||
static mi_decl_noinline void* mi_arenas_try_find_free(
|
static mi_decl_noinline void* mi_arenas_try_find_free(
|
||||||
mi_subproc_t* subproc, size_t slice_count, size_t alignment,
|
mi_subproc_t* subproc, size_t slice_count, size_t alignment,
|
||||||
bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid)
|
bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
|
||||||
{
|
{
|
||||||
mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_OBJ_SIZE));
|
mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_OBJ_SIZE));
|
||||||
mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
|
mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
|
||||||
if (alignment > MI_ARENA_SLICE_ALIGN) return NULL;
|
if (alignment > MI_ARENA_SLICE_ALIGN) return NULL;
|
||||||
|
|
||||||
// search arena's
|
// search arena's
|
||||||
mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena)
|
mi_forall_suitable_arenas(subproc, req_arena, tseq, true /* only numa matching */, numa_node, allow_large, arena)
|
||||||
{
|
{
|
||||||
void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
|
void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
|
||||||
if (p != NULL) return p;
|
if (p != NULL) return p;
|
||||||
}
|
}
|
||||||
mi_forall_suitable_arenas_end();
|
mi_forall_suitable_arenas_end();
|
||||||
|
if (numa_node < 0) return NULL;
|
||||||
|
|
||||||
|
// search again but now regardless of preferred numa affinity
|
||||||
|
mi_forall_suitable_arenas(subproc, req_arena, tseq, false /* numa non-matching now */, numa_node, allow_large, arena)
|
||||||
|
{
|
||||||
|
void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid);
|
||||||
|
if (p != NULL) return p;
|
||||||
|
}
|
||||||
|
mi_forall_suitable_arenas_end();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -411,14 +421,14 @@ static mi_decl_noinline void* mi_arenas_try_alloc(
|
||||||
mi_subproc_t* subproc,
|
mi_subproc_t* subproc,
|
||||||
size_t slice_count, size_t alignment,
|
size_t slice_count, size_t alignment,
|
||||||
bool commit, bool allow_large,
|
bool commit, bool allow_large,
|
||||||
mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid)
|
mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
|
||||||
{
|
{
|
||||||
mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES);
|
mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES);
|
||||||
mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
|
mi_assert(alignment <= MI_ARENA_SLICE_ALIGN);
|
||||||
void* p;
|
void* p;
|
||||||
|
|
||||||
// try to find free slices in the arena's
|
// try to find free slices in the arena's
|
||||||
p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid);
|
p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
|
||||||
if (p != NULL) return p;
|
if (p != NULL) return p;
|
||||||
|
|
||||||
// did we need a specific arena?
|
// did we need a specific arena?
|
||||||
|
@ -441,7 +451,7 @@ static mi_decl_noinline void* mi_arenas_try_alloc(
|
||||||
}
|
}
|
||||||
// try once more to allocate in the new arena
|
// try once more to allocate in the new arena
|
||||||
mi_assert_internal(req_arena == NULL);
|
mi_assert_internal(req_arena == NULL);
|
||||||
p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid);
|
p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
|
||||||
if (p != NULL) return p;
|
if (p != NULL) return p;
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -472,21 +482,18 @@ static void* mi_arena_os_alloc_aligned(
|
||||||
void* _mi_arenas_alloc_aligned( mi_subproc_t* subproc,
|
void* _mi_arenas_alloc_aligned( mi_subproc_t* subproc,
|
||||||
size_t size, size_t alignment, size_t align_offset,
|
size_t size, size_t alignment, size_t align_offset,
|
||||||
bool commit, bool allow_large,
|
bool commit, bool allow_large,
|
||||||
mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid)
|
mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
|
||||||
{
|
{
|
||||||
mi_assert_internal(memid != NULL);
|
mi_assert_internal(memid != NULL);
|
||||||
mi_assert_internal(size > 0);
|
mi_assert_internal(size > 0);
|
||||||
|
|
||||||
// *memid = _mi_memid_none();
|
|
||||||
// const int numa_node = _mi_os_numa_node(&tld->os); // current numa node
|
|
||||||
|
|
||||||
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
|
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
|
||||||
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed?
|
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed?
|
||||||
size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large
|
size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large
|
||||||
alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment
|
alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment
|
||||||
{
|
{
|
||||||
const size_t slice_count = mi_slice_count_of_size(size);
|
const size_t slice_count = mi_slice_count_of_size(size);
|
||||||
void* p = mi_arenas_try_alloc(subproc,slice_count, alignment, commit, allow_large, req_arena, tseq, memid);
|
void* p = mi_arenas_try_alloc(subproc,slice_count, alignment, commit, allow_large, req_arena, tseq, numa_node, memid);
|
||||||
if (p != NULL) return p;
|
if (p != NULL) return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -495,9 +502,9 @@ void* _mi_arenas_alloc_aligned( mi_subproc_t* subproc,
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid)
|
void* _mi_arenas_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, int numa_node, mi_memid_t* memid)
|
||||||
{
|
{
|
||||||
return _mi_arenas_alloc_aligned(subproc, size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena, tseq, memid);
|
return _mi_arenas_alloc_aligned(subproc, size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena, tseq, numa_node, memid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -547,7 +554,9 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_
|
||||||
|
|
||||||
// search arena's
|
// search arena's
|
||||||
const bool allow_large = true;
|
const bool allow_large = true;
|
||||||
mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena)
|
const int any_numa = -1;
|
||||||
|
const bool match_numa = true;
|
||||||
|
mi_forall_suitable_arenas(subproc, req_arena, tseq, match_numa, any_numa, allow_large, arena)
|
||||||
{
|
{
|
||||||
size_t slice_index;
|
size_t slice_index;
|
||||||
mi_bitmap_t* const bitmap = arena->pages_abandoned[bin];
|
mi_bitmap_t* const bitmap = arena->pages_abandoned[bin];
|
||||||
|
@ -582,7 +591,7 @@ static mi_page_t* mi_arenas_page_try_find_abandoned(mi_subproc_t* subproc, size_
|
||||||
|
|
||||||
// Allocate a fresh page
|
// Allocate a fresh page
|
||||||
static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment,
|
static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment,
|
||||||
mi_arena_t* req_arena, size_t tseq, bool commit)
|
mi_arena_t* req_arena, size_t tseq, int numa_node, bool commit)
|
||||||
{
|
{
|
||||||
const bool allow_large = (MI_SECURE < 2); // 2 = guard page at end of each arena page
|
const bool allow_large = (MI_SECURE < 2); // 2 = guard page at end of each arena page
|
||||||
const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN);
|
const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN);
|
||||||
|
@ -596,7 +605,7 @@ static mi_page_t* mi_arenas_page_alloc_fresh(mi_subproc_t* subproc, size_t slice
|
||||||
!os_align && // not large alignment
|
!os_align && // not large alignment
|
||||||
slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large
|
slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large
|
||||||
{
|
{
|
||||||
page = (mi_page_t*)mi_arenas_try_alloc(subproc, slice_count, page_alignment, commit, allow_large, req_arena, tseq, &memid);
|
page = (mi_page_t*)mi_arenas_try_alloc(subproc, slice_count, page_alignment, commit, allow_large, req_arena, tseq, numa_node, &memid);
|
||||||
if (page != NULL) {
|
if (page != NULL) {
|
||||||
mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count));
|
mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count));
|
||||||
mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index);
|
mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index);
|
||||||
|
@ -727,7 +736,7 @@ static mi_page_t* mi_arenas_page_regular_alloc(mi_heap_t* heap, size_t slice_cou
|
||||||
const long commit_on_demand = mi_option_get(mi_option_page_commit_on_demand);
|
const long commit_on_demand = mi_option_get(mi_option_page_commit_on_demand);
|
||||||
const bool commit = (slice_count <= mi_slice_count_of_size(MI_PAGE_MIN_COMMIT_SIZE) || // always commit small pages
|
const bool commit = (slice_count <= mi_slice_count_of_size(MI_PAGE_MIN_COMMIT_SIZE) || // always commit small pages
|
||||||
(commit_on_demand == 2 && _mi_os_has_overcommit()) || (commit_on_demand == 0));
|
(commit_on_demand == 2 && _mi_os_has_overcommit()) || (commit_on_demand == 0));
|
||||||
page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq, commit);
|
page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq, heap->numa_node, commit);
|
||||||
if (page != NULL) {
|
if (page != NULL) {
|
||||||
mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count);
|
mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count);
|
||||||
_mi_page_init(heap, page);
|
_mi_page_init(heap, page);
|
||||||
|
@ -749,7 +758,7 @@ static mi_page_t* mi_arenas_page_singleton_alloc(mi_heap_t* heap, size_t block_s
|
||||||
const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, _mi_os_secure_guard_page_size()) + _mi_os_secure_guard_page_size());
|
const size_t slice_count = mi_slice_count_of_size(_mi_align_up(info_size + block_size, _mi_os_secure_guard_page_size()) + _mi_os_secure_guard_page_size());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
mi_page_t* page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq, true /* commit singletons always */);
|
mi_page_t* page = mi_arenas_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq, heap->numa_node, true /* commit singletons always */);
|
||||||
if (page == NULL) return NULL;
|
if (page == NULL) return NULL;
|
||||||
|
|
||||||
mi_assert(page->reserved == 1);
|
mi_assert(page->reserved == 1);
|
||||||
|
@ -1375,7 +1384,7 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, size_t* k,
|
||||||
return bit_set_count;
|
return bit_set_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t mi_debug_show_chunks(const char* header1, const char* header2, const char* header3, size_t slice_count, size_t chunk_count, mi_bchunk_t* chunks, _Atomic(uint8_t)* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) {
|
static size_t mi_debug_show_chunks(const char* header1, const char* header2, const char* header3, size_t slice_count, size_t chunk_count, mi_bchunk_t* chunks, mi_bchunkmap_t* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) {
|
||||||
_mi_raw_message("\x1B[37m%s%s%s (use/commit: \x1B[31m0 - 25%%\x1B[33m - 50%%\x1B[36m - 75%%\x1B[32m - 100%%\x1B[0m)\n", header1, header2, header3);
|
_mi_raw_message("\x1B[37m%s%s%s (use/commit: \x1B[31m0 - 25%%\x1B[33m - 50%%\x1B[36m - 75%%\x1B[32m - 100%%\x1B[0m)\n", header1, header2, header3);
|
||||||
const size_t fields_per_line = (narrow ? 2 : 4);
|
const size_t fields_per_line = (narrow ? 2 : 4);
|
||||||
size_t bit_count = 0;
|
size_t bit_count = 0;
|
||||||
|
@ -1391,11 +1400,12 @@ static size_t mi_debug_show_chunks(const char* header1, const char* header2, con
|
||||||
|
|
||||||
char chunk_kind = ' ';
|
char chunk_kind = ' ';
|
||||||
if (chunk_bins != NULL) {
|
if (chunk_bins != NULL) {
|
||||||
switch (mi_atomic_load_relaxed(&chunk_bins[i])) {
|
switch (mi_bbitmap_debug_get_bin(chunk_bins,i)) {
|
||||||
case MI_BBIN_SMALL: chunk_kind = 'S'; break;
|
case MI_BBIN_SMALL: chunk_kind = 'S'; break;
|
||||||
case MI_BBIN_MEDIUM: chunk_kind = 'M'; break;
|
case MI_BBIN_MEDIUM: chunk_kind = 'M'; break;
|
||||||
case MI_BBIN_LARGE: chunk_kind = 'L'; break;
|
case MI_BBIN_LARGE: chunk_kind = 'L'; break;
|
||||||
case MI_BBIN_OTHER: chunk_kind = 'X'; break;
|
case MI_BBIN_OTHER: chunk_kind = 'X'; break;
|
||||||
|
default: chunk_kind = ' '; break; // suppress warning
|
||||||
// case MI_BBIN_NONE: chunk_kind = 'N'; break;
|
// case MI_BBIN_NONE: chunk_kind = 'N'; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1432,7 +1442,7 @@ static size_t mi_debug_show_chunks(const char* header1, const char* header2, con
|
||||||
return bit_set_count;
|
return bit_set_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t mi_debug_show_bitmap_binned(const char* header1, const char* header2, const char* header3, size_t slice_count, mi_bitmap_t* bitmap, _Atomic(uint8_t)* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) {
|
static size_t mi_debug_show_bitmap_binned(const char* header1, const char* header2, const char* header3, size_t slice_count, mi_bitmap_t* bitmap, mi_bchunkmap_t* chunk_bins, bool invert, mi_arena_t* arena, bool narrow) {
|
||||||
return mi_debug_show_chunks(header1, header2, header3, slice_count, mi_bitmap_chunk_count(bitmap), &bitmap->chunks[0], chunk_bins, invert, arena, narrow);
|
return mi_debug_show_chunks(header1, header2, header3, slice_count, mi_bitmap_chunk_count(bitmap), &bitmap->chunks[0], chunk_bins, invert, arena, narrow);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1463,7 +1473,7 @@ static void mi_debug_show_arenas_ex(bool show_pages, bool narrow) mi_attr_noexce
|
||||||
const char* header1 = "pages (p:page, f:full, s:singleton, P,F,S:not abandoned, i:arena-info, m:meta-data, ~:free-purgable, _:free-committed, .:free-reserved)";
|
const char* header1 = "pages (p:page, f:full, s:singleton, P,F,S:not abandoned, i:arena-info, m:meta-data, ~:free-purgable, _:free-committed, .:free-reserved)";
|
||||||
const char* header2 = (narrow ? "\n " : " ");
|
const char* header2 = (narrow ? "\n " : " ");
|
||||||
const char* header3 = "(chunk bin: S:small, M : medium, L : large, X : other)";
|
const char* header3 = "(chunk bin: S:small, M : medium, L : large, X : other)";
|
||||||
page_total += mi_debug_show_bitmap_binned(header1, header2, header3, arena->slice_count, arena->pages, arena->slices_free->chunk_bins, false, arena, narrow);
|
page_total += mi_debug_show_bitmap_binned(header1, header2, header3, arena->slice_count, arena->pages, arena->slices_free->chunkmap_bins, false, arena, narrow);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if (show_inuse) _mi_raw_message("total inuse slices : %zu\n", slice_total - free_total);
|
// if (show_inuse) _mi_raw_message("total inuse slices : %zu\n", slice_total - free_total);
|
||||||
|
@ -1515,17 +1525,17 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t
|
||||||
if (pages == 0) return 0;
|
if (pages == 0) return 0;
|
||||||
|
|
||||||
// pages per numa node
|
// pages per numa node
|
||||||
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
|
int numa_count = (numa_nodes > 0 && numa_nodes <= INT_MAX ? (int)numa_nodes : _mi_os_numa_node_count());
|
||||||
if (numa_count <= 0) numa_count = 1;
|
if (numa_count <= 0) { numa_count = 1; }
|
||||||
const size_t pages_per = pages / numa_count;
|
const size_t pages_per = pages / numa_count;
|
||||||
const size_t pages_mod = pages % numa_count;
|
const size_t pages_mod = pages % numa_count;
|
||||||
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
|
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
|
||||||
|
|
||||||
// reserve evenly among numa nodes
|
// reserve evenly among numa nodes
|
||||||
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
|
for (int numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
|
||||||
size_t node_pages = pages_per; // can be 0
|
size_t node_pages = pages_per; // can be 0
|
||||||
if (numa_node < pages_mod) node_pages++;
|
if ((size_t)numa_node < pages_mod) { node_pages++; }
|
||||||
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
|
int err = mi_reserve_huge_os_pages_at(node_pages, numa_node, timeout_per);
|
||||||
if (err) return err;
|
if (err) return err;
|
||||||
if (pages < node_pages) {
|
if (pages < node_pages) {
|
||||||
pages = 0;
|
pages = 0;
|
||||||
|
|
121
src/bitmap.c
121
src/bitmap.c
|
@ -218,39 +218,39 @@ static inline bool mi_bfield_atomic_try_clearX(_Atomic(mi_bfield_t)*b, bool* all
|
||||||
// ------- mi_bfield_atomic_is_set ---------------------------------------
|
// ------- mi_bfield_atomic_is_set ---------------------------------------
|
||||||
|
|
||||||
// Check if a bit is set
|
// Check if a bit is set
|
||||||
static inline bool mi_bfield_atomic_is_set(_Atomic(mi_bfield_t)*b, const size_t idx) {
|
static inline bool mi_bfield_atomic_is_set(const _Atomic(mi_bfield_t)*b, const size_t idx) {
|
||||||
const mi_bfield_t x = mi_atomic_load_relaxed(b);
|
const mi_bfield_t x = mi_atomic_load_relaxed(b);
|
||||||
return ((x & mi_bfield_mask(1,idx)) != 0);
|
return ((x & mi_bfield_mask(1,idx)) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if a bit is clear
|
// Check if a bit is clear
|
||||||
static inline bool mi_bfield_atomic_is_clear(_Atomic(mi_bfield_t)*b, const size_t idx) {
|
static inline bool mi_bfield_atomic_is_clear(const _Atomic(mi_bfield_t)*b, const size_t idx) {
|
||||||
const mi_bfield_t x = mi_atomic_load_relaxed(b);
|
const mi_bfield_t x = mi_atomic_load_relaxed(b);
|
||||||
return ((x & mi_bfield_mask(1, idx)) == 0);
|
return ((x & mi_bfield_mask(1, idx)) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if a bit is xset
|
// Check if a bit is xset
|
||||||
static inline bool mi_bfield_atomic_is_xset(mi_xset_t set, _Atomic(mi_bfield_t)*b, const size_t idx) {
|
static inline bool mi_bfield_atomic_is_xset(mi_xset_t set, const _Atomic(mi_bfield_t)*b, const size_t idx) {
|
||||||
if (set) return mi_bfield_atomic_is_set(b, idx);
|
if (set) return mi_bfield_atomic_is_set(b, idx);
|
||||||
else return mi_bfield_atomic_is_clear(b, idx);
|
else return mi_bfield_atomic_is_clear(b, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if all bits corresponding to a mask are set.
|
// Check if all bits corresponding to a mask are set.
|
||||||
static inline bool mi_bfield_atomic_is_set_mask(_Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
|
static inline bool mi_bfield_atomic_is_set_mask(const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
|
||||||
mi_assert_internal(mask != 0);
|
mi_assert_internal(mask != 0);
|
||||||
const mi_bfield_t x = mi_atomic_load_relaxed(b);
|
const mi_bfield_t x = mi_atomic_load_relaxed(b);
|
||||||
return ((x & mask) == mask);
|
return ((x & mask) == mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if all bits corresponding to a mask are clear.
|
// Check if all bits corresponding to a mask are clear.
|
||||||
static inline bool mi_bfield_atomic_is_clear_mask(_Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
|
static inline bool mi_bfield_atomic_is_clear_mask(const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
|
||||||
mi_assert_internal(mask != 0);
|
mi_assert_internal(mask != 0);
|
||||||
const mi_bfield_t x = mi_atomic_load_relaxed(b);
|
const mi_bfield_t x = mi_atomic_load_relaxed(b);
|
||||||
return ((x & mask) == 0);
|
return ((x & mask) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if all bits corresponding to a mask are set/cleared.
|
// Check if all bits corresponding to a mask are set/cleared.
|
||||||
static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, _Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
|
static inline bool mi_bfield_atomic_is_xset_mask(mi_xset_t set, const _Atomic(mi_bfield_t)* b, mi_bfield_t mask) {
|
||||||
mi_assert_internal(mask != 0);
|
mi_assert_internal(mask != 0);
|
||||||
if (set) return mi_bfield_atomic_is_set_mask(b, mask);
|
if (set) return mi_bfield_atomic_is_set_mask(b, mask);
|
||||||
else return mi_bfield_atomic_is_clear_mask(b, mask);
|
else return mi_bfield_atomic_is_clear_mask(b, mask);
|
||||||
|
@ -371,7 +371,7 @@ static inline bool mi_bchunk_clearN(mi_bchunk_t* chunk, size_t cidx, size_t n, b
|
||||||
|
|
||||||
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
||||||
// This can cross bfield's
|
// This can cross bfield's
|
||||||
mi_decl_noinline static bool mi_bchunk_is_xsetN_(mi_xset_t set, mi_bchunk_t* chunk, size_t field_idx, size_t idx, size_t n) {
|
mi_decl_noinline static bool mi_bchunk_is_xsetN_(mi_xset_t set, const mi_bchunk_t* chunk, size_t field_idx, size_t idx, size_t n) {
|
||||||
mi_assert_internal((field_idx*MI_BFIELD_BITS) + idx + n <= MI_BCHUNK_BITS);
|
mi_assert_internal((field_idx*MI_BFIELD_BITS) + idx + n <= MI_BCHUNK_BITS);
|
||||||
while (n > 0) {
|
while (n > 0) {
|
||||||
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
size_t m = MI_BFIELD_BITS - idx; // m is the bits to xset in this field
|
||||||
|
@ -391,7 +391,7 @@ mi_decl_noinline static bool mi_bchunk_is_xsetN_(mi_xset_t set, mi_bchunk_t* chu
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
// Check if a sequence of `n` bits within a chunk are all set/cleared.
|
||||||
static inline bool mi_bchunk_is_xsetN(mi_xset_t set, mi_bchunk_t* chunk, size_t cidx, size_t n) {
|
static inline bool mi_bchunk_is_xsetN(mi_xset_t set, const mi_bchunk_t* chunk, size_t cidx, size_t n) {
|
||||||
mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
|
mi_assert_internal(cidx + n <= MI_BCHUNK_BITS);
|
||||||
mi_assert_internal(n>0);
|
mi_assert_internal(n>0);
|
||||||
if (n==0) return true;
|
if (n==0) return true;
|
||||||
|
@ -1413,7 +1413,23 @@ void mi_bbitmap_unsafe_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
|
||||||
// Assign a specific size bin to a chunk
|
// Assign a specific size bin to a chunk
|
||||||
static void mi_bbitmap_set_chunk_bin(mi_bbitmap_t* bbitmap, size_t chunk_idx, mi_bbin_t bin) {
|
static void mi_bbitmap_set_chunk_bin(mi_bbitmap_t* bbitmap, size_t chunk_idx, mi_bbin_t bin) {
|
||||||
mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
|
mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
|
||||||
mi_atomic_store_release(&bbitmap->chunk_bins[chunk_idx], (uint8_t)bin);
|
for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) {
|
||||||
|
if (ibin == bin) {
|
||||||
|
mi_bchunk_set(& bbitmap->chunkmap_bins[ibin], chunk_idx, NULL);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mi_bchunk_clear(&bbitmap->chunkmap_bins[ibin], chunk_idx, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mi_bbin_t mi_bbitmap_debug_get_bin(const mi_bchunkmap_t* chunkmap_bins, size_t chunk_idx) {
|
||||||
|
for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) {
|
||||||
|
if (mi_bchunk_is_xsetN(MI_BIT_SET, &chunkmap_bins[ibin], chunk_idx, 1)) {
|
||||||
|
return ibin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return MI_BBIN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track the index of the highest chunk that is accessed.
|
// Track the index of the highest chunk that is accessed.
|
||||||
|
@ -1541,56 +1557,65 @@ static inline bool mi_bbitmap_try_find_and_clear_generic(mi_bbitmap_t* bbitmap,
|
||||||
mi_assert_internal(MI_BFIELD_BITS >= MI_BCHUNK_FIELDS);
|
mi_assert_internal(MI_BFIELD_BITS >= MI_BCHUNK_FIELDS);
|
||||||
const mi_bfield_t cmap_mask = mi_bfield_mask(cmap_max_count,0);
|
const mi_bfield_t cmap_mask = mi_bfield_mask(cmap_max_count,0);
|
||||||
const size_t cmap_cycle = cmap_acc+1;
|
const size_t cmap_cycle = cmap_acc+1;
|
||||||
const mi_bbin_t bbin = mi_bbin_of(n);
|
const mi_bbin_t bbin = mi_bbin_of(n);
|
||||||
// visit bins from smallest to largest (to reduce fragmentation on the larger blocks)
|
// visit each cmap entry
|
||||||
for(mi_bbin_t bin = MI_BBIN_SMALL; bin <= bbin; bin = mi_bbin_inc(bin)) // no need to traverse for MI_BBIN_NONE as anyone can allocate in MI_BBIN_SMALL
|
size_t cmap_idx = 0;
|
||||||
// (int bin = bbin; bin >= MI_BBIN_SMALL; bin--) // visit bins from largest size bin up to the NONE bin
|
mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
|
||||||
{
|
{
|
||||||
size_t cmap_idx = 0;
|
// and for each chunkmap entry we iterate over its bits to find the chunks
|
||||||
mi_bfield_cycle_iterate(cmap_mask, tseq, cmap_cycle, cmap_idx, X)
|
const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]);
|
||||||
{
|
const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits);
|
||||||
// don't search into non-accessed memory until we tried other size bins as well
|
if (cmap_entry == 0) continue;
|
||||||
if (bin < bbin && cmap_idx > cmap_acc)
|
|
||||||
// (bin > MI_BBIN_SMALL && cmap_idx > cmap_acc) // large to small
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// and for each chunkmap entry we iterate over its bits to find the chunks
|
// get size bin masks
|
||||||
const mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bbitmap->chunkmap.bfields[cmap_idx]);
|
mi_bfield_t cmap_bins[MI_BBIN_COUNT] = { 0 };
|
||||||
const size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits);
|
cmap_bins[MI_BBIN_NONE] = cmap_entry;
|
||||||
|
for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin < MI_BBIN_NONE; ibin = mi_bbin_inc(ibin)) {
|
||||||
|
const mi_bfield_t cmap_bin = mi_atomic_load_relaxed(&bbitmap->chunkmap_bins[ibin].bfields[cmap_idx]);
|
||||||
|
cmap_bins[ibin] = cmap_bin & cmap_entry;
|
||||||
|
cmap_bins[MI_BBIN_NONE] &= ~cmap_bin; // clear bits that are in an assigned size bin
|
||||||
|
}
|
||||||
|
|
||||||
|
// consider only chunks for a particular size bin at a time
|
||||||
|
// this picks the best bin only within a cmap entry (~ 1GiB address space), but avoids multiple
|
||||||
|
// iterations through all entries.
|
||||||
|
mi_assert_internal(bbin < MI_BBIN_NONE);
|
||||||
|
for (mi_bbin_t ibin = MI_BBIN_SMALL; ibin <= MI_BBIN_NONE;
|
||||||
|
// skip from bbin to NONE (so, say, a SMALL will never be placed in a OTHER, MEDIUM, or LARGE chunk to reduce fragmentation)
|
||||||
|
ibin = (ibin == bbin ? MI_BBIN_NONE : mi_bbin_inc(ibin)))
|
||||||
|
{
|
||||||
|
mi_assert_internal(ibin < MI_BBIN_COUNT);
|
||||||
|
const mi_bfield_t cmap_bin = cmap_bins[ibin];
|
||||||
size_t eidx = 0;
|
size_t eidx = 0;
|
||||||
mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`)
|
mi_bfield_cycle_iterate(cmap_bin, tseq, cmap_entry_cycle, eidx, Y)
|
||||||
{
|
{
|
||||||
mi_assert_internal(eidx <= MI_BFIELD_BITS);
|
// assertion doesn't quite hold as the max_accessed may be out-of-date
|
||||||
|
// mi_assert_internal(cmap_entry_cycle > eidx || ibin == MI_BBIN_NONE);
|
||||||
|
|
||||||
|
// get the chunk
|
||||||
const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
|
const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx;
|
||||||
mi_assert_internal(chunk_idx < mi_bbitmap_chunk_count(bbitmap));
|
mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx];
|
||||||
// only in the current size class!
|
|
||||||
const mi_bbin_t chunk_bin = (mi_bbin_t)mi_atomic_load_relaxed(&bbitmap->chunk_bins[chunk_idx]);
|
size_t cidx;
|
||||||
if ((mi_bbin_t)bin == chunk_bin || (bin == bbin && chunk_bin == MI_BBIN_NONE)) // only allow NONE at the final run
|
if ((*on_find)(chunk, n, &cidx)) {
|
||||||
// ((mi_bbin_t)bin == chunk_bin || (bin <= MI_BBIN_SMALL && chunk_bin <= MI_BBIN_SMALL)) { largest to smallest
|
if (cidx==0 && ibin == MI_BBIN_NONE) { // only the first block determines the size bin
|
||||||
{
|
// this chunk is now reserved for the `bbin` size class
|
||||||
mi_bchunk_t* chunk = &bbitmap->chunks[chunk_idx];
|
mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, bbin);
|
||||||
size_t cidx;
|
|
||||||
if ((*on_find)(chunk, n, &cidx)) {
|
|
||||||
if (cidx==0 && chunk_bin == MI_BBIN_NONE) { // only the first determines the size bin
|
|
||||||
// this chunk is now reserved for the `bbin` size class
|
|
||||||
mi_bbitmap_set_chunk_bin(bbitmap, chunk_idx, bbin);
|
|
||||||
}
|
|
||||||
*pidx = (chunk_idx * MI_BCHUNK_BITS) + cidx;
|
|
||||||
mi_assert_internal(*pidx + n <= mi_bbitmap_max_bits(bbitmap));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* we may find that all are cleared only on a second iteration but that is ok as the chunkmap is a conservative approximation. */
|
|
||||||
mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx);
|
|
||||||
}
|
}
|
||||||
|
*pidx = (chunk_idx * MI_BCHUNK_BITS) + cidx;
|
||||||
|
mi_assert_internal(*pidx + n <= mi_bbitmap_max_bits(bbitmap));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// todo: should _on_find_ return a boolen if there is a chance all are clear to avoid calling `try_clear?`
|
||||||
|
// we may find that all are cleared only on a second iteration but that is ok as the chunkmap is a conservative approximation.
|
||||||
|
mi_bbitmap_chunkmap_try_clear(bbitmap, chunk_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mi_bfield_cycle_iterate_end(Y);
|
mi_bfield_cycle_iterate_end(Y);
|
||||||
}
|
}
|
||||||
mi_bfield_cycle_iterate_end(X);
|
|
||||||
}
|
}
|
||||||
|
mi_bfield_cycle_iterate_end(X);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
14
src/bitmap.h
14
src/bitmap.h
|
@ -215,18 +215,24 @@ bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* vis
|
||||||
// Size bins; larger bins are allowed to go into smaller bins.
|
// Size bins; larger bins are allowed to go into smaller bins.
|
||||||
// SMALL can only be in small (and NONE), so they cannot fragment the larger bins.
|
// SMALL can only be in small (and NONE), so they cannot fragment the larger bins.
|
||||||
typedef enum mi_bbin_e {
|
typedef enum mi_bbin_e {
|
||||||
MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free)
|
|
||||||
MI_BBIN_SMALL, // slice_count == 1
|
MI_BBIN_SMALL, // slice_count == 1
|
||||||
MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
|
MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
|
||||||
MI_BBIN_MEDIUM, // slice_count == 8
|
MI_BBIN_MEDIUM, // slice_count == 8
|
||||||
MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS -- only used if MI_ENABLE_LARGE_PAGES is 1
|
MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS -- only used if MI_ENABLE_LARGE_PAGES is 1
|
||||||
|
MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free)
|
||||||
MI_BBIN_COUNT
|
MI_BBIN_COUNT
|
||||||
} mi_bbin_t;
|
} mi_bbin_t;
|
||||||
|
|
||||||
static inline mi_bbin_t mi_bbin_inc(mi_bbin_t bbin) {
|
static inline mi_bbin_t mi_bbin_inc(mi_bbin_t bbin) {
|
||||||
|
mi_assert_internal(bbin < MI_BBIN_COUNT);
|
||||||
return (mi_bbin_t)((int)bbin + 1);
|
return (mi_bbin_t)((int)bbin + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline mi_bbin_t mi_bbin_dec(mi_bbin_t bbin) {
|
||||||
|
mi_assert_internal(bbin > MI_BBIN_NONE);
|
||||||
|
return (mi_bbin_t)((int)bbin - 1);
|
||||||
|
}
|
||||||
|
|
||||||
static inline mi_bbin_t mi_bbin_of(size_t slice_count) {
|
static inline mi_bbin_t mi_bbin_of(size_t slice_count) {
|
||||||
if (slice_count==1) return MI_BBIN_SMALL;
|
if (slice_count==1) return MI_BBIN_SMALL;
|
||||||
if (slice_count==8) return MI_BBIN_MEDIUM;
|
if (slice_count==8) return MI_BBIN_MEDIUM;
|
||||||
|
@ -241,8 +247,8 @@ typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bbitmap_s {
|
||||||
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
|
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
|
||||||
_Atomic(size_t) chunk_max_accessed; // max chunk index that was once cleared or set
|
_Atomic(size_t) chunk_max_accessed; // max chunk index that was once cleared or set
|
||||||
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
|
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
|
||||||
mi_bchunkmap_t chunkmap;
|
mi_bchunkmap_t chunkmap;
|
||||||
_Atomic(uint8_t) chunk_bins[MI_BITMAP_MAX_CHUNK_COUNT]; // 512b
|
mi_bchunkmap_t chunkmap_bins[MI_BBIN_COUNT - 1]; // chunkmaps with bit set if the chunk is in that size class (excluding MI_BBIN_NONE)
|
||||||
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
|
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
|
||||||
} mi_bbitmap_t;
|
} mi_bbitmap_t;
|
||||||
|
|
||||||
|
@ -255,6 +261,8 @@ static inline size_t mi_bbitmap_max_bits(const mi_bbitmap_t* bbitmap) {
|
||||||
return (mi_bbitmap_chunk_count(bbitmap) * MI_BCHUNK_BITS);
|
return (mi_bbitmap_chunk_count(bbitmap) * MI_BCHUNK_BITS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mi_bbin_t mi_bbitmap_debug_get_bin(const mi_bchunk_t* chunkmap_bins, size_t chunk_idx);
|
||||||
|
|
||||||
size_t mi_bbitmap_size(size_t bit_count, size_t* chunk_count);
|
size_t mi_bbitmap_size(size_t bit_count, size_t* chunk_count);
|
||||||
|
|
||||||
|
|
||||||
|
|
12
src/heap.c
12
src/heap.c
|
@ -182,12 +182,13 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool allow_destroy,
|
||||||
mi_memid_t memid = heap->memid;
|
mi_memid_t memid = heap->memid;
|
||||||
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
|
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
|
||||||
heap->memid = memid;
|
heap->memid = memid;
|
||||||
heap->tld = tld; // avoid reading the thread-local tld during initialization
|
heap->tld = tld; // avoid reading the thread-local tld during initialization
|
||||||
|
heap->tag = heap_tag;
|
||||||
|
heap->numa_node = tld->numa_node;
|
||||||
heap->exclusive_arena = _mi_arena_from_id(arena_id);
|
heap->exclusive_arena = _mi_arena_from_id(arena_id);
|
||||||
heap->allow_page_reclaim = (!allow_destroy && mi_option_get(mi_option_page_reclaim_on_free) >= 0);
|
heap->allow_page_reclaim = (!allow_destroy && mi_option_get(mi_option_page_reclaim_on_free) >= 0);
|
||||||
heap->allow_page_abandon = (!allow_destroy && mi_option_get(mi_option_page_full_retain) >= 0);
|
heap->allow_page_abandon = (!allow_destroy && mi_option_get(mi_option_page_full_retain) >= 0);
|
||||||
heap->page_full_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
|
heap->page_full_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
|
||||||
heap->tag = heap_tag;
|
|
||||||
if (heap->tld->is_in_threadpool) {
|
if (heap->tld->is_in_threadpool) {
|
||||||
// if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap.
|
// if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap.
|
||||||
// this is checked in `free.c:mi_free_try_collect_mt`
|
// this is checked in `free.c:mi_free_try_collect_mt`
|
||||||
|
@ -227,7 +228,7 @@ mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena
|
||||||
else {
|
else {
|
||||||
// heaps associated wita a specific arena are allocated in that arena
|
// heaps associated wita a specific arena are allocated in that arena
|
||||||
// note: takes up at least one slice which is quite wasteful...
|
// note: takes up at least one slice which is quite wasteful...
|
||||||
heap = (mi_heap_t*)_mi_arenas_alloc(_mi_subproc(), _mi_align_up(sizeof(mi_heap_t),MI_ARENA_MIN_OBJ_SIZE), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, &memid);
|
heap = (mi_heap_t*)_mi_arenas_alloc(_mi_subproc(), _mi_align_up(sizeof(mi_heap_t),MI_ARENA_MIN_OBJ_SIZE), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, tld->numa_node, &memid);
|
||||||
}
|
}
|
||||||
if (heap==NULL) {
|
if (heap==NULL) {
|
||||||
_mi_error_message(ENOMEM, "unable to allocate heap meta-data\n");
|
_mi_error_message(ENOMEM, "unable to allocate heap meta-data\n");
|
||||||
|
@ -261,6 +262,11 @@ uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
|
||||||
return _mi_random_next(&heap->random);
|
return _mi_random_next(&heap->random);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mi_heap_set_numa_affinity(mi_heap_t* heap, int numa_node) {
|
||||||
|
if (heap == NULL) return;
|
||||||
|
heap->numa_node = (numa_node < 0 ? -1 : numa_node % _mi_os_numa_node_count());
|
||||||
|
}
|
||||||
|
|
||||||
// zero out the page queues
|
// zero out the page queues
|
||||||
static void mi_heap_reset_pages(mi_heap_t* heap) {
|
static void mi_heap_reset_pages(mi_heap_t* heap) {
|
||||||
mi_assert_internal(heap != NULL);
|
mi_assert_internal(heap != NULL);
|
||||||
|
|
55
src/init.c
55
src/init.c
|
@ -104,6 +104,7 @@ static mi_decl_cache_align mi_subproc_t subproc_main
|
||||||
static mi_decl_cache_align mi_tld_t tld_empty = {
|
static mi_decl_cache_align mi_tld_t tld_empty = {
|
||||||
0, // thread_id
|
0, // thread_id
|
||||||
0, // thread_seq
|
0, // thread_seq
|
||||||
|
0, // default numa node
|
||||||
&subproc_main, // subproc
|
&subproc_main, // subproc
|
||||||
NULL, // heap_backing
|
NULL, // heap_backing
|
||||||
NULL, // heaps list
|
NULL, // heaps list
|
||||||
|
@ -117,6 +118,7 @@ static mi_decl_cache_align mi_tld_t tld_empty = {
|
||||||
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
|
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
|
||||||
&tld_empty, // tld
|
&tld_empty, // tld
|
||||||
NULL, // exclusive_arena
|
NULL, // exclusive_arena
|
||||||
|
0, // preferred numa node
|
||||||
0, // cookie
|
0, // cookie
|
||||||
//{ 0, 0 }, // keys
|
//{ 0, 0 }, // keys
|
||||||
{ {0}, {0}, 0, true }, // random
|
{ {0}, {0}, 0, true }, // random
|
||||||
|
@ -141,6 +143,7 @@ extern mi_decl_hidden mi_decl_cache_align mi_heap_t heap_main;
|
||||||
static mi_decl_cache_align mi_tld_t tld_main = {
|
static mi_decl_cache_align mi_tld_t tld_main = {
|
||||||
0, // thread_id
|
0, // thread_id
|
||||||
0, // thread_seq
|
0, // thread_seq
|
||||||
|
0, // numa node
|
||||||
&subproc_main, // subproc
|
&subproc_main, // subproc
|
||||||
&heap_main, // heap_backing
|
&heap_main, // heap_backing
|
||||||
&heap_main, // heaps list
|
&heap_main, // heaps list
|
||||||
|
@ -154,6 +157,7 @@ static mi_decl_cache_align mi_tld_t tld_main = {
|
||||||
mi_decl_cache_align mi_heap_t heap_main = {
|
mi_decl_cache_align mi_heap_t heap_main = {
|
||||||
&tld_main, // thread local data
|
&tld_main, // thread local data
|
||||||
NULL, // exclusive arena
|
NULL, // exclusive arena
|
||||||
|
0, // preferred numa node
|
||||||
0, // initial cookie
|
0, // initial cookie
|
||||||
//{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
|
//{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
|
||||||
{ {0x846ca68b}, {0}, 0, true }, // random
|
{ {0x846ca68b}, {0}, 0, true }, // random
|
||||||
|
@ -306,6 +310,7 @@ static mi_tld_t* mi_tld_alloc(void) {
|
||||||
tld->heap_backing = NULL;
|
tld->heap_backing = NULL;
|
||||||
tld->heaps = NULL;
|
tld->heaps = NULL;
|
||||||
tld->subproc = &subproc_main;
|
tld->subproc = &subproc_main;
|
||||||
|
tld->numa_node = _mi_os_numa_node();
|
||||||
tld->thread_id = _mi_prim_thread_id();
|
tld->thread_id = _mi_prim_thread_id();
|
||||||
tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1);
|
tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1);
|
||||||
tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
|
tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
|
||||||
|
@ -647,25 +652,52 @@ void _mi_process_load(void) {
|
||||||
_mi_random_reinit_if_weak(&heap_main.random);
|
_mi_random_reinit_if_weak(&heap_main.random);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
|
// CPU features
|
||||||
#include <intrin.h>
|
|
||||||
mi_decl_cache_align bool _mi_cpu_has_fsrm = false;
|
mi_decl_cache_align bool _mi_cpu_has_fsrm = false;
|
||||||
mi_decl_cache_align bool _mi_cpu_has_erms = false;
|
mi_decl_cache_align bool _mi_cpu_has_erms = false;
|
||||||
|
mi_decl_cache_align bool _mi_cpu_has_popcnt = false;
|
||||||
|
|
||||||
|
#if (MI_ARCH_X64 || MI_ARCH_X86)
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
#include <cpuid.h>
|
||||||
|
static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
|
||||||
|
return (__get_cpuid(level, ®s4[0], ®s4[1], ®s4[2], ®s4[3]) == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
|
||||||
|
__cpuid((int32_t*)regs4, (int32_t)level);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static bool mi_cpuid(uint32_t* regs4, uint32_t level) {
|
||||||
|
MI_UNUSED(regs4); MI_UNUSED(level);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void mi_detect_cpu_features(void) {
|
static void mi_detect_cpu_features(void) {
|
||||||
// FSRM for fast short rep movsb/stosb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
|
// FSRM for fast short rep movsb/stosb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017))
|
||||||
// EMRS for fast enhanced rep movsb/stosb support
|
// EMRS for fast enhanced rep movsb/stosb support
|
||||||
int32_t cpu_info[4];
|
uint32_t cpu_info[4];
|
||||||
__cpuid(cpu_info, 7);
|
if (mi_cpuid(cpu_info, 7)) {
|
||||||
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
_mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||||
_mi_cpu_has_erms = ((cpu_info[2] & (1 << 9)) != 0); // bit 9 of ECX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
_mi_cpu_has_erms = ((cpu_info[1] & (1 << 9)) != 0); // bit 9 of EBX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features>
|
||||||
|
}
|
||||||
|
if (mi_cpuid(cpu_info, 1)) {
|
||||||
|
_mi_cpu_has_popcnt = ((cpu_info[2] & (1 << 23)) != 0); // bit 23 of ECX : see <https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits>
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
static void mi_detect_cpu_features(void) {
|
static void mi_detect_cpu_features(void) {
|
||||||
// nothing
|
#if MI_ARCH_ARM64
|
||||||
|
_mi_cpu_has_popcnt = true;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// Initialize the process; called by thread_init or the process loader
|
// Initialize the process; called by thread_init or the process loader
|
||||||
void mi_process_init(void) mi_attr_noexcept {
|
void mi_process_init(void) mi_attr_noexcept {
|
||||||
// ensure we are called once
|
// ensure we are called once
|
||||||
|
@ -685,15 +717,6 @@ void mi_process_init(void) mi_attr_noexcept {
|
||||||
// the following two can potentially allocate (on freeBSD for locks and thread keys)
|
// the following two can potentially allocate (on freeBSD for locks and thread keys)
|
||||||
mi_subproc_main_init();
|
mi_subproc_main_init();
|
||||||
mi_process_setup_auto_thread_done();
|
mi_process_setup_auto_thread_done();
|
||||||
|
|
||||||
#if MI_DEBUG
|
|
||||||
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
|
|
||||||
#endif
|
|
||||||
_mi_verbose_message("secure level: %d\n", MI_SECURE);
|
|
||||||
_mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL);
|
|
||||||
#if MI_TSAN
|
|
||||||
_mi_verbose_message("thread santizer enabled\n");
|
|
||||||
#endif
|
|
||||||
mi_thread_init();
|
mi_thread_init();
|
||||||
|
|
||||||
#if defined(_WIN32) && defined(MI_WIN_USE_FLS)
|
#if defined(_WIN32) && defined(MI_WIN_USE_FLS)
|
||||||
|
|
|
@ -355,7 +355,6 @@ size_t _mi_clz_generic(size_t x) {
|
||||||
|
|
||||||
#endif // bit scan
|
#endif // bit scan
|
||||||
|
|
||||||
#if !MI_HAS_FAST_POPCOUNT
|
|
||||||
|
|
||||||
#if MI_SIZE_SIZE == 4
|
#if MI_SIZE_SIZE == 4
|
||||||
#define mi_mask_even_bits32 (0x55555555)
|
#define mi_mask_even_bits32 (0x55555555)
|
||||||
|
@ -383,7 +382,7 @@ static size_t mi_popcount_generic32(uint32_t x) {
|
||||||
return mi_byte_sum32(x);
|
return mi_byte_sum32(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t _mi_popcount_generic(size_t x) {
|
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
|
||||||
return mi_popcount_generic32(x);
|
return mi_popcount_generic32(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -407,9 +406,8 @@ static size_t mi_popcount_generic64(uint64_t x) {
|
||||||
return mi_byte_sum64(x);
|
return mi_byte_sum64(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t _mi_popcount_generic(size_t x) {
|
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
|
||||||
return mi_popcount_generic64(x);
|
return mi_popcount_generic64(x);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // popcount
|
|
||||||
|
|
|
@ -175,7 +175,7 @@ static mi_option_desc_t options[_mi_option_last] =
|
||||||
{ 0, UNINIT, MI_OPTION(max_vabits) }, // max virtual address space bits
|
{ 0, UNINIT, MI_OPTION(max_vabits) }, // max virtual address space bits
|
||||||
{ MI_DEFAULT_PAGEMAP_COMMIT,
|
{ MI_DEFAULT_PAGEMAP_COMMIT,
|
||||||
UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront?
|
UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront?
|
||||||
{ 0, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
|
{ 1, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
|
||||||
{ 16, UNINIT, MI_OPTION(page_reclaim_max) }, // don't reclaim pages if we already own N pages (in that size class)
|
{ 16, UNINIT, MI_OPTION(page_reclaim_max) }, // don't reclaim pages if we already own N pages (in that size class)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
34
src/os.c
34
src/os.c
|
@ -694,18 +694,19 @@ static void mi_os_free_huge_os_pages(void* p, size_t size) {
|
||||||
Support NUMA aware allocation
|
Support NUMA aware allocation
|
||||||
-----------------------------------------------------------------------------*/
|
-----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
_Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count
|
static _Atomic(int) _mi_numa_node_count; // = 0 // cache the node count
|
||||||
|
|
||||||
size_t _mi_os_numa_node_count_get(void) {
|
int _mi_os_numa_node_count(void) {
|
||||||
size_t count = mi_atomic_load_acquire(&_mi_numa_node_count);
|
int count = mi_atomic_load_acquire(&_mi_numa_node_count);
|
||||||
if (count <= 0) {
|
if mi_unlikely(count <= 0) {
|
||||||
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
|
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
|
||||||
if (ncount > 0) {
|
if (ncount > 0 && ncount < INT_MAX) {
|
||||||
count = (size_t)ncount;
|
count = (int)ncount;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
count = _mi_prim_numa_node_count(); // or detect dynamically
|
const size_t n = _mi_prim_numa_node_count(); // or detect dynamically
|
||||||
if (count == 0) count = 1;
|
if (n == 0 || n > INT_MAX) { count = 1; }
|
||||||
|
else { count = (int)n; }
|
||||||
}
|
}
|
||||||
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
|
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
|
||||||
_mi_verbose_message("using %zd numa regions\n", count);
|
_mi_verbose_message("using %zd numa regions\n", count);
|
||||||
|
@ -713,15 +714,24 @@ size_t _mi_os_numa_node_count_get(void) {
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
int _mi_os_numa_node_get(void) {
|
|
||||||
size_t numa_count = _mi_os_numa_node_count();
|
static int mi_os_numa_node_get(void) {
|
||||||
|
int numa_count = _mi_os_numa_node_count();
|
||||||
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
|
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
|
||||||
// never more than the node count and >= 0
|
// never more than the node count and >= 0
|
||||||
size_t numa_node = _mi_prim_numa_node();
|
const size_t n = _mi_prim_numa_node();
|
||||||
|
int numa_node = (n < INT_MAX ? (int)n : 0);
|
||||||
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
|
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
|
||||||
return (int)numa_node;
|
return numa_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int _mi_os_numa_node(void) {
|
||||||
|
if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
|
||||||
|
else return mi_os_numa_node_get();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------------
|
||||||
Public API
|
Public API
|
||||||
|
|
|
@ -62,8 +62,16 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(MADV_DONTNEED) && defined(POSIX_MADV_DONTNEED) // QNX
|
||||||
|
#define MADV_DONTNEED POSIX_MADV_DONTNEED
|
||||||
|
#endif
|
||||||
|
#if !defined(MADV_FREE) && defined(POSIX_MADV_FREE) // QNX
|
||||||
|
#define MADV_FREE POSIX_MADV_FREE
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
|
#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
|
||||||
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------
|
||||||
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
|
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
|
||||||
// and do allocation themselves; using syscalls prevents recursion when mimalloc is
|
// and do allocation themselves; using syscalls prevents recursion when mimalloc is
|
||||||
|
@ -191,6 +199,8 @@ int _mi_prim_free(void* addr, size_t size ) {
|
||||||
static int unix_madvise(void* addr, size_t size, int advice) {
|
static int unix_madvise(void* addr, size_t size, int advice) {
|
||||||
#if defined(__sun)
|
#if defined(__sun)
|
||||||
int res = madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520)
|
int res = madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520)
|
||||||
|
#elif defined(__QNX__)
|
||||||
|
int res = posix_madvise(addr, size, advice);
|
||||||
#else
|
#else
|
||||||
int res = madvise(addr, size, advice);
|
int res = madvise(addr, size, advice);
|
||||||
#endif
|
#endif
|
||||||
|
|
30
src/stats.c
30
src/stats.c
|
@ -92,23 +92,23 @@ void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) {
|
||||||
|
|
||||||
|
|
||||||
// must be thread safe as it is called from stats_merge
|
// must be thread safe as it is called from stats_merge
|
||||||
static void mi_stat_count_add(mi_stat_count_t* stat, const mi_stat_count_t* src) {
|
static void mi_stat_count_add_mt(mi_stat_count_t* stat, const mi_stat_count_t* src) {
|
||||||
if (stat==src) return;
|
if (stat==src) return;
|
||||||
if (src->total!=0) { mi_atomic_addi64_relaxed(&stat->total, src->total); }
|
mi_atomic_void_addi64_relaxed(&stat->total, &src->total);
|
||||||
if (src->current!=0) { mi_atomic_addi64_relaxed(&stat->current, src->current); }
|
mi_atomic_void_addi64_relaxed(&stat->current, &src->current);
|
||||||
// peak scores do really not work across threads ... we use conservative max
|
// peak scores do really not work across threads .. we just add them
|
||||||
if (src->peak > stat->peak) {
|
mi_atomic_void_addi64_relaxed( &stat->peak, &src->peak);
|
||||||
mi_atomic_maxi64_relaxed(&stat->peak, src->peak); // or: mi_atomic_addi64_relaxed( &stat->peak, src->peak);
|
// or, take the max?
|
||||||
}
|
// mi_atomic_maxi64_relaxed(&stat->peak, src->peak);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mi_stat_counter_add(mi_stat_counter_t* stat, const mi_stat_counter_t* src) {
|
static void mi_stat_counter_add_mt(mi_stat_counter_t* stat, const mi_stat_counter_t* src) {
|
||||||
if (stat==src) return;
|
if (stat==src) return;
|
||||||
if (src->total!=0) { mi_atomic_addi64_relaxed(&stat->total, src->total); }
|
mi_atomic_void_addi64_relaxed(&stat->total, &src->total);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MI_STAT_COUNT(stat) mi_stat_count_add(&stats->stat, &src->stat);
|
#define MI_STAT_COUNT(stat) mi_stat_count_add_mt(&stats->stat, &src->stat);
|
||||||
#define MI_STAT_COUNTER(stat) mi_stat_counter_add(&stats->stat, &src->stat);
|
#define MI_STAT_COUNTER(stat) mi_stat_counter_add_mt(&stats->stat, &src->stat);
|
||||||
|
|
||||||
// must be thread safe as it is called from stats_merge
|
// must be thread safe as it is called from stats_merge
|
||||||
static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
|
static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
|
||||||
|
@ -119,11 +119,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
|
||||||
|
|
||||||
#if MI_STAT>1
|
#if MI_STAT>1
|
||||||
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
|
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
|
||||||
mi_stat_count_add(&stats->malloc_bins[i], &src->malloc_bins[i]);
|
mi_stat_count_add_mt(&stats->malloc_bins[i], &src->malloc_bins[i]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
|
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
|
||||||
mi_stat_count_add(&stats->page_bins[i], &src->page_bins[i]);
|
mi_stat_count_add_mt(&stats->page_bins[i], &src->page_bins[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -318,8 +318,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
|
||||||
mi_stat_print(&stats->malloc_normal, "normal", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg);
|
mi_stat_print(&stats->malloc_normal, "normal", (stats->malloc_normal_count.total == 0 ? 1 : -1), out, arg);
|
||||||
mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg);
|
mi_stat_print(&stats->malloc_huge, "huge", (stats->malloc_huge_count.total == 0 ? 1 : -1), out, arg);
|
||||||
mi_stat_count_t total = { 0,0,0 };
|
mi_stat_count_t total = { 0,0,0 };
|
||||||
mi_stat_count_add(&total, &stats->malloc_normal);
|
mi_stat_count_add_mt(&total, &stats->malloc_normal);
|
||||||
mi_stat_count_add(&total, &stats->malloc_huge);
|
mi_stat_count_add_mt(&total, &stats->malloc_huge);
|
||||||
mi_stat_print_ex(&total, "total", 1, out, arg, "");
|
mi_stat_print_ex(&total, "total", 1, out, arg, "");
|
||||||
#endif
|
#endif
|
||||||
#if MI_STAT>1
|
#if MI_STAT>1
|
||||||
|
|
Loading…
Add table
Reference in a new issue