Merge branch 'dev3-bin-dbg' of https://github.com/microsoft/mimalloc into dev3-bin-dbg

This commit is contained in:
Gustavo Varo 2025-03-05 16:27:42 -05:00
commit 71549dae90
51 changed files with 7111 additions and 4983 deletions

View file

@ -10,31 +10,38 @@ option(MI_PADDING "Enable padding to detect heap block overflow (alway
option(MI_OVERRIDE "Override the standard malloc interface (i.e. define entry points for 'malloc', 'free', etc)" ON)
option(MI_XMALLOC "Enable abort() call on memory allocation failure by default" OFF)
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF)
option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF)
option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF)
option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for arm64: '-march=armv8.1-a' (2016))" ON)
option(MI_OPT_ARCH "Only for optimized builds: turn on architecture specific optimizations (for x64: '-march=haswell;-mavx2' (2013), for arm64: '-march=armv8.1-a' (2016))" ON)
option(MI_OPT_SIMD "Use SIMD instructions (requires MI_OPT_ARCH to be enabled)" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
option(MI_WIN_USE_FIXED_TLS "Use a fixed TLS slot on Windows to avoid extra tests in the malloc fast path" OFF)
option(MI_LOCAL_DYNAMIC_TLS "Use local-dynamic-tls, a slightly slower but dlopen-compatible thread local storage mechanism (Unix)" OFF)
option(MI_LIBC_MUSL "Set this when linking with musl libc" OFF)
option(MI_LIBC_MUSL "Enable this when linking with musl libc" OFF)
option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF)
option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF)
option(MI_TRACK_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF)
option(MI_TRACK_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF)
option(MI_TRACK_ETW "Compile with Windows event tracing (ETW) support (adds a small overhead)" OFF)
option(MI_BUILD_SHARED "Build shared library" ON)
option(MI_BUILD_STATIC "Build static library" ON)
option(MI_BUILD_OBJECT "Build object library" ON)
option(MI_BUILD_TESTS "Build test executables" ON)
option(MI_DEBUG_TSAN "Build with thread sanitizer (needs clang)" OFF)
option(MI_DEBUG_UBSAN "Build with undefined-behavior sanitizer (needs clang++)" OFF)
option(MI_GUARDED "Build with guard pages behind certain object allocations (implies MI_NO_PADDING=ON)" OFF)
option(MI_SKIP_COLLECT_ON_EXIT "Skip collecting memory on program exit" OFF)
option(MI_NO_PADDING "Force no use of padding even in DEBUG mode etc." OFF)
option(MI_INSTALL_TOPLEVEL "Install directly into $CMAKE_INSTALL_PREFIX instead of PREFIX/lib/mimalloc-version" OFF)
option(MI_NO_THP "Disable transparent huge pages support on Linux/Android for the mimalloc process only" OFF)
option(MI_EXTRA_CPPDEFS "Extra pre-processor definitions (use as `-DMI_EXTRA_CPPDEFS=\"opt1=val1;opt2=val2\"`)" "")
option(MI_WIN_DBG_EXTS "Build with windows debugger extension points")
# negated options for vcpkg features
option(MI_NO_USE_CXX "Use plain C compilation (has priority over MI_USE_CXX)" OFF)
option(MI_NO_OPT_ARCH "Do not use architecture specific optimizations (like '-march=armv8.1-a' for example) (has priority over MI_OPT_ARCH)" OFF)
@ -54,6 +61,7 @@ set(mi_sources
src/alloc-aligned.c
src/alloc-posix.c
src/arena.c
src/arena-meta.c
src/bitmap.c
src/heap.c
src/init.c
@ -61,12 +69,15 @@ set(mi_sources
src/options.c
src/os.c
src/page.c
src/page-map.c
src/random.c
src/segment.c
src/segment-map.c
src/stats.c
src/prim/prim.c)
if(WIN32 AND MI_WIN_DBG_EXTS)
list(APPEND mi_sources src/prim/windows/windbg/mimalloc_dbg.cpp)
endif()
set(mi_cflags "")
set(mi_cflags_static "") # extra flags for a static library build
set(mi_cflags_dynamic "") # extra flags for a shared-object library build
@ -155,8 +166,8 @@ if(CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo")
if (NOT MI_OPT_ARCH)
message(STATUS "Architecture specific optimizations are disabled (MI_OPT_ARCH=OFF)")
endif()
else()
set(MI_OPT_ARCH OFF)
#else()
# set(MI_OPT_ARCH OFF)
endif()
if(MI_OVERRIDE)
@ -248,6 +259,17 @@ if(MI_TRACK_ETW)
endif()
endif()
if(MI_WIN_DBG_EXTS)
if(NOT WIN32)
set(MI_WIN_DBG_EXTS OFF)
message(WARNING "Can only enable Windows debbuger extension support on Windows (MI_WIN_DBG_EXTS=OFF)")
endif()
if(MI_WIN_DBG_EXTS)
message(STATUS "Compile with Windows debbuger extension support (MI_WIN_DBG_EXTS=ON)")
list(APPEND mi_defines MI_WIN_DBG_EXTS=1)
endif()
endif()
if(MI_GUARDED)
message(STATUS "Compile guard pages behind certain object allocations (MI_GUARDED=ON)")
list(APPEND mi_defines MI_GUARDED=1)
@ -260,7 +282,7 @@ endif()
if(MI_SEE_ASM)
message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)")
list(APPEND mi_cflags -save-temps)
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 14)
message(STATUS "No GNU Line marker")
list(APPEND mi_cflags -Wno-gnu-line-marker)
endif()
@ -431,11 +453,15 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
list(APPEND mi_cflags -ftls-model=initial-exec)
endif()
endif()
endif()
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel")
if(MI_OVERRIDE)
list(APPEND mi_cflags -fno-builtin-malloc)
endif()
endif()
# Compiler and architecture specific flags
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM_NAME MATCHES "Haiku")
if(MI_OPT_ARCH)
if(APPLE AND CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_OSX_ARCHITECTURES) # to support multi-arch binaries (#999)
@ -443,17 +469,24 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU|Intel" AND NOT CMAKE_SYSTEM
if("arm64" IN_LIST CMAKE_OSX_ARCHITECTURES)
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_arm64;-march=armv8.1-a")
endif()
if("x86_64" IN_LIST CMAKE_OSX_ARCHITECTURES)
list(APPEND MI_OPT_ARCH_FLAGS "-Xarch_x86_64;-march=haswell;-Xarch_x86_64;-mavx2")
endif()
elseif(MI_ARCH STREQUAL "x64")
set(MI_OPT_ARCH_FLAGS "-march=haswell;-mavx2") # fast bit scan (since 2013)
elseif(MI_ARCH STREQUAL "arm64")
set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a") # fast atomics
set(MI_OPT_ARCH_FLAGS "-march=armv8.1-a") # fast atomics (since 2016)
endif()
endif()
endif()
if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914)
if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) # vs2017+
list(APPEND mi_cflags /Zc:__cplusplus)
if(MI_OPT_ARCH AND NOT MI_CLANG_CL)
if(MI_ARCH STREQUAL "arm64")
set(MI_OPT_ARCH_FLAGS "/arch:armv8.1") # fast atomics
if(MI_ARCH STREQUAL "x64")
set(MI_OPT_ARCH_FLAGS "/arch:AVX2")
elseif(MI_ARCH STREQUAL "arm64")
set(MI_OPT_ARCH_FLAGS "/arch:armv8.1")
endif()
endif()
endif()
@ -465,6 +498,12 @@ endif()
if(MI_OPT_ARCH_FLAGS)
list(APPEND mi_cflags ${MI_OPT_ARCH_FLAGS})
message(STATUS "Architecture specific optimization is enabled (with ${MI_OPT_ARCH_FLAGS}) (MI_OPT_ARCH=ON)")
if (MI_OPT_SIMD)
list(APPEND mi_defines "MI_OPT_SIMD=1")
message(STATUS "SIMD instructions are enabled (MI_OPT_SIMD=ON)")
endif()
elseif(MI_OPT_SIMD)
message(STATUS "SIMD instructions are not enabled (either MI_OPT_ARCH=OFF or this architecture has no SIMD support)")
endif()
# extra needed libraries
@ -490,6 +529,9 @@ endfunction()
if(WIN32)
list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt)
if(MI_WIN_DBG_EXTS)
list(APPEND mi_libraries dbgeng) # todo: only for the dll?
endif()
else()
find_link_library("pthread" MI_LIB_PTHREAD)
if(MI_LIB_PTHREAD)

View file

@ -30,6 +30,10 @@ jobs:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
MSBuildConfiguration: Release
Release SIMD:
BuildType: release-simd
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_SIMD=ON -DMI_WIN_USE_FIXED_TLS=ON
MSBuildConfiguration: Release
Secure:
BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
@ -89,6 +93,11 @@ jobs:
CXX: clang++
BuildType: release-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Release SIMD Clang:
CC: clang
CXX: clang++
BuildType: release-simd-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_SIMD=ON
Secure Clang:
CC: clang
CXX: clang++
@ -148,6 +157,9 @@ jobs:
Release:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Release SIMD:
BuildType: release-simd
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_OPT_SIMD=ON
Secure:
BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
@ -263,3 +275,28 @@ jobs:
- script: ctest --verbose --timeout 240
workingDirectory: $(BuildType)
displayName: CTest
- job:
displayName: macOS 13 (Ventura)
pool:
vmImage:
macOS-13
strategy:
matrix:
Debug:
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
steps:
- task: CMake@1
inputs:
workingDirectory: $(BuildType)
cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
displayName: Make
- script: ctest --verbose --timeout 180
workingDirectory: $(BuildType)
displayName: CTest

View file

@ -63,7 +63,7 @@ need a specific redirection DLL:
mode on Windows arm64. Unfortunately we cannot run x64 code emulated on Windows arm64 with
the x64 mimalloc override directly (since the C runtime always uses `arm64ec`). Instead:
1. Build the program as normal for x64 and link as normal with the x64
`mimalloc.lib` export library.
`mimalloc.dll.lib` export library.
2. Now separately build `mimalloc.dll` in `arm64ec` mode and _overwrite_ your
previous (x64) `mimalloc.dll` -- the loader can handle the mix of arm64ec
and x64 code. Now use `mimalloc-redirect-arm64ec.dll` to match your new

View file

@ -1,5 +1,5 @@
set(mi_version_major 1)
set(mi_version_minor 9)
set(mi_version_major 3)
set(mi_version_minor 0)
set(mi_version_patch 2)
set(mi_version ${mi_version_major}.${mi_version_minor})

View file

@ -5,11 +5,11 @@ vcpkg_from_github(
# The "REF" can be a commit hash, branch name (dev2), or a version (v2.2.1).
# REF "v${VERSION}"
REF 866ce5b89db1dbc3e66bbf89041291fd16329518
REF 6a89f8554eaab8d8d00e17b5b09f79e1d8dbf61b
# The sha512 is the hash of the tar.gz bundle.
# (To get the sha512, run `vcpkg install mimalloc[override] --overlay-ports=<dir of this file>` and copy the sha from the error message.)
SHA512 0b0e5ff823c49b9534b8c32800679806c5d7c29020af058da043c3e6e36ae3c32a1cdd5a21ece97dd60bc7dd4703967f683beac435dbb8514638a6cc55e5dea8
SHA512 32b87a3195efcc558b83a546348a8fb544fed335cdd6c9f8e7e9d0e8e64540fdcf1f4aa57fd0e783b78731518f4810292b832227d7e7665bf8426f1e6ce96f9d
)
vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS

View file

@ -1,6 +1,6 @@
{
"name": "mimalloc",
"version": "1.9.2",
"version": "3.0.2",
"port-version": 2,
"description": "Compact general purpose allocator with excellent performance",
"homepage": "https://github.com/microsoft/mimalloc",

View file

@ -431,12 +431,11 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large);
/// @param start Start of the memory area
/// @param size The size of the memory area.
/// @param is_committed Is the area already committed?
/// @param is_large Does it consist of large OS pages? Set this to \a true as well for memory
/// that should not be decommitted or protected (like rdma etc.)
/// @param is_pinned Can the memory not be decommitted or reset? (usually the case for large OS pages)
/// @param is_zero Does the area consists of zero's?
/// @param numa_node Possible associated numa node or `-1`.
/// @return \a true if successful, and \a false on error.
bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node);
bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node);
/// Reserve \a pages of huge OS pages (1GiB) evenly divided over \a numa_nodes nodes,
/// but stops after at most `timeout_msecs` seconds.

View file

@ -178,6 +178,7 @@
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Lib>
<AdditionalLibraryDirectories>
@ -197,6 +198,7 @@
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<PostBuildEvent>
<Command>
@ -209,8 +211,7 @@
<Lib>
<AdditionalLibraryDirectories>
</AdditionalLibraryDirectories>
<AdditionalDependencies>
</AdditionalDependencies>
<AdditionalDependencies>dbgeng.lib</AdditionalDependencies>
</Lib>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
@ -224,6 +225,7 @@
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<PostBuildEvent>
<Command>
@ -251,6 +253,7 @@
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<PostBuildEvent>
<Command>
@ -283,6 +286,7 @@
<CompileAs>CompileAsCpp</CompileAs>
<IntrinsicFunctions>true</IntrinsicFunctions>
<LanguageStandard>stdcpp20</LanguageStandard>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -311,6 +315,8 @@
<CompileAs>CompileAsCpp</CompileAs>
<IntrinsicFunctions>true</IntrinsicFunctions>
<LanguageStandard>stdcpp20</LanguageStandard>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -347,6 +353,7 @@
<LanguageStandard>stdcpp20</LanguageStandard>
<EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
<ExceptionHandling>Sync</ExceptionHandling>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -383,6 +390,7 @@
<LanguageStandard>stdcpp20</LanguageStandard>
<EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
<ExceptionHandling>Sync</ExceptionHandling>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -424,16 +432,7 @@
</ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena-abandon.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\arena-meta.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
@ -453,6 +452,7 @@
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\libc.c" />
<ClCompile Include="..\..\src\page-map.c" />
<ClCompile Include="..\..\src\prim\prim.c" />
<ClCompile Include="..\..\src\prim\windows\prim.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -476,9 +476,8 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\prim\windows\windbg\mimalloc_dbg.cpp" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment-map.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" />
</ItemGroup>
@ -488,11 +487,13 @@
<ClInclude Include="..\..\include\mimalloc-new-delete.h" />
<ClInclude Include="..\..\include\mimalloc-stats.h" />
<ClInclude Include="..\..\include\mimalloc\atomic.h" />
<ClInclude Include="..\..\include\mimalloc\bits.h" />
<ClInclude Include="..\..\include\mimalloc\internal.h" />
<ClInclude Include="..\..\include\mimalloc\prim.h" />
<ClInclude Include="..\..\include\mimalloc\track.h" />
<ClInclude Include="..\..\include\mimalloc\types.h" />
<ClInclude Include="..\..\src\bitmap.h" />
<ClInclude Include="..\..\src\prim\windows\windbg\mimalloc_dbg.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

View file

@ -52,16 +52,16 @@
<ClCompile Include="..\..\src\random.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment-map.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\stats.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena-abandon.c">
<ClCompile Include="..\..\src\page-map.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena-meta.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\prim\windows\windbg\mimalloc_dbg.cpp">
<Filter>Sources</Filter>
</ClCompile>
</ItemGroup>
@ -93,6 +93,9 @@
<ClInclude Include="..\..\include\mimalloc\prim.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc\bits.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-stats.h">
<Filter>Headers</Filter>
</ClInclude>

View file

@ -174,6 +174,7 @@
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<SupportJustMyCode>false</SupportJustMyCode>
<CompileAs>CompileAsCpp</CompileAs>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -204,9 +205,10 @@
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<SupportJustMyCode>false</SupportJustMyCode>
<CompileAs>CompileAsCpp</CompileAs>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;dbgeng.lib;%(AdditionalDependencies)</AdditionalDependencies>
<IgnoreSpecificDefaultLibraries>
</IgnoreSpecificDefaultLibraries>
<ModuleDefinitionFile>
@ -234,6 +236,7 @@
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<SupportJustMyCode>false</SupportJustMyCode>
<CompileAs>CompileAsCpp</CompileAs>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect-arm64.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -264,6 +267,7 @@
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<SupportJustMyCode>false</SupportJustMyCode>
<CompileAs>CompileAsCpp</CompileAs>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect-arm64ec.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -298,6 +302,7 @@
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<CompileAs>CompileAsCpp</CompileAs>
<BufferSecurityCheck>false</BufferSecurityCheck>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -332,6 +337,7 @@
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<CompileAs>CompileAsCpp</CompileAs>
<BufferSecurityCheck>false</BufferSecurityCheck>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -367,6 +373,7 @@
<CompileAs>CompileAsCpp</CompileAs>
<BufferSecurityCheck>false</BufferSecurityCheck>
<EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -402,6 +409,7 @@
<CompileAs>CompileAsCpp</CompileAs>
<BufferSecurityCheck>false</BufferSecurityCheck>
<EnableEnhancedInstructionSet>CPUExtensionRequirementsARMv81</EnableEnhancedInstructionSet>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -423,17 +431,17 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h" />
<ClInclude Include="..\..\include\mimalloc-etw-gen.h" />
<ClInclude Include="..\..\include\mimalloc-etw.h" />
<ClInclude Include="..\..\include\mimalloc-new-delete.h" />
<ClInclude Include="..\..\include\mimalloc-override.h" />
<ClInclude Include="..\..\include\mimalloc-stats.h" />
<ClInclude Include="..\..\include\mimalloc\atomic.h" />
<ClInclude Include="..\..\include\mimalloc\bits.h" />
<ClInclude Include="..\..\include\mimalloc\internal.h" />
<ClInclude Include="..\..\include\mimalloc\prim.h" />
<ClInclude Include="..\..\include\mimalloc\track.h" />
<ClInclude Include="..\..\include\mimalloc\types.h" />
<ClInclude Include="..\..\src\bitmap.h" />
<ClInclude Include="..\..\src\prim\windows\windbg\mimalloc_dbg.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\src\alloc-aligned.c">
@ -458,7 +466,10 @@
</ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena-abandon.c">
<ClCompile Include="..\..\src\arena-meta.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.c" />
<ClCompile Include="..\..\src\free.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64EC'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -468,11 +479,10 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.c" />
<ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\libc.c" />
<ClCompile Include="..\..\src\page-map.c" />
<ClCompile Include="..\..\src\prim\prim.c" />
<ClCompile Include="..\..\src\prim\windows\prim.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -497,9 +507,8 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|ARM64EC'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\prim\windows\windbg\mimalloc_dbg.cpp" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment-map.c" />
<ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\stats.c" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

View file

@ -49,16 +49,19 @@
<ClCompile Include="..\..\src\random.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\segment-map.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\stats.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena-abandon.c">
<ClCompile Include="..\..\src\page-map.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\free.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\arena-meta.c">
<Filter>Sources</Filter>
</ClCompile>
<ClCompile Include="..\..\src\prim\windows\windbg\mimalloc_dbg.cpp">
<Filter>Sources</Filter>
</ClCompile>
</ItemGroup>
@ -75,12 +78,6 @@
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-etw.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-etw-gen.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-new-delete.h">
<Filter>Headers</Filter>
</ClInclude>
@ -96,6 +93,9 @@
<ClInclude Include="..\..\include\mimalloc\prim.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc\bits.h">
<Filter>Headers</Filter>
</ClInclude>
<ClInclude Include="..\..\include\mimalloc-stats.h">
<Filter>Headers</Filter>
</ClInclude>

View file

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H
#define MIMALLOC_H
#define MI_MALLOC_VERSION 192 // major + 2 digits minor
#define MI_MALLOC_VERSION 302 // major + 2 digits minor
// ------------------------------------------------------
// Compiler specific attributes
@ -274,17 +274,17 @@ mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t nu
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_pinned /* cannot decommit/reset? */, bool is_zero, int numa_node) mi_attr_noexcept;
mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
mi_decl_export void mi_arenas_print(void) mi_attr_noexcept;
// Experimental: heaps associated with specific memory arena's
typedef int mi_arena_id_t;
typedef void* mi_arena_id_t;
mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_pinned, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
#if MI_MALLOC_VERSION >= 182
// Create a heap that only allocates in the specified arena
@ -323,6 +323,23 @@ mi_decl_export void mi_collect_reduce(size_t target_thread_owned) mi_attr_noexce
// experimental
//mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size);
//mi_decl_export void* mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, void** base, size_t* full_size);
//mi_decl_export void* mi_os_alloc_aligned_allow_large(size_t size, size_t alignment, bool commit, bool* is_committed, bool* is_pinned, void** base, size_t* full_size);
//mi_decl_export void mi_os_free(void* p, size_t size);
//mi_decl_export void mi_os_commit(void* p, size_t size);
//mi_decl_export void mi_os_decommit(void* p, size_t size);
mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size);
mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id);
mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena);
mi_decl_export void mi_heap_unload(mi_heap_t* heap);
// Is a pointer contained in the given arena area?
mi_decl_export bool mi_arena_contains(mi_arena_id_t arena_id, const void* p);
// ------------------------------------------------------
// Convenience
// ------------------------------------------------------
@ -370,12 +387,11 @@ typedef enum mi_option_e {
mi_option_os_tag, // tag used for OS logging (macOS only for now) (=100)
mi_option_max_errors, // issue at most N error messages
mi_option_max_warnings, // issue at most N warning messages
mi_option_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%)
mi_option_deprecated_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%)
mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe
mi_option_arena_reserve, // initial memory size for arena reservation (= 1 GiB on 64-bit) (internally, this value is in KiB; use `mi_option_get_size`)
mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10)
mi_option_purge_extend_delay,
mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1)
mi_option_deprecated_purge_extend_delay,
mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's)
mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows)
mi_option_visit_abandoned, // allow visiting heap blocks from abandoned threads (=0)
@ -384,8 +400,14 @@ typedef enum mi_option_e {
mi_option_guarded_precise, // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0)
mi_option_guarded_sample_rate, // 1 out of N allocations in the min/max range will be guarded (=1000)
mi_option_guarded_sample_seed, // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0)
mi_option_target_segments_per_thread, // experimental (=0)
mi_option_generic_collect, // collect heaps every N (=10000) generic allocation calls
mi_option_page_reclaim_on_free, // reclaim abandoned pages on a free (=0). -1 disallowr always, 0 allows if the page originated from the current heap, 1 allow always
mi_option_page_full_retain, // retain N full (small) pages per size class (=2)
mi_option_page_max_candidates, // max candidate pages to consider for allocation (=4)
mi_option_max_vabits, // max user space virtual address bits to consider (=48)
mi_option_pagemap_commit, // commit the full pagemap (to always catch invalid pointer uses) (=0)
mi_option_page_commit_on_demand, // commit page memory on-demand
mi_option_page_reclaim_max, // don't reclaim pages if we already own N pages (in that size class) (=16)
_mi_option_last,
// legacy option names
mi_option_large_os_pages = mi_option_allow_large_os_pages,

View file

@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_ATOMIC_H
#define MIMALLOC_ATOMIC_H
#ifndef MI_ATOMIC_H
#define MI_ATOMIC_H
// include windows.h or pthreads.h
#if defined(_WIN32)
@ -75,16 +75,21 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_atomic_exchange_relaxed(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_cas_weak_relaxed(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(relaxed),mi_memory_order(relaxed))
#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
#define mi_atomic_cas_strong_relaxed(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(relaxed),mi_memory_order(relaxed))
#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_and_relaxed(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_or_relaxed(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(relaxed))
#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)
@ -406,10 +411,9 @@ static inline void mi_atomic_yield(void) {
// ----------------------------------------------------------------------
// Locks
// These do not have to be recursive and should be light-weight
// in-process only locks. Only used for reserving arena's and to
// maintain the abandoned list.
// Locks
// These should be light-weight in-process only locks.
// Only used for reserving arena's and to maintain the abandoned list.
// ----------------------------------------------------------------------
#if _MSC_VER
#pragma warning(disable:26110) // unlock with holding lock
@ -535,4 +539,4 @@ static inline void mi_lock_done(mi_lock_t* lock) {
#endif
#endif // __MIMALLOC_ATOMIC_H
#endif // MI_ATOMIC_H

344
include/mimalloc/bits.h Normal file
View file

@ -0,0 +1,344 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Bit operation, and platform dependent definition (MI_INTPTR_SIZE etc)
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITS_H
#define MI_BITS_H
// ------------------------------------------------------
// Size of a pointer.
// We assume that `sizeof(void*)==sizeof(intptr_t)`
// and it holds for all platforms we know of.
//
// However, the C standard only requires that:
// p == (void*)((intptr_t)p))
// but we also need:
// i == (intptr_t)((void*)i)
// or otherwise one might define an intptr_t type that is larger than a pointer...
// ------------------------------------------------------
#if INTPTR_MAX > INT64_MAX
# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
#elif INTPTR_MAX == INT64_MAX
# define MI_INTPTR_SHIFT (3)
#elif INTPTR_MAX == INT32_MAX
# define MI_INTPTR_SHIFT (2)
#else
#error platform pointers must be 32, 64, or 128 bits
#endif
#if (INTPTR_MAX) > LONG_MAX
# define MI_PU(x) x##ULL
#else
# define MI_PU(x) x##UL
#endif
#if SIZE_MAX == UINT64_MAX
# define MI_SIZE_SHIFT (3)
typedef int64_t mi_ssize_t;
#elif SIZE_MAX == UINT32_MAX
# define MI_SIZE_SHIFT (2)
typedef int32_t mi_ssize_t;
#else
#error platform objects must be 32 or 64 bits in size
#endif
#if (SIZE_MAX/2) > LONG_MAX
# define MI_ZU(x) x##ULL
#else
# define MI_ZU(x) x##UL
#endif
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
#define MI_KiB (MI_ZU(1024))
#define MI_MiB (MI_KiB*MI_KiB)
#define MI_GiB (MI_MiB*MI_KiB)
/* --------------------------------------------------------------------------------
Architecture
-------------------------------------------------------------------------------- */
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) // consider arm64ec as arm64
#define MI_ARCH_ARM64 1
#elif defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#define MI_ARCH_X64 1
#elif defined(__i386__) || defined(__i386) || defined(_M_IX86) || defined(_X86_) || defined(__X86__)
#define MI_ARCH_X86 1
#elif defined(__arm__) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARMT) || defined(__arm)
#define MI_ARCH_ARM32 1
#elif defined(__riscv) || defined(_M_RISCV)
#define MI_ARCH_RISCV 1
#if (LONG_MAX == INT32_MAX)
#define MI_ARCH_RISCV32 1
#else
#define MI_ARCH_RISCV64 1
#endif
#endif
#if MI_ARCH_X64 && defined(__AVX2__)
#include <immintrin.h>
#elif MI_ARCH_ARM64 && MI_OPT_SIMD
#include <arm_neon.h>
#endif
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
#include <intrin.h>
#endif
#if MI_ARCH_X64 && defined(__AVX2__) && !defined(__BMI2__) // msvc
#define __BMI2__ 1
#endif
#if MI_ARCH_X64 && (defined(__AVX2__) || defined(__BMI2__)) && !defined(__BMI1__) // msvc
#define __BMI1__ 1
#endif
// Define big endian if needed
// #define MI_BIG_ENDIAN 1
// maximum virtual address bits in a user-space pointer
#if MI_DEFAULT_VIRTUAL_ADDRESS_BITS > 0
#define MI_MAX_VABITS MI_DEFAULT_VIRTUAL_ADDRESS_BITS
#elif MI_ARCH_X64
#define MI_MAX_VABITS (47)
#elif MI_INTPTR_SIZE > 4
#define MI_MAX_VABITS (48)
#else
#define MI_MAX_VABITS (32)
#endif
// use a flat page-map (or a 2-level one)
#ifndef MI_PAGE_MAP_FLAT
#if MI_MAX_VABITS <= 40 && !defined(__APPLE__)
#define MI_PAGE_MAP_FLAT 1
#else
#define MI_PAGE_MAP_FLAT 0
#endif
#endif
/* --------------------------------------------------------------------------------
Builtin's
-------------------------------------------------------------------------------- */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#define mi_builtin(name) __builtin_##name
#define mi_has_builtin(name) __has_builtin(__builtin_##name)
#if (LONG_MAX == INT32_MAX)
#define mi_builtin32(name) mi_builtin(name##l)
#define mi_has_builtin32(name) mi_has_builtin(name##l)
#else
#define mi_builtin32(name) mi_builtin(name)
#define mi_has_builtin32(name) mi_has_builtin(name)
#endif
#if (LONG_MAX == INT64_MAX)
#define mi_builtin64(name) mi_builtin(name##l)
#define mi_has_builtin64(name) mi_has_builtin(name##l)
#else
#define mi_builtin64(name) mi_builtin(name##ll)
#define mi_has_builtin64(name) mi_has_builtin(name##ll)
#endif
#if (MI_SIZE_BITS == 32)
#define mi_builtinz(name) mi_builtin32(name)
#define mi_has_builtinz(name) mi_has_builtin32(name)
#define mi_msc_builtinz(name) name
#elif (MI_SIZE_BITS == 64)
#define mi_builtinz(name) mi_builtin64(name)
#define mi_has_builtinz(name) mi_has_builtin64(name)
#define mi_msc_builtinz(name) name##64
#endif
/* --------------------------------------------------------------------------------
Popcount and count trailing/leading zero's
-------------------------------------------------------------------------------- */
size_t _mi_popcount_generic(size_t x);
static inline size_t mi_popcount(size_t x) {
#if mi_has_builtinz(popcount)
return mi_builtinz(popcount)(x);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
return mi_msc_builtinz(__popcnt)(x);
#elif MI_ARCH_X64 && defined(__BMI1__)
return (size_t)_mm_popcnt_u64(x);
#else
#define MI_HAS_FAST_POPCOUNT 0
return (x<=1 ? x : _mi_popcount_generic(x));
#endif
}
#ifndef MI_HAS_FAST_POPCOUNT
#define MI_HAS_FAST_POPCOUNT 1
#endif
size_t _mi_clz_generic(size_t x);
size_t _mi_ctz_generic(size_t x);
static inline size_t mi_ctz(size_t x) {
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__)
size_t r;
__asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif defined(__GNUC__) && MI_ARCH_X64
// tzcnt is interpreted as bsf if BMI1 is not supported (pre-haswell)
// if the argument is zero:
// - tzcnt: sets carry-flag, and returns MI_SIZE_BITS
// - bsf : sets zero-flag, and leaves the destination _unmodified_ (on both AMD and Intel now, see <https://github.com/llvm/llvm-project/pull/102885>)
// so we always initialize r to MI_SIZE_BITS to work correctly on all cpu's without branching
size_t r = MI_SIZE_BITS;
__asm ("tzcnt\t%1, %0" : "+r"(r) : "r"(x) : "cc"); // use '+r' to keep the assignment to r in case this becomes bsf on older cpu's
return r;
#elif mi_has_builtinz(ctz)
return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS);
#elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__)
return (x!=0 ? _tzcnt_u64(x) : MI_SIZE_BITS); // ensure it still works on non-BMI1 cpu's as well
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS);
#elif defined(__GNUC__) && MI_ARCH_X86
size_t r = MI_SIZE_BITS;
__asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc");
return r;
#elif MI_HAS_FAST_POPCOUNT
return (x!=0 ? (mi_popcount(x^(x-1))-1) : MI_SIZE_BITS);
#else
#define MI_HAS_FAST_BITSCAN 0
return (x!=0 ? _mi_ctz_generic(x) : MI_SIZE_BITS);
#endif
}
static inline size_t mi_clz(size_t x) {
// we don't optimize anymore to lzcnt as there are still non BMI1 cpu's around (like Intel Celeron, see issue #1016)
// on pre-haswell cpu's lzcnt gets executed as bsr which is not equivalent (at it returns the bit position)
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 lzcnt is defined for 0
size_t r;
__asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return r;
#elif mi_has_builtinz(clz)
return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long idx;
return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS);
#elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86)
if (x==0) return MI_SIZE_BITS;
size_t r;
__asm ("bsr\t%1, %0" : "=r"(r) : "r"(x) : "cc");
return (MI_SIZE_BITS - 1 - r);
#else
#define MI_HAS_FAST_BITSCAN 0
return (x!=0 ? _mi_clz_generic(x) : MI_SIZE_BITS);
#endif
}
#ifndef MI_HAS_FAST_BITSCAN
#define MI_HAS_FAST_BITSCAN 1
#endif
/* --------------------------------------------------------------------------------
find trailing/leading zero (bit scan forward/reverse)
-------------------------------------------------------------------------------- */
// Bit scan forward: find the least significant bit that is set (i.e. count trailing zero's)
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsf(size_t x, size_t* idx) {
// we don't optimize anymore to lzcnt so we run correctly on older cpu's as well
#if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9)
// on x64 the carry flag is set on zero which gives better codegen
bool is_zero;
__asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" );
return !is_zero;
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false);
#else
return (x!=0 ? (*idx = mi_ctz(x), true) : false);
#endif
}
// Bit scan reverse: find the most significant bit that is set
// return false if `x==0` (with `*idx` undefined) and true otherwise,
// with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`).
static inline bool mi_bsr(size_t x, size_t* idx) {
#if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
unsigned long i;
return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false);
#else
return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false);
#endif
}
/* --------------------------------------------------------------------------------
rotate
-------------------------------------------------------------------------------- */
static inline size_t mi_rotr(size_t x, size_t r) {
#if (mi_has_builtin(rotateright64) && MI_SIZE_BITS==64)
return mi_builtin(rotateright64)(x,r);
#elif (mi_has_builtin(rotateright32) && MI_SIZE_BITS==32)
return mi_builtin(rotateright32)(x,r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64)
return _rotr64(x, (int)r);
#elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32)
return _lrotr(x,(int)r);
#else
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
return ((x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1))));
#endif
}
static inline size_t mi_rotl(size_t x, size_t r) {
#if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64)
return mi_builtin(rotateleft64)(x,r);
#elif (mi_has_builtin(rotateleft32) && MI_SIZE_BITS==32)
return mi_builtin(rotateleft32)(x,r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64)
return _rotl64(x, (int)r);
#elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32)
return _lrotl(x, (int)r);
#else
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1);
return ((x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1))));
#endif
}
static inline uint32_t mi_rotl32(uint32_t x, uint32_t r) {
#if mi_has_builtin(rotateleft32)
return mi_builtin(rotateleft32)(x,r);
#elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32)
return _lrotl(x, (int)r);
#else
// The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to
// avoid UB when `rshift==0`. See <https://blog.regehr.org/archives/1063>
const unsigned int rshift = (unsigned int)(r) & 31;
return ((x << rshift) | (x >> ((-rshift) & 31)));
#endif
}
#endif // MI_BITS_H

File diff suppressed because it is too large Load diff

View file

@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_PRIM_H
#define MIMALLOC_PRIM_H
#ifndef MI_PRIM_H
#define MI_PRIM_H
// --------------------------------------------------------------------------
@ -117,7 +117,8 @@ void _mi_prim_thread_done_auto_done(void);
// Called when the default heap for a thread changes
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// Is this thread part of a thread pool?
bool _mi_prim_thread_is_in_threadpool(void);
@ -269,35 +270,42 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce
// defined in `init.c`; do not use these directly
extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern bool _mi_process_is_initialized; // has mi_process_init been called?
extern mi_decl_hidden mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
extern mi_decl_hidden bool _mi_process_is_initialized; // has mi_process_init been called?
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept;
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
const mi_threadid_t tid = __mi_prim_thread_id();
mi_assert_internal(tid > 1);
mi_assert_internal((tid & MI_PAGE_FLAG_MASK) == 0); // bottom 2 bits are clear?
return tid;
}
// Get a unique id for the current thread.
#if defined(MI_PRIM_THREAD_ID)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
return MI_PRIM_THREAD_ID(); // used for example by CPython for a free threaded build (see python/cpython#115488)
}
#elif defined(_WIN32)
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
// Windows: works on Intel and ARM in both 32- and 64-bit
return (uintptr_t)NtCurrentTeb();
}
#elif MI_USE_BUILTIN_THREAD_POINTER
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
// Works on most Unix based platforms with recent compilers
return (uintptr_t)__builtin_thread_pointer();
}
#elif MI_HAS_TLS_SLOT
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
#if defined(__BIONIC__)
// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
// see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
@ -313,7 +321,7 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
#else
// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
static inline mi_threadid_t __mi_prim_thread_id(void) mi_attr_noexcept {
return (uintptr_t)&_mi_heap_default;
}
@ -416,4 +424,4 @@ static inline mi_heap_t* mi_prim_get_default_heap(void) {
#endif // mi_prim_get_default_heap()
#endif // MIMALLOC_PRIM_H
#endif // MI_PRIM_H

View file

@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_TRACK_H
#define MIMALLOC_TRACK_H
#ifndef MI_TRACK_H
#define MI_TRACK_H
/* ------------------------------------------------------------------------------------------------------
Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers.
@ -142,4 +142,4 @@ defined, undefined, or not accessible at all:
}
#endif
#endif
#endif // MI_TRACK_H

View file

@ -5,17 +5,15 @@ terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#pragma once
#ifndef MIMALLOC_TYPES_H
#define MIMALLOC_TYPES_H
#ifndef MI_TYPES_H
#define MI_TYPES_H
// --------------------------------------------------------------------------
// This file contains the main type definitions for mimalloc:
// mi_heap_t : all data for a thread-local heap, contains
// lists of all managed heap pages.
// mi_segment_t : a larger chunk of memory (32GiB) from where pages
// are allocated.
// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from
// where objects are allocated.
// where objects of a single size are allocated.
// Note: we write "OS page" for OS memory pages while
// using plain "page" for mimalloc pages (`mi_page_t`).
// --------------------------------------------------------------------------
@ -24,11 +22,9 @@ terms of the MIT license. A copy of the license can be found in the file
#include <mimalloc-stats.h>
#include <stddef.h> // ptrdiff_t
#include <stdint.h> // uintptr_t, uint16_t, etc
#include "atomic.h" // _Atomic
#ifdef _MSC_VER
#pragma warning(disable:4214) // bitfield is not int
#endif
#include <errno.h> // error codes
#include "bits.h" // size defines (MI_INTPTR_SIZE etc), bit operations
#include "atomic.h" // _Atomic primitives
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `sizeof(void*)`
@ -51,11 +47,17 @@ terms of the MIT license. A copy of the license can be found in the file
// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
// #define MI_STAT 1
// Define MI_SECURE to enable security mitigations
// #define MI_SECURE 1 // guard page around metadata
// #define MI_SECURE 2 // guard page around each mimalloc page
// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
// #define MI_SECURE 4 // checks for double free. (may be more expensive)
// Define MI_SECURE to enable security mitigations. Level 1 has minimal performance impact,
// but protects most metadata with guard pages:
// #define MI_SECURE 1 // guard page around metadata
//
// Level 2 has more performance impact but protect well against various buffer overflows
// by surrounding all mimalloc pages with guard pages:
// #define MI_SECURE 2 // guard page around each mimalloc page (can fragment VMA's with large heaps..)
//
// The next two levels can have more performance cost:
// #define MI_SECURE 3 // randomize allocations, encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
// #define MI_SECURE 4 // checks for double free. (may be more expensive)
#if !defined(MI_SECURE)
#define MI_SECURE 0
@ -98,126 +100,132 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_ENCODE_FREELIST 1
#endif
// Enable large pages for objects between 64KiB and 512KiB.
// Disabled by default as for many workloads the block sizes above 64 KiB are quite random which can lead to too many partially used large pages.
#ifndef MI_ENABLE_LARGE_PAGES
#define MI_ENABLE_LARGE_PAGES 0
#endif
// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread.
// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from
// another thread so the memory becomes "virtually" available (and eventually gets properly freed by
// the owning thread).
// #define MI_HUGE_PAGE_ABANDON 1
// --------------------------------------------------------------
// Sizes of internal data-structures
// (comments specify sizes on 64-bit, usually 32-bit is halved)
// --------------------------------------------------------------
// ------------------------------------------------------
// Platform specific values
// ------------------------------------------------------
// ------------------------------------------------------
// Size of a pointer.
// We assume that `sizeof(void*)==sizeof(intptr_t)`
// and it holds for all platforms we know of.
//
// However, the C standard only requires that:
// p == (void*)((intptr_t)p))
// but we also need:
// i == (intptr_t)((void*)i)
// or otherwise one might define an intptr_t type that is larger than a pointer...
// ------------------------------------------------------
#if INTPTR_MAX > INT64_MAX
# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
#elif INTPTR_MAX == INT64_MAX
# define MI_INTPTR_SHIFT (3)
#elif INTPTR_MAX == INT32_MAX
# define MI_INTPTR_SHIFT (2)
// Sizes are for 64-bit
#ifndef MI_ARENA_SLICE_SHIFT
#ifdef MI_SMALL_PAGE_SHIFT // backward compatibility
#define MI_ARENA_SLICE_SHIFT MI_SMALL_PAGE_SHIFT
#else
#error platform pointers must be 32, 64, or 128 bits
#define MI_ARENA_SLICE_SHIFT (13 + MI_SIZE_SHIFT) // 64 KiB (32 KiB on 32-bit)
#endif
#endif
#ifndef MI_BCHUNK_BITS_SHIFT
#define MI_BCHUNK_BITS_SHIFT (6 + MI_SIZE_SHIFT) // optimized for 512 bits per chunk (avx512)
#endif
#if SIZE_MAX == UINT64_MAX
# define MI_SIZE_SHIFT (3)
typedef int64_t mi_ssize_t;
#elif SIZE_MAX == UINT32_MAX
# define MI_SIZE_SHIFT (2)
typedef int32_t mi_ssize_t;
#else
#error platform objects must be 32 or 64 bits
#endif
#define MI_BCHUNK_BITS (1 << MI_BCHUNK_BITS_SHIFT) // sub-bitmaps are "bchunks" of 512 bits
#define MI_ARENA_SLICE_SIZE (MI_ZU(1) << MI_ARENA_SLICE_SHIFT) // arena's allocate in slices of 64 KiB
#define MI_ARENA_SLICE_ALIGN (MI_ARENA_SLICE_SIZE)
#if (SIZE_MAX/2) > LONG_MAX
# define MI_ZU(x) x##ULL
# define MI_ZI(x) x##LL
#else
# define MI_ZU(x) x##UL
# define MI_ZI(x) x##L
#endif
#define MI_ARENA_MIN_OBJ_SLICES (1)
#define MI_ARENA_MAX_OBJ_SLICES (MI_BCHUNK_BITS) // 32 MiB (for now, cannot cross chunk boundaries)
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_MIN_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
#define MI_ARENA_MAX_OBJ_SIZE (MI_ARENA_MAX_OBJ_SLICES * MI_ARENA_SLICE_SIZE)
#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
#define MI_SMALL_PAGE_SIZE MI_ARENA_MIN_OBJ_SIZE // 64 KiB
#define MI_MEDIUM_PAGE_SIZE (8*MI_SMALL_PAGE_SIZE) // 512 KiB (=byte in the bchunk bitmap)
#define MI_LARGE_PAGE_SIZE (MI_SIZE_SIZE*MI_MEDIUM_PAGE_SIZE) // 4 MiB (=word in the bchunk bitmap)
#define MI_KiB (MI_ZU(1024))
#define MI_MiB (MI_KiB*MI_KiB)
#define MI_GiB (MI_MiB*MI_KiB)
// ------------------------------------------------------
// Main internal data-structures
// ------------------------------------------------------
// Main tuning parameters for segment and page sizes
// Sizes for 64-bit, divide by two for 32-bit
#ifndef MI_SMALL_PAGE_SHIFT
#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB
#endif
#ifndef MI_MEDIUM_PAGE_SHIFT
#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB
#endif
#ifndef MI_LARGE_PAGE_SHIFT
#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4MiB
#endif
#ifndef MI_SEGMENT_SHIFT
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4MiB -- must be equal to `MI_LARGE_PAGE_SHIFT`
#endif
// Derived constants
#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
#define MI_SEGMENT_ALIGN (MI_SEGMENT_SIZE)
#define MI_SEGMENT_MASK ((uintptr_t)(MI_SEGMENT_ALIGN - 1))
#define MI_SMALL_PAGE_SIZE (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
#define MI_MEDIUM_PAGE_SIZE (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
#define MI_LARGE_PAGE_SIZE (MI_ZU(1)<<MI_LARGE_PAGE_SHIFT)
#define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
#define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
// (Except for large pages since huge objects are allocated in 4MiB chunks)
#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/8) // 8 KiB
#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64 KiB
#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1 MiB
#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
// Maximum number of size classes. (spaced exponentially in 12.5% increments)
#if MI_BIN_HUGE != 73U
#error "mimalloc internal: expecting 73 bins"
#endif
#if (MI_LARGE_OBJ_WSIZE_MAX >= 655360)
#error "mimalloc internal: define more bins"
#endif
// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`)
#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX)
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
#define MI_BIN_FULL (MI_BIN_HUGE+1)
#define MI_BIN_COUNT (MI_BIN_FULL+1)
// We never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
// Minimal commit for a page on-demand commit (should be >= OS page size)
#define MI_PAGE_MIN_COMMIT_SIZE MI_ARENA_SLICE_SIZE // (4*MI_KiB)
// ------------------------------------------------------
// Arena's are large reserved areas of memory allocated from
// the OS that are managed by mimalloc to efficiently
// allocate MI_ARENA_SLICE_SIZE slices of memory for the
// mimalloc pages.
// ------------------------------------------------------
// A large memory arena where pages are allocated in.
typedef struct mi_arena_s mi_arena_t; // defined in `arena.c`
// ---------------------------------------------------------------
// a memory id tracks the provenance of arena/OS allocated memory
// ---------------------------------------------------------------
// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated.
// The memid keeps track of this.
typedef enum mi_memkind_e {
MI_MEM_NONE, // not allocated
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (the initial main heap data for example (`init.c`))
MI_MEM_META, // allocated with the meta data allocator (`arena-meta.c`)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case) (`arena.c`)
} mi_memkind_t;
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
}
static inline bool mi_memkind_needs_no_free(mi_memkind_t memkind) {
return (memkind <= MI_MEM_STATIC);
}
typedef struct mi_memid_os_info {
void* base; // actual base address of the block (used for offset aligned allocations)
size_t size; // allocated full size
// size_t alignment; // alignment at allocation
} mi_memid_os_info_t;
typedef struct mi_memid_arena_info {
mi_arena_t* arena; // arena that contains this memory
uint32_t slice_index; // slice index in the arena
uint32_t slice_count; // allocated slices
} mi_memid_arena_info_t;
typedef struct mi_memid_meta_info {
void* meta_page; // meta-page that contains the block
uint32_t block_index; // block index in the meta-data page
uint32_t block_count; // allocated blocks
} mi_memid_meta_info_t;
typedef struct mi_memid_s {
union {
mi_memid_os_info_t os; // only used for MI_MEM_OS
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
mi_memid_meta_info_t meta; // only used for MI_MEM_META
} mem;
mi_memkind_t memkind;
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
bool initially_committed;// `true` if the memory was originally allocated as committed
bool initially_zero; // `true` if the memory was originally zero initialized
} mi_memid_t;
static inline bool mi_memid_is_os(mi_memid_t memid) {
return mi_memkind_is_os(memid.memkind);
}
static inline bool mi_memid_needs_no_free(mi_memid_t memid) {
return mi_memkind_needs_no_free(memid.memkind);
}
// ------------------------------------------------------
// Mimalloc pages contain allocated blocks
@ -235,48 +243,30 @@ typedef struct mi_block_s {
mi_encoded_t next;
} mi_block_t;
#if MI_GUARDED
// we always align guarded pointers in a block at an offset
// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
#define MI_BLOCK_TAG_ALIGNED ((mi_encoded_t)(0))
#define MI_BLOCK_TAG_GUARDED (~MI_BLOCK_TAG_ALIGNED)
#endif
// The page flags are put in the bottom 2 bits of the thread_id (for a fast test in `mi_free`)
// `has_aligned` is true if the page has pointers at an offset in a block (so we unalign before free-ing)
// `in_full_queue` is true if the page is full and resides in the full queue (so we move it to a regular queue on free-ing)
#define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01)
#define MI_PAGE_HAS_ALIGNED MI_ZU(0x02)
#define MI_PAGE_FLAG_MASK MI_ZU(0x03)
typedef size_t mi_page_flags_t;
// The delayed flags are used for efficient multi-threaded free-ing
typedef enum mi_delayed_e {
MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
MI_NEVER_DELAYED_FREE = 3 // sticky: used for abandoned pages without a owning heap; this only resets on page reclaim
} mi_delayed_t;
// The `in_full` and `has_aligned` page flags are put in a union to efficiently
// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
#if !MI_TSAN
typedef union mi_page_flags_s {
uint8_t full_aligned;
struct {
uint8_t in_full : 1;
uint8_t has_aligned : 1;
} x;
} mi_page_flags_t;
#else
// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
typedef union mi_page_flags_s {
uint32_t full_aligned;
struct {
uint8_t in_full;
uint8_t has_aligned;
} x;
} mi_page_flags_t;
#endif
// There are two special threadid's: 0 for abandoned threads, and 4 for abandoned & mapped threads --
// abandoned-mapped pages are abandoned but also mapped in an arena so can be quickly found for reuse.
#define MI_THREADID_ABANDONED MI_ZU(0)
#define MI_THREADID_ABANDONED_MAPPED (MI_PAGE_FLAG_MASK + 1)
// Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
// Points to a list of blocks that are freed by other threads.
// The least-bit is set if the page is owned by the current thread. (`mi_page_is_owned`).
// Ownership is required before we can read any non-atomic fields in the page.
// This way we can push a block on the thread free list and try to claim ownership atomically in `free.c:mi_free_block_mt`.
typedef uintptr_t mi_thread_free_t;
// A heap can serve only specific objects signified by its heap tag (e.g. various object types in CPython)
typedef uint8_t mi_heaptag_t;
// A page contains blocks of one specific size (`block_size`).
// Each page has three list of free blocks:
// `free` for blocks that can be allocated,
@ -285,169 +275,109 @@ typedef uintptr_t mi_thread_free_t;
// The `local_free` and `thread_free` lists are migrated to the `free` list
// when it is exhausted. The separate `local_free` list is necessary to
// implement a monotonic heartbeat. The `thread_free` list is needed for
// avoiding atomic operations in the common case.
// avoiding atomic operations when allocating from the owning thread.
//
// `used - |thread_free|` == actual blocks that are in use (alive)
// `used - |thread_free| + |free| + |local_free| == capacity`
//
// We don't count `freed` (as |free|) but use `used` to reduce
// We don't count "freed" (as |free|) but use only the `used` field to reduce
// the number of memory accesses in the `mi_page_all_free` function(s).
// Use `_mi_page_free_collect` to collect the thread_free list and update the `used` count.
//
// Notes:
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Non-atomic fields can only be accessed if having _ownership_ (low bit of `xthread_free` is 1).
// Combining the `thread_free` list with an ownership bit allows a concurrent `free` to atomically
// free an object and (re)claim ownership if the page was abandoned.
// - If a page is not part of a heap it is called "abandoned" (`heap==NULL`) -- in
// that case the `xthreadid` is 0 or 4 (4 is for abandoned pages that
// are in the abandoned page lists of an arena, these are called "mapped" abandoned pages).
// - page flags are in the bottom 3 bits of `xthread_id` for the fast path in `mi_free`.
// - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Using `uint16_t` does not seem to slow things down
// - The size is 10 words on 64-bit which helps the page index calculations
// (and 12 words on 32-bit, and encoded free lists add 2 words)
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s {
// "owned" by the segment
uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
uint8_t segment_in_use:1; // `true` if the segment allocated this page
uint8_t is_committed:1; // `true` if the page virtual memory is committed
uint8_t is_zero_init:1; // `true` if the page was initially zero initialized
uint8_t is_huge:1; // `true` if the page is in a huge segment
_Atomic(mi_threadid_t) xthread_id; // thread this page belongs to. (= `heap->thread_id (or 0 or 4 if abandoned) | page_flags`)
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint16_t capacity; // number of blocks committed
uint16_t reserved; // number of blocks reserved in memory
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t retire_expire; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
uint8_t heap_tag; // tag of the owning heap, used to separate heaps by object type
// padding
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the page area containing the blocks
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads (= `mi_block_t* | (1 if owned)`)
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the blocks
mi_heaptag_t heap_tag; // tag of the owning heap, used to separate heaps by object type
bool free_is_zero; // `true` if the blocks in the free list are zero initialized
// padding
#if (MI_ENCODE_FREELIST || MI_PADDING)
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
#endif
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap;
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
#if MI_INTPTR_SIZE==4 // pad to 12 words on 32-bit
void* padding[1];
#endif
mi_heap_t* heap; // the heap owning this page (or NULL for abandoned pages)
struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
size_t slice_committed; // committed size relative to the first arena slice of the page data (or 0 if the page is fully committed already)
mi_memid_t memid; // provenance of the page memory
} mi_page_t;
// ------------------------------------------------------
// Object sizes
// ------------------------------------------------------
#define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map.
#define MI_PAGE_MIN_START_BLOCK_ALIGN MI_MAX_ALIGN_SIZE // minimal block alignment for the first block in a page (16b)
#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks (such that we guarantee natural alignment)
#define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation
#if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8
#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+2)*32) // 160 >= sizeof(mi_page_t)
#else
#define MI_PAGE_INFO_SIZE ((MI_INTPTR_SHIFT+1)*32) // 128/96 >= sizeof(mi_page_t)
#endif
// The max object size are checked to not waste more than 12.5% internally over the page sizes.
#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < ~8 KiB
#if MI_ENABLE_LARGE_PAGES
#define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 64 KiB
#define MI_LARGE_MAX_OBJ_SIZE (MI_LARGE_PAGE_SIZE/8) // <= 512KiB // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
#else
#define MI_MEDIUM_MAX_OBJ_SIZE (MI_MEDIUM_PAGE_SIZE/4) // <= 128 KiB
#define MI_LARGE_MAX_OBJ_SIZE MI_MEDIUM_MAX_OBJ_SIZE // note: this must be a nice power of 2 or we get rounding issues with `_mi_bin`
#endif
#define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE)
#if (MI_LARGE_MAX_OBJ_WSIZE >= 655360)
#error "mimalloc internal: define more bins"
#endif
// ------------------------------------------------------
// Mimalloc segments contain mimalloc pages
// Page kinds
// ------------------------------------------------------
typedef enum mi_page_kind_e {
MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment
MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment
MI_PAGE_HUGE // a huge page is a single page in a segment of variable size (but still 2MiB aligned)
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an alignment `> MI_BLOCK_ALIGNMENT_MAX`.
MI_PAGE_SMALL, // small blocks go into 64KiB pages
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages
MI_PAGE_LARGE, // larger blocks go into 4MiB pages (if `MI_ENABLE_LARGE_PAGES==1`)
MI_PAGE_SINGLETON // page containing a single block.
// used for blocks `> MI_LARGE_MAX_OBJ_SIZE` or an aligment `> MI_PAGE_MAX_OVERALLOC_ALIGN`.
} mi_page_kind_t;
// ---------------------------------------------------------------
// a memory id tracks the provenance of arena/OS allocated memory
// ---------------------------------------------------------------
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
typedef enum mi_memkind_e {
MI_MEM_NONE, // not allocated
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case)
} mi_memkind_t;
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP);
}
typedef struct mi_memid_os_info {
void* base; // actual base address of the block (used for offset aligned allocations)
size_t size; // full allocation size
} mi_memid_os_info_t;
typedef struct mi_memid_arena_info {
size_t block_index; // index in the arena
mi_arena_id_t id; // arena id (>= 1)
bool is_exclusive; // this arena can only be used for specific arena allocations
} mi_memid_arena_info_t;
typedef struct mi_memid_s {
union {
mi_memid_os_info_t os; // only used for MI_MEM_OS
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
} mem;
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
bool initially_committed;// `true` if the memory was originally allocated as committed
bool initially_zero; // `true` if the memory was originally zero initialized
mi_memkind_t memkind;
} mi_memid_t;
// ---------------------------------------------------------------
// Segments contain mimalloc pages
// ---------------------------------------------------------------
typedef struct mi_subproc_s mi_subproc_t;
// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS.
// Inside segments we allocated fixed size _pages_ that contain blocks.
typedef struct mi_segment_s {
// constant fields
mi_memid_t memid; // memory id to track provenance
bool allow_decommit;
bool allow_purge;
size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE`
mi_subproc_t* subproc; // segment belongs to sub process
// segment fields
struct mi_segment_s* next; // must be the first (non-constant) segment field -- see `segment.c:segment_init`
struct mi_segment_s* prev;
bool was_reclaimed; // true if it was reclaimed (used to limit reclaim-on-free reclamation)
bool dont_free; // can be temporarily true to ensure the segment is not freed
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t abandoned_visits; // count how often this segment is visited for reclaiming (to force reclaim if it is too long)
size_t used; // count of pages in use (`used <= capacity`)
size_t capacity; // count of available pages (`#free + used`)
size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages.
uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
struct mi_segment_s* abandoned_os_next; // only used for abandoned segments outside arena's, and only if `mi_option_visit_abandoned` is enabled
struct mi_segment_s* abandoned_os_prev;
// layout like this to optimize access in `mi_free`
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
} mi_segment_t;
// ------------------------------------------------------
// Heaps
//
// Provide first-class heaps to allocate from.
// A heap just owns a set of pages for allocation and
// can only be allocate/reallocate from the thread that created it.
// Freeing blocks can be done from any thread though.
// Per thread, the segments are shared among its heaps.
//
// Per thread, there is always a default heap that is
// used for allocation; it is initialized to statically
// point to an empty heap to avoid initialization checks
@ -461,11 +391,10 @@ typedef struct mi_tld_s mi_tld_t;
typedef struct mi_page_queue_s {
mi_page_t* first;
mi_page_t* last;
size_t count;
size_t block_size;
} mi_page_queue_t;
#define MI_BIN_FULL (MI_BIN_HUGE+1)
// Random context
typedef struct mi_random_cxt_s {
uint32_t input[16];
@ -476,7 +405,7 @@ typedef struct mi_random_cxt_s {
// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows
#if (MI_PADDING)
#if MI_PADDING
typedef struct mi_padding_s {
uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes)
@ -493,12 +422,9 @@ typedef struct mi_padding_s {
// A heap owns a set of pages.
struct mi_heap_s {
mi_tld_t* tld;
_Atomic(mi_block_t*) thread_delayed_free;
mi_threadid_t thread_id; // thread this heap belongs too
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
mi_tld_t* tld; // thread-local data
mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL)
uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list
mi_random_ctx_t random; // random number context used for secure allocation
size_t page_count; // total number of pages in the `pages` queues.
size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues)
@ -506,7 +432,9 @@ struct mi_heap_s {
long generic_count; // how often is `_mi_malloc_generic` called?
long generic_collect_count; // how often is `_mi_malloc_generic` called without collecting?
mi_heap_t* next; // list of heaps per thread
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
long page_full_retain; // how many full pages can be retained per queue (before abondoning them)
bool allow_page_reclaim; // `true` if this heap should not reclaim abandoned pages
bool allow_page_abandon; // `true` if this heap can abandon pages to reduce memory footprint
uint8_t tag; // custom tag, can be used for separating heaps based on the object types
#if MI_GUARDED
size_t guarded_size_min; // minimal size for guarded objects
@ -516,7 +444,8 @@ struct mi_heap_s {
size_t guarded_sample_count; // current sample count (counting down to 0)
#endif
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
mi_page_queue_t pages[MI_BIN_COUNT]; // queue of pages for each size class (or "bin")
mi_memid_t memid; // provenance of the heap struct itself (meta or os)
};
@ -526,15 +455,23 @@ struct mi_heap_s {
// static variables of a process.
// ------------------------------------------------------
struct mi_subproc_s {
_Atomic(size_t) abandoned_count; // count of abandoned segments for this sub-process
_Atomic(size_t) abandoned_os_list_count; // count of abandoned segments in the os-list
mi_lock_t abandoned_os_lock; // lock for the abandoned os segment list (outside of arena's) (this lock protect list operations)
mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list
mi_segment_t* abandoned_os_list; // doubly-linked list of abandoned segments outside of arena's (in OS allocated memory)
mi_segment_t* abandoned_os_list_tail; // the tail-end of the list
mi_memid_t memid; // provenance of this memory block
};
#define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`)
// 160 arenas is enough for ~2 TiB memory
typedef struct mi_subproc_s {
_Atomic(size_t) arena_count; // current count of arena's
_Atomic(mi_arena_t*) arenas[MI_MAX_ARENAS]; // arena's of this sub-process
mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time
_Atomic(int64_t) purge_expire; // expiration is set if any arenas can be purged
_Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process
mi_page_t* os_abandoned_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on)
mi_lock_t os_abandoned_pages_lock; // lock for the os abandoned pages list (this lock protects list operations)
mi_memid_t memid; // provenance of this memory block (meta or OS)
mi_stats_t stats; // sub-process statistics (tld stats are merged in on thread termination)
} mi_subproc_t;
// ------------------------------------------------------
@ -544,37 +481,44 @@ struct mi_subproc_s {
// Milliseconds as in `int64_t` to avoid overflows
typedef int64_t mi_msecs_t;
// Queue of segments
typedef struct mi_segment_queue_s {
mi_segment_t* first;
mi_segment_t* last;
} mi_segment_queue_t;
// Segments thread local data
typedef struct mi_segments_tld_s {
mi_segment_queue_t small_free; // queue of segments with free small pages
mi_segment_queue_t medium_free; // queue of segments with free medium pages
mi_page_queue_t pages_purge; // queue of freed pages that are delay purged
size_t count; // current number of segments;
size_t peak_count; // peak number of segments
size_t current_size; // current size of all segments
size_t peak_size; // peak size of all segments
size_t reclaim_count;// number of reclaimed (abandoned) segments
mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_stats_t* stats; // points to tld stats
} mi_segments_tld_t;
// Thread local data
struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count
bool recurse; // true if deferred was called; used to prevent infinite recursion.
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
mi_segments_tld_t segments; // segment tld
mi_stats_t stats; // statistics
mi_threadid_t thread_id; // thread id of this thread
size_t thread_seq; // thread sequence id (linear count of created threads)
mi_subproc_t* subproc; // sub-process this thread belongs to.
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
unsigned long long heartbeat; // monotonic heartbeat count
bool recurse; // true if deferred was called; used to prevent infinite recursion.
bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks)
mi_stats_t stats; // statistics
mi_memid_t memid; // provenance of the tld memory itself (meta or OS)
};
/* -----------------------------------------------------------
Error codes passed to `_mi_fatal_error`
All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
For portability define undefined error codes using common Unix codes:
<https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
----------------------------------------------------------- */
#ifndef EAGAIN // double free
#define EAGAIN (11)
#endif
#ifndef ENOMEM // out of memory
#define ENOMEM (12)
#endif
#ifndef EFAULT // corrupted free-list or meta-data
#define EFAULT (14)
#endif
#ifndef EINVAL // trying to free an invalid pointer
#define EINVAL (22)
#endif
#ifndef EOVERFLOW // count*size overflow
#define EOVERFLOW (75)
#endif
// ------------------------------------------------------
// Debug
@ -622,25 +566,61 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line
#endif
#endif
// add to stat keeping track of the peak
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
void __mi_stat_increase(mi_stat_count_t* stat, size_t amount);
void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount);
void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount);
// adjust stat in special cases to compensate for double counting (and does not adjust peak values and can decrease the total)
void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount);
void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount);
void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount);
void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount);
// counters can just be increased
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount);
#if (MI_STAT)
#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount)
#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount)
#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
#define mi_debug_stat_increase(stat,amount) __mi_stat_increase( &(stat), amount)
#define mi_debug_stat_decrease(stat,amount) __mi_stat_decrease( &(stat), amount)
#define mi_debug_stat_counter_increase(stat,amount) __mi_stat_counter_increase( &(stat), amount)
#define mi_debug_stat_increase_mt(stat,amount) __mi_stat_increase_mt( &(stat), amount)
#define mi_debug_stat_decrease_mt(stat,amount) __mi_stat_decrease_mt( &(stat), amount)
#define mi_debug_stat_counter_increase_mt(stat,amount) __mi_stat_counter_increase_mt( &(stat), amount)
#else
#define mi_stat_increase(stat,amount) ((void)0)
#define mi_stat_decrease(stat,amount) ((void)0)
#define mi_stat_counter_increase(stat,amount) ((void)0)
#define mi_debug_stat_increase(stat,amount) ((void)0)
#define mi_debug_stat_decrease(stat,amount) ((void)0)
#define mi_debug_stat_counter_increase(stat,amount) ((void)0)
#define mi_debug_stat_increase_mt(stat,amount) ((void)0)
#define mi_debug_stat_decrease_mt(stat,amount) ((void)0)
#define mi_debug_stat_counter_increase_mt(stat,amount) ((void)0)
#endif
#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
#define mi_subproc_stat_counter_increase(subproc,stat,amount) __mi_stat_counter_increase_mt( &(subproc)->stats.stat, amount)
#define mi_subproc_stat_increase(subproc,stat,amount) __mi_stat_increase_mt( &(subproc)->stats.stat, amount)
#define mi_subproc_stat_decrease(subproc,stat,amount) __mi_stat_decrease_mt( &(subproc)->stats.stat, amount)
#define mi_subproc_stat_adjust_increase(subproc,stat,amnt) __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amnt)
#define mi_subproc_stat_adjust_decrease(subproc,stat,amnt) __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amnt)
#define mi_tld_stat_counter_increase(tld,stat,amount) __mi_stat_counter_increase( &(tld)->stats.stat, amount)
#define mi_tld_stat_increase(tld,stat,amount) __mi_stat_increase( &(tld)->stats.stat, amount)
#define mi_tld_stat_decrease(tld,stat,amount) __mi_stat_decrease( &(tld)->stats.stat, amount)
#define mi_tld_stat_adjust_increase(tld,stat,amnt) __mi_stat_adjust_increase( &(tld)->stats.stat, amnt)
#define mi_tld_stat_adjust_decrease(tld,stat,amnt) __mi_stat_adjust_decrease( &(tld)->stats.stat, amnt)
#endif
#define mi_os_stat_counter_increase(stat,amount) mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount)
#define mi_os_stat_increase(stat,amount) mi_subproc_stat_increase(_mi_subproc(),stat,amount)
#define mi_os_stat_decrease(stat,amount) mi_subproc_stat_decrease(_mi_subproc(),stat,amount)
#define mi_heap_stat_counter_increase(heap,stat,amount) mi_tld_stat_counter_increase(heap->tld, stat, amount)
#define mi_heap_stat_increase(heap,stat,amount) mi_tld_stat_increase( heap->tld, stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_tld_stat_decrease( heap->tld, stat, amount)
#define mi_debug_heap_stat_counter_increase(heap,stat,amount) mi_debug_stat_counter_increase( (heap)->tld->stats.stat, amount)
#define mi_debug_heap_stat_increase(heap,stat,amount) mi_debug_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_debug_heap_stat_decrease(heap,stat,amount) mi_debug_stat_decrease( (heap)->tld->stats.stat, amount)
#endif // MI_TYPES_H

View file

@ -16,21 +16,22 @@ terms of the MIT license. A copy of the license can be found in the file
// ------------------------------------------------------
static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) {
// objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`).
// objects up to `MI_PAGE_MIN_BLOCK_ALIGN` are always allocated aligned to their size
mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0));
if (alignment > size) return false;
if (alignment <= MI_MAX_ALIGN_SIZE) return true;
const size_t bsize = mi_good_size(size);
return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0);
const bool ok = (bsize <= MI_PAGE_MAX_START_BLOCK_ALIGN2 && _mi_is_power_of_two(bsize));
if (ok) { mi_assert_internal((bsize & (alignment-1)) == 0); } // since both power of 2 and alignment <= size
return ok;
}
#if MI_GUARDED
static mi_decl_restrict void* mi_heap_malloc_guarded_aligned(mi_heap_t* heap, size_t size, size_t alignment, bool zero) mi_attr_noexcept {
// use over allocation for guarded blocksl
mi_assert_internal(alignment > 0 && alignment < MI_BLOCK_ALIGNMENT_MAX);
mi_assert_internal(alignment > 0 && alignment < MI_PAGE_MAX_OVERALLOC_ALIGN);
const size_t oversize = size + alignment - 1;
void* base = _mi_heap_malloc_guarded(heap, oversize, zero);
void* p = mi_align_up_ptr(base, alignment);
void* p = _mi_align_up_ptr(base, alignment);
mi_track_align(base, p, (uint8_t*)p - (uint8_t*)base, size);
mi_assert_internal(mi_usable_size(p) >= size);
mi_assert_internal(_mi_is_aligned(p, alignment));
@ -59,21 +60,20 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
void* p;
size_t oversize;
if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) {
// use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
// This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
// first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
if mi_unlikely(alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) {
// use OS allocation for large alignments and allocate inside a singleton page (not in an arena)
// This can support alignments >= MI_PAGE_ALIGN by ensuring the object can be aligned
// in the first (and single) page such that the page info is `MI_PAGE_ALIGN` bytes before it (and can be found in the _mi_page_map).
if mi_unlikely(offset != 0) {
// todo: cannot support offset alignment for very large alignments yet
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
#endif
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation with a large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
#endif
return NULL;
}
oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
// note: no guarded as alignment > 0
p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
// zero afterwards as only the area from the aligned_p may be committed!
p = _mi_heap_malloc_zero_ex(heap, oversize, zero, alignment); // the page block size should be large enough to align in the single huge page block
if (p == NULL) return NULL;
}
else {
@ -114,13 +114,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t
#endif
// now zero the block if needed
if (alignment > MI_BLOCK_ALIGNMENT_MAX) {
// for the tracker, on huge aligned allocations only from the start of the large block is defined
mi_track_mem_undefined(aligned_p, size);
if (zero) {
_mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
}
}
//if (alignment > MI_PAGE_MAX_OVERALLOC_ALIGN) {
// // for the tracker, on huge aligned allocations only from the start of the large block is defined
// mi_track_mem_undefined(aligned_p, size);
// if (zero) {
// _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p));
// }
//}
if (p != aligned_p) {
mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
@ -177,12 +177,14 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
}
#if MI_GUARDED
if (offset==0 && alignment < MI_BLOCK_ALIGNMENT_MAX && mi_heap_malloc_use_guarded(heap,size)) {
if (offset==0 && alignment < MI_PAGE_MAX_OVERALLOC_ALIGN && mi_heap_malloc_use_guarded(heap,size)) {
return mi_heap_malloc_guarded_aligned(heap, size, alignment, zero);
}
#endif
// try first if there happens to be a small block available with just the right alignment
// since most small power-of-2 blocks (under MI_PAGE_MAX_BLOCK_START_ALIGN2) are already
// naturally aligned this can be often the case.
if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) {
const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE;

View file

@ -30,7 +30,11 @@ terms of the MIT license. A copy of the license can be found in the file
// Note: in release mode the (inlined) routine is about 7 instructions with a single test.
extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept
{
mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size);
if (page->block_size != 0) { // not the empty heap
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
}
// check the free list
mi_block_t* const block = page->free;
@ -82,7 +86,7 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_
#if (MI_STAT>0)
const size_t bsize = mi_page_usable_block_size(page);
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_increase(heap, malloc_normal, bsize);
mi_heap_stat_counter_increase(heap, malloc_normal_count, 1);
#if (MI_STAT>1)
@ -130,7 +134,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap,
mi_assert(size <= MI_SMALL_SIZE_MAX);
#if MI_DEBUG
const uintptr_t tid = _mi_thread_id();
mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local
mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == tid); // heaps are thread local
#endif
#if (MI_PADDING || MI_GUARDED)
if (size == 0) { size = sizeof(void*); }
@ -184,7 +188,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z
else {
// regular allocation
mi_assert(heap!=NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == _mi_thread_id()); // heaps are thread local
void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic
mi_track_malloc(p,size,zero);
@ -268,7 +272,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
// if p == NULL then behave as malloc.
// else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)).
// (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.)
const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0)
const size_t size = (p==NULL ? 0 : _mi_usable_size(p,"mi_realloc"));
if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0)
mi_assert_internal(p!=NULL);
// todo: do not track as the usable size is still the same in the free; adjust potential padding?
@ -615,7 +619,6 @@ static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
block->next = MI_BLOCK_TAG_GUARDED;
// set guard page at the end of the block
mi_segment_t* const segment = _mi_page_segment(page);
const size_t block_size = mi_page_block_size(page); // must use `block_size` to match `mi_free_local`
const size_t os_page_size = _mi_os_page_size();
mi_assert_internal(block_size >= obj_size + os_page_size + sizeof(mi_block_t));
@ -625,8 +628,11 @@ static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
return NULL;
}
uint8_t* guard_page = (uint8_t*)block + block_size - os_page_size;
// note: the alignment of the guard page relies on blocks being os_page_size aligned which
// is ensured in `mi_arena_page_alloc_fresh`.
mi_assert_internal(_mi_is_aligned(block, os_page_size));
mi_assert_internal(_mi_is_aligned(guard_page, os_page_size));
if (segment->allow_decommit && _mi_is_aligned(guard_page, os_page_size)) {
if (!page->memid.is_pinned && _mi_is_aligned(guard_page, os_page_size)) {
_mi_os_protect(guard_page, os_page_size);
}
else {
@ -636,11 +642,11 @@ static void* mi_block_ptr_set_guarded(mi_block_t* block, size_t obj_size) {
// align pointer just in front of the guard page
size_t offset = block_size - os_page_size - obj_size;
mi_assert_internal(offset > sizeof(mi_block_t));
if (offset > MI_BLOCK_ALIGNMENT_MAX) {
if (offset > MI_PAGE_MAX_OVERALLOC_ALIGN) {
// give up to place it right in front of the guard page if the offset is too large for unalignment
offset = MI_BLOCK_ALIGNMENT_MAX;
offset = MI_PAGE_MAX_OVERALLOC_ALIGN;
}
void* p = (uint8_t*)block + offset;
void* p = (uint8_t*)block + offset;
mi_track_align(block, p, offset, obj_size);
mi_track_mem_defined(block, sizeof(mi_block_t));
return p;
@ -659,16 +665,16 @@ mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, boo
const size_t req_size = _mi_align_up(bsize + os_page_size, os_page_size);
mi_block_t* const block = (mi_block_t*)_mi_malloc_generic(heap, req_size, zero, 0 /* huge_alignment */);
if (block==NULL) return NULL;
void* const p = mi_block_ptr_set_guarded(block, obj_size);
void* const p = mi_block_ptr_set_guarded(block, obj_size);
// stats
mi_track_malloc(p, size, zero);
mi_track_malloc(p, size, zero);
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); }
#if MI_STAT>1
mi_heap_stat_increase(heap, malloc_requested, mi_usable_size(p));
#endif
_mi_stat_counter_increase(&heap->tld->stats.malloc_guarded_count, 1);
mi_heap_stat_counter_increase(heap, malloc_guarded_count, 1);
}
#if MI_DEBUG>3
if (p != NULL && zero) {

View file

@ -1,346 +0,0 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#if !defined(MI_IN_ARENA_C)
#error "this file should be included from 'arena.c' (so mi_arena_t is visible)"
// add includes help an IDE
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
#endif
// Minimal exports for arena-abandoned.
size_t mi_arena_id_index(mi_arena_id_t id);
mi_arena_t* mi_arena_from_index(size_t idx);
size_t mi_arena_get_count(void);
void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex);
bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index);
/* -----------------------------------------------------------
Abandoned blocks/segments:
_mi_arena_segment_clear_abandoned
_mi_arena_segment_mark_abandoned
This is used to atomically abandon/reclaim segments
(and crosses the arena API but it is convenient to have here).
Abandoned segments still have live blocks; they get reclaimed
when a thread frees a block in it, or when a thread needs a fresh
segment.
Abandoned segments are atomically marked in the `block_abandoned`
bitmap of arenas. Any segments allocated outside arenas are put
in the sub-process `abandoned_os_list`. This list is accessed
using locks but this should be uncommon and generally uncontended.
Reclaim and visiting either scan through the `block_abandoned`
bitmaps of the arena's, or visit the `abandoned_os_list`
A potentially nicer design is to use arena's for everything
and perhaps have virtual arena's to map OS allocated memory
but this would lack the "density" of our current arena's. TBC.
----------------------------------------------------------- */
// reclaim a specific OS abandoned segment; `true` on success.
// sets the thread_id.
static bool mi_arena_segment_os_clear_abandoned(mi_segment_t* segment, bool take_lock) {
mi_assert(segment->memid.memkind != MI_MEM_ARENA);
// not in an arena, remove from list of abandoned os segments
mi_subproc_t* const subproc = segment->subproc;
if (take_lock && !mi_lock_try_acquire(&subproc->abandoned_os_lock)) {
return false; // failed to acquire the lock, we just give up
}
// remove atomically from the abandoned os list (if possible!)
bool reclaimed = false;
mi_segment_t* const next = segment->abandoned_os_next;
mi_segment_t* const prev = segment->abandoned_os_prev;
if (next != NULL || prev != NULL || subproc->abandoned_os_list == segment) {
#if MI_DEBUG>3
// find ourselves in the abandoned list (and check the count)
bool found = false;
size_t count = 0;
for (mi_segment_t* current = subproc->abandoned_os_list; current != NULL; current = current->abandoned_os_next) {
if (current == segment) { found = true; }
count++;
}
mi_assert_internal(found);
mi_assert_internal(count == mi_atomic_load_relaxed(&subproc->abandoned_os_list_count));
#endif
// remove (atomically) from the list and reclaim
if (prev != NULL) { prev->abandoned_os_next = next; }
else { subproc->abandoned_os_list = next; }
if (next != NULL) { next->abandoned_os_prev = prev; }
else { subproc->abandoned_os_list_tail = prev; }
segment->abandoned_os_next = NULL;
segment->abandoned_os_prev = NULL;
mi_atomic_decrement_relaxed(&subproc->abandoned_count);
mi_atomic_decrement_relaxed(&subproc->abandoned_os_list_count);
if (take_lock) { // don't reset the thread_id when iterating
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
}
reclaimed = true;
}
if (take_lock) { mi_lock_release(&segment->subproc->abandoned_os_lock); }
return reclaimed;
}
// reclaim a specific abandoned segment; `true` on success.
// sets the thread_id.
bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment) {
if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) {
return mi_arena_segment_os_clear_abandoned(segment, true /* take lock */);
}
// arena segment: use the blocks_abandoned bitmap.
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
mi_assert_internal(arena != NULL);
// reclaim atomically
bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx);
if (was_marked) {
mi_assert_internal(mi_atomic_load_acquire(&segment->thread_id) == 0);
mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count);
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
}
// mi_assert_internal(was_marked);
mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return was_marked;
}
// mark a specific OS segment as abandoned
static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) {
mi_assert(segment->memid.memkind != MI_MEM_ARENA);
// not in an arena; we use a list of abandoned segments
mi_subproc_t* const subproc = segment->subproc;
mi_lock(&subproc->abandoned_os_lock) {
// push on the tail of the list (important for the visitor)
mi_segment_t* prev = subproc->abandoned_os_list_tail;
mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL);
mi_assert_internal(segment->abandoned_os_prev == NULL);
mi_assert_internal(segment->abandoned_os_next == NULL);
if (prev != NULL) { prev->abandoned_os_next = segment; }
else { subproc->abandoned_os_list = segment; }
subproc->abandoned_os_list_tail = segment;
segment->abandoned_os_prev = prev;
segment->abandoned_os_next = NULL;
mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count);
mi_atomic_increment_relaxed(&subproc->abandoned_count);
// and release the lock
}
return;
}
// mark a specific segment as abandoned
// clears the thread_id.
void _mi_arena_segment_mark_abandoned(mi_segment_t* segment)
{
mi_assert_internal(segment->used == segment->abandoned);
mi_atomic_store_release(&segment->thread_id, (uintptr_t)0); // mark as abandoned for multi-thread free's
if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) {
mi_arena_segment_os_mark_abandoned(segment);
return;
}
// segment is in an arena, mark it in the arena `blocks_abandoned` bitmap
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
mi_assert_internal(arena != NULL);
// set abandonment atomically
mi_subproc_t* const subproc = segment->subproc; // don't access the segment after setting it abandoned
const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
if (was_unmarked) { mi_atomic_increment_relaxed(&subproc->abandoned_count); }
mi_assert_internal(was_unmarked);
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
}
/* -----------------------------------------------------------
Iterate through the abandoned blocks/segments using a cursor.
This is used for reclaiming and abandoned block visiting.
----------------------------------------------------------- */
// start a cursor at a randomized arena
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current) {
mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc);
current->bitmap_idx = 0;
current->subproc = subproc;
current->visit_all = visit_all;
current->hold_visit_lock = false;
const size_t abandoned_count = mi_atomic_load_relaxed(&subproc->abandoned_count);
const size_t abandoned_list_count = mi_atomic_load_relaxed(&subproc->abandoned_os_list_count);
const size_t max_arena = mi_arena_get_count();
if (heap != NULL && heap->arena_id != _mi_arena_id_none()) {
// for a heap that is bound to one arena, only visit that arena
current->start = mi_arena_id_index(heap->arena_id);
current->end = current->start + 1;
current->os_list_count = 0;
}
else {
// otherwise visit all starting at a random location
if (abandoned_count > abandoned_list_count && max_arena > 0) {
current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
current->end = current->start + max_arena;
}
else {
current->start = 0;
current->end = 0;
}
current->os_list_count = abandoned_list_count; // max entries to visit in the os abandoned list
}
mi_assert_internal(current->start <= max_arena);
}
void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current) {
if (current->hold_visit_lock) {
mi_lock_release(&current->subproc->abandoned_os_visit_lock);
current->hold_visit_lock = false;
}
}
static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_subproc_t* subproc, mi_bitmap_index_t bitmap_idx) {
// try to reclaim an abandoned segment in the arena atomically
if (!_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) return NULL;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
// check that the segment belongs to our sub-process
// note: this is the reason we need the `abandoned_visit` lock in the case abandoned visiting is enabled.
// without the lock an abandoned visit may otherwise fail to visit all abandoned segments in the sub-process.
// for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the `abandoned_visit` lock.
if (segment->subproc != subproc) {
// it is from another sub-process, re-mark it and continue searching
const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
mi_assert_internal(was_zero); MI_UNUSED(was_zero);
return NULL;
}
else {
// success, we unabandoned a segment in our sub-process
mi_atomic_decrement_relaxed(&subproc->abandoned_count);
return segment;
}
}
static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_cursor_t* previous) {
const size_t max_arena = mi_arena_get_count();
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx);
// visit arena's (from the previous cursor)
for (; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) {
// index wraps around
size_t arena_idx = (previous->start >= max_arena ? previous->start % max_arena : previous->start);
mi_arena_t* arena = mi_arena_from_index(arena_idx);
if (arena != NULL) {
bool has_lock = false;
// visit the abandoned fields (starting at previous_idx)
for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) {
size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]);
if mi_unlikely(field != 0) { // skip zero fields quickly
// we only take the arena lock if there are actually abandoned segments present
if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) {
has_lock = (previous->visit_all ? (mi_lock_acquire(&arena->abandoned_visit_lock),true) : mi_lock_try_acquire(&arena->abandoned_visit_lock));
if (!has_lock) {
if (previous->visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock");
}
// skip to next arena
break;
}
}
mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned));
// visit each set bit in the field (todo: maybe use `ctz` here?)
for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) {
// pre-check if the bit is set
size_t mask = ((size_t)1 << bit_idx);
if mi_unlikely((field & mask) == mask) {
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, bitmap_idx);
if (segment != NULL) {
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
previous->bitmap_idx = mi_bitmap_index_create_ex(field_idx, bit_idx + 1); // start at next one for the next iteration
return segment;
}
}
}
}
}
if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); }
}
}
return NULL;
}
static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_cursor_t* previous) {
// go through the abandoned_os_list
// we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`.
// The lock is released when the cursor is released.
if (!previous->hold_visit_lock) {
previous->hold_visit_lock = (previous->visit_all ? (mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock),true)
: mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock));
if (!previous->hold_visit_lock) {
if (previous->visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock");
}
return NULL; // we cannot get the lock, give up
}
}
// One list entry at a time
while (previous->os_list_count > 0) {
previous->os_list_count--;
mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free`
mi_segment_t* segment = previous->subproc->abandoned_os_list;
// pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries)
if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) {
mi_lock_release(&previous->subproc->abandoned_os_lock);
return segment;
}
// already abandoned, try again
mi_lock_release(&previous->subproc->abandoned_os_lock);
}
// done
mi_assert_internal(previous->os_list_count == 0);
return NULL;
}
// reclaim abandoned segments
// this does not set the thread id (so it appears as still abandoned)
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous) {
if (previous->start < previous->end) {
// walk the arena
mi_segment_t* segment = mi_arena_segment_clear_abandoned_next_field(previous);
if (segment != NULL) { return segment; }
}
// no entries in the arena's anymore, walk the abandoned OS list
mi_assert_internal(previous->start == previous->end);
return mi_arena_segment_clear_abandoned_next_list(previous);
}
bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
// (unfortunately) the visit_abandoned option must be enabled from the start.
// This is to avoid taking locks if abandoned list visiting is not required (as for most programs)
if (!mi_option_is_enabled(mi_option_visit_abandoned)) {
_mi_error_message(EFAULT, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON");
return false;
}
mi_arena_field_cursor_t current;
_mi_arena_field_cursor_init(NULL, _mi_subproc_from_id(subproc_id), true /* visit all (blocking) */, &current);
mi_segment_t* segment;
bool ok = true;
while (ok && (segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL) {
ok = _mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg);
_mi_arena_segment_mark_abandoned(segment);
}
_mi_arena_field_cursor_done(&current);
return ok;
}

174
src/arena-meta.c Normal file
View file

@ -0,0 +1,174 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
We have a special "mini" allocator just for allocation of meta-data like
the heap (`mi_heap_t`) or thread-local data (`mi_tld_t`).
We reuse the bitmap of the arena's for allocation of 64b blocks inside
an arena slice (64KiB).
We always ensure that meta data is zero'd (we zero on `free`)
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
/* -----------------------------------------------------------
Meta data allocation
----------------------------------------------------------- */
#define MI_META_PAGE_SIZE MI_ARENA_SLICE_SIZE
#define MI_META_PAGE_ALIGN MI_ARENA_SLICE_ALIGN
#define MI_META_BLOCK_SIZE (128) // large enough such that META_MAX_SIZE >= 4k (even on 32-bit)
#define MI_META_BLOCK_ALIGN MI_META_BLOCK_SIZE
#define MI_META_BLOCKS_PER_PAGE (MI_META_PAGE_SIZE / MI_META_BLOCK_SIZE) // 512
#define MI_META_MAX_SIZE (MI_BCHUNK_SIZE * MI_META_BLOCK_SIZE)
typedef struct mi_meta_page_s {
_Atomic(struct mi_meta_page_s*) next; // a linked list of meta-data pages (never released)
mi_memid_t memid; // provenance of the meta-page memory itself
mi_bbitmap_t blocks_free; // a small bitmap with 1 bit per block.
} mi_meta_page_t;
static mi_decl_cache_align _Atomic(mi_meta_page_t*) mi_meta_pages = MI_ATOMIC_VAR_INIT(NULL);
#if MI_DEBUG > 1
static mi_meta_page_t* mi_meta_page_of_ptr(void* p, size_t* block_idx) {
mi_meta_page_t* mpage = (mi_meta_page_t*)((uint8_t*)mi_align_down_ptr(p,MI_META_PAGE_ALIGN) + _mi_os_secure_guard_page_size());
if (block_idx != NULL) {
*block_idx = ((uint8_t*)p - (uint8_t*)mpage) / MI_META_BLOCK_SIZE;
}
return mpage;
}
#endif
static mi_meta_page_t* mi_meta_page_next( mi_meta_page_t* mpage ) {
return mi_atomic_load_ptr_acquire(mi_meta_page_t, &mpage->next);
}
static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) {
mi_assert_internal(_mi_is_aligned((uint8_t*)mpage - _mi_os_secure_guard_page_size(), MI_META_PAGE_ALIGN));
mi_assert_internal(block_idx < MI_META_BLOCKS_PER_PAGE);
void* p = ((uint8_t*)mpage - _mi_os_secure_guard_page_size() + (block_idx * MI_META_BLOCK_SIZE));
mi_assert_internal(mpage == mi_meta_page_of_ptr(p,NULL));
return p;
}
// allocate a fresh meta page and add it to the global list.
static mi_meta_page_t* mi_meta_page_zalloc(void) {
// allocate a fresh arena slice
// note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again..
mi_memid_t memid;
uint8_t* base = (uint8_t*)_mi_arenas_alloc_aligned(_mi_subproc(), MI_META_PAGE_SIZE, MI_META_PAGE_ALIGN, 0,
true /* commit*/, (MI_SECURE==0) /* allow large? */,
NULL /* req arena */, 0 /* thread_seq */, &memid);
if (base == NULL) return NULL;
mi_assert_internal(_mi_is_aligned(base,MI_META_PAGE_ALIGN));
if (!memid.initially_zero) {
_mi_memzero_aligned(base, MI_ARENA_SLICE_SIZE);
}
// guard pages
#if MI_SECURE >= 1
_mi_os_secure_guard_page_set_at(base, memid.is_pinned);
_mi_os_secure_guard_page_set_before(base + MI_META_PAGE_SIZE, memid.is_pinned);
#endif
// initialize the page and free block bitmap
mi_meta_page_t* mpage = (mi_meta_page_t*)(base + _mi_os_secure_guard_page_size());
mpage->memid = memid;
mi_bbitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */);
const size_t mpage_size = offsetof(mi_meta_page_t,blocks_free) + mi_bbitmap_size(MI_META_BLOCKS_PER_PAGE, NULL);
const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE);
const size_t guard_blocks = _mi_divide_up(_mi_os_secure_guard_page_size(), MI_META_BLOCK_SIZE);
mi_assert_internal(info_blocks + 2*guard_blocks < MI_META_BLOCKS_PER_PAGE);
mi_bbitmap_unsafe_setN(&mpage->blocks_free, info_blocks + guard_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks - 2*guard_blocks);
// push atomically in front of the meta page list
// (note: there is no ABA issue since we never free meta-pages)
mi_meta_page_t* old = mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages);
do {
mi_atomic_store_ptr_release(mi_meta_page_t, &mpage->next, old);
} while(!mi_atomic_cas_ptr_weak_acq_rel(mi_meta_page_t,&mi_meta_pages,&old,mpage));
return mpage;
}
// allocate meta-data
mi_decl_noinline void* _mi_meta_zalloc( size_t size, mi_memid_t* pmemid )
{
mi_assert_internal(pmemid != NULL);
size = _mi_align_up(size,MI_META_BLOCK_SIZE);
if (size == 0 || size > MI_META_MAX_SIZE) return NULL;
const size_t block_count = _mi_divide_up(size,MI_META_BLOCK_SIZE);
mi_assert_internal(block_count > 0 && block_count < MI_BCHUNK_BITS);
mi_meta_page_t* mpage0 = mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages);
mi_meta_page_t* mpage = mpage0;
while (mpage != NULL) {
size_t block_idx;
if (mi_bbitmap_try_find_and_clearN(&mpage->blocks_free, block_count, 0, &block_idx)) {
// found and claimed `block_count` blocks
*pmemid = _mi_memid_create_meta(mpage, block_idx, block_count);
return mi_meta_block_start(mpage,block_idx);
}
else {
mpage = mi_meta_page_next(mpage);
}
}
// failed to find space in existing pages
if (mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages) != mpage0) {
// the page list was updated by another thread in the meantime, retry
return _mi_meta_zalloc(size,pmemid);
}
// otherwise, allocate a fresh metapage and try once more
mpage = mi_meta_page_zalloc();
if (mpage != NULL) {
size_t block_idx;
if (mi_bbitmap_try_find_and_clearN(&mpage->blocks_free, block_count, 0, &block_idx)) {
// found and claimed `block_count` blocks
*pmemid = _mi_memid_create_meta(mpage, block_idx, block_count);
return mi_meta_block_start(mpage,block_idx);
}
}
// if all this failed, allocate from the OS
return _mi_os_alloc(size, pmemid);
}
// free meta-data
mi_decl_noinline void _mi_meta_free(void* p, size_t size, mi_memid_t memid) {
if (p==NULL) return;
if (memid.memkind == MI_MEM_META) {
mi_assert_internal(_mi_divide_up(size, MI_META_BLOCK_SIZE) == memid.mem.meta.block_count);
const size_t block_count = memid.mem.meta.block_count;
const size_t block_idx = memid.mem.meta.block_index;
mi_meta_page_t* mpage = (mi_meta_page_t*)memid.mem.meta.meta_page;
mi_assert_internal(mi_meta_page_of_ptr(p,NULL) == mpage);
mi_assert_internal(block_idx + block_count <= MI_META_BLOCKS_PER_PAGE);
mi_assert_internal(mi_bbitmap_is_clearN(&mpage->blocks_free, block_idx, block_count));
// we zero on free (and on the initial page allocation) so we don't need a "dirty" map
_mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE);
mi_bbitmap_setN(&mpage->blocks_free, block_idx, block_count);
}
else {
_mi_arenas_free(p,size,memid);
}
}
// used for debug output
bool _mi_meta_is_meta_page(void* p)
{
mi_meta_page_t* mpage0 = mi_atomic_load_ptr_acquire(mi_meta_page_t, &mi_meta_pages);
mi_meta_page_t* mpage = mpage0;
while (mpage != NULL) {
if ((void*)mpage == p) return true;
mpage = mi_meta_page_next(mpage);
}
return false;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,110 +1,313 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
Concurrent bitmap that can set/reset sequences of bits atomically,
represented as an array of fields where each field is a machine word (`size_t`)
There are two api's; the standard one cannot have sequences that cross
between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS).
(this is used in region allocation)
The `_across` postfixed functions do allow sequences that can cross over
between the fields. (This is used in arena allocation)
Concurrent bitmap that can set/reset sequences of bits atomically
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_H
#define MI_BITMAP_H
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
/* --------------------------------------------------------------------------------
Atomic bitmaps with release/acquire guarantees:
#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE)
#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set
`mi_bfield_t`: is a single machine word that can efficiently be bit counted (usually `size_t`)
each bit usually represents a single MI_ARENA_SLICE_SIZE in an arena (64 KiB).
We need 16K bits to represent a 1GiB arena.
// An atomic bitmap of `size_t` fields
typedef _Atomic(size_t) mi_bitmap_field_t;
typedef mi_bitmap_field_t* mi_bitmap_t;
`mi_bchunk_t`: a chunk of bfield's of a total of MI_BCHUNK_BITS (= 512 on 64-bit, 256 on 32-bit)
allocations never span across chunks -- so MI_ARENA_MAX_OBJ_SIZE is the number
of bits in a chunk times the MI_ARENA_SLICE_SIZE (512 * 64KiB = 32 MiB).
These chunks are cache-aligned and we can use AVX2/AVX512/NEON/SVE/SVE2/etc. instructions
to scan for bits (perhaps) more efficiently.
// A bitmap index is the index of the bit in a bitmap.
typedef size_t mi_bitmap_index_t;
We allocate byte-sized ranges aligned to bytes in the bfield, and bfield-sized
ranges aligned to a bfield.
// Create a bit index.
static inline mi_bitmap_index_t mi_bitmap_index_create_ex(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx <= MI_BITMAP_FIELD_BITS);
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
}
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
return mi_bitmap_index_create_ex(idx,bitidx);
}
Searching linearly through the chunks would be too slow (16K bits per GiB).
Instead we add a "chunkmap" to do a two-level search (more or less a btree of depth 2).
// Get the field index from a bit index.
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
}
`mi_bchunkmap_t` (== `mi_bchunk_t`): for each chunk we track if it has (potentially) any bit set.
The chunkmap has 1 bit per chunk that is set if the chunk potentially has a bit set.
This is used to avoid scanning every chunk. (and thus strictly an optimization)
It is conservative: it is fine to set a bit in the chunk map even if the chunk turns out
to have no bits set. It is also allowed to briefly have a clear bit even if the
chunk has bits set -- as long as we guarantee that the bit will be set later on;
(this allows us to set the chunkmap bit right after we set a bit in the corresponding chunk).
// Get the bit index in a bitmap field
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
}
However, when we clear a bit in a chunk, and the chunk is indeed all clear, we
cannot safely clear the bit corresponding to the chunk in the chunkmap since it
may race with another thread setting a bit in the same chunk. Therefore, when
clearing, we first test if a chunk is clear, then clear the chunkmap bit, and
then test again to catch any set bits that we may have missed.
// Get the full bit index
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
return bitmap_idx;
}
Since the chunkmap may thus be briefly out-of-sync, this means that we may sometimes
not find a free page even though it's there (but we accept this as we avoid taking
full locks). (Another way to do this is to use an epoch but we like to avoid that complexity
for now).
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
`mi_bitmap_t`: a bitmap with N chunks. A bitmap has a chunkmap of MI_BCHUNK_BITS (512)
and thus has at most 512 chunks (=2^18 bits x 64 KiB slices = 16 GiB max arena size).
The minimum is 1 chunk which is a 32 MiB arena.
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
For now, the implementation assumes MI_HAS_FAST_BITSCAN and uses trailing-zero-count
and pop-count (but we think it can be adapted work reasonably well on older hardware too)
--------------------------------------------------------------------------------------------- */
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// A word-size bit field.
typedef size_t mi_bfield_t;
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#define MI_BFIELD_BITS_SHIFT (MI_SIZE_SHIFT+3)
#define MI_BFIELD_BITS (1 << MI_BFIELD_BITS_SHIFT)
#define MI_BFIELD_SIZE (MI_BFIELD_BITS/8)
#define MI_BFIELD_LO_BIT8 (((~(mi_bfield_t)0))/0xFF) // 0x01010101 ..
#define MI_BFIELD_HI_BIT8 (MI_BFIELD_LO_BIT8 << 7) // 0x80808080 ..
// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically.
// Returns `true` if successful when all previous `count` bits were 0.
bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero);
bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#define MI_BCHUNK_SIZE (MI_BCHUNK_BITS / 8)
#define MI_BCHUNK_FIELDS (MI_BCHUNK_BITS / MI_BFIELD_BITS) // 8 on both 64- and 32-bit
//--------------------------------------------------------------------------
// the `_across` functions work on bitmaps where sequences can cross over
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
// A bitmap chunk contains 512 bits on 64-bit (256 on 32-bit)
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bchunk_s {
_Atomic(mi_bfield_t) bfields[MI_BCHUNK_FIELDS];
} mi_bchunk_t;
// Find `count` bits of zeros and set them to 1 atomically; returns `true` on success.
// Starts at idx, and wraps around to search in all `bitmap_fields` fields.
bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
// The chunkmap has one bit per corresponding chunk that is set if the chunk potentially has bits set.
// The chunkmap is itself a chunk.
typedef mi_bchunk_t mi_bchunkmap_t;
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
#define MI_BCHUNKMAP_BITS MI_BCHUNK_BITS
#define MI_BITMAP_MAX_CHUNK_COUNT (MI_BCHUNKMAP_BITS)
#define MI_BITMAP_MIN_CHUNK_COUNT (1)
#if MI_SIZE_BITS > 32
#define MI_BITMAP_DEFAULT_CHUNK_COUNT (64) // 2 GiB on 64-bit -- this is for the page map
#else
#define MI_BITMAP_DEFAULT_CHUNK_COUNT (1)
#endif
#define MI_BITMAP_MAX_BIT_COUNT (MI_BITMAP_MAX_CHUNK_COUNT * MI_BCHUNK_BITS) // 16 GiB arena
#define MI_BITMAP_MIN_BIT_COUNT (MI_BITMAP_MIN_CHUNK_COUNT * MI_BCHUNK_BITS) // 32 MiB arena
#define MI_BITMAP_DEFAULT_BIT_COUNT (MI_BITMAP_DEFAULT_CHUNK_COUNT * MI_BCHUNK_BITS) // 2 GiB arena
// An atomic bitmap
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 1]; // suppress warning on msvc
mi_bchunkmap_t chunkmap;
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bitmap_t;
static inline size_t mi_bitmap_chunk_count(const mi_bitmap_t* bitmap) {
return mi_atomic_load_relaxed(&((mi_bitmap_t*)bitmap)->chunk_count);
}
static inline size_t mi_bitmap_max_bits(const mi_bitmap_t* bitmap) {
return (mi_bitmap_chunk_count(bitmap) * MI_BCHUNK_BITS);
}
/* --------------------------------------------------------------------------------
Atomic bitmap operations
-------------------------------------------------------------------------------- */
// Many operations are generic over setting or clearing the bit sequence: we use `mi_xset_t` for this (true if setting, false if clearing)
typedef bool mi_xset_t;
#define MI_BIT_SET (true)
#define MI_BIT_CLEAR (false)
// Required size of a bitmap to represent `bit_count` bits.
size_t mi_bitmap_size(size_t bit_count, size_t* chunk_count);
// Initialize a bitmap to all clear; avoid a mem_zero if `already_zero` is true
// returns the size of the bitmap.
size_t mi_bitmap_init(mi_bitmap_t* bitmap, size_t bit_count, bool already_zero);
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks).
// Not atomic so only use if still local to a thread.
void mi_bitmap_unsafe_setN(mi_bitmap_t* bitmap, size_t idx, size_t n);
// Set a bit in the bitmap; returns `true` if it atomically transitioned from 0 to 1
bool mi_bitmap_set(mi_bitmap_t* bitmap, size_t idx);
// Clear a bit in the bitmap; returns `true` if it atomically transitioned from 1 to 0
bool mi_bitmap_clear(mi_bitmap_t* bitmap, size_t idx);
// Set a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 0's to 1's
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
// If `already_set` is not NULL, it is set to count of bits were already all set.
// (this is used for correct statistics if commiting over a partially committed area)
bool mi_bitmap_setN(mi_bitmap_t* bitmap, size_t idx, size_t n, size_t* already_set);
// Clear a sequence of `n` bits in the bitmap; returns `true` if atomically transitioned from all 1's to 0's
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
bool mi_bitmap_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n);
// Is a sequence of n bits already all set/cleared?
bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n);
// Is a sequence of n bits already set?
// (Used to check if a memory range is already committed)
static inline bool mi_bitmap_is_setN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_is_xsetN(MI_BIT_SET, bitmap, idx, n);
}
// Is a sequence of n bits already clear?
static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) {
return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n);
}
static inline bool mi_bitmap_is_set(mi_bitmap_t* bitmap, size_t idx) {
return mi_bitmap_is_setN(bitmap, idx, 1);
}
static inline bool mi_bitmap_is_clear(mi_bitmap_t* bitmap, size_t idx) {
return mi_bitmap_is_clearN(bitmap, idx, 1);
}
// Called once a bit is cleared to see if the memory slice can be claimed.
typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, mi_heaptag_t heap_tag, bool* keep_set);
// Find a set bits in the bitmap, atomically clear it, and check if `claim` returns true.
// If not claimed, continue on (potentially setting the bit again depending on `keep_set`).
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx,
mi_claim_fun_t* claim, mi_arena_t* arena, mi_heaptag_t heap_tag );
// Atomically clear a bit but only if it is set. Will block otherwise until the bit is set.
// This is used to delay free-ing a page that it at the same time being considered to be
// allocated from `mi_arena_try_abandoned` (and is in the `claim` function of `mi_bitmap_try_find_and_claim`).
void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx);
// If a bit is set in the bitmap, return `true` and set `idx` to the index of the highest bit.
// Otherwise return `false` (and `*idx` is undefined).
// Used for unloading arena's
bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx);
typedef bool (mi_forall_set_fun_t)(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg2);
// Visit all set bits in a bitmap (`slice_count == 1`)
bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
// Visit all set bits in a bitmap with larger ranges if possible (`slice_count >= 1`)
bool _mi_bitmap_forall_setc_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg);
/* ----------------------------------------------------------------------------
Binned concurrent bitmap
Assigns a size class to each chunk such that small blocks don't cause too
much fragmentation since we keep chunks for larger blocks separate.
---------------------------------------------------------------------------- */
// Size bins; larger bins are allowed to go into smaller bins.
// SMALL can only be in small (and NONE), so they cannot fragment the larger bins.
typedef enum mi_bbin_e {
MI_BBIN_NONE, // no bin assigned yet (the chunk is completely free)
MI_BBIN_SMALL, // slice_count == 1
MI_BBIN_OTHER, // slice_count: any other from the other bins, and 1 <= slice_count <= MI_BCHUNK_BITS
MI_BBIN_MEDIUM, // slice_count == 8
MI_BBIN_LARGE, // slice_count == MI_BFIELD_BITS -- only used if MI_ENABLE_LARGE_PAGES is 1
MI_BBIN_COUNT
} mi_bbin_t;
static inline mi_bbin_t mi_bbin_inc(mi_bbin_t bbin) {
return (mi_bbin_t)((int)bbin + 1);
}
static inline mi_bbin_t mi_bbin_of(size_t slice_count) {
if (slice_count==1) return MI_BBIN_SMALL;
if (slice_count==8) return MI_BBIN_MEDIUM;
#if MI_ENABLE_LARGE_PAGES
if (slice_count==MI_BFIELD_BITS) return MI_BBIN_LARGE;
#endif
return MI_BBIN_OTHER;
}
// An atomic "binned" bitmap for the free slices where we keep chunks reserved for particalar size classes
typedef mi_decl_align(MI_BCHUNK_SIZE) struct mi_bbitmap_s {
_Atomic(size_t) chunk_count; // total count of chunks (0 < N <= MI_BCHUNKMAP_BITS)
_Atomic(size_t) chunk_max_accessed; // max chunk index that was once cleared or set
size_t _padding[MI_BCHUNK_SIZE/MI_SIZE_SIZE - 2]; // suppress warning on msvc
mi_bchunkmap_t chunkmap;
_Atomic(uint8_t) chunk_bins[MI_BITMAP_MAX_CHUNK_COUNT]; // 512b
mi_bchunk_t chunks[MI_BITMAP_DEFAULT_CHUNK_COUNT]; // usually dynamic MI_BITMAP_MAX_CHUNK_COUNT
} mi_bbitmap_t;
static inline size_t mi_bbitmap_chunk_count(const mi_bbitmap_t* bbitmap) {
return mi_atomic_load_relaxed(&((mi_bbitmap_t*)bbitmap)->chunk_count);
}
static inline size_t mi_bbitmap_max_bits(const mi_bbitmap_t* bbitmap) {
return (mi_bbitmap_chunk_count(bbitmap) * MI_BCHUNK_BITS);
}
size_t mi_bbitmap_size(size_t bit_count, size_t* chunk_count);
// Initialize a bitmap to all clear; avoid a mem_zero if `already_zero` is true
// returns the size of the bitmap.
size_t mi_bbitmap_init(mi_bbitmap_t* bbitmap, size_t bit_count, bool already_zero);
// Set/clear a sequence of `n` bits in the bitmap (and can cross chunks).
// Not atomic so only use if still local to a thread.
void mi_bbitmap_unsafe_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Set a sequence of `n` bits in the bbitmap; returns `true` if atomically transitioned from all 0's to 1's
// `n` cannot cross chunk boundaries (and `n <= MI_BCHUNK_BITS`)!
bool mi_bbitmap_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Is a sequence of n bits already all set/cleared?
bool mi_bbitmap_is_xsetN(mi_xset_t set, mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Is a sequence of n bits already set?
// (Used to check if a memory range is already committed)
static inline bool mi_bbitmap_is_setN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
return mi_bbitmap_is_xsetN(MI_BIT_SET, bbitmap, idx, n);
}
// Is a sequence of n bits already clear?
static inline bool mi_bbitmap_is_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n) {
return mi_bbitmap_is_xsetN(MI_BIT_CLEAR, bbitmap, idx, n);
}
// Try to atomically transition `n` bits from all set to all clear. Returns `true` on succes.
// `n` cannot cross chunk boundaries, where `n <= MI_CHUNK_BITS`.
bool mi_bbitmap_try_clearN(mi_bbitmap_t* bbitmap, size_t idx, size_t n);
// Specialized versions for common bit sequence sizes
bool mi_bbitmap_try_find_and_clear(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 1-bit
bool mi_bbitmap_try_find_and_clear8(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // 8-bits
// bool mi_bbitmap_try_find_and_clearX(mi_bbitmap_t* bbitmap, size_t tseq, size_t* pidx); // MI_BFIELD_BITS
bool mi_bbitmap_try_find_and_clearNX(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // < MI_BFIELD_BITS
bool mi_bbitmap_try_find_and_clearN_(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx); // > MI_BFIELD_BITS <= MI_BCHUNK_BITS
// Find a sequence of `n` bits in the bbitmap with all bits set, and try to atomically clear all.
// Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`.
mi_decl_nodiscard static inline bool mi_bbitmap_try_find_and_clearN(mi_bbitmap_t* bbitmap, size_t n, size_t tseq, size_t* pidx) {
if (n==1) return mi_bbitmap_try_find_and_clear(bbitmap, tseq, pidx); // small pages
if (n==8) return mi_bbitmap_try_find_and_clear8(bbitmap, tseq, pidx); // medium pages
// if (n==MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearX(bbitmap, tseq, pidx); // large pages
if (n==0 || n>MI_BCHUNK_BITS) return false; // cannot be more than a chunk
if (n<=MI_BFIELD_BITS) return mi_bbitmap_try_find_and_clearNX(bbitmap, tseq, n, pidx);
return mi_bbitmap_try_find_and_clearN_(bbitmap, tseq, n, pidx);
}
#endif // MI_BITMAP_H

View file

@ -23,9 +23,6 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block);
// Free
// ------------------------------------------------------
// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block);
// regular free of a (thread local) block pointer
// fast path written carefully to prevent spilling on the stack
static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool track_stats, bool check_full)
@ -50,6 +47,40 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
}
}
// Forward declaration for multi-threaded collect
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t* mt_free) mi_attr_noexcept;
// Free a block multi-threaded
static inline void mi_free_block_mt(mi_page_t* page, mi_block_t* block) mi_attr_noexcept
{
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page, block));
// _mi_padding_shrink(page, block, sizeof(mi_block_t));
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
size_t dbgsize = mi_usable_size(block);
if (dbgsize > MI_MiB) { dbgsize = MI_MiB; }
_mi_memset_aligned(block, MI_DEBUG_FREED, dbgsize);
#endif
// push atomically on the page thread free list
mi_thread_free_t tf_new;
mi_thread_free_t tf_old = mi_atomic_load_relaxed(&page->xthread_free);
do {
mi_block_set_next(page, block, mi_tf_block(tf_old));
tf_new = mi_tf_create(block, true /* always use owned: try to claim it if the page is abandoned */);
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_old, tf_new)); // todo: release is enough?
// and atomically try to collect the page if it was abandoned
const bool is_owned_now = !mi_tf_is_owned(tf_old);
if (is_owned_now) {
mi_assert_internal(mi_page_is_abandoned(page));
mi_free_try_collect_mt(page,block);
}
}
// Adjust a block that was allocated aligned, to the actual start of the block in the page.
// note: this can be called from `mi_free_generic_mt` where a non-owning thread accesses the
// `page_start` and `block_size` fields; however these are constant and the page won't be
@ -57,7 +88,7 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL);
size_t diff = (uint8_t*)p - page->page_start;
size_t diff = (uint8_t*)p - mi_page_start(page);
size_t adjust;
if mi_likely(page->block_size_shift != 0) {
adjust = diff & (((size_t)1 << page->block_size_shift) - 1);
@ -81,218 +112,207 @@ static inline void mi_block_check_unguard(mi_page_t* page, mi_block_t* block, vo
}
#endif
// free a local pointer (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
MI_UNUSED(segment);
static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, void* p) mi_attr_noexcept {
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p);
mi_block_check_unguard(page, block, p);
mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */);
}
// free a pointer owned by another thread (page parameter comes first for better codegen)
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept {
static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, void* p) mi_attr_noexcept {
if (p==NULL) return; // a NULL pointer is seen as abandoned (tid==0) with a full flag set
#if !MI_PAGE_MAP_FLAT
if (page==&_mi_page_empty) return; // an invalid pointer may lead to using the empty page
#endif
mi_assert_internal(p!=NULL && page != NULL && page != &_mi_page_empty);
mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
mi_block_check_unguard(page, block, p);
mi_free_block_mt(page, segment, block);
mi_free_block_mt(page, block);
}
// generic free (for runtime integration)
void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(page,segment,p);
else mi_free_generic_mt(page,segment,p);
void mi_decl_noinline _mi_free_generic(mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(page,p);
else mi_free_generic_mt(page,p);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in release mode but does further checks in debug mode
// (and secure mode) to see if this was a valid pointer.
static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
{
MI_UNUSED(msg);
#if (MI_DEBUG>0)
// Get the page belonging to a pointer
// Does further checks in debug mode to see if this was a valid pointer.
static inline mi_page_t* mi_validate_ptr_page(const void* p, const char* msg)
{
MI_UNUSED_RELEASE(msg);
#if MI_DEBUG
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0 && !mi_option_is_enabled(mi_option_guarded_precise)) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
return NULL;
}
#endif
mi_segment_t* const segment = _mi_ptr_segment(p);
if mi_unlikely(segment==NULL) return segment;
#if (MI_DEBUG>0)
if mi_unlikely(!mi_is_in_heap_region(p)) {
_mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
"(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
_mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
mi_page_t* page = _mi_safe_ptr_page(p);
if (page == NULL) {
if (p != NULL) {
_mi_error_message(EINVAL, "%s: invalid pointer: %p\n", msg, p);
}
#if !MI_PAGE_MAP_FLAT
page = (mi_page_t*)&_mi_page_empty;
#endif
}
return page;
#else
return _mi_ptr_page(p);
#endif
#if (MI_DEBUG>0 || MI_SECURE>=4)
if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
_mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
return NULL;
}
#endif
return segment;
}
// Free a block
// Fast path written carefully to prevent register spilling on the stack
void mi_free(void* p) mi_attr_noexcept
{
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
if mi_unlikely(segment==NULL) return;
mi_page_t* const page = mi_validate_ptr_page(p,"mi_free");
const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_page_t* const page = _mi_segment_page_of(segment, p);
#if MI_PAGE_MAP_FLAT // if not flat, p==NULL leads to `_mi_page_empty` which leads to `mi_free_generic_mt`
if mi_unlikely(page==NULL) return;
#endif
mi_assert_internal(page!=NULL);
if mi_likely(is_local) { // thread-local free?
if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
// thread-local, aligned, and not a full page
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
}
else {
// page is full or contains (inner) aligned blocks; use generic path
mi_free_generic_local(page, segment, p);
}
const mi_threadid_t xtid = (_mi_prim_thread_id() ^ mi_page_xthread_id(page));
if mi_likely(xtid == 0) { // `tid == mi_page_thread_id(page) && mi_page_flags(page) == 0`
// thread-local, aligned, and not a full page
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */);
}
else if (xtid <= MI_PAGE_FLAG_MASK) { // `tid == mi_page_thread_id(page) && mi_page_flags(page) != 0`
// page is local, but is full or contains (inner) aligned blocks; use generic path
mi_free_generic_local(page, p);
}
// free-ing in a page owned by a heap in another thread, or an abandoned page (not belonging to a heap)
else if ((xtid & MI_PAGE_FLAG_MASK) == 0) { // `tid != mi_page_thread_id(page) && mi_page_flags(page) == 0`
// blocks are aligned (and not a full page); push on the thread_free list
mi_block_t* const block = (mi_block_t*)p;
mi_free_block_mt(page,block);
}
else {
// not thread-local; use generic path
mi_free_generic_mt(page, segment, p);
// page is full or contains (inner) aligned blocks; use generic multi-thread path
mi_free_generic_mt(page, p);
}
}
// return true if successful
bool _mi_free_delayed_block(mi_block_t* block) {
// get segment and page
mi_assert_internal(block!=NULL);
const mi_segment_t* const segment = _mi_ptr_segment(block);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(_mi_thread_id() == segment->thread_id);
mi_page_t* const page = _mi_segment_page_of(segment, block);
// Clear the no-delayed flag so delayed freeing is used again for this page.
// This must be done before collecting the free lists on this page -- otherwise
// some blocks may end up in the page `thread_free` list with no blocks in the
// heap `thread_delayed_free` list which may cause the page to be never freed!
// (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
return false;
}
// collect all other non-local frees (move from `thread_free` to `free`) to ensure up-to-date `used` count
_mi_page_free_collect(page, false);
// and free the block (possibly freeing the page as well since `used` is updated)
mi_free_block_local(page, block, false /* stats have already been adjusted */, true /* check for a full page */);
return true;
}
// ------------------------------------------------------
// Multi-threaded Free (`_mt`)
// ------------------------------------------------------
// Push a block that is owned by another thread on its page-local thread free
// list or it's heap delayed free list. Such blocks are later collected by
// the owning thread in `_mi_free_delayed_block`.
static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block )
{
// Try to put the block on either the page-local thread free list,
// or the heap delayed free list (if this is the first non-local free in that page)
mi_thread_free_t tfreex;
bool use_delayed;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
if mi_unlikely(use_delayed) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
}
else {
// usual: directly add to page thread_free list
mi_block_set_next(page, block, mi_tf_block(tfree));
tfreex = mi_tf_set_block(tfree,block);
}
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
// If this was the first non-local free, we need to push it on the heap delayed free list instead
if mi_unlikely(use_delayed) {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
mi_assert_internal(heap != NULL);
if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do {
mi_block_set_nextx(heap,block,dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
}
// and reset the MI_DELAYED_FREEING flag
tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
tfreex = tfree;
mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
} while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
static bool mi_page_unown_from_free(mi_page_t* page, mi_block_t* mt_free);
static inline bool mi_page_queue_len_is_atmost( mi_heap_t* heap, size_t block_size, size_t atmost) {
mi_page_queue_t* const pq = mi_page_queue(heap,block_size);
mi_assert_internal(pq!=NULL);
return (pq->count <= atmost);
/*
for(mi_page_t* p = pq->first; p!=NULL; p = p->next, atmost--) {
if (atmost == 0) { return false; }
}
return true;
*/
}
// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block)
{
// first see if the segment was abandoned and if we can reclaim it into our thread
if (_mi_option_get_fast(mi_option_abandoned_reclaim_on_free) != 0 &&
#if MI_HUGE_PAGE_ABANDON
segment->page_kind != MI_PAGE_HUGE &&
#endif
mi_atomic_load_relaxed(&segment->thread_id) == 0 && // segment is abandoned?
mi_prim_get_default_heap() != (mi_heap_t*)&_mi_heap_empty) // and we did not already exit this thread (without this check, a fresh heap will be initalized (issue #944))
static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page, mi_block_t* mt_free) mi_attr_noexcept {
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
// we own the page now..
// safe to collect the thread atomic free list
// use the `_partly` version to avoid atomic operations since we already have the `mt_free` pointing into the thread free list
_mi_page_free_collect_partly(page, mt_free);
#if MI_DEBUG > 1
if (mi_page_is_singleton(page)) { mi_assert_internal(mi_page_all_free(page)); }
#endif
// 1. free if the page is free now (this is updated by `_mi_page_free_collect_partly`)
if (mi_page_all_free(page))
{
// the segment is abandoned, try to reclaim it into our heap
if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) {
mi_assert_internal(_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
mi_assert_internal(mi_heap_get_default()->tld->segments.subproc == segment->subproc);
mi_free(block); // recursively free as now it will be a local free in our heap
return;
// first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish)
_mi_arenas_page_unabandon(page);
// we can free the page directly
_mi_arenas_page_free(page);
return;
}
// 2. we can try to reclaim the page for ourselves
// note: we only reclaim if the page originated from our heap (the heap field is preserved on abandonment)
// to avoid claiming arbitrary object sizes and limit indefinite expansion. This helps benchmarks like `larson`
if (page->block_size <= MI_SMALL_MAX_OBJ_SIZE) // only for small sized blocks
{
const long reclaim_on_free = _mi_option_get_fast(mi_option_page_reclaim_on_free);
if (reclaim_on_free >= 0) { // and reclaiming is allowed
// get our heap (with the right tag)
// note: don't use `mi_heap_get_default()` as we may just have terminated this thread and we should
// not reinitialize the heap for this thread. (can happen due to thread-local destructors for example -- issue #944)
mi_heap_t* heap = mi_prim_get_default_heap();
if (heap != page->heap) {
if (mi_heap_is_initialized(heap)) {
heap = _mi_heap_by_tag(heap, page->heap_tag);
}
}
// can we reclaim into this heap?
if (heap != NULL && heap->allow_page_reclaim) {
const long reclaim_max = _mi_option_get_fast(mi_option_page_reclaim_max);
if ((heap == page->heap && mi_page_queue_len_is_atmost(heap, page->block_size, reclaim_max)) || // only reclaim if we were the originating heap, and we have at most N pages already
(reclaim_on_free == 1 && // OR if the reclaim across heaps is allowed
!mi_page_is_used_at_frac(page, 8) && // and the page is not too full
!heap->tld->is_in_threadpool && // and not part of a threadpool
_mi_arena_memid_is_suitable(page->memid, heap->exclusive_arena)) // and the memory is suitable
)
{
// first remove it from the abandoned pages in the arena -- this waits for any readers to finish
_mi_arenas_page_unabandon(page);
_mi_heap_page_reclaim(heap, page);
mi_heap_stat_counter_increase(heap, pages_reclaim_on_free, 1);
return;
}
}
}
}
// The padding check may access the non-thread-owned page for the key values.
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
mi_check_padding(page, block);
// adjust stats (after padding check and potentially recursive `mi_free` above)
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free_size(block, mi_page_usable_size_of(page,block));
// for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
_mi_padding_shrink(page, block, sizeof(mi_block_t));
if (segment->page_kind == MI_PAGE_HUGE) {
#if MI_HUGE_PAGE_ABANDON
// huge page segments are always abandoned and can be freed immediately
_mi_segment_huge_page_free(segment, page, block);
// 3. if the page is unmapped, try to reabandon so it can possibly be mapped and found for allocations
if (!mi_page_is_used_at_frac(page, 8) && // only reabandon if a full page starts to have enough blocks available to prevent immediate re-abandon of a full page
!mi_page_is_abandoned_mapped(page) && page->memid.memkind == MI_MEM_ARENA &&
_mi_arenas_page_try_reabandon_to_mapped(page))
{
return;
#else
// huge pages are special as they occupy the entire segment
// as these are large we reset the memory occupied by the page so it is available to other threads
// (as the owning thread needs to actually free the memory later).
_mi_segment_huge_page_reset(segment, page, block);
#endif
}
else {
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading
memset(block, MI_DEBUG_FREED, mi_usable_size(block));
#endif
}
// and finally free the actual block by pushing it on the owning heap
// thread_delayed free list (or heap delayed free list)
mi_free_block_delayed_mt(page,block);
// not reclaimed or free'd, unown again
// _mi_page_unown(page);
mi_page_unown_from_free(page, mt_free);
}
// release ownership of a page. This may free the page if all (other) blocks were concurrently
// freed in the meantime. Returns true if the page was freed.
// This is a specialized version of `mi_page_unown` to (try to) avoid calling `mi_page_free_collect` again.
static bool mi_page_unown_from_free(mi_page_t* page, mi_block_t* mt_free) {
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
mi_assert_internal(mt_free != NULL);
mi_assert_internal(page->used > 1);
mi_thread_free_t tf_expect = mi_tf_create(mt_free, true);
mi_thread_free_t tf_new = mi_tf_create(mt_free, false);
while mi_unlikely(!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tf_expect, tf_new)) {
mi_assert_internal(mi_tf_is_owned(tf_expect));
while (mi_tf_block(tf_expect) != NULL) {
_mi_page_free_collect(page,false); // update used
if (mi_page_all_free(page)) { // it may become free just before unowning it
_mi_arenas_page_unabandon(page);
_mi_arenas_page_free(page);
return true;
}
tf_expect = mi_atomic_load_relaxed(&page->xthread_free);
}
mi_assert_internal(mi_tf_block(tf_expect)==NULL);
tf_new = mi_tf_create(NULL, false);
}
return false;
}
@ -316,9 +336,8 @@ static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* p
}
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
if mi_unlikely(segment==NULL) return 0;
const mi_page_t* const page = _mi_segment_page_of(segment, p);
const mi_page_t* const page = mi_validate_ptr_page(p,msg);
if mi_unlikely(page==NULL) return 0;
if mi_likely(!mi_page_has_aligned(page)) {
const mi_block_t* block = (const mi_block_t*)p;
return mi_page_usable_size_of(page, block);
@ -513,21 +532,21 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
// only maintain stats for smaller objects if requested
#if (MI_STAT>0)
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
#if (MI_STAT < 2)
void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
#if (MI_STAT < 2)
MI_UNUSED(block);
#endif
#endif
mi_heap_t* const heap = mi_heap_get_default();
const size_t bsize = mi_page_usable_block_size(page);
#if (MI_STAT>1)
#if (MI_STAT>1)
const size_t usize = mi_page_usable_size_of(page, block);
mi_heap_stat_decrease(heap, malloc_requested, usize);
#endif
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
#endif
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_decrease(heap, malloc_normal, bsize);
#if (MI_STAT > 1)
#if (MI_STAT > 1)
mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], 1);
#endif
#endif
}
else {
const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc
@ -535,7 +554,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
}
}
#else
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
MI_UNUSED(page); MI_UNUSED(block);
}
#endif
@ -553,7 +572,7 @@ static void mi_block_unguard(mi_page_t* page, mi_block_t* block, void* p) {
const size_t bsize = mi_page_block_size(page);
const size_t psize = _mi_os_page_size();
mi_assert_internal(bsize > psize);
mi_assert_internal(_mi_page_segment(page)->allow_decommit);
mi_assert_internal(!page->memid.is_pinned);
void* gpage = (uint8_t*)block + bsize - psize;
mi_assert_internal(_mi_is_aligned(gpage, psize));
_mi_os_unprotect(gpage, psize);

View file

@ -7,11 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h" // mi_prim_get_default_heap
#include <string.h> // memset, memcpy
#if defined(_MSC_VER) && (_MSC_VER < 1920)
#pragma warning(disable:4204) // non-constant aggregate initializer
#endif
@ -58,8 +55,6 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
MI_UNUSED(arg2);
MI_UNUSED(pq);
mi_assert_internal(mi_page_heap(page) == heap);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == heap->thread_id);
mi_assert_expensive(_mi_page_is_valid(page));
return true;
}
@ -68,6 +63,9 @@ static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
static bool mi_heap_is_valid(mi_heap_t* heap) {
mi_assert_internal(heap!=NULL);
mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
for (size_t bin = 0; bin < MI_BIN_COUNT; bin++) {
mi_assert_internal(_mi_page_queue_is_valid(heap, &heap->pages[bin]));
}
return true;
}
#endif
@ -98,7 +96,7 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
if (mi_page_all_free(page)) {
// no more used blocks, free the page.
// note: this will free retired pages as well.
_mi_page_free(page, pq, collect >= MI_FORCE);
_mi_page_free(page, pq);
}
else if (collect == MI_ABANDON) {
// still used blocks but the thread is done; abandon the page
@ -107,66 +105,29 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
return true; // don't break
}
static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
MI_UNUSED(arg1);
MI_UNUSED(arg2);
MI_UNUSED(heap);
MI_UNUSED(pq);
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
return true; // don't break
}
static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
{
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
mi_assert_expensive(mi_heap_is_valid(heap));
const bool force = (collect >= MI_FORCE);
_mi_deferred_free(heap, force);
// python/cpython#112532: we may be called from a thread that is not the owner of the heap
const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id());
// note: never reclaim on collect but leave it to threads that need storage to reclaim
if (
#ifdef NDEBUG
collect == MI_FORCE
#else
collect >= MI_FORCE
#endif
&& is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim)
{
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
// if all memory is freed by now, all segments should be freed.
// note: this only collects in the current subprocess
_mi_abandoned_reclaim_all(heap, &heap->tld->segments);
}
// if abandoning, mark all pages to no longer add to delayed_free
if (collect == MI_ABANDON) {
mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
}
// free all current thread delayed blocks.
// (if abandoning, after this there are no more thread-delayed references into the pages.)
_mi_heap_delayed_free_all(heap);
// const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id());
// collect retired pages
_mi_heap_collect_retired(heap, force);
// if (_mi_is_main_thread()) { mi_debug_show_arenas(true, false, false); }
// collect all pages owned by this thread
mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL);
mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL );
// collect segments (purge pages, this can be expensive so don't force on abandonment)
_mi_segments_collect(collect == MI_FORCE, &heap->tld->segments);
// if forced, collect thread data cache on program-exit (or shared library unload)
if (force && is_main_thread && mi_heap_is_backing(heap)) {
_mi_thread_data_collect(); // collect thread data cache
}
// collect arenas (this is program wide so don't force purges on abandonment of threads)
_mi_arenas_collect(collect == MI_FORCE /* force purge? */);
//mi_atomic_storei64_release(&heap->tld->subproc->purge_expire, 1);
_mi_arenas_collect(collect == MI_FORCE /* force purge? */, collect >= MI_FORCE /* visit all? */, heap->tld);
// merge statistics
if (collect <= MI_FORCE) {
@ -192,8 +153,12 @@ void mi_collect(bool force) mi_attr_noexcept {
----------------------------------------------------------- */
mi_heap_t* mi_heap_get_default(void) {
mi_thread_init();
return mi_prim_get_default_heap();
mi_heap_t* heap = mi_prim_get_default_heap();
if mi_unlikely(!mi_heap_is_initialized(heap)) {
mi_thread_init();
heap = mi_prim_get_default_heap();
}
return heap;
}
static bool mi_heap_is_default(const mi_heap_t* heap) {
@ -206,52 +171,90 @@ mi_heap_t* mi_heap_get_backing(void) {
mi_assert_internal(heap!=NULL);
mi_heap_t* bheap = heap->tld->heap_backing;
mi_assert_internal(bheap!=NULL);
mi_assert_internal(bheap->thread_id == _mi_thread_id());
mi_assert_internal(bheap->tld->thread_id == _mi_thread_id());
return bheap;
}
void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag) {
// todo: make order of parameters consistent (but would that break compat with CPython?)
void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool allow_destroy, uint8_t heap_tag, mi_tld_t* tld)
{
mi_assert_internal(heap!=NULL);
mi_memid_t memid = heap->memid;
_mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = tld;
heap->thread_id = _mi_thread_id();
heap->arena_id = arena_id;
heap->no_reclaim = noreclaim;
heap->tag = tag;
if (heap == tld->heap_backing) {
heap->memid = memid;
heap->tld = tld; // avoid reading the thread-local tld during initialization
heap->exclusive_arena = _mi_arena_from_id(arena_id);
heap->allow_page_reclaim = (!allow_destroy && mi_option_get(mi_option_page_reclaim_on_free) >= 0);
heap->allow_page_abandon = (!allow_destroy && mi_option_get(mi_option_page_full_retain) >= 0);
heap->page_full_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
heap->tag = heap_tag;
if (heap->tld->is_in_threadpool) {
// if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap.
// this is checked in `free.c:mi_free_try_collect_mt`
// .. but abandoning is good in this case: halve the full page retain (possibly to 0)
// (so blocked threads do not hold on to too much memory)
if (heap->page_full_retain > 0) {
heap->page_full_retain = heap->page_full_retain / 4;
}
}
if (heap->tld->heap_backing == NULL) {
heap->tld->heap_backing = heap; // first heap becomes the backing heap
_mi_random_init(&heap->random);
}
else {
_mi_random_split(&tld->heap_backing->random, &heap->random);
_mi_random_split(&heap->tld->heap_backing->random, &heap->random);
}
heap->cookie = _mi_heap_random_next(heap) | 1;
heap->keys[0] = _mi_heap_random_next(heap);
heap->keys[1] = _mi_heap_random_next(heap);
//heap->keys[0] = _mi_heap_random_next(heap);
//heap->keys[1] = _mi_heap_random_next(heap);*/
_mi_heap_guarded_init(heap);
// push on the thread local heaps list
heap->next = heap->tld->heaps;
heap->tld->heaps = heap;
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode?
if (heap == NULL) return NULL;
mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id, mi_tld_t* tld) {
mi_assert_internal(tld!=NULL);
mi_assert(heap_tag >= 0 && heap_tag < 256);
_mi_heap_init(heap, bheap->tld, arena_id, allow_destroy /* no reclaim? */, (uint8_t)heap_tag /* heap tag */);
// allocate and initialize a heap
mi_memid_t memid;
mi_heap_t* heap;
if (arena_id == _mi_arena_id_none()) {
heap = (mi_heap_t*)_mi_meta_zalloc(sizeof(mi_heap_t), &memid);
}
else {
// heaps associated wita a specific arena are allocated in that arena
// note: takes up at least one slice which is quite wasteful...
heap = (mi_heap_t*)_mi_arenas_alloc(_mi_subproc(), _mi_align_up(sizeof(mi_heap_t),MI_ARENA_MIN_OBJ_SIZE), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, &memid);
}
if (heap==NULL) {
_mi_error_message(ENOMEM, "unable to allocate heap meta-data\n");
return NULL;
}
heap->memid = memid;
_mi_heap_init(heap, arena_id, allow_destroy, (uint8_t)heap_tag, tld);
return heap;
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id) {
mi_heap_t* bheap = mi_heap_get_backing();
mi_assert_internal(bheap != NULL);
return _mi_heap_create(heap_tag, allow_destroy, arena_id, bheap->tld);
}
mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) {
return mi_heap_new_ex(0 /* default heap tag */, false /* don't allow `mi_heap_destroy` */, arena_id);
return mi_heap_new_ex(0 /* default heap tag */, false /* allow destroy? */, arena_id);
}
mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
// don't reclaim abandoned memory or otherwise destroy is unsafe
return mi_heap_new_ex(0 /* default heap tag */, true /* no reclaim */, _mi_arena_id_none());
return mi_heap_new_ex(0 /* default heap tag */, true /* allow destroy? */, _mi_arena_id_none());
}
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) {
return _mi_arena_memid_is_suitable(memid, heap->arena_id);
return _mi_arena_memid_is_suitable(memid, heap->exclusive_arena);
}
uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
@ -263,14 +266,14 @@ static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(heap != NULL);
mi_assert_internal(mi_heap_is_initialized(heap));
// TODO: copy full empty heap instead?
memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct));
_mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages));
heap->thread_delayed_free = NULL;
// heap->thread_delayed_free = NULL;
heap->page_count = 0;
}
// called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources.
static void mi_heap_free(mi_heap_t* heap) {
static void mi_heap_free(mi_heap_t* heap, bool do_free_mem) {
mi_assert(heap != NULL);
mi_assert_internal(mi_heap_is_initialized(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
@ -297,7 +300,9 @@ static void mi_heap_free(mi_heap_t* heap) {
mi_assert_internal(heap->tld->heaps != NULL);
// and free the used memory
mi_free(heap);
if (do_free_mem) {
_mi_meta_free(heap, sizeof(*heap), heap->memid);
}
}
// return a heap on the same thread as `heap` specialized for the specified tag (if it exists)
@ -324,24 +329,24 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
MI_UNUSED(pq);
// ensure no more thread_delayed_free will be added
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
//_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// stats
const size_t bsize = mi_page_block_size(page);
if (bsize > MI_LARGE_OBJ_SIZE_MAX) {
if (bsize > MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_decrease(heap, malloc_huge, bsize);
}
#if (MI_STAT)
#if (MI_STAT)
_mi_page_free_collect(page, false); // update used count
const size_t inuse = page->used;
if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
if (bsize <= MI_LARGE_MAX_OBJ_SIZE) {
mi_heap_stat_decrease(heap, malloc_normal, bsize * inuse);
#if (MI_STAT>1)
#if (MI_STAT>1)
mi_heap_stat_decrease(heap, malloc_bins[_mi_bin(bsize)], inuse);
#endif
#endif
}
mi_heap_stat_decrease(heap, malloc_requested, bsize * inuse); // todo: off for aligned blocks...
#endif
#endif
/// pretend it is all free now
mi_assert_internal(mi_page_thread_free(page) == NULL);
@ -351,7 +356,8 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
// mi_page_free(page,false);
page->next = NULL;
page->prev = NULL;
_mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
mi_page_set_heap(page, NULL);
_mi_arenas_page_free(page);
return true; // keep going
}
@ -372,7 +378,8 @@ static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_he
void mi_heap_destroy(mi_heap_t* heap) {
mi_assert(heap != NULL);
mi_assert(mi_heap_is_initialized(heap));
mi_assert(heap->no_reclaim);
mi_assert(!heap->allow_page_reclaim);
mi_assert(!heap->allow_page_abandon);
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
#if MI_GUARDED
@ -380,9 +387,9 @@ void mi_heap_destroy(mi_heap_t* heap) {
mi_heap_delete(heap);
return;
#else
if (!heap->no_reclaim) {
if (heap->allow_page_reclaim) {
_mi_warning_message("'mi_heap_destroy' called but ignored as the heap was not created with 'allow_destroy' (heap at %p)\n", heap);
// don't free in case it may contain reclaimed pages
// don't free in case it may contain reclaimed pages,
mi_heap_delete(heap);
}
else {
@ -392,7 +399,7 @@ void mi_heap_destroy(mi_heap_t* heap) {
#endif
// free all pages
_mi_heap_destroy_pages(heap);
mi_heap_free(heap);
mi_heap_free(heap,true);
}
#endif
}
@ -404,7 +411,7 @@ void _mi_heap_unsafe_destroy_all(mi_heap_t* heap) {
mi_heap_t* curr = heap->tld->heaps;
while (curr != NULL) {
mi_heap_t* next = curr->next;
if (curr->no_reclaim) {
if (!curr->allow_page_reclaim) {
mi_heap_destroy(curr);
}
else {
@ -419,44 +426,30 @@ void _mi_heap_unsafe_destroy_all(mi_heap_t* heap) {
----------------------------------------------------------- */
// Transfer the pages from one heap to the other
static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
mi_assert_internal(heap!=NULL);
if (from==NULL || from->page_count == 0) return;
//static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
// mi_assert_internal(heap!=NULL);
// if (from==NULL || from->page_count == 0) return;
//
// // transfer all pages by appending the queues; this will set a new heap field
// for (size_t i = 0; i <= MI_BIN_FULL; i++) {
// mi_page_queue_t* pq = &heap->pages[i];
// mi_page_queue_t* append = &from->pages[i];
// size_t pcount = _mi_page_queue_append(heap, pq, append);
// heap->page_count += pcount;
// from->page_count -= pcount;
// }
// mi_assert_internal(from->page_count == 0);
//
// // and reset the `from` heap
// mi_heap_reset_pages(from);
//}
// reduce the size of the delayed frees
_mi_heap_delayed_free_partial(from);
// transfer all pages by appending the queues; this will set a new heap field
// so threads may do delayed frees in either heap for a while.
// note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state
// so after this only the new heap will get delayed frees
for (size_t i = 0; i <= MI_BIN_FULL; i++) {
mi_page_queue_t* pq = &heap->pages[i];
mi_page_queue_t* append = &from->pages[i];
size_t pcount = _mi_page_queue_append(heap, pq, append);
heap->page_count += pcount;
from->page_count -= pcount;
}
mi_assert_internal(from->page_count == 0);
// and do outstanding delayed frees in the `from` heap
// note: be careful here as the `heap` field in all those pages no longer point to `from`,
// turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
// the regular `_mi_free_delayed_block` which is safe.
_mi_heap_delayed_free_all(from);
#if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
#endif
// and reset the `from` heap
mi_heap_reset_pages(from);
}
// are two heaps compatible with respect to heap-tag, exclusive arena etc.
static bool mi_heaps_are_compatible(mi_heap_t* heap1, mi_heap_t* heap2) {
return (heap1->tag == heap2->tag && // store same kind of objects
heap1->arena_id == heap2->arena_id); // same arena preference
}
//// are two heaps compatible with respect to heap-tag, exclusive arena etc.
//static bool mi_heaps_are_compatible(mi_heap_t* heap1, mi_heap_t* heap2) {
// return (heap1->tag == heap2->tag && // store same kind of objects
// heap1->tld->subproc == heap2->tld->subproc && // same sub-process
// heap1->arena_id == heap2->arena_id); // same arena preference
//}
// Safe delete a heap without freeing any still allocated blocks in that heap.
void mi_heap_delete(mi_heap_t* heap)
@ -466,17 +459,11 @@ void mi_heap_delete(mi_heap_t* heap)
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
mi_heap_t* bheap = heap->tld->heap_backing;
if (bheap != heap && mi_heaps_are_compatible(bheap,heap)) {
// transfer still used pages to the backing heap
mi_heap_absorb(bheap, heap);
}
else {
// the backing heap abandons its pages
_mi_heap_collect_abandon(heap);
}
// abandon all pages
_mi_heap_collect_abandon(heap);
mi_assert_internal(heap->page_count==0);
mi_heap_free(heap);
mi_heap_free(heap,true);
}
mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
@ -490,7 +477,63 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
}
/* -----------------------------------------------------------
Load/unload heaps
----------------------------------------------------------- */
void mi_heap_unload(mi_heap_t* heap) {
mi_assert(mi_heap_is_initialized(heap));
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
if (heap->exclusive_arena == NULL) {
_mi_warning_message("cannot unload heaps that are not associated with an exclusive arena\n");
return;
}
// abandon all pages so all thread'id in the pages are cleared
_mi_heap_collect_abandon(heap);
mi_assert_internal(heap->page_count==0);
// remove from heap list
mi_heap_free(heap, false /* but don't actually free the memory */);
// disassociate from the current thread-local and static state
heap->tld = NULL;
return;
}
bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena_id) {
mi_assert(mi_heap_is_initialized(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return false;
if (heap->exclusive_arena == NULL) {
_mi_warning_message("cannot reload heaps that were not associated with an exclusive arena\n");
return false;
}
if (heap->tld != NULL) {
_mi_warning_message("cannot reload heaps that were not unloaded first\n");
return false;
}
mi_arena_t* arena = _mi_arena_from_id(arena_id);
if (heap->exclusive_arena != arena) {
_mi_warning_message("trying to reload a heap at a different arena address: %p vs %p\n", heap->exclusive_arena, arena);
return false;
}
mi_assert_internal(heap->page_count==0);
// re-associate with the current thread-local and static state
heap->tld = mi_heap_get_default()->tld;
// reinit direct pages (as we may be in a different process)
mi_assert_internal(heap->page_count == 0);
for (size_t i = 0; i < MI_PAGES_DIRECT; i++) {
heap->pages_free_direct[i] = (mi_page_t*)&_mi_page_empty;
}
// push on the thread local heaps list
heap->next = heap->tld->heaps;
heap->tld->heaps = heap;
return true;
}
/* -----------------------------------------------------------
Analysis
@ -499,11 +542,8 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) {
// static since it is not thread safe to access heaps from other threads.
static mi_heap_t* mi_heap_of_block(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p);
bool valid = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(valid);
if mi_unlikely(!valid) return NULL;
return mi_page_heap(_mi_segment_page_of(segment,p));
mi_page_t* page = _mi_ptr_page(p); // TODO: check pointer validity?
return mi_page_heap(page);
}
bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {
@ -578,7 +618,7 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_
if (page->used == 0) return true;
size_t psize;
uint8_t* const pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
uint8_t* const pstart = mi_page_area(page, &psize);
mi_heap_t* const heap = mi_page_heap(page);
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page); // without padding

View file

@ -11,32 +11,31 @@ terms of the MIT license. A copy of the license can be found in the file
#include <string.h> // memcpy, memset
#include <stdlib.h> // atexit
#define MI_MEMID_INIT(kind) {{{NULL,0}}, kind, true /* pinned */, true /* committed */, false /* zero */ }
#define MI_MEMID_STATIC MI_MEMID_INIT(MI_MEM_STATIC)
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0,
false, false, false, false,
0, // capacity
0, // reserved capacity
{ 0 }, // flags
false, // is_zero
0, // retire_expire
NULL, // free
NULL, // local_free
0, // used
0, // block size shift
0, // heap tag
0, // block_size
NULL, // page_start
MI_ATOMIC_VAR_INIT(MI_PAGE_IN_FULL_QUEUE), // xthread_id (must set flag to catch NULL on a free)
NULL, // free
0, // used
0, // capacity
0, // reserved capacity
0, // block size shift
0, // retire_expire
NULL, // local_free
MI_ATOMIC_VAR_INIT(0), // xthread_free
0, // block_size
NULL, // page_start
0, // heap tag
false, // is_zero
#if (MI_PADDING || MI_ENCODE_FREELIST)
{ 0, 0 },
#endif
MI_ATOMIC_VAR_INIT(0), // xthread_free
MI_ATOMIC_VAR_INIT(0), // xheap
NULL, NULL
#if MI_INTPTR_SIZE==4
, { NULL }
{ 0, 0 }, // keys
#endif
NULL, // xheap
NULL, NULL, // next, prev
MI_ARENA_SLICE_SIZE, // page_committed
MI_MEMID_STATIC // memid
};
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
@ -51,7 +50,7 @@ const mi_page_t _mi_page_empty = {
// Empty page queues for every bin
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
#define QNULL(sz) { NULL, NULL, 0, (sz)*sizeof(uintptr_t) }
#define MI_PAGE_QUEUES_EMPTY \
{ QNULL(1), \
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
@ -63,8 +62,8 @@ const mi_page_t _mi_page_empty = {
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), QNULL(393216), QNULL(458752), QNULL(524288), /* 72 */ \
QNULL(MI_LARGE_OBJ_WSIZE_MAX + 1 /* 655360, Huge queue */), \
QNULL(MI_LARGE_OBJ_WSIZE_MAX + 2) /* Full queue */ }
QNULL(MI_LARGE_MAX_OBJ_WSIZE + 1 /* 655360, Huge queue */), \
QNULL(MI_LARGE_MAX_OBJ_WSIZE + 2) /* Full queue */ }
#define MI_STAT_COUNT_NULL() {0,0,0}
@ -95,25 +94,83 @@ const mi_page_t _mi_page_empty = {
// may lead to allocation itself on some platforms)
// --------------------------------------------------------
mi_decl_hidden mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL,
MI_ATOMIC_VAR_INIT(NULL),
0, // tid
0, // cookie
0, // arena id
{ 0, 0 }, // keys
{ {0}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, 0, // generic count
NULL, // next
false, // can reclaim
0, // tag
static mi_decl_cache_align mi_subproc_t subproc_main
#if __cplusplus
= { }; // empty initializer to prevent running the constructor (with msvc)
#else
= { 0 }; // C zero initialize
#endif
static mi_decl_cache_align mi_tld_t tld_empty = {
0, // thread_id
0, // thread_seq
&subproc_main, // subproc
NULL, // heap_backing
NULL, // heaps list
0, // heartbeat
false, // recurse
false, // is_in_threadpool
{ MI_STAT_VERSION, MI_STATS_NULL }, // stats
MI_MEMID_STATIC // memid
};
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
&tld_empty, // tld
NULL, // exclusive_arena
0, // cookie
//{ 0, 0 }, // keys
{ {0}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, 0, // generic count
NULL, // next
0, // full page retain
false, // can reclaim
true, // can eager abandon
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`)
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
MI_PAGE_QUEUES_EMPTY,
MI_MEMID_STATIC
};
extern mi_decl_hidden mi_decl_cache_align mi_heap_t heap_main;
static mi_decl_cache_align mi_tld_t tld_main = {
0, // thread_id
0, // thread_seq
&subproc_main, // subproc
&heap_main, // heap_backing
&heap_main, // heaps list
0, // heartbeat
false, // recurse
false, // is_in_threadpool
{ MI_STAT_VERSION, MI_STATS_NULL }, // stats
MI_MEMID_STATIC // memid
};
mi_decl_cache_align mi_heap_t heap_main = {
&tld_main, // thread local data
NULL, // exclusive arena
0, // initial cookie
//{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, 0, // generic count
NULL, // next heap
2, // full page retain
true, // allow page reclaim
true, // allow page abandon
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 0,
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_MEMID_STATIC
};
@ -124,40 +181,6 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
// the thread-local default heap for allocation
mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
extern mi_decl_hidden mi_heap_t _mi_heap_main;
static mi_decl_cache_align mi_subproc_t mi_subproc_default;
static mi_decl_cache_align mi_tld_t tld_main = {
0, false,
&_mi_heap_main, &_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0, 0, &mi_subproc_default,
&tld_main.stats
}, // segments
{ MI_STAT_VERSION, MI_STATS_NULL } // stats
};
mi_decl_cache_align mi_heap_t _mi_heap_main = {
&tld_main,
MI_ATOMIC_VAR_INIT(NULL),
0, // thread id
0, // initial cookie
0, // arena id
{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
{ {0x846ca68b}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
0, 0, // generic count
NULL, // next heap
false, // can reclaim
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 0,
#endif
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
};
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
@ -173,7 +196,7 @@ mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t samp
if (heap->guarded_sample_rate >= 1) {
heap->guarded_sample_seed = heap->guarded_sample_seed % heap->guarded_sample_rate;
}
heap->guarded_sample_count = heap->guarded_sample_seed; // count down samples
heap->guarded_sample_count = 1 + heap->guarded_sample_seed; // count down samples
}
mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, size_t max) {
@ -202,28 +225,146 @@ void _mi_heap_guarded_init(mi_heap_t* heap) {
}
#endif
// Initialize main subproc
static void mi_subproc_main_init(void) {
if (subproc_main.memid.memkind != MI_MEM_STATIC) {
subproc_main.memid = _mi_memid_create(MI_MEM_STATIC);
mi_lock_init(&subproc_main.os_abandoned_pages_lock);
mi_lock_init(&subproc_main.arena_reserve_lock);
}
}
// Initialize main tld
static void mi_tld_main_init(void) {
if (tld_main.thread_id == 0) {
tld_main.thread_id = _mi_prim_thread_id();
}
}
// Initialization of the (statically allocated) main heap, and the main tld and subproc.
static void mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
_mi_heap_main.cookie = 1;
#if defined(_WIN32) && !defined(MI_SHARED_LIB)
_mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking
if (heap_main.cookie == 0) {
// heap
heap_main.cookie = 1;
#if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB)
_mi_random_init_weak(&heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking
#else
_mi_random_init(&_mi_heap_main.random);
_mi_random_init(&heap_main.random);
#endif
_mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
mi_lock_init(&mi_subproc_default.abandoned_os_lock);
mi_lock_init(&mi_subproc_default.abandoned_os_visit_lock);
_mi_heap_guarded_init(&_mi_heap_main);
heap_main.cookie = _mi_heap_random_next(&heap_main);
//heap_main.keys[0] = _mi_heap_random_next(&heap_main);
//heap_main.keys[1] = _mi_heap_random_next(&heap_main);
_mi_heap_guarded_init(&heap_main);
heap_main.allow_page_reclaim = (mi_option_get(mi_option_page_reclaim_on_free) >= 0);
heap_main.allow_page_abandon = (mi_option_get(mi_option_page_full_retain) >= 0);
heap_main.page_full_retain = mi_option_get_clamp(mi_option_page_full_retain, -1, 32);
mi_subproc_main_init();
mi_tld_main_init();
}
}
mi_heap_t* _mi_heap_main_get(void) {
mi_heap_main_init();
return &_mi_heap_main;
return &heap_main;
}
/* -----------------------------------------------------------
Thread local data
----------------------------------------------------------- */
// Count current and total created threads
static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1);
static _Atomic(size_t) thread_total_count;
size_t _mi_current_thread_count(void) {
return mi_atomic_load_relaxed(&thread_count);
}
// The mimalloc thread local data
mi_decl_thread mi_tld_t* thread_tld = &tld_empty;
// Allocate fresh tld
static mi_tld_t* mi_tld_alloc(void) {
mi_atomic_increment_relaxed(&thread_count);
if (_mi_is_main_thread()) {
return &tld_main;
}
else {
// allocate tld meta-data
// note: we need to be careful to not access the tld from `_mi_meta_zalloc`
// (and in turn from `_mi_arena_alloc_aligned` and `_mi_os_alloc_aligned`).
mi_memid_t memid;
mi_tld_t* tld = (mi_tld_t*)_mi_meta_zalloc(sizeof(mi_tld_t), &memid);
if (tld==NULL) {
_mi_error_message(ENOMEM, "unable to allocate memory for thread local data\n");
return NULL;
}
tld->memid = memid;
tld->heap_backing = NULL;
tld->heaps = NULL;
tld->subproc = &subproc_main;
tld->thread_id = _mi_prim_thread_id();
tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1);
tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
return tld;
}
}
#define MI_TLD_INVALID ((mi_tld_t*)1)
mi_decl_noinline static void mi_tld_free(mi_tld_t* tld) {
if (tld != NULL && tld != MI_TLD_INVALID) {
_mi_stats_done(&tld->stats);
_mi_meta_free(tld, sizeof(mi_tld_t), tld->memid);
}
#if 0
// do not read/write to `thread_tld` on older macOS <= 14 as that will re-initialize the thread local storage
// (since we are calling this during pthread shutdown)
// (and this could happen on other systems as well, so let's never do it)
thread_tld = MI_TLD_INVALID;
#endif
mi_atomic_decrement_relaxed(&thread_count);
}
static mi_tld_t* mi_tld(void) {
mi_tld_t* tld = thread_tld;
if (tld == MI_TLD_INVALID) {
_mi_error_message(EFAULT, "internal error: tld is accessed after the thread terminated\n");
thread_tld = &tld_empty;
}
if (tld==&tld_empty) {
thread_tld = tld = mi_tld_alloc();
}
return tld;
}
mi_subproc_t* _mi_subproc(void) {
// should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()`
// todo: this will still fail on OS systems where the first access to a thread-local causes allocation.
// on such systems we can check for this with the _mi_prim_get_default_heap as those are protected (by being
// stored in a TLS slot for example)
mi_heap_t* heap = mi_prim_get_default_heap();
if (heap == NULL) {
return _mi_subproc_main();
}
else {
return heap->tld->subproc; // avoid using thread local storage (`thread_tld`)
}
}
mi_tld_t* _mi_thread_tld(void) mi_attr_noexcept {
// should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()`
mi_heap_t* heap = mi_prim_get_default_heap();
if (heap == NULL) {
return &tld_empty;
}
else {
return heap->tld;
}
}
@ -231,179 +372,99 @@ mi_heap_t* _mi_heap_main_get(void) {
Sub process
----------------------------------------------------------- */
mi_subproc_t* _mi_subproc_main(void) {
return &subproc_main;
}
mi_subproc_id_t mi_subproc_main(void) {
return NULL;
}
mi_subproc_id_t mi_subproc_new(void) {
mi_memid_t memid = _mi_memid_none();
mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid);
mi_memid_t memid;
mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid);
if (subproc == NULL) return NULL;
subproc->memid = memid;
subproc->abandoned_os_list = NULL;
mi_lock_init(&subproc->abandoned_os_lock);
mi_lock_init(&subproc->abandoned_os_visit_lock);
mi_lock_init(&subproc->os_abandoned_pages_lock);
mi_lock_init(&subproc->arena_reserve_lock);
return subproc;
}
mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) {
return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id);
return (subproc_id == NULL ? &subproc_main : (mi_subproc_t*)subproc_id);
}
void mi_subproc_delete(mi_subproc_id_t subproc_id) {
if (subproc_id == NULL) return;
mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
// check if there are no abandoned segments still..
// check if there are os pages still..
bool safe_to_delete = false;
mi_lock(&subproc->abandoned_os_lock) {
if (subproc->abandoned_os_list == NULL) {
mi_lock(&subproc->os_abandoned_pages_lock) {
if (subproc->os_abandoned_pages == NULL) {
safe_to_delete = true;
}
}
if (!safe_to_delete) return;
// merge stats back into the main subproc?
_mi_stats_merge_from(&_mi_subproc_main()->stats, &subproc->stats);
// safe to release
// todo: should we refcount subprocesses?
mi_lock_done(&subproc->abandoned_os_lock);
mi_lock_done(&subproc->abandoned_os_visit_lock);
_mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t));
mi_lock_done(&subproc->os_abandoned_pages_lock);
mi_lock_done(&subproc->arena_reserve_lock);
_mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid);
}
void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) {
mi_heap_t* heap = mi_heap_get_default();
if (heap == NULL) return;
mi_assert(heap->tld->segments.subproc == &mi_subproc_default);
if (heap->tld->segments.subproc != &mi_subproc_default) return;
heap->tld->segments.subproc = _mi_subproc_from_id(subproc_id);
mi_tld_t* tld = mi_tld();
if (tld == NULL) return;
mi_assert(tld->subproc == &subproc_main);
if (tld->subproc != &subproc_main) return;
tld->subproc = _mi_subproc_from_id(subproc_id);
}
/* -----------------------------------------------------------
Initialization and freeing of the thread local heaps
Allocate heap data
----------------------------------------------------------- */
// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size).
typedef struct mi_thread_data_s {
mi_heap_t heap; // must come first due to cast in `_mi_heap_done`
mi_tld_t tld;
mi_memid_t memid; // must come last due to zero'ing
} mi_thread_data_t;
// Thread meta-data is allocated directly from the OS. For
// some programs that do not use thread pools and allocate and
// destroy many OS threads, this may causes too much overhead
// per thread so we maintain a small cache of recently freed metadata.
#define TD_CACHE_SIZE (32)
static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE];
static mi_thread_data_t* mi_thread_data_zalloc(void) {
// try to find thread metadata in the cache
bool is_zero = false;
mi_thread_data_t* td = NULL;
for (int i = 0; i < TD_CACHE_SIZE; i++) {
td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td != NULL) {
// found cached allocation, try use it
td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) {
break;
}
}
}
// if that fails, allocate as meta data
if (td == NULL) {
mi_memid_t memid;
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid);
if (td == NULL) {
// if this fails, try once more. (issue #257)
td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid);
if (td == NULL) {
// really out of memory
_mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
}
}
if (td != NULL) {
td->memid = memid;
is_zero = memid.initially_zero;
}
}
if (td != NULL && !is_zero) {
_mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid));
}
return td;
}
static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
// try to add the thread metadata to the cache
for (int i = 0; i < TD_CACHE_SIZE; i++) {
mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td == NULL) {
mi_thread_data_t* expected = NULL;
if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) {
return;
}
}
}
// if that fails, just free it directly
_mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid);
}
void _mi_thread_data_collect(void) {
// free all thread metadata from the cache
for (int i = 0; i < TD_CACHE_SIZE; i++) {
mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td != NULL) {
td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) {
_mi_os_free(td, sizeof(mi_thread_data_t), td->memid);
}
}
}
}
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_thread_heap_init(void) {
if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true;
if (_mi_is_main_thread()) {
// mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization
// mi_assert_internal(heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization
// the main heap is statically allocated
mi_heap_main_init();
_mi_heap_set_default_direct(&_mi_heap_main);
_mi_heap_set_default_direct(&heap_main);
//mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap());
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
mi_thread_data_t* td = mi_thread_data_zalloc();
if (td == NULL) return false;
// allocates tld data
// note: we cannot access thread-locals yet as that can cause (recursive) allocation
// (on macOS <= 14 for example where the loader allocates thread-local data on demand).
mi_tld_t* tld = mi_tld_alloc();
mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
_mi_tld_init(tld, heap); // must be before `_mi_heap_init`
_mi_heap_init(heap, tld, _mi_arena_id_none(), false /* can reclaim */, 0 /* default tag */);
// allocate and initialize the heap
mi_heap_t* heap = _mi_heap_create(0 /* default tag */, false /* allow destroy? */, _mi_arena_id_none(), tld);
// associate the heap with this thread
// (this is safe, on macOS for example, the heap is set in a dedicated TLS slot and thus does not cause recursive allocation)
_mi_heap_set_default_direct(heap);
// now that the heap is set for this thread, we can set the thread-local tld.
thread_tld = tld;
}
return false;
}
// initialize thread local data
void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
_mi_memzero_aligned(tld,sizeof(mi_tld_t));
tld->heap_backing = bheap;
tld->heaps = NULL;
tld->segments.subproc = &mi_subproc_default;
tld->segments.stats = &tld->stats;
}
// Free the thread local default heap (called from `mi_thread_done`)
static bool _mi_thread_heap_done(mi_heap_t* heap) {
if (!mi_heap_is_initialized(heap)) return true;
// reset default heap
_mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty);
_mi_heap_set_default_direct(_mi_is_main_thread() ? &heap_main : (mi_heap_t*)&_mi_heap_empty);
// switch to backing heap
heap = heap->tld->heap_backing;
@ -423,26 +484,22 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) {
mi_assert_internal(mi_heap_is_backing(heap));
// collect if not the main thread
if (heap != &_mi_heap_main) {
if (heap != &heap_main) {
_mi_heap_collect_abandon(heap);
}
// merge stats
_mi_stats_done(&heap->tld->stats);
// free heap meta data
_mi_meta_free(heap, sizeof(mi_heap_t), heap->memid);
// free if not the main thread
if (heap != &_mi_heap_main) {
mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
mi_thread_data_free((mi_thread_data_t*)heap);
}
else {
if (heap == &heap_main) {
#if 0
// never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
// there may still be delete/free calls after the mi_fls_done is called. Issue #207
_mi_heap_destroy_pages(heap);
mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main);
mi_assert_internal(heap->tld->heap_backing == &heap_main);
#endif
}
return false;
}
@ -456,7 +513,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) {
// 1. windows dynamic library:
// call from DllMain on DLL_THREAD_DETACH
// 2. windows static library:
// use `FlsAlloc` to call a destructor when the thread is done
// use special linker section to call a destructor when the thread is done
// 3. unix, pthreads:
// use a pthread key to call a destructor when a pthread is done
//
@ -470,19 +527,14 @@ static void mi_process_setup_auto_thread_done(void) {
if (tls_initialized) return;
tls_initialized = true;
_mi_prim_thread_init_auto_done();
_mi_heap_set_default_direct(&_mi_heap_main);
_mi_heap_set_default_direct(&heap_main);
}
bool _mi_is_main_thread(void) {
return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id());
return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id());
}
static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1);
size_t _mi_current_thread_count(void) {
return mi_atomic_load_relaxed(&thread_count);
}
// This is called from the `mi_malloc_generic`
void mi_thread_init(void) mi_attr_noexcept
@ -495,8 +547,7 @@ void mi_thread_init(void) mi_attr_noexcept
// fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
if (_mi_thread_heap_init()) return; // returns true if already initialized
_mi_stat_increase(&_mi_stats_main.threads, 1);
mi_atomic_increment_relaxed(&thread_count);
mi_subproc_stat_increase(_mi_subproc_main(), threads, 1);
//_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id());
}
@ -518,14 +569,18 @@ void _mi_thread_done(mi_heap_t* heap)
}
// adjust stats
mi_atomic_decrement_relaxed(&thread_count);
_mi_stat_decrease(&_mi_stats_main.threads, 1);
mi_subproc_stat_decrease(_mi_subproc_main(), threads, 1);
// check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
if (heap->thread_id != _mi_thread_id()) return;
if (heap->tld->thread_id != _mi_prim_thread_id()) return;
// abandon the thread local heap
if (_mi_thread_heap_done(heap)) return; // returns true if already ran
// note: we store the tld as we should avoid reading `thread_tld` at this point (to avoid reinitializing the thread local storage)
mi_tld_t* tld = heap->tld;
_mi_thread_heap_done(heap); // returns true if already ran
// free thread local data
mi_tld_free(tld);
}
void _mi_heap_set_default_direct(mi_heap_t* heap) {
@ -546,7 +601,10 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
}
void mi_thread_set_in_threadpool(void) mi_attr_noexcept {
// nothing
mi_tld_t* tld = mi_tld();
if (tld!=NULL) {
tld->is_in_threadpool = true;
}
}
// --------------------------------------------------------
@ -586,7 +644,7 @@ void _mi_process_load(void) {
}
// reseed random
_mi_random_reinit_if_weak(&_mi_heap_main.random);
_mi_random_reinit_if_weak(&heap_main.random);
}
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@ -613,19 +671,32 @@ void mi_process_init(void) mi_attr_noexcept {
// ensure we are called once
static mi_atomic_once_t process_init;
#if _MSC_VER < 1920
mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main
mi_heap_main_init(); // vs2017 can dynamically re-initialize heap_main
#endif
if (!mi_atomic_once(&process_init)) return;
_mi_process_is_initialized = true;
_mi_verbose_message("process init: 0x%zx\n", _mi_thread_id());
mi_process_setup_auto_thread_done();
mi_detect_cpu_features();
_mi_os_init();
_mi_page_map_init();
mi_heap_main_init();
mi_tld_main_init();
// the following two can potentially allocate (on freeBSD for locks and thread keys)
mi_subproc_main_init();
mi_process_setup_auto_thread_done();
#if MI_DEBUG
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif
_mi_verbose_message("secure level: %d\n", MI_SECURE);
_mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL);
#if MI_TSAN
_mi_verbose_message("thread santizer enabled\n");
#endif
mi_thread_init();
#if defined(_WIN32)
#if defined(_WIN32) && defined(MI_WIN_USE_FLS)
// On windows, when building as a static lib the FLS cleanup happens to early for the main thread.
// To avoid this, set the FLS value for the main thread to NULL so the fls cleanup
// will not call _mi_thread_done on the (still executing) main thread. See issue #508.
@ -684,15 +755,14 @@ void mi_cdecl _mi_process_done(void) {
if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
mi_heap_collect(heap, true /* force */);
_mi_heap_unsafe_destroy_all(heap); // forcefully release all memory held by all heaps (of this thread only!)
_mi_arena_unsafe_destroy_all();
_mi_segment_map_unsafe_destroy();
_mi_arenas_unsafe_destroy_all(heap->tld);
}
if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
mi_stats_print(NULL);
}
_mi_allocator_done();
_mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id);
_mi_verbose_message("process done: 0x%zx\n", tld_main.thread_id);
os_preloading = true; // don't call the C runtime anymore
}

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -171,7 +171,18 @@ int _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
char c;
MI_NEXTC();
if (c != '%') {
if ((c >= ' ' && c <= '~') || c=='\n' || c=='\r' || c=='\t') { // output visible ascii or standard control only
if (c == '\\') {
MI_NEXTC();
switch (c) {
case 'e': mi_outc('\x1B', &out, end); break;
case 't': mi_outc('\t', &out, end); break;
case 'n': mi_outc('\n', &out, end); break;
case 'r': mi_outc('\r', &out, end); break;
case '\\': mi_outc('\\', &out, end); break;
default: /* ignore */ break;
}
}
else if ((c >= ' ' && c <= '~') || c=='\n' || c=='\r' || c=='\t' || c=='\x1b') { // output visible ascii or standard control only
mi_outc(c, &out, end);
}
}
@ -199,7 +210,10 @@ int _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) {
}
char* start = out;
if (c == 's') {
if (c == '%') {
mi_outc('%', &out, end);
}
else if (c == 's') {
// string
const char* s = va_arg(args, const char*);
mi_outs(s, &out, end);
@ -275,3 +289,127 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
va_end(args);
return written;
}
// --------------------------------------------------------
// generic trailing and leading zero count, and popcount
// --------------------------------------------------------
#if !MI_HAS_FAST_BITSCAN
static size_t mi_ctz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
static const uint8_t debruijn[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
if (x==0) return 32;
return debruijn[(uint32_t)((x & -(int32_t)x) * (uint32_t)(0x077CB531U)) >> 27];
}
static size_t mi_clz_generic32(uint32_t x) {
// de Bruijn multiplication, see <http://keithandkatie.com/keith/papers/debruijn.html>
static const uint8_t debruijn[32] = {
31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
};
if (x==0) return 32;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return debruijn[(uint32_t)(x * (uint32_t)(0x07C4ACDDU)) >> 27];
}
size_t _mi_ctz_generic(size_t x) {
if (x==0) return MI_SIZE_BITS;
#if (MI_SIZE_BITS <= 32)
return mi_ctz_generic32((uint32_t)x);
#else
const uint32_t lo = (uint32_t)x;
if (lo != 0) {
return mi_ctz_generic32(lo);
}
else {
return (32 + mi_ctz_generic32((uint32_t)(x>>32)));
}
#endif
}
size_t _mi_clz_generic(size_t x) {
if (x==0) return MI_SIZE_BITS;
#if (MI_SIZE_BITS <= 32)
return mi_clz_generic32((uint32_t)x);
#else
const uint32_t hi = (uint32_t)(x>>32);
if (hi != 0) {
return mi_clz_generic32(hi);
}
else {
return 32 + mi_clz_generic32((uint32_t)x);
}
#endif
}
#endif // bit scan
#if !MI_HAS_FAST_POPCOUNT
#if MI_SIZE_SIZE == 4
#define mi_mask_even_bits32 (0x55555555)
#define mi_mask_even_pairs32 (0x33333333)
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum32(uint32_t x) {
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
x += (x << 8);
x += (x << 16);
return (size_t)(x >> 24);
}
static size_t mi_popcount_generic32(uint32_t x) {
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
// into the lower bit-pair:
x = x - ((x >> 1) & mi_mask_even_bits32);
// add the 2-bit pair results
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
// add the 4-bit nibble results
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
// each byte now has a count of its bits, we can sum them now:
return mi_byte_sum32(x);
}
size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic32(x);
}
#else
#define mi_mask_even_bits64 (0x5555555555555555)
#define mi_mask_even_pairs64 (0x3333333333333333)
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
static size_t mi_byte_sum64(uint64_t x) {
x += (x << 8);
x += (x << 16);
x += (x << 32);
return (size_t)(x >> 56);
}
static size_t mi_popcount_generic64(uint64_t x) {
x = x - ((x >> 1) & mi_mask_even_bits64);
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
return mi_byte_sum64(x);
}
size_t _mi_popcount_generic(size_t x) {
return mi_popcount_generic64(x);
}
#endif
#endif // popcount

View file

@ -102,6 +102,14 @@ typedef struct mi_option_desc_s {
#endif
#endif
#ifndef MI_DEFAULT_PAGEMAP_COMMIT
#if defined(__APPLE__) // when overloading malloc, we still get mixed pointers sometimes on macOS; this avoids a bad access
#define MI_DEFAULT_PAGEMAP_COMMIT 1
#else
#define MI_DEFAULT_PAGEMAP_COMMIT 0
#endif
#endif
static mi_option_desc_t options[_mi_option_last] =
{
@ -136,18 +144,17 @@ static mi_option_desc_t options[_mi_option_last] =
#else
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
#endif
{ 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
{ 1000,UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 32, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output
{ 32, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output
{ 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try.
{ 10, UNINIT, MI_OPTION(deprecated_max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try.
{ 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
{ MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`)
{ 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
{ 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) },
{ 0, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free
{ 1, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's
{ 1, UNINIT, MI_OPTION_LEGACY(deprecated_purge_extend_delay, decommit_extend_delay) },
{ MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's)
{ 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries.
#if defined(MI_VISIT_ABANDONED)
@ -161,8 +168,15 @@ static mi_option_desc_t options[_mi_option_last] =
{ MI_DEFAULT_GUARDED_SAMPLE_RATE,
UNINIT, MI_OPTION(guarded_sample_rate)}, // 1 out of N allocations in the min/max range will be guarded (=4000)
{ 0, UNINIT, MI_OPTION(guarded_sample_seed)},
{ 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable.
{ 10000, UNINIT, MI_OPTION(generic_collect) }, // collect heaps every N (=10000) generic allocation calls
{ 10000, UNINIT, MI_OPTION(generic_collect) }, // collect heaps every N (=10000) generic allocation calls
{ 0, UNINIT, MI_OPTION_LEGACY(page_reclaim_on_free, abandoned_reclaim_on_free) },// reclaim abandoned pages on a free: -1 = disable completely, 0 = only reclaim into the originating heap, 1 = reclaim on free across heaps
{ 2, UNINIT, MI_OPTION(page_full_retain) }, // number of (small) pages to retain in the free page queues
{ 4, UNINIT, MI_OPTION(page_max_candidates) }, // max search to find a best page candidate
{ 0, UNINIT, MI_OPTION(max_vabits) }, // max virtual address space bits
{ MI_DEFAULT_PAGEMAP_COMMIT,
UNINIT, MI_OPTION(pagemap_commit) }, // commit the full pagemap upfront?
{ 0, UNINIT, MI_OPTION(page_commit_on_demand) }, // commit pages on-demand (2 disables this only on overcommit systems (like Linux))
{ 16, UNINIT, MI_OPTION(page_reclaim_max) }, // don't reclaim pages if we already own N pages (in that size class)
};
static void mi_option_init(mi_option_desc_t* desc);
@ -455,7 +469,7 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me
// Define our own limited `fprintf` that avoids memory allocation.
// We do this using `_mi_vsnprintf` with a limited buffer.
static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512];
char buf[992];
if (fmt==NULL) return;
if (!mi_recurse_enter()) return;
_mi_vsnprintf(buf, sizeof(buf)-1, fmt, args);
@ -481,6 +495,13 @@ static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix
}
}
void _mi_raw_message(const char* fmt, ...) {
va_list args;
va_start(args, fmt);
mi_vfprintf(NULL, NULL, NULL, fmt, args);
va_end(args);
}
void _mi_message(const char* fmt, ...) {
va_list args;
va_start(args, fmt);

248
src/os.c
View file

@ -9,21 +9,9 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc/atomic.h"
#include "mimalloc/prim.h"
#define mi_os_stat_increase(stat,amount) _mi_stat_increase(&_mi_stats_main.stat, amount)
#define mi_os_stat_decrease(stat,amount) _mi_stat_decrease(&_mi_stats_main.stat, amount)
#define mi_os_stat_counter_increase(stat,inc) _mi_stat_counter_increase(&_mi_stats_main.stat, inc)
/* -----------------------------------------------------------
Initialization.
----------------------------------------------------------- */
#ifndef MI_DEFAULT_VIRTUAL_ADDRESS_BITS
#if MI_INTPTR_SIZE < 8
#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS 32
#else
#define MI_DEFAULT_VIRTUAL_ADDRESS_BITS 48
#endif
#endif
#ifndef MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB
#if MI_INTPTR_SIZE < 8
#define MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB 4*MI_MiB // 4 GiB
@ -37,7 +25,7 @@ static mi_os_mem_config_t mi_os_mem_config = {
0, // large page size (usually 2MiB)
4096, // allocation granularity
MI_DEFAULT_PHYSICAL_MEMORY_IN_KIB,
MI_DEFAULT_VIRTUAL_ADDRESS_BITS,
MI_MAX_VABITS, // in `bits.h`
true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems)
false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span)
true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory)
@ -62,6 +50,18 @@ size_t _mi_os_large_page_size(void) {
return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size());
}
size_t _mi_os_guard_page_size(void) {
const size_t gsize = _mi_os_page_size();
mi_assert(gsize <= (MI_ARENA_SLICE_SIZE/8));
return gsize;
}
size_t _mi_os_virtual_address_bits(void) {
const size_t vbits = mi_os_mem_config.virtual_address_bits;
mi_assert(vbits <= MI_MAX_VABITS);
return vbits;
}
bool _mi_os_use_large_page(size_t size, size_t alignment) {
// if we have access, check the size and alignment requirements
if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false;
@ -91,73 +91,54 @@ void _mi_os_init(void) {
bool _mi_os_decommit(void* addr, size_t size);
bool _mi_os_commit(void* addr, size_t size, bool* is_zero);
static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
mi_assert_internal(alignment != 0);
uintptr_t mask = alignment - 1;
if ((alignment & mask) == 0) { // power of two?
return (sz & ~mask);
}
else {
return ((sz / alignment) * alignment);
}
}
static void* mi_align_down_ptr(void* p, size_t alignment) {
return (void*)_mi_align_down((uintptr_t)p, alignment);
}
/* -----------------------------------------------------------
aligned hinting
-------------------------------------------------------------- */
// On systems with enough virtual address bits, we can do efficient aligned allocation by using
// the 2TiB to 30TiB area to allocate those. If we have at least 46 bits of virtual address
// space (64TiB) we use this technique. (but see issue #939)
#if (MI_INTPTR_SIZE >= 8) && !defined(MI_NO_ALIGNED_HINT)
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
// If this returns NULL, the OS will determine the address but on some OS's that may not be
// properly aligned which can be more costly as it needs to be adjusted afterwards.
// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
// in the middle of the 2TiB - 6TiB address range (see issue #372))
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes)
#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages)
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size)
{
if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL;
if (mi_os_mem_config.virtual_address_bits < 46) return NULL; // < 64TiB virtual address space
size = _mi_align_up(size, MI_SEGMENT_SIZE);
if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096.
#if (MI_SECURE>0)
size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas.
#endif
uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size);
if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize
uintptr_t init = MI_HINT_BASE;
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif
uintptr_t expected = hint + size;
mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init);
hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all
}
if (hint%try_alignment != 0) return NULL;
return (void*)hint;
}
#else
void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
MI_UNUSED(try_alignment); MI_UNUSED(size);
return NULL;
}
#endif
// In secure mode, return the size of a guard page, otherwise 0
size_t _mi_os_secure_guard_page_size(void) {
#if MI_SECURE > 0
return _mi_os_guard_page_size();
#else
return 0;
#endif
}
// In secure mode, try to decommit an area and output a warning if this fails.
bool _mi_os_secure_guard_page_set_at(void* addr, bool is_pinned) {
if (addr == NULL) return true;
#if MI_SECURE > 0
const bool ok = (is_pinned ? false : _mi_os_decommit(addr, _mi_os_secure_guard_page_size()));
if (!ok) {
_mi_error_message(EINVAL, "secure level %d, but failed to commit guard page (at %p of size %zu)\n", MI_SECURE, addr, _mi_os_secure_guard_page_size());
}
return ok;
#else
MI_UNUSED(is_pinned);
return true;
#endif
}
// In secure mode, try to decommit an area and output a warning if this fails.
bool _mi_os_secure_guard_page_set_before(void* addr, bool is_pinned) {
return _mi_os_secure_guard_page_set_at((uint8_t*)addr - _mi_os_secure_guard_page_size(), is_pinned);
}
// In secure mode, try to recommit an area
bool _mi_os_secure_guard_page_reset_at(void* addr) {
if (addr == NULL) return true;
#if MI_SECURE > 0
return _mi_os_commit(addr, _mi_os_secure_guard_page_size(), NULL);
#else
return true;
#endif
}
// In secure mode, try to recommit an area
bool _mi_os_secure_guard_page_reset_before(void* addr) {
return _mi_os_secure_guard_page_reset_at((uint8_t*)addr - _mi_os_secure_guard_page_size());
}
/* -----------------------------------------------------------
Free memory
@ -186,10 +167,10 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me
void* base = addr;
// different base? (due to alignment)
if (memid.mem.os.base != base) {
mi_assert(memid.mem.os.base <= addr);
mi_assert(memid.mem.os.base <= addr);
base = memid.mem.os.base;
const size_t diff = (uint8_t*)addr - (uint8_t*)memid.mem.os.base;
if (memid.mem.os.size==0) {
if (memid.mem.os.size==0) {
csize += diff;
}
if (still_committed) {
@ -236,8 +217,6 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm
_mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large);
}
mi_os_stat_counter_increase(mmap_calls, 1);
if (p != NULL) {
mi_os_stat_increase(reserved, size);
@ -270,18 +249,24 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL;
size = _mi_align_up(size, _mi_os_page_size());
// try first with a requested alignment hint (this will usually be aligned directly on Win 10+ or BSD)
void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero);
if (p == NULL) return NULL;
// try a direct allocation if the alignment is below the default, or if larger than 1/8 fraction of the size.
const bool try_direct_alloc = (alignment <= mi_os_mem_config.alloc_granularity || alignment > size/8);
void* p = NULL;
if (try_direct_alloc) {
p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero);
}
// aligned already?
if (((uintptr_t)p % alignment) == 0) {
if (p != NULL && ((uintptr_t)p % alignment) == 0) {
*base = p;
}
else {
// if not aligned, free it, overallocate, and unmap around it
#if !MI_TRACK_ASAN
_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
if (try_direct_alloc) {
_mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit);
}
#endif
if (p != NULL) { mi_os_prim_free(p, size, (commit ? size : 0)); }
if (size >= (SIZE_MAX - alignment)) return NULL; // overflow
@ -293,10 +278,10 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
if (p == NULL) return NULL;
// set p to the aligned part in the full region
// note: this is dangerous on Windows as VirtualFree needs the actual base pointer
// this is handled though by having the `base` field in the memid's
// note: on Windows VirtualFree needs the actual base pointer
// this is handledby having the `base` field in the memid.
*base = p; // remember the base
p = mi_align_up_ptr(p, alignment);
p = _mi_align_up_ptr(p, alignment);
// explicitly commit only the aligned part
if (commit) {
@ -309,7 +294,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit
if (p == NULL) return NULL;
// and selectively unmap parts around the over-allocated area.
void* aligned_p = mi_align_up_ptr(p, alignment);
void* aligned_p = _mi_align_up_ptr(p, alignment);
size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p;
size_t mid_size = _mi_align_up(size, _mi_os_page_size());
size_t post_size = over_size - pre_size - mid_size;
@ -339,7 +324,7 @@ void* _mi_os_alloc(size_t size, mi_memid_t* memid) {
bool os_is_zero = false;
void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero);
if (p != NULL) {
*memid = _mi_memid_create_os(true, os_is_zero, os_is_large);
*memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large);
}
return p;
}
@ -355,9 +340,9 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
bool os_is_large = false;
bool os_is_zero = false;
void* os_base = NULL;
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base );
void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base);
if (p != NULL) {
*memid = _mi_memid_create_os(commit, os_is_zero, os_is_large);
*memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large);
memid->mem.os.base = os_base;
// memid->mem.os.alignment = alignment;
memid->mem.os.size += ((uint8_t*)p - (uint8_t*)os_base); // todo: return from prim_alloc_aligned
@ -365,6 +350,18 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
return p;
}
void* _mi_os_zalloc(size_t size, mi_memid_t* memid) {
void* p = _mi_os_alloc(size, memid);
if (p == NULL) return NULL;
// zero the OS memory if needed
if (!memid->initially_zero) {
_mi_memzero_aligned(p, size);
memid->initially_zero = true;
}
return p;
}
/* -----------------------------------------------------------
OS aligned allocation with an offset. This is used
for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc
@ -374,11 +371,9 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo
----------------------------------------------------------- */
void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid) {
mi_assert(offset <= MI_SEGMENT_SIZE);
mi_assert(offset <= size);
mi_assert((alignment % _mi_os_page_size()) == 0);
*memid = _mi_memid_none();
if (offset > MI_SEGMENT_SIZE) return NULL;
if (offset == 0) {
// regular aligned allocation
return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid);
@ -411,11 +406,11 @@ static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size,
if (newsize != NULL) *newsize = 0;
if (size == 0 || addr == NULL) return NULL;
// page align conservatively within the range
void* start = (conservative ? mi_align_up_ptr(addr, _mi_os_page_size())
// page align conservatively within the range, or liberally straddling pages outside the range
void* start = (conservative ? _mi_align_up_ptr(addr, _mi_os_page_size())
: mi_align_down_ptr(addr, _mi_os_page_size()));
void* end = (conservative ? mi_align_down_ptr((uint8_t*)addr + size, _mi_os_page_size())
: mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
: _mi_align_up_ptr((uint8_t*)addr + size, _mi_os_page_size()));
ptrdiff_t diff = (uint8_t*)end - (uint8_t*)start;
if (diff <= 0) return NULL;
@ -526,7 +521,7 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, size_t stat_size)
return needs_recommit;
}
else {
if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed
if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed (on Windows, we cannot reset uncommitted memory)
_mi_os_reset(p, size);
}
return false; // needs no recommit
@ -591,15 +586,14 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
start = huge_start;
if (start == 0) {
// Initialize the start address after the 32TiB area
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
start = ((uintptr_t)8 << 40); // 8TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap());
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
#endif
}
end = start + size;
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
} while (!mi_atomic_cas_strong_acq_rel(&mi_huge_start, &huge_start, end));
} while (!mi_atomic_cas_weak_acq_rel(&mi_huge_start, &huge_start, end));
if (total_size != NULL) *total_size = size;
return (uint8_t*)start;
@ -612,7 +606,7 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
}
#endif
// Allocate MI_SEGMENT_SIZE aligned huge pages
// Allocate MI_ARENA_SLICE_ALIGN aligned huge pages
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) {
*memid = _mi_memid_none();
if (psize != NULL) *psize = 0;
@ -674,7 +668,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse
if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; }
if (page != 0) {
mi_assert(start != NULL);
*memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */);
*memid = _mi_memid_create_os(start, *psize, true /* is committed */, all_zero, true /* is_large */);
memid->memkind = MI_MEM_OS_HUGE;
mi_assert(memid->is_pinned);
#ifdef MI_TRACK_ASAN
@ -727,3 +721,49 @@ int _mi_os_numa_node_get(void) {
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return (int)numa_node;
}
/* ----------------------------------------------------------------------------
Public API
-----------------------------------------------------------------------------*/
#if 0
mi_decl_export void* mi_os_alloc(size_t size, bool commit, size_t* full_size) {
return mi_os_alloc_aligned(size, mi_os_mem_config.alloc_granularity, commit, NULL, full_size);
}
static void* mi_os_alloc_aligned_ex(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_committed, bool* is_pinned, void** base, size_t* full_size) {
mi_memid_t memid = _mi_memid_none();
void* p = _mi_os_alloc_aligned(size, alignment, commit, allow_large, &memid);
if (p == NULL) return p;
if (is_committed != NULL) { *is_committed = memid.initially_committed; }
if (is_pinned != NULL) { *is_pinned = memid.is_pinned; }
if (base != NULL) { *base = memid.mem.os.base; }
if (full_size != NULL) { *full_size = memid.mem.os.size; }
if (!memid.initially_zero && memid.initially_committed) {
_mi_memzero_aligned(memid.mem.os.base, memid.mem.os.size);
}
return p;
}
mi_decl_export void* mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, void** base, size_t* full_size) {
return mi_os_alloc_aligned_ex(size, alignment, commit, false, NULL, NULL, base, full_size);
}
mi_decl_export void* mi_os_alloc_aligned_allow_large(size_t size, size_t alignment, bool commit, bool* is_committed, bool* is_pinned, void** base, size_t* full_size) {
return mi_os_alloc_aligned_ex(size, alignment, commit, true, is_committed, is_pinned, base, full_size);
}
mi_decl_export void mi_os_free(void* p, size_t size) {
if (p==NULL || size == 0) return;
mi_memid_t memid = _mi_memid_create_os(p, size, true, false, false);
_mi_os_free(p, size, memid);
}
mi_decl_export void mi_os_commit(void* p, size_t size) {
_mi_os_commit(p, size, NULL);
}
mi_decl_export void mi_os_decommit(void* p, size_t size) {
_mi_os_decommit(p, size);
}
#endif

334
src/page-map.c Normal file
View file

@ -0,0 +1,334 @@
/*----------------------------------------------------------------------------
Copyright (c) 2023-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "bitmap.h"
#if MI_PAGE_MAP_FLAT
// The page-map contains a byte for each 64kb slice in the address space.
// For an address `a` where `ofs = _mi_page_map[a >> 16]`:
// 0 = unused
// 1 = the slice at `a & ~0xFFFF` is a mimalloc page.
// 1 < ofs <= 127 = the slice is part of a page, starting at `(((a>>16) - ofs - 1) << 16)`.
//
// 1 byte per slice => 1 TiB address space needs a 2^14 * 2^16 = 16 MiB page map.
// A full 256 TiB address space (48 bit) needs a 4 GiB page map.
// A full 4 GiB address space (32 bit) needs only a 64 KiB page map.
mi_decl_cache_align uint8_t* _mi_page_map = NULL;
static void* mi_page_map_max_address = NULL;
static mi_memid_t mi_page_map_memid;
#define MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT MI_ARENA_SLICE_SIZE
static mi_bitmap_t* mi_page_map_commit; // one bit per committed 64 KiB entries
static void mi_page_map_ensure_committed(size_t idx, size_t slice_count);
bool _mi_page_map_init(void) {
size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS);
if (vbits == 0) {
vbits = _mi_os_virtual_address_bits();
#if MI_ARCH_X64 // canonical address is limited to the first 128 TiB
if (vbits >= 48) { vbits = 47; }
#endif
}
// Allocate the page map and commit bits
mi_page_map_max_address = (void*)(vbits >= MI_SIZE_BITS ? (SIZE_MAX - MI_ARENA_SLICE_SIZE + 1) : (MI_PU(1) << vbits));
const size_t page_map_size = (MI_ZU(1) << (vbits - MI_ARENA_SLICE_SHIFT));
const bool commit = (page_map_size <= 1*MI_MiB || mi_option_is_enabled(mi_option_pagemap_commit)); // _mi_os_has_overcommit(); // commit on-access on Linux systems?
const size_t commit_bits = _mi_divide_up(page_map_size, MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT);
const size_t bitmap_size = (commit ? 0 : mi_bitmap_size(commit_bits, NULL));
const size_t reserve_size = bitmap_size + page_map_size;
uint8_t* const base = (uint8_t*)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
if (base==NULL) {
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
return false;
}
if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
_mi_warning_message("internal: the page map was committed but not zero initialized!\n");
_mi_memzero_aligned(base, reserve_size);
}
if (bitmap_size > 0) {
mi_page_map_commit = (mi_bitmap_t*)base;
_mi_os_commit(mi_page_map_commit, bitmap_size, NULL);
mi_bitmap_init(mi_page_map_commit, commit_bits, true);
}
_mi_page_map = base + bitmap_size;
// commit the first part so NULL pointers get resolved without an access violation
if (!commit) {
mi_page_map_ensure_committed(0, 1);
}
_mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL
mi_assert_internal(_mi_ptr_page(NULL)==NULL);
return true;
}
static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) {
// is the page map area that contains the page address committed?
// we always set the commit bits so we can track what ranges are in-use.
// we only actually commit if the map wasn't committed fully already.
if (mi_page_map_commit != NULL) {
const size_t commit_idx = idx / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
const size_t commit_idx_hi = (idx + slice_count - 1) / MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
for (size_t i = commit_idx; i <= commit_idx_hi; i++) { // per bit to avoid crossing over bitmap chunks
if (mi_bitmap_is_clear(mi_page_map_commit, i)) {
// this may race, in which case we do multiple commits (which is ok)
bool is_zero;
uint8_t* const start = _mi_page_map + (i * MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT);
const size_t size = MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT;
_mi_os_commit(start, size, &is_zero);
if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start, size); }
mi_bitmap_set(mi_page_map_commit, i);
}
}
}
#if MI_DEBUG > 0
_mi_page_map[idx] = 0;
_mi_page_map[idx+slice_count-1] = 0;
#endif
}
static size_t mi_page_map_get_idx(mi_page_t* page, uint8_t** page_start, size_t* slice_count) {
size_t page_size;
*page_start = mi_page_area(page, &page_size);
if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer
*slice_count = mi_slice_count_of_size(page_size) + (((uint8_t*)*page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks
return _mi_page_map_index(page);
}
void _mi_page_map_register(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access!
if mi_unlikely(_mi_page_map == NULL) {
if (!_mi_page_map_init()) return;
}
mi_assert(_mi_page_map!=NULL);
uint8_t* page_start;
size_t slice_count;
const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
mi_page_map_ensure_committed(idx, slice_count);
// set the offsets
for (size_t i = 0; i < slice_count; i++) {
mi_assert_internal(i < 128);
_mi_page_map[idx + i] = (uint8_t)(i+1);
}
}
void _mi_page_map_unregister(mi_page_t* page) {
mi_assert_internal(_mi_page_map != NULL);
// get index and count
uint8_t* page_start;
size_t slice_count;
const size_t idx = mi_page_map_get_idx(page, &page_start, &slice_count);
// unset the offsets
_mi_memzero(_mi_page_map + idx, slice_count);
}
void _mi_page_map_unregister_range(void* start, size_t size) {
const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
const uintptr_t index = _mi_page_map_index(start);
mi_page_map_ensure_committed(index, slice_count); // we commit the range in total; todo: scan the commit bits and clear only those ranges?
_mi_memzero(&_mi_page_map[index], slice_count);
}
mi_page_t* _mi_safe_ptr_page(const void* p) {
if mi_unlikely(p >= mi_page_map_max_address) return NULL;
const uintptr_t idx = _mi_page_map_index(p);
if mi_unlikely(mi_page_map_commit != NULL && !mi_bitmap_is_set(mi_page_map_commit, idx/MI_PAGE_MAP_ENTRIES_PER_COMMIT_BIT)) return NULL;
const uintptr_t ofs = _mi_page_map[idx];
if mi_unlikely(ofs == 0) return NULL;
return (mi_page_t*)((((uintptr_t)p >> MI_ARENA_SLICE_SHIFT) - ofs + 1) << MI_ARENA_SLICE_SHIFT);
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return (_mi_safe_ptr_page(p) != NULL);
}
#else
// A 2-level page map
#define MI_PAGE_MAP_SUB_SIZE (MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*))
mi_decl_cache_align mi_page_t*** _mi_page_map;
static void* mi_page_map_max_address;
static mi_memid_t mi_page_map_memid;
static _Atomic(mi_bfield_t) mi_page_map_commit;
static mi_page_t** mi_page_map_ensure_committed(size_t idx);
static mi_page_t** mi_page_map_ensure_at(size_t idx);
static inline void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count);
bool _mi_page_map_init(void) {
size_t vbits = (size_t)mi_option_get_clamp(mi_option_max_vabits, 0, MI_SIZE_BITS);
if (vbits == 0) {
vbits = _mi_os_virtual_address_bits();
#if MI_ARCH_X64 // canonical address is limited to the first 128 TiB
if (vbits >= 48) { vbits = 47; }
#endif
}
// Allocate the page map and commit bits
mi_assert(MI_MAX_VABITS >= vbits);
mi_page_map_max_address = (void*)(vbits >= MI_SIZE_BITS ? (SIZE_MAX - MI_ARENA_SLICE_SIZE + 1) : (MI_PU(1) << vbits));
const size_t page_map_count = (MI_ZU(1) << (vbits - MI_PAGE_MAP_SUB_SHIFT - MI_ARENA_SLICE_SHIFT));
mi_assert(page_map_count <= MI_PAGE_MAP_COUNT);
const size_t os_page_size = _mi_os_page_size();
const size_t page_map_size = _mi_align_up( page_map_count * sizeof(mi_page_t**), os_page_size);
const size_t reserve_size = page_map_size + os_page_size;
const bool commit = page_map_size <= 64*MI_KiB ||
mi_option_is_enabled(mi_option_pagemap_commit) || _mi_os_has_overcommit();
_mi_page_map = (mi_page_t***)_mi_os_alloc_aligned(reserve_size, 1, commit, true /* allow large */, &mi_page_map_memid);
if (_mi_page_map==NULL) {
_mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB);
return false;
}
if (mi_page_map_memid.initially_committed && !mi_page_map_memid.initially_zero) {
_mi_warning_message("internal: the page map was committed but not zero initialized!\n");
_mi_memzero_aligned(_mi_page_map, page_map_size);
}
mi_atomic_store_release(&mi_page_map_commit, (commit ? ~MI_ZU(0) : MI_ZU(0)));
// note: for the NULL range we only commit one OS page (in the map and sub)
if (!mi_page_map_memid.initially_committed) {
_mi_os_commit(&_mi_page_map[0], os_page_size, NULL); // commit first part of the map
}
_mi_page_map[0] = (mi_page_t**)((uint8_t*)_mi_page_map + page_map_size); // we reserved 2 sub maps at the end already
if (!mi_page_map_memid.initially_committed) {
_mi_os_commit(_mi_page_map[0], os_page_size, NULL); // only first OS page
}
_mi_page_map[0][0] = (mi_page_t*)&_mi_page_empty; // caught in `mi_free`
mi_assert_internal(_mi_ptr_page(NULL)==&_mi_page_empty);
return true;
}
#define MI_PAGE_MAP_ENTRIES_PER_CBIT (MI_PAGE_MAP_COUNT / MI_BFIELD_BITS)
static inline bool mi_page_map_is_committed(size_t idx, size_t* pbit_idx) {
mi_bfield_t commit = mi_atomic_load_relaxed(&mi_page_map_commit);
const size_t bit_idx = idx/MI_PAGE_MAP_ENTRIES_PER_CBIT;
mi_assert_internal(bit_idx < MI_BFIELD_BITS);
if (pbit_idx != NULL) { *pbit_idx = bit_idx; }
return ((commit & (MI_ZU(1) << bit_idx)) != 0);
}
static mi_page_t** mi_page_map_ensure_committed(size_t idx) {
size_t bit_idx;
if mi_unlikely(!mi_page_map_is_committed(idx, &bit_idx)) {
uint8_t* start = (uint8_t*)&_mi_page_map[bit_idx * MI_PAGE_MAP_ENTRIES_PER_CBIT];
_mi_os_commit(start, MI_PAGE_MAP_ENTRIES_PER_CBIT * sizeof(mi_page_t**), NULL);
mi_atomic_or_acq_rel(&mi_page_map_commit, MI_ZU(1) << bit_idx);
}
return _mi_page_map[idx];
}
static mi_page_t** mi_page_map_ensure_at(size_t idx) {
mi_page_t** sub = mi_page_map_ensure_committed(idx);
if mi_unlikely(sub == NULL) {
// sub map not yet allocated, alloc now
mi_memid_t memid;
sub = (mi_page_t**)_mi_os_alloc(MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), &memid);
mi_page_t** expect = NULL;
if (!mi_atomic_cas_strong_acq_rel(((_Atomic(mi_page_t**)*)&_mi_page_map[idx]), &expect, sub)) {
// another thread already allocated it.. free and continue
_mi_os_free(sub, MI_PAGE_MAP_SUB_COUNT * sizeof(mi_page_t*), memid);
sub = expect;
mi_assert_internal(sub!=NULL);
}
if (sub == NULL) {
_mi_error_message(EFAULT, "internal error: unable to extend the page map\n");
}
}
return sub;
}
static void mi_page_map_set_range(mi_page_t* page, size_t idx, size_t sub_idx, size_t slice_count) {
// is the page map area that contains the page address committed?
while (slice_count > 0) {
mi_page_t** sub = mi_page_map_ensure_at(idx);
// set the offsets for the page
while (sub_idx < MI_PAGE_MAP_SUB_COUNT) {
sub[sub_idx] = page;
slice_count--; if (slice_count == 0) return;
sub_idx++;
}
idx++; // potentially wrap around to the next idx
sub_idx = 0;
}
}
static size_t mi_page_map_get_idx(mi_page_t* page, size_t* sub_idx, size_t* slice_count) {
size_t page_size;
uint8_t* page_start = mi_page_area(page, &page_size);
if (page_size > MI_LARGE_PAGE_SIZE) { page_size = MI_LARGE_PAGE_SIZE - MI_ARENA_SLICE_SIZE; } // furthest interior pointer
*slice_count = mi_slice_count_of_size(page_size) + ((page_start - (uint8_t*)page)/MI_ARENA_SLICE_SIZE); // add for large aligned blocks
return _mi_page_map_index(page, sub_idx);
}
void _mi_page_map_register(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_page_map != NULL); // should be initialized before multi-thread access!
if mi_unlikely(_mi_page_map == NULL) {
if (!_mi_page_map_init()) return;
}
mi_assert(_mi_page_map!=NULL);
size_t slice_count;
size_t sub_idx;
const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
mi_page_map_set_range(page, idx, sub_idx, slice_count);
}
void _mi_page_map_unregister(mi_page_t* page) {
mi_assert_internal(_mi_page_map != NULL);
mi_assert_internal(page != NULL);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
if mi_unlikely(_mi_page_map == NULL) return;
// get index and count
size_t slice_count;
size_t sub_idx;
const size_t idx = mi_page_map_get_idx(page, &sub_idx, &slice_count);
// unset the offsets
mi_page_map_set_range(NULL, idx, sub_idx, slice_count);
}
void _mi_page_map_unregister_range(void* start, size_t size) {
if mi_unlikely(_mi_page_map == NULL) return;
const size_t slice_count = _mi_divide_up(size, MI_ARENA_SLICE_SIZE);
size_t sub_idx;
const uintptr_t idx = _mi_page_map_index(start, &sub_idx);
mi_page_map_set_range(NULL, idx, sub_idx, slice_count); // todo: avoid committing if not already committed?
}
// Return NULL for invalid pointers
mi_page_t* _mi_safe_ptr_page(const void* p) {
if (p==NULL) return NULL;
if mi_unlikely(p >= mi_page_map_max_address) return NULL;
size_t sub_idx;
const size_t idx = _mi_page_map_index(p,&sub_idx);
if mi_unlikely(!mi_page_map_is_committed(idx,NULL)) return NULL;
mi_page_t** const sub = _mi_page_map[idx];
if mi_unlikely(sub==NULL) return NULL;
return sub[sub_idx];
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return (_mi_safe_ptr_page(p) != NULL);
}
#endif

View file

@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MI_IN_PAGE_C
#error "this file should be included from 'page.c'"
// include to help an IDE
#include "mimalloc.h"
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
#endif
@ -38,15 +38,19 @@ terms of the MIT license. A copy of the license can be found in the file
static inline bool mi_page_queue_is_huge(const mi_page_queue_t* pq) {
return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+sizeof(uintptr_t)));
return (pq->block_size == (MI_LARGE_MAX_OBJ_SIZE+sizeof(uintptr_t)));
}
static inline bool mi_page_queue_is_full(const mi_page_queue_t* pq) {
return (pq->block_size == (MI_LARGE_OBJ_SIZE_MAX+(2*sizeof(uintptr_t))));
return (pq->block_size == (MI_LARGE_MAX_OBJ_SIZE+(2*sizeof(uintptr_t))));
}
static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
return (pq->block_size > MI_LARGE_OBJ_SIZE_MAX);
return (pq->block_size > MI_LARGE_MAX_OBJ_SIZE);
}
static inline size_t mi_page_queue_count(const mi_page_queue_t* pq) {
return pq->count;
}
/* -----------------------------------------------------------
@ -57,8 +61,8 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) {
// Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes",
// i.e. byte size == `wsize*sizeof(void*)`.
static inline size_t mi_bin(size_t size) {
size_t wsize = _mi_wsize_from_size(size);
static mi_decl_noinline size_t mi_bin(size_t size) {
size_t wsize = _mi_wsize_from_size(size);
#if defined(MI_ALIGN4W)
if mi_likely(wsize <= 4) {
return (wsize <= 1 ? 1 : (wsize+1)&~1); // round to double word sizes
@ -72,7 +76,7 @@ static inline size_t mi_bin(size_t size) {
return (wsize == 0 ? 1 : wsize);
}
#endif
else if mi_unlikely(wsize > MI_LARGE_OBJ_WSIZE_MAX) {
else if mi_unlikely(wsize > MI_LARGE_MAX_OBJ_WSIZE) {
return MI_BIN_HUGE;
}
else {
@ -106,8 +110,8 @@ size_t _mi_bin_size(size_t bin) {
}
// Good size for allocation
size_t mi_good_size(size_t size) mi_attr_noexcept {
if (size <= MI_LARGE_OBJ_SIZE_MAX) {
mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept {
if (size <= MI_LARGE_MAX_OBJ_SIZE) {
return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE));
}
else {
@ -136,6 +140,34 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
}
#endif
bool _mi_page_queue_is_valid(mi_heap_t* heap, const mi_page_queue_t* pq) {
MI_UNUSED_RELEASE(heap);
if (pq==NULL) return false;
size_t count = 0; MI_UNUSED_RELEASE(count);
mi_page_t* prev = NULL; MI_UNUSED_RELEASE(prev);
for (mi_page_t* page = pq->first; page != NULL; page = page->next) {
mi_assert_internal(page->prev == prev);
if (mi_page_is_in_full(page)) {
mi_assert_internal(_mi_wsize_from_size(pq->block_size) == MI_LARGE_MAX_OBJ_WSIZE + 2);
}
else if (mi_page_is_huge(page)) {
mi_assert_internal(_mi_wsize_from_size(pq->block_size) == MI_LARGE_MAX_OBJ_WSIZE + 1);
}
else {
mi_assert_internal(mi_page_block_size(page) == pq->block_size);
}
mi_assert_internal(page->heap == heap);
if (page->next == NULL) {
mi_assert_internal(pq->last == page);
}
count++;
prev = page;
}
mi_assert_internal(pq->count == count);
return true;
}
static size_t mi_page_bin(const mi_page_t* page) {
const size_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page))));
mi_assert_internal(bin <= MI_BIN_FULL);
@ -210,8 +242,9 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(queue, page));
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
mi_assert_internal(queue->count >= 1);
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_heap_t* heap = mi_page_heap(page);
if (page->prev != NULL) page->prev->next = page->next;
@ -224,9 +257,9 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
mi_heap_queue_first_update(heap,queue);
}
heap->page_count--;
queue->count--;
page->next = NULL;
page->prev = NULL;
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), NULL);
mi_page_set_in_full(page,false);
}
@ -242,7 +275,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
// mi_atomic_store_ptr_release(mi_atomic_cast(void*, &page->heap), heap);
page->next = queue->first;
page->prev = NULL;
if (queue->first != NULL) {
@ -253,12 +286,42 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_
else {
queue->first = queue->last = page;
}
queue->count++;
// update direct
mi_heap_queue_first_update(heap, queue);
heap->page_count++;
}
static void mi_page_queue_push_at_end(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(!mi_page_queue_contains(queue, page));
mi_assert_internal(mi_page_block_size(page) == queue->block_size ||
(mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue));
page->prev = queue->last;
page->next = NULL;
if (queue->last != NULL) {
mi_assert_internal(queue->last->next == NULL);
queue->last->next = page;
queue->last = page;
}
else {
queue->first = queue->last = page;
}
queue->count++;
// update direct
if (queue->first == page) {
mi_heap_queue_first_update(heap, queue);
}
heap->page_count++;
}
static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_queue_contains(queue, page));
@ -270,6 +333,7 @@ static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue,
static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* from, bool enqueue_at_end, mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(from->count >= 1);
mi_assert_expensive(mi_page_queue_contains(from, page));
mi_assert_expensive(!mi_page_queue_contains(to, page));
const size_t bsize = mi_page_block_size(page);
@ -292,8 +356,10 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t*
mi_assert_internal(mi_heap_contains_queue(heap, from));
mi_heap_queue_first_update(heap, from);
}
from->count--;
// insert into `to`
to->count++;
if (enqueue_at_end) {
// enqueue at the end
page->prev = to->last;
@ -317,8 +383,8 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t*
page->prev = to->first;
page->next = next;
to->first->next = page;
if (next != NULL) {
next->prev = page;
if (next != NULL) {
next->prev = page;
}
else {
to->last = page;
@ -356,15 +422,10 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
// set append pages to new heap and count
size_t count = 0;
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
// inline `mi_page_set_heap` to avoid wrong assertion during absorption;
// in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
// set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a
// side effect that it spins until any DELAYED_FREEING is finished. This ensures
// that after appending only the new heap will be used for delayed free operations.
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false);
mi_page_set_heap(page, heap);
count++;
}
mi_assert_internal(count == append->count);
if (pq->last==NULL) {
// take over afresh
@ -381,5 +442,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
append->first->prev = pq->last;
pq->last = append->last;
}
pq->count += append->count;
return count;
}

View file

@ -36,14 +36,15 @@ static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_sta
return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
}
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld);
//static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page);
#if (MI_DEBUG>=3)
static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
mi_assert_internal(_mi_ptr_page(page) == page);
size_t count = 0;
while (head != NULL) {
mi_assert_internal(page == _mi_ptr_page(head));
mi_assert_internal((uint8_t*)head - (uint8_t*)page > (ptrdiff_t)MI_LARGE_PAGE_SIZE || page == _mi_ptr_page(head));
count++;
head = mi_block_next(page, head);
}
@ -59,7 +60,7 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) {
static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
size_t psize;
uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
uint8_t* page_area = mi_page_area(page, &psize);
mi_block_t* start = (mi_block_t*)page_area;
mi_block_t* end = (mi_block_t*)(page_area + psize);
while(p != NULL) {
@ -83,10 +84,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->capacity <= page->reserved);
// const size_t bsize = mi_page_block_size(page);
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = mi_page_start(page);
mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL));
mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE));
// uint8_t* start = mi_page_start(page);
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@ -121,86 +119,38 @@ bool _mi_page_is_valid(mi_page_t* page) {
#if MI_SECURE
mi_assert_internal(page->keys[0] != 0);
#endif
if (mi_page_heap(page)!=NULL) {
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
#if MI_HUGE_PAGE_ABANDON
if (segment->page_kind != MI_PAGE_HUGE)
#endif
if (!mi_page_is_abandoned(page)) {
//mi_assert_internal(!_mi_process_is_initialized);
{
mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page));
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_is_huge(page) || mi_page_is_in_full(page));
// mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
}
}
return true;
}
#endif
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
while (!_mi_page_try_use_delayed_free(page, delay, override_never)) {
mi_atomic_yield();
}
}
bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
mi_thread_free_t tfreex;
mi_delayed_t old_delay;
mi_thread_free_t tfree;
size_t yield_count = 0;
do {
tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
tfreex = mi_tf_set_delayed(tfree, delay);
old_delay = mi_tf_delayed(tfree);
if mi_unlikely(old_delay == MI_DELAYED_FREEING) {
if (yield_count >= 4) return false; // give up after 4 tries
yield_count++;
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
// tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
}
else if (delay == old_delay) {
break; // avoid atomic operation if already equal
}
else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
break; // leave never-delayed flag set
}
} while ((old_delay == MI_DELAYED_FREEING) ||
!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
return true; // success
}
/* -----------------------------------------------------------
Page collect the `local_free` and `thread_free` lists
----------------------------------------------------------- */
// Collect the local `thread_free` list using an atomic exchange.
// Note: The exchange must be done atomically as this is used right after
// moving to the full list in `mi_page_collect_ex` and we need to
// ensure that there was no race where the page became unfull just before the move.
static void _mi_page_thread_free_collect(mi_page_t* page)
static void mi_page_thread_collect_to_local(mi_page_t* page, mi_block_t* head)
{
mi_block_t* head;
mi_thread_free_t tfreex;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
head = mi_tf_block(tfree);
tfreex = mi_tf_set_block(tfree,NULL);
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex));
// return if the list is empty
if (head == NULL) return;
// find the tail -- also to get a proper count (without data races)
// find the last block in the list -- also to get a proper use count (without data races)
size_t max_count = page->capacity; // cannot collect more than capacity
size_t count = 1;
mi_block_t* tail = head;
mi_block_t* last = head;
mi_block_t* next;
while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) {
while ((next = mi_block_next(page, last)) != NULL && count <= max_count) {
count++;
tail = next;
last = next;
}
// if `count > max_count` there was a memory corruption (possibly infinite list due to double multi-threaded free)
if (count > max_count) {
_mi_error_message(EFAULT, "corrupted thread-free list\n");
@ -208,20 +158,37 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
}
// and append the current local free list
mi_block_set_next(page,tail, page->local_free);
mi_block_set_next(page, last, page->local_free);
page->local_free = head;
// update counts now
page->used -= (uint16_t)count;
mi_assert_internal(count <= UINT16_MAX);
page->used = page->used - (uint16_t)count;
}
// Collect the local `thread_free` list using an atomic exchange.
static void mi_page_thread_free_collect(mi_page_t* page)
{
// atomically capture the thread free list
mi_block_t* head;
mi_thread_free_t tfreex;
mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
do {
head = mi_tf_block(tfree);
if mi_likely(head == NULL) return; // return if the list is empty
tfreex = mi_tf_create(NULL,mi_tf_is_owned(tfree)); // set the thread free list to NULL
} while (!mi_atomic_cas_weak_acq_rel(&page->xthread_free, &tfree, tfreex)); // release is enough?
mi_assert_internal(head != NULL);
// and move it to the local list
mi_page_thread_collect_to_local(page, head);
}
void _mi_page_free_collect(mi_page_t* page, bool force) {
mi_assert_internal(page!=NULL);
// collect the thread free list
if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation
_mi_page_thread_free_collect(page);
}
mi_page_thread_free_collect(page);
// and the local free list
if (page->local_free != NULL) {
@ -248,50 +215,116 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
mi_assert_internal(!force || page->local_free == NULL);
}
// Collect elements in the thread-free list starting at `head`. This is an optimized
// version of `_mi_page_free_collect` to be used from `free.c:_mi_free_collect_mt` that avoids atomic access to `xthread_free`.
//
// `head` must be in the `xthread_free` list. It will not collect `head` itself
// so the `used` count is not fully updated in general. However, if the `head` is
// the last remaining element, it will be collected and the used count will become `0` (so `mi_page_all_free` becomes true).
void _mi_page_free_collect_partly(mi_page_t* page, mi_block_t* head) {
if (head == NULL) return;
mi_block_t* next = mi_block_next(page,head); // we cannot collect the head element itself as `page->thread_free` may point to it (and we want to avoid atomic ops)
if (next != NULL) {
mi_block_set_next(page, head, NULL);
mi_page_thread_collect_to_local(page, next);
if (page->local_free != NULL && page->free == NULL) {
page->free = page->local_free;
page->local_free = NULL;
page->free_is_zero = false;
}
}
if (page->used == 1) {
// all elements are free'd since we skipped the `head` element itself
mi_assert_internal(mi_tf_block(mi_atomic_load_relaxed(&page->xthread_free)) == head);
mi_assert_internal(mi_block_next(page,head) == NULL);
_mi_page_free_collect(page, false); // collect the final element
}
}
/* -----------------------------------------------------------
Page fresh and retire
----------------------------------------------------------- */
/*
// called from segments when reclaiming abandoned pages
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
// mi_page_set_heap(page, heap);
// _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
mi_assert_expensive(mi_page_is_valid_init(page));
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
#endif
// mi_assert_internal(mi_page_heap(page) == heap);
// mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
// TODO: push on full queue immediately if it is full?
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
}
*/
// called from `mi_free` on a reclaim, and fresh_alloc if we get an abandoned page
void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page)
{
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(mi_page_is_abandoned(page));
mi_page_set_heap(page,heap);
_mi_page_free_collect(page, false); // ensure used count is up to date
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
mi_page_queue_push_at_end(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
}
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) {
_mi_page_free(page, pq);
}
else {
mi_page_queue_remove(pq, page);
mi_heap_t* heap = page->heap;
mi_page_set_heap(page, NULL);
page->heap = heap; // dont set heap to NULL so we can reclaim_on_free within the same heap
_mi_arenas_page_abandon(page, heap->tld);
_mi_arenas_collect(false, false, heap->tld); // allow purging
}
}
// allocate a fresh page from a segment
static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) {
#if !MI_HUGE_PAGE_ABANDON
mi_assert_internal(pq != NULL);
mi_assert_internal(mi_heap_contains_queue(heap, pq));
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size);
mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_MAX_OBJ_SIZE || block_size == pq->block_size);
#endif
mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments);
mi_page_t* page = _mi_arenas_page_alloc(heap, block_size, page_alignment);
if (page == NULL) {
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
// out-of-memory
return NULL;
}
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
#endif
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
// a fresh page was found, initialize it
const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
mi_assert_internal(full_block_size >= block_size);
mi_page_init(heap, page, full_block_size, heap->tld);
if (mi_page_is_abandoned(page)) {
_mi_heap_page_reclaim(heap, page);
if (!mi_page_immediate_available(page)) {
if (mi_page_is_expandable(page)) {
mi_page_extend_free(heap, page);
}
else {
mi_assert(false); // should not happen?
return NULL;
}
}
}
else if (pq != NULL) {
mi_page_queue_push(heap, pq, page);
}
mi_heap_stat_increase(heap, pages, 1);
mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
mi_heap_stat_increase(heap, page_bins[mi_page_bin(page)], 1);
if (pq != NULL) { mi_page_queue_push(heap, pq, page); }
mi_assert_expensive(_mi_page_is_valid(page));
return page;
}
@ -302,55 +335,21 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0);
if (page==NULL) return NULL;
mi_assert_internal(pq->block_size==mi_page_block_size(page));
mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
mi_assert_internal(pq==mi_heap_page_queue_of(heap, page));
return page;
}
/* -----------------------------------------------------------
Do any delayed frees
(put there by other threads if they deallocated in a full page)
----------------------------------------------------------- */
void _mi_heap_delayed_free_all(mi_heap_t* heap) {
while (!_mi_heap_delayed_free_partial(heap)) {
mi_atomic_yield();
}
}
// returns true if all delayed frees were processed
bool _mi_heap_delayed_free_partial(mi_heap_t* heap) {
// take over the list (note: no atomic exchange since it is often NULL)
mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };
bool all_freed = true;
// and free them all
while(block != NULL) {
mi_block_t* next = mi_block_nextx(heap,block, heap->keys);
// use internal free instead of regular one to keep stats etc correct
if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet
// reset the delayed_freeing flag; in that case delay it further by reinserting the current block
// into the delayed free list
all_freed = false;
mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
do {
mi_block_set_nextx(heap, block, dfree, heap->keys);
} while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block));
}
block = next;
}
return all_freed;
}
/* -----------------------------------------------------------
Unfull, abandon, free and retire
----------------------------------------------------------- */
// Move a page from the full list back to a regular list
// Move a page from the full list back to a regular list (called from thread-local mi_free)
void _mi_page_unfull(mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(mi_page_is_in_full(page));
mi_assert_internal(!mi_page_heap(page)->allow_page_abandon);
if (!mi_page_is_in_full(page)) return;
mi_heap_t* heap = mi_page_heap(page);
@ -366,87 +365,40 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(!mi_page_immediate_available(page));
mi_assert_internal(!mi_page_is_in_full(page));
if (mi_page_is_in_full(page)) return;
mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
_mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
}
// Abandon a page with used blocks at the end of a thread.
// Note: only call if it is ensured that no references exist from
// the `page->heap->thread_delayed_free` into this page.
// Currently only called through `mi_heap_collect_ex` which ensures this.
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(mi_page_heap(page) != NULL);
mi_heap_t* pheap = mi_page_heap(page);
// remove from our page list
mi_segments_tld_t* segments_tld = &pheap->tld->segments;
mi_page_queue_remove(pq, page);
// page is no longer associated with our heap
mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
mi_page_set_heap(page, NULL);
#if (MI_DEBUG>1) && !MI_TRACK_ENABLED
// check there are no references left..
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) {
mi_assert_internal(_mi_ptr_page(block) != page);
}
#endif
// and abandon it
mi_assert_internal(mi_page_heap(page) == NULL);
_mi_segment_page_abandon(page,segments_tld);
}
// force abandon a page
void _mi_page_force_abandon(mi_page_t* page) {
mi_heap_t* heap = mi_page_heap(page);
// mark page as not using delayed free
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// ensure this page is no longer in the heap delayed free list
_mi_heap_delayed_free_all(heap);
// We can still access the page meta-info even if it is freed as we ensure
// in `mi_segment_force_abandon` that the segment is not freed (yet)
if (page->capacity == 0) return; // it may have been freed now
// and now unlink it from the page queue and abandon (or free)
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
if (mi_page_all_free(page)) {
_mi_page_free(page, pq, false);
}
else {
if (heap->allow_page_abandon) {
// abandon full pages (this is the usual case in order to allow for sharing of memory between heaps)
_mi_page_abandon(page, pq);
}
else if (!mi_page_is_in_full(page)) {
// put full pages in a heap local queue (this is for heaps that cannot abandon, for example, if the heap can be destroyed)
mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
_mi_page_free_collect(page, false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
}
}
// Free a page with no more free blocks
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
// mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
// no more aligned blocks in here
mi_page_set_has_aligned(page, false);
// remove from the page list
// (no need to do _mi_heap_delayed_free first as all blocks are already free)
mi_heap_t* heap = mi_page_heap(page);
mi_segments_tld_t* segments_tld = &heap->tld->segments;
mi_page_queue_remove(pq, page);
// and free it
mi_heap_t* heap = page->heap;
mi_heap_stat_decrease(heap, page_bins[mi_page_bin(page)], 1);
mi_page_set_heap(page,NULL);
_mi_segment_page_free(page, force, segments_tld);
_mi_arenas_page_free(page);
_mi_arenas_collect(false, false, heap->tld); // allow purging
}
#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE
@ -476,9 +428,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
const size_t bsize = mi_page_block_size(page);
if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue?
if (pq->last==page && pq->first==page) { // the only page in the queue?
mi_stat_counter_increase(_mi_stats_main.pages_retire,1);
page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
mi_heap_t* heap = mi_page_heap(page);
mi_debug_heap_stat_counter_increase(heap, pages_retire, 1);
page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
mi_assert_internal(pq >= heap->pages);
const size_t index = pq - heap->pages;
mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE);
@ -489,7 +441,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
}
}
#endif
_mi_page_free(page, pq, false);
_mi_page_free(page, pq);
}
// free retired pages: we don't need to look at the entire queues
@ -504,7 +456,7 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
if (mi_page_all_free(page)) {
page->retire_expire--;
if (force || page->retire_expire == 0) {
_mi_page_free(pq->first, pq, force);
_mi_page_free(pq->first, pq);
}
else {
// keep retired, update min/max
@ -521,6 +473,29 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
heap->page_retired_max = max;
}
/*
static void mi_heap_collect_full_pages(mi_heap_t* heap) {
// note: normally full pages get immediately abandoned and the full queue is always empty
// this path is only used if abandoning is disabled due to a destroy-able heap or options
// set by the user.
mi_page_queue_t* pq = &heap->pages[MI_BIN_FULL];
for (mi_page_t* page = pq->first; page != NULL; ) {
mi_page_t* next = page->next; // get next in case we free the page
_mi_page_free_collect(page, false); // register concurrent free's
// no longer full?
if (!mi_page_is_full(page)) {
if (mi_page_all_free(page)) {
_mi_page_free(page, pq);
}
else {
_mi_page_unfull(page);
}
}
page = next;
}
}
*/
/* -----------------------------------------------------------
Initialize the initial free list in a page.
@ -534,7 +509,7 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) {
MI_UNUSED(stats);
#if (MI_SECURE<=2)
#if (MI_SECURE<3)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
@ -592,7 +567,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats)
{
MI_UNUSED(stats);
#if (MI_SECURE <= 2)
#if (MI_SECURE<3)
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
#endif
@ -620,7 +595,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
----------------------------------------------------------- */
#define MI_MAX_EXTEND_SIZE (4*1024) // heuristic, one OS page seems to work well.
#if (MI_SECURE>0)
#if (MI_SECURE>=3)
#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many
#else
#define MI_MIN_EXTEND (1)
@ -631,9 +606,9 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
// Note: we also experimented with "bump" allocation on the first
// allocations but this did not speed up any benchmark (due to an
// extra test in malloc? or cache effects?)
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) {
mi_assert_expensive(mi_page_is_valid_init(page));
#if (MI_SECURE<=2)
#if (MI_SECURE<3)
mi_assert(page->free == NULL);
mi_assert(page->local_free == NULL);
if (page->free != NULL) return;
@ -642,12 +617,12 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
size_t page_size;
//uint8_t* page_start =
_mi_segment_page_start(_mi_page_segment(page), page, &page_size);
mi_stat_counter_increase(tld->stats.pages_extended, 1);
mi_page_area(page, &page_size);
mi_debug_heap_stat_counter_increase(heap, pages_extended, 1);
// calculate the extend count
const size_t bsize = mi_page_block_size(page);
size_t extend = page->reserved - page->capacity;
size_t extend = (size_t)page->reserved - page->capacity;
mi_assert_internal(extend > 0);
size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/bsize);
@ -663,56 +638,56 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
mi_assert_internal(extend < (1UL<<16));
// commit on demand?
if (page->slice_committed > 0) {
const size_t needed_size = (page->capacity + extend)*bsize;
const size_t needed_commit = _mi_align_up( mi_page_slice_offset_of(page, needed_size), MI_PAGE_MIN_COMMIT_SIZE );
if (needed_commit > page->slice_committed) {
mi_assert_internal(((needed_commit - page->slice_committed) % _mi_os_page_size()) == 0);
_mi_os_commit(mi_page_slice_start(page) + page->slice_committed, needed_commit - page->slice_committed, NULL);
page->slice_committed = needed_commit;
}
}
// and append the extend the free list
if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) {
mi_page_free_list_extend(page, bsize, extend, &tld->stats );
if (extend < MI_MIN_SLICES || MI_SECURE<3) { //!mi_option_is_enabled(mi_option_secure)) {
mi_page_free_list_extend(page, bsize, extend, &heap->tld->stats );
}
else {
mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats);
mi_page_free_list_extend_secure(heap, page, bsize, extend, &heap->tld->stats);
}
// enable the new free list
page->capacity += (uint16_t)extend;
mi_stat_increase(tld->stats.page_committed, extend * bsize);
mi_debug_heap_stat_increase(heap, page_committed, extend * bsize);
mi_assert_expensive(mi_page_is_valid_init(page));
}
// Initialize a fresh page
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
// Initialize a fresh page (that is already partially initialized)
void _mi_page_init(mi_heap_t* heap, mi_page_t* page) {
mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page);
mi_assert(segment != NULL);
mi_assert_internal(block_size > 0);
// set fields
mi_page_set_heap(page, heap);
page->block_size = block_size;
size_t page_size;
page->page_start = _mi_segment_page_start(segment, page, &page_size);
mi_track_mem_noaccess(page->page_start,page_size);
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
uint8_t* page_start = mi_page_area(page, &page_size); MI_UNUSED(page_start);
mi_track_mem_noaccess(page_start,page_size);
mi_assert_internal(page_size / mi_page_block_size(page) < (1L<<16));
mi_assert_internal(page->reserved > 0);
#if (MI_PADDING || MI_ENCODE_FREELIST)
page->keys[0] = _mi_heap_random_next(heap);
page->keys[1] = _mi_heap_random_next(heap);
#endif
page->free_is_zero = page->is_zero_init;
#if MI_DEBUG>2
if (page->is_zero_init) {
mi_track_mem_defined(page->page_start, page_size);
mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size));
if (page->memid.initially_zero) {
mi_track_mem_defined(page->page_start, mi_page_committed(page));
mi_assert_expensive(mi_mem_is_zero(page_start, mi_page_committed(page)));
}
#endif
if (block_size > 0 && _mi_is_power_of_two(block_size)) {
page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size));
}
else {
page->block_size_shift = 0;
}
mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->used == 0);
mi_assert_internal(page->xthread_free == 0);
mi_assert_internal(mi_page_is_owned(page));
mi_assert_internal(page->xthread_free == 1);
mi_assert_internal(page->next == NULL);
mi_assert_internal(page->prev == NULL);
mi_assert_internal(page->retire_expire == 0);
@ -721,11 +696,11 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->keys[0] != 0);
mi_assert_internal(page->keys[1] != 0);
#endif
mi_assert_internal(page->block_size_shift == 0 || (block_size == ((size_t)1 << page->block_size_shift)));
mi_assert_internal(page->block_size_shift == 0 || (mi_page_block_size(page) == ((size_t)1 << page->block_size_shift)));
mi_assert_expensive(mi_page_is_valid_init(page));
// initialize an initial free list
mi_page_extend_free(heap,page,tld);
mi_page_extend_free(heap,page);
mi_assert(mi_page_immediate_available(page));
}
@ -734,70 +709,65 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
Find pages with free blocks
-------------------------------------------------------------*/
// search for a best next page to use for at most N pages (often cut short if immediate blocks are available)
#define MI_MAX_CANDIDATE_SEARCH (4)
// is the page not yet used up to its reserved space?
static bool mi_page_is_expandable(const mi_page_t* page) {
mi_assert_internal(page != NULL);
mi_assert_internal(page->capacity <= page->reserved);
return (page->capacity < page->reserved);
}
// Find a page with free blocks of `page->block_size`.
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try)
{
// search through the pages in "next fit" order
#if MI_STAT
size_t count = 0;
#endif
size_t candidate_count = 0; // we reset this on the first candidate to limit the search
long candidate_limit = 0; // we reset this on the first candidate to limit the search
long page_full_retain = (pq->block_size > MI_SMALL_MAX_OBJ_SIZE ? 0 : heap->page_full_retain); // only retain small pages
mi_page_t* page_candidate = NULL; // a page with free space
mi_page_t* page = pq->first;
while (page != NULL)
{
mi_page_t* next = page->next; // remember next
#if MI_STAT
mi_page_t* next = page->next; // remember next (as this page can move to another queue)
count++;
#endif
candidate_count++;
candidate_limit--;
// collect freed blocks by us and other threads
_mi_page_free_collect(page, false);
#if MI_MAX_CANDIDATE_SEARCH > 1
// search up to N pages for a best candidate
// is the local free list non-empty?
const bool immediate_available = mi_page_immediate_available(page);
bool immediate_available = mi_page_immediate_available(page);
if (!immediate_available) {
// collect freed blocks by us and other threads to we get a proper use count
_mi_page_free_collect(page, false);
immediate_available = mi_page_immediate_available(page);
}
// if the page is completely full, move it to the `mi_pages_full`
// queue so we don't visit long-lived pages too often.
if (!immediate_available && !mi_page_is_expandable(page)) {
mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
mi_page_to_full(page, pq);
page_full_retain--;
if (page_full_retain < 0) {
mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
mi_page_to_full(page, pq);
}
}
else {
// the page has free space, make it a candidate
// we prefer non-expandable pages with high usage as candidates (to reduce commit, and increase chances of free-ing up pages)
if (page_candidate == NULL) {
page_candidate = page;
candidate_count = 0;
candidate_limit = _mi_option_get_fast(mi_option_page_max_candidates);
}
else if (mi_page_all_free(page_candidate)) {
_mi_page_free(page_candidate, pq);
page_candidate = page;
}
// prefer to reuse fuller pages (in the hope the less used page gets freed)
else if (page->used >= page_candidate->used && !mi_page_is_mostly_used(page) && !mi_page_is_expandable(page)) {
else if (page->used >= page_candidate->used && !mi_page_is_mostly_used(page)) { // && !mi_page_is_expandable(page)) {
page_candidate = page;
}
// if we find a non-expandable candidate, or searched for N pages, return with the best candidate
if (immediate_available || candidate_count > MI_MAX_CANDIDATE_SEARCH) {
if (immediate_available || candidate_limit <= 0) {
mi_assert_internal(page_candidate!=NULL);
break;
}
}
#else
// first-fit algorithm
#if 0
// first-fit algorithm without candidates
// If the page contains free blocks, we are done
if (mi_page_immediate_available(page) || mi_page_is_expandable(page)) {
break; // pick this one
@ -818,20 +788,26 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
if (page_candidate != NULL) {
page = page_candidate;
}
if (page != NULL && !mi_page_immediate_available(page)) {
mi_assert_internal(mi_page_is_expandable(page));
mi_page_extend_free(heap, page, heap->tld);
if (page != NULL) {
if (!mi_page_immediate_available(page)) {
mi_assert_internal(mi_page_is_expandable(page));
mi_page_extend_free(heap, page);
}
mi_assert_internal(mi_page_immediate_available(page));
}
if (page == NULL) {
_mi_heap_collect_retired(heap, false); // perhaps make a page available
page = mi_page_fresh(heap, pq);
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
if (page == NULL && first_try) {
// out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
page = mi_page_queue_find_free_ex(heap, pq, false);
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
}
}
else {
mi_assert_internal(page == NULL || mi_page_immediate_available(page));
// move the page to the front of the queue
mi_page_queue_move_to_front(heap, pq, page);
page->retire_expire = 0;
@ -846,30 +822,25 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
// Find a page with free blocks of `size`.
static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
mi_page_queue_t* pq = mi_page_queue(heap, size);
static mi_page_t* mi_find_free_page(mi_heap_t* heap, mi_page_queue_t* pq) {
// mi_page_queue_t* pq = mi_page_queue(heap, size);
mi_assert_internal(!mi_page_queue_is_huge(pq));
// check the first page: we even do this with candidate search or otherwise we re-search every time
mi_page_t* page = pq->first;
if (page != NULL) {
#if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
if mi_likely(page != NULL && mi_page_immediate_available(page)) {
#if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
mi_page_extend_free(heap, page, heap->tld);
mi_page_extend_free(heap, page);
mi_assert_internal(mi_page_immediate_available(page));
}
else
#endif
{
_mi_page_free_collect(page,false);
}
if (mi_page_immediate_available(page)) {
page->retire_expire = 0;
return page; // fast path
}
#endif
page->retire_expire = 0;
return page; // fast path
}
else {
return mi_page_queue_find_free_ex(heap, pq, true);
}
return mi_page_queue_find_free_ex(heap, pq, true);
}
@ -905,13 +876,13 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
// Huge pages contain just one block, and the segment contains just that page.
// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX)
// so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`.
static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
size_t block_size = _mi_os_good_alloc_size(size);
mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment, mi_page_queue_t* pq) {
const size_t block_size = _mi_os_good_alloc_size(size);
// mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
#if MI_HUGE_PAGE_ABANDON
mi_page_queue_t* pq = NULL;
#error todo.
#else
mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size
// mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_MAX_OBJ_SIZE+1); // always in the huge queue regardless of the block size
mi_assert_internal(mi_page_queue_is_huge(pq));
#endif
mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
@ -919,10 +890,9 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(mi_page_is_huge(page));
mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE);
mi_assert_internal(_mi_page_segment(page)->used==1);
mi_assert_internal(mi_page_is_singleton(page));
#if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
mi_assert_internal(mi_page_is_abandoned(page));
mi_page_set_heap(page, NULL);
#endif
mi_heap_stat_increase(heap, malloc_huge, mi_page_block_size(page));
@ -935,30 +905,30 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_a
// Allocate a page
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
// huge allocation?
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
return NULL;
}
else {
return mi_huge_page_alloc(heap,size,huge_alignment);
}
if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) {
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
return NULL;
}
mi_page_queue_t* pq = mi_page_queue(heap, (huge_alignment > 0 ? MI_LARGE_MAX_OBJ_SIZE+1 : size));
// huge allocation?
if mi_unlikely(mi_page_queue_is_huge(pq) || req_size > MI_MAX_ALLOC_SIZE) {
return mi_huge_page_alloc(heap,size,huge_alignment,pq);
}
else {
// otherwise find a page with free blocks in our size segregated queues
#if MI_PADDING
mi_assert_internal(size >= MI_PADDING_SIZE);
#endif
return mi_find_free_page(heap, size);
return mi_find_free_page(heap, pq);
}
}
// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for
// very large requested alignments in which case we use a huge segment.
// The `huge_alignment` is normally 0 but is set to a multiple of MI_SLICE_SIZE for
// very large requested alignments in which case we use a huge singleton page.
void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept
{
mi_assert_internal(heap != NULL);
@ -971,17 +941,14 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
mi_assert_internal(mi_heap_is_initialized(heap));
// do administrative tasks every N generic mallocs
if mi_unlikely(++heap->generic_count >= 100) {
if mi_unlikely(++heap->generic_count >= 1000) {
heap->generic_collect_count += heap->generic_count;
heap->generic_count = 0;
// call potential deferred free routines
_mi_deferred_free(heap, false);
// free delayed frees from other threads (but skip contended ones)
_mi_heap_delayed_free_partial(heap);
// collect every once in a while (10000 by default)
const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L);
const long generic_collect = mi_option_get_clamp(mi_option_generic_collect, 1, 1000000L);
if (heap->generic_collect_count >= generic_collect) {
heap->generic_collect_count = 0;
mi_heap_collect(heap, false /* force? */);
@ -991,7 +958,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
// find (or allocate) a page of the right size
mi_page_t* page = mi_find_page(heap, size, huge_alignment);
if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more
mi_heap_collect(heap, true /* force */);
mi_heap_collect(heap, true /* force? */);
page = mi_find_page(heap, size, huge_alignment);
}
@ -1003,6 +970,8 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(mi_page_block_size(page) >= size);
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(_mi_ptr_page(page)==page);
// and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
void* p;

View file

@ -239,6 +239,9 @@ void _mi_prim_thread_done_auto_done(void) {
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -64,7 +64,8 @@ static void* zone_valloc(malloc_zone_t* zone, size_t size) {
static void zone_free(malloc_zone_t* zone, void* p) {
MI_UNUSED(zone);
mi_cfree(p);
// mi_cfree(p); // checked free as `zone_free` may be called with invalid pointers
mi_free(p); // with the page_map and pagemap_commit=1 we can use the regular free
}
static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) {

View file

@ -62,6 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include <sys/syscall.h>
#endif
#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this?
//------------------------------------------------------------------------------------
// Use syscalls for some primitives to allow for libraries that override open/read/close etc.
@ -148,7 +149,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
}
#endif
}
config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this?
config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE;
config->has_overcommit = unix_detect_overcommit();
config->has_partial_free = true; // mmap can free in parts
config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE)
@ -201,7 +202,8 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p
void* p = NULL;
#if defined(MAP_ALIGNED) // BSD
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
size_t n = mi_bsr(try_alignment);
size_t idx;
size_t n = mi_bsr(try_alignment, &idx);
if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB
p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0);
if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) {
@ -365,6 +367,9 @@ int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool comm
mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
mi_assert_internal(commit || !allow_large);
mi_assert_internal(try_alignment > 0);
if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) {
try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations
}
*is_zero = true;
int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE);
@ -412,7 +417,7 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) {
int err = 0;
// decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
err = unix_madvise(start, size, MADV_DONTNEED);
#if !MI_DEBUG && !MI_SECURE
#if !MI_DEBUG && MI_SECURE<=2
*needs_recommit = false;
#else
*needs_recommit = true;
@ -482,7 +487,7 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co
int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
bool is_large = true;
*is_zero = true;
*addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
*addr = unix_mmap(hint_addr, size, MI_ARENA_SLICE_ALIGN, PROT_READ | PROT_WRITE, true, true, &is_large);
if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
unsigned long numa_mask = (1UL << numa_node);
// TODO: does `mbind` work correctly for huge OS pages? should we
@ -889,3 +894,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
}
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -277,3 +277,7 @@ void _mi_prim_thread_done_auto_done(void) {
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -17,6 +17,14 @@ terms of the MIT license. A copy of the license can be found in the file
// Dynamically bind Windows API points for portability
//---------------------------------------------
#if defined(_MSC_VER)
#pragma warning(disable:28159) // don't use GetVersion
#pragma warning(disable:4996) // don't use GetVersion
#endif
static DWORD win_major_version = 6;
static DWORD win_minor_version = 0;
// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
@ -111,16 +119,27 @@ static bool win_enable_large_os_pages(size_t* large_page_size)
// Initialize
//---------------------------------------------
static DWORD win_allocation_granularity = 64*MI_KiB;
void _mi_prim_mem_init( mi_os_mem_config_t* config )
{
config->has_overcommit = false;
config->has_partial_free = false;
config->has_virtual_reserve = true;
// windows version
OSVERSIONINFOW version; _mi_memzero_var(version);
if (GetVersionExW(&version)) {
win_major_version = version.dwMajorVersion;
win_minor_version = version.dwMinorVersion;
}
// get the page size
SYSTEM_INFO si;
GetSystemInfo(&si);
if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; }
if (si.dwAllocationGranularity > 0) {
config->alloc_granularity = si.dwAllocationGranularity;
win_allocation_granularity = si.dwAllocationGranularity;
}
// get virtual address bits
if ((uintptr_t)si.lpMaximumApplicationAddress > 0) {
const size_t vbits = MI_SIZE_BITS - mi_clz((uintptr_t)si.lpMaximumApplicationAddress);
@ -183,7 +202,7 @@ int _mi_prim_free(void* addr, size_t size ) {
// the start of the region.
MEMORY_BASIC_INFORMATION info; _mi_memzero_var(info);
VirtualQuery(addr, &info, sizeof(info));
if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) {
if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)(4*MI_MiB)) {
errcode = 0;
err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
if (err) { errcode = GetLastError(); }
@ -211,7 +230,7 @@ static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_ali
}
#endif
// on modern Windows try use VirtualAlloc2 for aligned allocation
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
if (addr == NULL && try_alignment > win_allocation_granularity && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
reqs.Alignment = try_alignment;
MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
@ -247,7 +266,7 @@ static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignmen
// success, return the address
return p;
}
else if (max_retry_msecs > 0 && (try_alignment <= 2*MI_SEGMENT_ALIGN) &&
else if (max_retry_msecs > 0 && (try_alignment <= 8*MI_MiB) &&
(flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 &&
win_is_out_of_memory_error(GetLastError())) {
// if committing regular memory and being out-of-memory,
@ -823,3 +842,16 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
mi_allocator_done();
}
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
#if (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64)
if (win_major_version >= 6) {
// check if this thread belongs to a windows threadpool
// see: <https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/pebteb/teb/index.htm>
_TEB* const teb = NtCurrentTeb();
void* const pool_data = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778)));
return (pool_data != NULL);
}
#endif
return false;
}

View file

@ -0,0 +1,146 @@
/* ----------------------------------------------------------------------------
Copyright (c) Microsoft Research
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include <atomic>
#include <DbgEng.h>
#include <map>
#include <string>
#include <vector>
#include <Windows.h>
#include "mimalloc.h"
#include "mimalloc/internal.h"
ULONG64 g_MiMallocBase = 0;
IDebugClient* g_DebugClient = nullptr;
IDebugControl* g_DebugControl = nullptr;
IDebugSymbols3* g_DebugSymbols = nullptr;
IDebugDataSpaces* g_DataSpaces = nullptr;
// Function to find mimalloc.dll base address at startup
HRESULT FindMimallocBase()
{
if (g_DebugSymbols == nullptr)
{
return E_FAIL;
}
return g_DebugSymbols->GetModuleByModuleName("mimalloc", 0, NULL, &g_MiMallocBase);
}
// Entry point for the extension
extern "C" __declspec(dllexport) HRESULT CALLBACK DebugExtensionInitialize(PULONG version, PULONG flags)
{
UNREFERENCED_PARAMETER(flags);
// Ensure Version is valid
if (!version)
{
return E_INVALIDARG;
}
// Set the version
*version = DEBUG_EXTENSION_VERSION(1, 0);
HRESULT hr = DebugCreate(__uuidof(IDebugClient), (void**)&g_DebugClient);
if (FAILED(hr))
{
return hr;
}
// Query for the IDebugControl interface
hr = g_DebugClient->QueryInterface(__uuidof(IDebugControl), (void**)&g_DebugControl);
if (FAILED(hr))
{
g_DebugClient->Release();
return hr;
}
hr = g_DebugClient->QueryInterface(__uuidof(IDebugSymbols3), (void**)&g_DebugSymbols);
if (FAILED(hr))
{
g_DebugControl->Release();
g_DebugClient->Release();
return hr;
}
hr = g_DebugClient->QueryInterface(__uuidof(IDebugDataSpaces), (void**)&g_DataSpaces);
if (FAILED(hr))
{
g_DebugSymbols->Release();
g_DebugControl->Release();
g_DebugClient->Release();
return hr;
}
// Find mimalloc base address at startup
hr = FindMimallocBase();
if (FAILED(hr) || g_MiMallocBase == 0)
{
return E_FAIL; // Prevent extension from loading
}
mi_register_output(
[](const char* msg, void* arg) {
g_DebugControl->Output(DEBUG_OUTPUT_ERROR, msg);
g_DebugControl->Output(DEBUG_OUTPUT_ERROR, "\n");
},
nullptr);
g_DebugControl->Output(DEBUG_OUTPUT_NORMAL, "mimalloc.dll base address found: 0x%llx\n", g_MiMallocBase);
return S_OK;
}
// Notifies the extension that a debug event has occurred
extern "C" __declspec(dllexport) void CALLBACK DebugExtensionNotify(ULONG notify, ULONG64 argument)
{
UNREFERENCED_PARAMETER(notify);
UNREFERENCED_PARAMETER(argument);
}
// Uninitializes the extension
extern "C" __declspec(dllexport) void CALLBACK DebugExtensionUninitialize()
{
if (g_DebugSymbols)
{
g_DebugSymbols->Release();
g_DebugSymbols = nullptr;
}
if (g_DebugControl)
{
g_DebugControl->Release();
g_DebugControl = nullptr;
}
if (g_DebugClient)
{
g_DebugClient->Release();
g_DebugClient = nullptr;
}
}
// Sample command: !mi_help
extern "C" __declspec(dllexport) HRESULT CALLBACK mi_help(PDEBUG_CLIENT Client, PCSTR args)
{
UNREFERENCED_PARAMETER(args);
// Print Help
g_DebugControl->Output(DEBUG_OUTPUT_NORMAL, "Hello from MiMalloc WinDbg Extension!\n");
return S_OK;
}
extern "C" __declspec(dllexport) HRESULT CALLBACK mi_dump_arenas(PDEBUG_CLIENT client, PCSTR args)
{
mi_debug_show_arenas();
return S_OK;
}

View file

@ -7,7 +7,6 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/prim.h" // _mi_prim_random_buf
#include <string.h> // memset
/* ----------------------------------------------------------------------------
We use our own PRNG to keep predictable performance of random number generation
@ -33,15 +32,11 @@ The implementation uses regular C code which compiles very well on modern compil
(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
-----------------------------------------------------------------------------*/
static inline uint32_t rotl(uint32_t x, uint32_t shift) {
return (x << shift) | (x >> (32 - shift));
}
static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
x[a] += x[b]; x[d] = mi_rotl32(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = mi_rotl32(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = mi_rotl32(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = mi_rotl32(x[b] ^ x[c], 7);
}
static void chacha_block(mi_random_ctx_t* ctx)
@ -99,7 +94,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
// since we only use chacha for randomness (and not encryption) we
// do not _need_ to read 32-bit values as little endian but we do anyways
// just for being compatible :-)
memset(ctx, 0, sizeof(*ctx));
_mi_memzero(ctx, sizeof(*ctx));
for (size_t i = 0; i < 4; i++) {
const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
ctx->input[i] = read32(sigma,i);
@ -114,7 +109,7 @@ static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t no
}
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
memset(ctx_new, 0, sizeof(*ctx_new));
_mi_memzero(ctx_new, sizeof(*ctx_new));
_mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
ctx_new->input[12] = 0;
ctx_new->input[13] = 0;
@ -160,7 +155,7 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random
x ^= _mi_prim_clock_now();
x ^= _mi_prim_clock_now();
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {

View file

@ -1,142 +0,0 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* -----------------------------------------------------------
The following functions are to reliably find the segment or
block that encompasses any pointer p (or NULL if it is not
in any of our segments).
We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
set to 1 if it contains the segment meta data.
----------------------------------------------------------- */
#include "mimalloc.h"
#include "mimalloc/internal.h"
#include "mimalloc/atomic.h"
// Reduce total address space to reduce .bss (due to the `mi_segment_map`)
#if (MI_INTPTR_SIZE > 4) && MI_TRACK_ASAN
#define MI_SEGMENT_MAP_MAX_ADDRESS (128*1024ULL*MI_GiB) // 128 TiB (see issue #881)
#elif (MI_INTPTR_SIZE > 4)
#define MI_SEGMENT_MAP_MAX_ADDRESS (48*1024ULL*MI_GiB) // 48 TiB
#else
#define MI_SEGMENT_MAP_MAX_ADDRESS (UINT32_MAX)
#endif
#define MI_SEGMENT_MAP_PART_SIZE (MI_INTPTR_SIZE*MI_KiB - 128) // 128 > sizeof(mi_memid_t) !
#define MI_SEGMENT_MAP_PART_BITS (8*MI_SEGMENT_MAP_PART_SIZE)
#define MI_SEGMENT_MAP_PART_ENTRIES (MI_SEGMENT_MAP_PART_SIZE / MI_INTPTR_SIZE)
#define MI_SEGMENT_MAP_PART_BIT_SPAN (MI_SEGMENT_ALIGN) // memory area covered by 1 bit
#if (MI_SEGMENT_MAP_PART_BITS < (MI_SEGMENT_MAP_MAX_ADDRESS / MI_SEGMENT_MAP_PART_BIT_SPAN)) // prevent overflow on 32-bit (issue #1017)
#define MI_SEGMENT_MAP_PART_SPAN (MI_SEGMENT_MAP_PART_BITS * MI_SEGMENT_MAP_PART_BIT_SPAN)
#else
#define MI_SEGMENT_MAP_PART_SPAN MI_SEGMENT_MAP_MAX_ADDRESS
#endif
#define MI_SEGMENT_MAP_MAX_PARTS ((MI_SEGMENT_MAP_MAX_ADDRESS / MI_SEGMENT_MAP_PART_SPAN) + 1)
// A part of the segment map.
typedef struct mi_segmap_part_s {
mi_memid_t memid;
_Atomic(uintptr_t) map[MI_SEGMENT_MAP_PART_ENTRIES];
} mi_segmap_part_t;
// Allocate parts on-demand to reduce .bss footprint
static _Atomic(mi_segmap_part_t*) mi_segment_map[MI_SEGMENT_MAP_MAX_PARTS]; // = { NULL, .. }
static mi_segmap_part_t* mi_segment_map_index_of(const mi_segment_t* segment, bool create_on_demand, size_t* idx, size_t* bitidx) {
// note: segment can be invalid or NULL.
mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
*idx = 0;
*bitidx = 0;
if ((uintptr_t)segment >= MI_SEGMENT_MAP_MAX_ADDRESS) return NULL;
const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_MAP_PART_SPAN;
if (segindex >= MI_SEGMENT_MAP_MAX_PARTS) return NULL;
mi_segmap_part_t* part = mi_atomic_load_ptr_relaxed(mi_segmap_part_t, &mi_segment_map[segindex]);
// allocate on demand to reduce .bss footprint
if mi_unlikely(part == NULL) {
if (!create_on_demand) return NULL;
mi_memid_t memid;
part = (mi_segmap_part_t*)_mi_os_alloc(sizeof(mi_segmap_part_t), &memid);
if (part == NULL) return NULL;
part->memid = memid;
mi_segmap_part_t* expected = NULL;
if (!mi_atomic_cas_ptr_strong_release(mi_segmap_part_t, &mi_segment_map[segindex], &expected, part)) {
_mi_os_free(part, sizeof(mi_segmap_part_t), memid);
part = expected;
if (part == NULL) return NULL;
}
}
mi_assert(part != NULL);
const uintptr_t offset = ((uintptr_t)segment) % MI_SEGMENT_MAP_PART_SPAN;
const uintptr_t bitofs = offset / MI_SEGMENT_MAP_PART_BIT_SPAN;
*idx = bitofs / MI_INTPTR_BITS;
*bitidx = bitofs % MI_INTPTR_BITS;
return part;
}
void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return; // we lookup segments first in the arena's and don't need the segment map
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, true /* alloc map if needed */, &index, &bitidx);
if (part == NULL) return; // outside our address range..
uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t newmask;
do {
newmask = (mask | ((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
}
void _mi_segment_map_freed_at(const mi_segment_t* segment) {
if (segment->memid.memkind == MI_MEM_ARENA) return;
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* don't alloc if not present */, &index, &bitidx);
if (part == NULL) return; // outside our address range..
uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
uintptr_t newmask;
do {
newmask = (mask & ~((uintptr_t)1 << bitidx));
} while (!mi_atomic_cas_weak_release(&part->map[index], &mask, newmask));
}
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
static mi_segment_t* _mi_segment_of(const void* p) {
if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL
size_t index;
size_t bitidx;
mi_segmap_part_t* part = mi_segment_map_index_of(segment, false /* dont alloc if not present */, &index, &bitidx);
if (part == NULL) return NULL;
const uintptr_t mask = mi_atomic_load_relaxed(&part->map[index]);
if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(cookie_ok); MI_UNUSED(cookie_ok);
return segment; // yes, allocated by us
}
return NULL;
}
// Is this a valid pointer in our heap?
static bool mi_is_valid_pointer(const void* p) {
// first check if it is in an arena, then check if it is OS allocated
return (_mi_arena_contains(p) || _mi_segment_of(p) != NULL);
}
mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
return mi_is_valid_pointer(p);
}
void _mi_segment_map_unsafe_destroy(void) {
for (size_t i = 0; i < MI_SEGMENT_MAP_MAX_PARTS; i++) {
mi_segmap_part_t* part = mi_atomic_exchange_ptr_relaxed(mi_segmap_part_t, &mi_segment_map[i], NULL);
if (part != NULL) {
_mi_os_free(part, sizeof(mi_segmap_part_t), part->memid);
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -20,10 +20,11 @@ terms of the MIT license. A copy of the license can be found in the file
// containing the whole library. If it is linked first
// it will override all the standard library allocation
// functions (on Unix's).
#include "alloc.c" // includes alloc-override.c
#include "alloc.c" // includes alloc-override.c and free.c
#include "alloc-aligned.c"
#include "alloc-posix.c"
#include "arena.c"
#include "arena-meta.c"
#include "bitmap.c"
#include "heap.c"
#include "init.c"
@ -31,9 +32,8 @@ terms of the MIT license. A copy of the license can be found in the file
#include "options.c"
#include "os.c"
#include "page.c" // includes page-queue.c
#include "page-map.c"
#include "random.c"
#include "segment.c"
#include "segment-map.c"
#include "stats.c"
#include "prim/prim.c"
#if MI_OSX_ZONE

View file

@ -19,48 +19,77 @@ terms of the MIT license. A copy of the license can be found in the file
Statistics operations
----------------------------------------------------------- */
static bool mi_is_in_main(void* stat) {
return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main
&& (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));
static void mi_stat_update_mt(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
// add atomically
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) {
mi_atomic_addi64_relaxed(&stat->total, amount);
}
}
static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
if mi_unlikely(mi_is_in_main(stat))
{
// add atomically (for abandoned pages)
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
if (amount > 0) {
mi_atomic_addi64_relaxed(&stat->total,amount);
}
}
else {
// add thread local
stat->current += amount;
if (stat->current > stat->peak) { stat->peak = stat->current; }
if (amount > 0) { stat->total += amount; }
}
// add thread local
stat->current += amount;
if (stat->current > stat->peak) { stat->peak = stat->current; }
if (amount > 0) { stat->total += amount; }
}
void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
if (mi_is_in_main(stat)) {
mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount );
}
else {
stat->total += amount;
}
void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount) {
mi_atomic_addi64_relaxed(&stat->total, (int64_t)amount);
}
void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) {
void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
stat->total += amount;
}
void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount) {
mi_stat_update_mt(stat, (int64_t)amount);
}
void __mi_stat_increase(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, (int64_t)amount);
}
void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount) {
mi_stat_update_mt(stat, -((int64_t)amount));
}
void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_update(stat, -((int64_t)amount));
}
// Adjust stats to compensate; for example before committing a range,
// first adjust downwards with parts that were already committed so
// we avoid double counting.
static void mi_stat_adjust_mt(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
// adjust atomically
mi_atomic_addi64_relaxed(&stat->current, amount);
mi_atomic_addi64_relaxed(&stat->total, amount);
}
static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount) {
if (amount == 0) return;
stat->current += amount;
stat->total += amount;
}
void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust_mt(stat, (int64_t)amount);
}
void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust(stat, (int64_t)amount);
}
void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust_mt(stat, -((int64_t)amount));
}
void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount) {
mi_stat_adjust(stat, -((int64_t)amount));
}
// must be thread safe as it is called from stats_merge
static void mi_stat_count_add(mi_stat_count_t* stat, const mi_stat_count_t* src) {
@ -94,8 +123,8 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
}
#endif
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
mi_stat_count_add(&stats->page_bins[i], &src->page_bins[i]);
}
mi_stat_count_add(&stats->page_bins[i], &src->page_bins[i]);
}
}
#undef MI_STAT_COUNT
@ -109,7 +138,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
// unit == 0: count as decimal
// unit < 0 : count in binary
static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
char buf[32]; buf[0] = 0;
char buf[32]; _mi_memzero_var(buf);
int len = 32;
const char* suffix = (unit <= 0 ? " " : "B");
const int64_t base = (unit == 0 ? 1000 : 1024);
@ -274,7 +303,7 @@ static void mi_cdecl mi_buffered_out(const char* msg, void* arg) {
static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
// wrap the output function to be line buffered
char buf[256];
char buf[256]; _mi_memzero_var(buf);
buffered_t buffer = { out0, arg0, NULL, 0, 255 };
buffer.buf = buf;
mi_output_fun* out = &mi_buffered_out;
@ -302,15 +331,20 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
mi_stat_peak_print(&stats->reset, "reset", 1, out, arg );
mi_stat_peak_print(&stats->purged, "purged", 1, out, arg );
mi_stat_print_ex(&stats->page_committed, "touched", 1, out, arg, "");
mi_stat_print(&stats->segments, "segments", -1, out, arg);
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
// mi_stat_print(&stats->segments, "segments", -1, out, arg);
// mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
// mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
mi_stat_print(&stats->pages, "pages", -1, out, arg);
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg);
mi_stat_counter_print(&stats->pages_reclaim_on_alloc, "-reclaima", out, arg);
mi_stat_counter_print(&stats->pages_reclaim_on_free, "-reclaimf", out, arg);
mi_stat_counter_print(&stats->pages_reabandon_full, "-reabandon", out, arg);
mi_stat_counter_print(&stats->pages_unabandon_busy_wait, "-waits", out, arg);
mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
mi_stat_counter_print(&stats->pages_retire, "-retire", out, arg);
mi_stat_counter_print(&stats->arena_count, "arenas", out, arg);
// mi_stat_counter_print(&stats->arena_crossover_count, "-crossover", out, arg);
// mi_stat_counter_print(&stats->arena_purges, "-purges", out, arg);
mi_stat_counter_print(&stats->arena_rollback_count, "-rollback", out, arg);
mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
@ -343,36 +377,37 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
static mi_msecs_t mi_process_start; // = 0
static mi_stats_t* mi_stats_get_default(void) {
mi_heap_t* heap = mi_heap_get_default();
return &heap->tld->stats;
}
static void mi_stats_merge_from(mi_stats_t* stats) {
if (stats != &_mi_stats_main) {
mi_stats_add(&_mi_stats_main, stats);
memset(stats, 0, sizeof(mi_stats_t));
}
// return thread local stats
static mi_stats_t* mi_get_tld_stats(void) {
return &mi_heap_get_default()->tld->stats;
}
void mi_stats_reset(void) mi_attr_noexcept {
mi_stats_t* stats = mi_stats_get_default();
if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); }
memset(&_mi_stats_main, 0, sizeof(mi_stats_t));
mi_stats_t* stats = mi_get_tld_stats();
mi_subproc_t* subproc = _mi_subproc();
if (stats != &subproc->stats) { _mi_memzero(stats, sizeof(mi_stats_t)); }
_mi_memzero(&subproc->stats, sizeof(mi_stats_t));
if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); };
}
void mi_stats_merge(void) mi_attr_noexcept {
mi_stats_merge_from( mi_stats_get_default() );
void _mi_stats_merge_from(mi_stats_t* to, mi_stats_t* from) {
if (to != from) {
mi_stats_add(to, from);
_mi_memzero(from, sizeof(mi_stats_t));
}
}
void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done`
mi_stats_merge_from(stats);
_mi_stats_merge_from(&_mi_subproc()->stats, stats);
}
void mi_stats_merge(void) mi_attr_noexcept {
_mi_stats_done( mi_get_tld_stats() );
}
void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
mi_stats_merge_from(mi_stats_get_default());
_mi_stats_print(&_mi_stats_main, out, arg);
mi_stats_merge();
_mi_stats_print(&_mi_subproc()->stats, out, arg);
}
void mi_stats_print(void* out) mi_attr_noexcept {
@ -381,7 +416,7 @@ void mi_stats_print(void* out) mi_attr_noexcept {
}
void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
_mi_stats_print(mi_stats_get_default(), out, arg);
_mi_stats_print(mi_get_tld_stats(), out, arg);
}
@ -415,11 +450,12 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
{
mi_subproc_t* subproc = _mi_subproc();
mi_process_info_t pinfo;
_mi_memzero_var(pinfo);
pinfo.elapsed = _mi_clock_end(mi_process_start);
pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.current)));
pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.peak)));
pinfo.current_rss = pinfo.current_commit;
pinfo.peak_rss = pinfo.peak_commit;
pinfo.utime = 0;
@ -447,7 +483,7 @@ void mi_stats_get(size_t stats_size, mi_stats_t* stats) mi_attr_noexcept {
if (stats == NULL || stats_size == 0) return;
_mi_memzero(stats, stats_size);
const size_t size = (stats_size > sizeof(mi_stats_t) ? sizeof(mi_stats_t) : stats_size);
_mi_memcpy(stats, &_mi_stats_main, size);
_mi_memcpy(stats, &_mi_subproc()->stats, size);
stats->version = MI_STAT_VERSION;
}
@ -494,9 +530,9 @@ static void mi_heap_buf_print(mi_heap_buf_t* hbuf, const char* msg) {
static void mi_heap_buf_print_count_bin(mi_heap_buf_t* hbuf, const char* prefix, mi_stat_count_t* stat, size_t bin, bool add_comma) {
const size_t binsize = _mi_bin_size(bin);
const size_t pagesize = (binsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_SMALL_PAGE_SIZE :
(binsize <= MI_MEDIUM_OBJ_SIZE_MAX ? MI_MEDIUM_PAGE_SIZE :
(binsize <= MI_LARGE_OBJ_SIZE_MAX ? MI_LARGE_PAGE_SIZE : 0)));
const size_t pagesize = (binsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_SMALL_PAGE_SIZE :
(binsize <= MI_MEDIUM_MAX_OBJ_SIZE ? MI_MEDIUM_PAGE_SIZE :
(binsize <= MI_LARGE_MAX_OBJ_SIZE ? MI_LARGE_PAGE_SIZE : 0)));
char buf[128];
_mi_snprintf(buf, 128, "%s{ \"total\": %lld, \"peak\": %lld, \"current\": %lld, \"block_size\": %zu, \"page_size\": %zu }%s\n", prefix, stat->total, stat->peak, stat->current, binsize, pagesize, (add_comma ? "," : ""));
buf[127] = 0;
@ -576,7 +612,7 @@ char* mi_stats_get_json(size_t output_size, char* output_buf) mi_attr_noexcept {
mi_heap_buf_print(&hbuf, " },\n");
// statistics
mi_stats_t* stats = &_mi_stats_main;
mi_stats_t* stats = &_mi_subproc()->stats;
MI_STAT_FIELDS()
// size bins
@ -589,7 +625,7 @@ char* mi_stats_get_json(size_t output_size, char* output_buf) mi_attr_noexcept {
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
mi_heap_buf_print_count_bin(&hbuf, " ", &stats->page_bins[i], i, i!=MI_BIN_HUGE);
}
mi_heap_buf_print(&hbuf, " ]\n");
mi_heap_buf_print(&hbuf, " ]\n");
mi_heap_buf_print(&hbuf, "}\n");
return hbuf.buf;
}

View file

@ -12,4 +12,14 @@ std::string TestAllocInDll::GetString()
std::string r = test;
delete[] test;
return r;
}
#include <windows.h>
void TestAllocInDll::TestHeapAlloc()
{
HANDLE heap = GetProcessHeap();
int* p = (int*)HeapAlloc(heap, 0, sizeof(int));
*p = 42;
HeapFree(heap, 0, p);
}

View file

@ -8,4 +8,5 @@ class TestAllocInDll
{
public:
__declspec(dllexport) std::string GetString();
__declspec(dllexport) void TestHeapAlloc();
};

View file

@ -32,7 +32,7 @@ static void test_manage_os_memory(void);
int main() {
mi_version();
mi_stats_reset();
// mi_bins();
// test_manage_os_memory();
@ -84,7 +84,7 @@ int main() {
static void invalid_free() {
free((void*)0xBADBEEF);
realloc((void*)0xBADBEEF,10);
realloc((void*)0xBADBEEF, 10);
}
static void block_overflow1() {
@ -182,7 +182,7 @@ static void test_process_info(void) {
size_t peak_commit = 0;
size_t page_faults = 0;
for (int i = 0; i < 100000; i++) {
void* p = calloc(100,10);
void* p = calloc(100, 10);
free(p);
}
mi_process_info(&elapsed, &user_msecs, &system_msecs, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
@ -193,7 +193,7 @@ static void test_reserved(void) {
#define KiB 1024ULL
#define MiB (KiB*KiB)
#define GiB (MiB*KiB)
mi_reserve_os_memory(4*GiB, false, true);
mi_reserve_os_memory(3*GiB, false, true);
void* p1 = malloc(100);
void* p2 = malloc(100000);
void* p3 = malloc(2*GiB);
@ -240,8 +240,8 @@ static void test_heap_walk(void) {
}
static void test_canary_leak(void) {
char* p = mi_mallocn_tp(char,23);
for(int i = 0; i < 23; i++) {
char* p = mi_mallocn_tp(char, 22);
for (int i = 0; i < 22; i++) {
p[i] = '0'+i;
}
puts(p);
@ -251,7 +251,7 @@ static void test_canary_leak(void) {
#if _WIN32
static void test_manage_os_memory(void) {
size_t size = 256 * 1024 * 1024;
void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
mi_arena_id_t arena_id;
mi_manage_os_memory_ex(ptr, size, true /* committed */, true /* pinned */, false /* is zero */, -1 /* numa node */, true /* exclusive */, &arena_id);
mi_heap_t* cuda_heap = mi_heap_new_in_arena(arena_id); // you can do this in any thread
@ -260,11 +260,11 @@ static void test_manage_os_memory(void) {
void* p1 = mi_heap_malloc(cuda_heap, 8);
int* p2 = mi_heap_malloc_tp(cuda_heap, int);
*p2 = 42;
// and maybe set the cuda heap as the default heap? (but careful as now `malloc` will allocate in the cuda heap as well)
{
mi_heap_t* prev_default_heap = mi_heap_set_default(cuda_heap);
void* p3 = mi_malloc(8); // allocate in the cuda heap
void* p3 = mi_malloc(8); // allocate in the cuda heap
mi_free(p3);
}
mi_free(p1);
@ -287,15 +287,15 @@ static void test_manage_os_memory(void) {
static void test_large_pages(void) {
mi_memid_t memid;
#if 0
#if 0
size_t pages_reserved;
size_t page_size;
uint8_t* p = (uint8_t*)_mi_os_alloc_huge_os_pages(1, -1, 30000, &pages_reserved, &page_size, &memid);
const size_t req_size = pages_reserved * page_size;
#else
#else
const size_t req_size = 64*MI_MiB;
uint8_t* p = (uint8_t*)_mi_os_alloc(req_size,&memid,NULL);
#endif
uint8_t* p = (uint8_t*)_mi_os_alloc(req_size, &memid, NULL);
#endif
p[0] = 1;
@ -318,8 +318,8 @@ static void test_large_pages(void) {
#if 0
#include <stdint.h>
#include <stdbool.h>
#include <mimalloc/bits.h>
#define MI_INTPTR_SIZE 8
#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE)
#define MI_BIN_HUGE 100
@ -371,8 +371,6 @@ uint8_t _mi_bsr(uintptr_t x) {
#endif
}
static inline size_t _mi_wsize_from_size(size_t size) {
return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
}
@ -412,7 +410,9 @@ static inline size_t mi_bin(size_t wsize) {
#endif
wsize--;
// find the highest bit
const size_t b = _mi_bsr(wsize); // note: wsize != 0
size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
@ -446,7 +446,9 @@ static inline uint8_t _mi_bin4(size_t size) {
bin = MI_BIN_HUGE;
}
else {
uint8_t b = mi_bsr32((uint32_t)wsize);
size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3;
}
return bin;
@ -462,7 +464,9 @@ static size_t _mi_binx4(size_t wsize) {
bin = (uint8_t)wsize;
}
else {
uint8_t b = mi_bsr32((uint32_t)wsize);
size_t idx;
mi_bsr(wsize, &idx);
uint8_t b = (uint8_t)idx;
if (b <= 1) return wsize;
bin = ((b << 1) | (wsize >> (b - 1))&0x01) + 3;
}
@ -471,7 +475,9 @@ static size_t _mi_binx4(size_t wsize) {
static size_t _mi_binx8(size_t bsize) {
if (bsize<=1) return bsize;
uint8_t b = mi_bsr32((uint32_t)bsize);
size_t idx;
mi_bsr(bsize, &idx);
uint8_t b = (uint8_t)idx;
if (b <= 2) return bsize;
size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5;
return bin;
@ -489,8 +495,10 @@ static inline size_t mi_binx(size_t wsize) {
}
else {
wsize--;
assert(wsize>0);
// find the highest bit
uint8_t b = (uint8_t)mi_bsr32((uint32_t)wsize); // note: wsize != 0
uint8_t b = (uint8_t)(MI_SIZE_BITS - 1 - mi_clz(wsize));
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin

View file

@ -37,7 +37,7 @@ static void test_thread_local(); // issue #944
static void test_mixed1(); // issue #942
static void test_stl_allocators();
#if x_WIN32
#if _WIN32
#include "main-override-dep.h"
static void test_dep(); // issue #981: test overriding in another DLL
#else
@ -46,8 +46,8 @@ static void test_dep() { };
int main() {
mi_stats_reset(); // ignore earlier allocations
various_tests();
test_mixed1();
//various_tests();
//test_mixed1();
test_dep();
@ -145,11 +145,13 @@ static bool test_stl_allocator1() {
struct some_struct { int i; int j; double z; };
#if x_WIN32
#if _WIN32
static void test_dep()
{
TestAllocInDll t;
std::string s = t.GetString();
t.TestHeapAlloc();
}
#endif

View file

@ -34,7 +34,7 @@ we therefore test the API over various inputs. Please add more tests :-)
#include "mimalloc.h"
// #include "mimalloc/internal.h"
#include "mimalloc/types.h" // for MI_DEBUG and MI_BLOCK_ALIGNMENT_MAX
#include "mimalloc/types.h" // for MI_DEBUG and MI_PAGE_MAX_OVERALLOC_ALIGN
#include "testhelper.h"
@ -169,7 +169,7 @@ int main(void) {
/*
CHECK_BODY("malloc-aligned6") {
bool ok = true;
for (size_t align = 1; align <= MI_BLOCK_ALIGNMENT_MAX && ok; align *= 2) {
for (size_t align = 1; align <= MI_PAGE_MAX_OVERALLOC_ALIGN && ok; align *= 2) {
void* ps[8];
for (int i = 0; i < 8 && ok; i++) {
ps[i] = mi_malloc_aligned(align*13 // size
@ -186,16 +186,16 @@ int main(void) {
};
*/
CHECK_BODY("malloc-aligned7") {
void* p = mi_malloc_aligned(1024,MI_BLOCK_ALIGNMENT_MAX);
void* p = mi_malloc_aligned(1024,MI_PAGE_MAX_OVERALLOC_ALIGN);
mi_free(p);
result = ((uintptr_t)p % MI_BLOCK_ALIGNMENT_MAX) == 0;
result = ((uintptr_t)p % MI_PAGE_MAX_OVERALLOC_ALIGN) == 0;
};
CHECK_BODY("malloc-aligned8") {
bool ok = true;
for (int i = 0; i < 5 && ok; i++) {
int n = (1 << i);
void* p = mi_malloc_aligned(1024, n * MI_BLOCK_ALIGNMENT_MAX);
ok = ((uintptr_t)p % (n*MI_BLOCK_ALIGNMENT_MAX)) == 0;
void* p = mi_malloc_aligned(1024, n * MI_PAGE_MAX_OVERALLOC_ALIGN);
ok = ((uintptr_t)p % (n*MI_PAGE_MAX_OVERALLOC_ALIGN)) == 0;
mi_free(p);
}
result = ok;
@ -203,7 +203,7 @@ int main(void) {
CHECK_BODY("malloc-aligned9") { // test large alignments
bool ok = true;
void* p[8];
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_BLOCK_ALIGNMENT_MAX, MI_BLOCK_ALIGNMENT_MAX + 1, 2 * MI_BLOCK_ALIGNMENT_MAX, 8 * MI_BLOCK_ALIGNMENT_MAX, 0 };
size_t sizes[8] = { 8, 512, 1024 * 1024, MI_PAGE_MAX_OVERALLOC_ALIGN, MI_PAGE_MAX_OVERALLOC_ALIGN + 1, 2 * MI_PAGE_MAX_OVERALLOC_ALIGN, 8 * MI_PAGE_MAX_OVERALLOC_ALIGN, 0 };
for (int i = 0; i < 28 && ok; i++) {
int align = (1 << i);
for (int j = 0; j < 8 && ok; j++) {

View file

@ -40,6 +40,19 @@ static int ITER = 20;
static int THREADS = 8;
static int SCALE = 10;
static int ITER = 10;
#elif 0
static int THREADS = 4;
static int SCALE = 10;
static int ITER = 20;
#elif 0
static int THREADS = 32;
static int SCALE = 50;
static int ITER = 50;
#elif 0
static int THREADS = 32;
static int SCALE = 25;
static int ITER = 50;
#define ALLOW_LARGE true
#else
static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 50; // scaling factor
@ -50,7 +63,12 @@ static int ITER = 50; // N full iterations destructing and re-creating a
#define STRESS // undefine for leak test
static bool allow_large_objects = false; // allow very large objects? (set to `true` if SCALE>100)
#ifndef ALLOW_LARGE
#define ALLOW_LARGE false
#endif
static bool allow_large_objects = ALLOW_LARGE; // allow very large objects? (set to `true` if SCALE>100)
static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
static bool main_participates = false; // main thread participates as a worker too
@ -67,7 +85,7 @@ static bool main_participates = false; // main thread participates as a
#define custom_free(p) mi_free(p)
#ifndef NDEBUG
#define HEAP_WALK // walk the heap objects?
#define xHEAP_WALK // walk the heap objects?
#endif
#endif
@ -242,9 +260,23 @@ static void test_stress(void) {
//mi_debug_show_arenas(true);
#endif
#if !defined(NDEBUG) || defined(MI_TSAN)
if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
if ((n + 1) % 10 == 0) {
printf("- iterations left: %3d\n", ITER - (n + 1));
#ifndef USE_STD_MALLOC
mi_debug_show_arenas();
#endif
//mi_collect(true);
//mi_debug_show_arenas();
}
#endif
}
// clean up
for (int i = 0; i < TRANSFERS; i++) {
void* p = atomic_exchange_ptr(&transfer[i], NULL);
if (p != NULL) {
free_items(p);
}
}
}
#ifndef STRESS
@ -284,7 +316,12 @@ int main(int argc, char** argv) {
mi_option_enable(mi_option_visit_abandoned);
#endif
#if !defined(NDEBUG) && !defined(USE_STD_MALLOC)
mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */);
// mi_option_set(mi_option_arena_reserve, 32 * 1024 /* in kib = 32MiB */);
// mi_option_set(mi_option_purge_delay,1);
#endif
#if defined(NDEBUG) && !defined(USE_STD_MALLOC)
// mi_option_set(mi_option_purge_delay,-1);
mi_option_set(mi_option_page_reclaim_on_free, 0);
#endif
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
@ -359,9 +396,10 @@ static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
thread_entry_fun = fun;
DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
thandles[0] = GetCurrentThread(); // avoid lint warning
const size_t start = (main_participates ? 1 : 0);
for (size_t i = start; i < nthreads; i++) {
thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
thandles[i] = CreateThread(0, 8*1024L, &thread_entry, (void*)(i), 0, &tids[i]);
}
if (main_participates) fun(0); // run the main thread as well
for (size_t i = start; i < nthreads; i++) {