Merge branch 'dev' into kile/stl

This commit is contained in:
Daan 2020-01-16 15:33:13 -08:00 committed by GitHub
commit 97bd204c42
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: 4AEE18F83AFDEB23
34 changed files with 3055 additions and 1497 deletions

View file

@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.0)
project(libmimalloc C CXX) project(libmimalloc C CXX)
include("cmake/mimalloc-config-version.cmake")
set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
@ -14,9 +14,13 @@ option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanis
option(MI_BUILD_TESTS "Build test executables" ON) option(MI_BUILD_TESTS "Build test executables" ON)
option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF)
include("cmake/mimalloc-config-version.cmake")
set(mi_sources set(mi_sources
src/stats.c src/stats.c
src/random.c
src/os.c src/os.c
src/arena.c
src/memory.c src/memory.c
src/segment.c src/segment.c
src/page.c src/page.c
@ -112,7 +116,7 @@ endif()
# extra needed libraries # extra needed libraries
if(WIN32) if(WIN32)
list(APPEND mi_libraries psapi shell32 user32) list(APPEND mi_libraries psapi shell32 user32 bcrypt)
else() else()
list(APPEND mi_libraries pthread) list(APPEND mi_libraries pthread)
find_library(LIBRT rt) find_library(LIBRT rt)

View file

@ -13,16 +13,31 @@ jobs:
pool: pool:
vmImage: vmImage:
windows-2019 windows-2019
strategy:
matrix:
Debug:
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Secure:
BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
steps: steps:
- task: CMake@1 - task: CMake@1
inputs: inputs:
workingDirectory: 'build' workingDirectory: $(BuildType)
cmakeArgs: .. cmakeArgs: .. $(cmakeExtraArgs)
- task: MSBuild@1 - task: MSBuild@1
inputs: inputs:
solution: build/libmimalloc.sln solution: $(BuildType)/libmimalloc.sln
- upload: $(Build.SourcesDirectory)/build - script: |
artifact: windows cd $(BuildType)
ctest
displayName: CTest
- upload: $(Build.SourcesDirectory)/$(BuildType)
artifact: mimalloc-windows-$(BuildType)
- job: - job:
displayName: Linux displayName: Linux
@ -61,32 +76,42 @@ jobs:
CXX: clang++ CXX: clang++
BuildType: secure-clang BuildType: secure-clang
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
steps: steps:
- task: CMake@1 - task: CMake@1
inputs: inputs:
workingDirectory: $(BuildType) workingDirectory: $(BuildType)
cmakeArgs: .. $(cmakeExtraArgs) cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(nproc) -C $(BuildType) - script: make -j$(nproc) -C $(BuildType)
displayName: Make displayName: Make
- script: make test -C $(BuildType) - script: make test -C $(BuildType)
displayName: Ctest displayName: CTest
- upload: $(Build.SourcesDirectory)/$(BuildType) - upload: $(Build.SourcesDirectory)/$(BuildType)
artifact: ubuntu-$(BuildType) artifact: mimalloc-ubuntu-$(BuildType)
- job: - job:
displayName: macOS displayName: macOS
pool: pool:
vmImage: vmImage:
macOS-10.14 macOS-10.14
strategy:
matrix:
Debug:
BuildType: debug
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Debug -DMI_DEBUG_FULL=ON
Release:
BuildType: release
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release
Secure:
BuildType: secure
cmakeExtraArgs: -DCMAKE_BUILD_TYPE=Release -DMI_SECURE=ON
steps: steps:
- task: CMake@1 - task: CMake@1
inputs: inputs:
workingDirectory: 'build' workingDirectory: $(BuildType)
cmakeArgs: .. cmakeArgs: .. $(cmakeExtraArgs)
- script: make -j$(sysctl -n hw.ncpu) -C build - script: make -j$(sysctl -n hw.ncpu) -C $(BuildType)
- upload: $(Build.SourcesDirectory)/build displayName: Make
artifact: macos - script: make test -C $(BuildType)
displayName: CTest
- upload: $(Build.SourcesDirectory)/$(BuildType)
artifact: mimalloc-macos-$(BuildType)

View file

@ -1,5 +1,5 @@
set(mi_version_major 1) set(mi_version_major 1)
set(mi_version_minor 2) set(mi_version_minor 4)
set(mi_version ${mi_version_major}.${mi_version_minor}) set(mi_version ${mi_version_major}.${mi_version_minor})
set(PACKAGE_VERSION ${mi_version}) set(PACKAGE_VERSION ${mi_version})

View file

@ -129,7 +129,7 @@
<CompileAs>Default</CompileAs> <CompileAs>Default</CompileAs>
</ClCompile> </ClCompile>
<Link> <Link>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<IgnoreSpecificDefaultLibraries> <IgnoreSpecificDefaultLibraries>
</IgnoreSpecificDefaultLibraries> </IgnoreSpecificDefaultLibraries>
<ModuleDefinitionFile> <ModuleDefinitionFile>
@ -195,7 +195,7 @@
<Link> <Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding> <EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences> <OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalDependencies>$(ProjectDir)\..\..\bin\mimalloc-redirect.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<ModuleDefinitionFile> <ModuleDefinitionFile>
</ModuleDefinitionFile> </ModuleDefinitionFile>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration> <LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
@ -231,6 +231,7 @@
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" /> <ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\memory.c" /> <ClCompile Include="..\..\src\memory.c" />
@ -243,6 +244,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\page.c" /> <ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" /> <ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\stats.c" /> <ClCompile Include="..\..\src\stats.c" />
</ItemGroup> </ItemGroup>

View file

@ -70,5 +70,11 @@
<ClCompile Include="..\..\src\alloc-posix.c"> <ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -217,6 +217,7 @@
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" /> <ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\memory.c" /> <ClCompile Include="..\..\src\memory.c" />
@ -228,6 +229,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\page.c" /> <ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" /> <ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" /> <ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" /> <ClCompile Include="..\..\src\stats.c" />

View file

@ -53,6 +53,12 @@
<ClCompile Include="..\..\src\alloc-posix.c"> <ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h"> <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View file

@ -123,7 +123,7 @@
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions> <PreprocessorDefinitions>MI_DEBUG=3;MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
<CompileAs>Default</CompileAs> <CompileAs>Default</CompileAs>
@ -231,6 +231,10 @@
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" /> <ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.inc.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\memory.c" /> <ClCompile Include="..\..\src\memory.c" />
@ -243,6 +247,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\page.c" /> <ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" /> <ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\stats.c" /> <ClCompile Include="..\..\src\stats.c" />
</ItemGroup> </ItemGroup>

View file

@ -40,6 +40,15 @@
<ClCompile Include="..\..\src\stats.c"> <ClCompile Include="..\..\src\stats.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\bitmap.inc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h"> <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View file

@ -116,7 +116,7 @@
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>MI_DEBUG=1;%(PreprocessorDefinitions);</PreprocessorDefinitions> <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs> <CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode> <SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp17</LanguageStandard> <LanguageStandard>stdcpp17</LanguageStandard>
@ -217,6 +217,10 @@
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\alloc-posix.c" /> <ClCompile Include="..\..\src\alloc-posix.c" />
<ClCompile Include="..\..\src\alloc.c" /> <ClCompile Include="..\..\src\alloc.c" />
<ClCompile Include="..\..\src\arena.c" />
<ClCompile Include="..\..\src\bitmap.inc.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\heap.c" /> <ClCompile Include="..\..\src\heap.c" />
<ClCompile Include="..\..\src\init.c" /> <ClCompile Include="..\..\src\init.c" />
<ClCompile Include="..\..\src\memory.c" /> <ClCompile Include="..\..\src\memory.c" />
@ -228,6 +232,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\page.c" /> <ClCompile Include="..\..\src\page.c" />
<ClCompile Include="..\..\src\random.c" />
<ClCompile Include="..\..\src\segment.c" /> <ClCompile Include="..\..\src\segment.c" />
<ClCompile Include="..\..\src\os.c" /> <ClCompile Include="..\..\src\os.c" />
<ClCompile Include="..\..\src\stats.c" /> <ClCompile Include="..\..\src\stats.c" />

View file

@ -43,6 +43,15 @@
<ClCompile Include="..\..\src\stats.c"> <ClCompile Include="..\..\src\stats.c">
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\arena.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\bitmap.inc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\random.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h"> <ClInclude Include="$(ProjectDir)..\..\include\mimalloc.h">

View file

@ -36,6 +36,13 @@ static inline void mi_atomic_add64(volatile int64_t* p, int64_t add);
// Atomically add a value; returns the previous value. Memory ordering is relaxed. // Atomically add a value; returns the previous value. Memory ordering is relaxed.
static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add);
// Atomically "and" a value; returns the previous value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x);
// Atomically "or" a value; returns the previous value. Memory ordering is relaxed.
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x);
// Atomically compare and exchange a value; returns `true` if successful. // Atomically compare and exchange a value; returns `true` if successful.
// May fail spuriously. Memory ordering as release on success, and relaxed on failure. // May fail spuriously. Memory ordering as release on success, and relaxed on failure.
// (Note: expected and desired are in opposite order from atomic_compare_exchange) // (Note: expected and desired are in opposite order from atomic_compare_exchange)
@ -121,22 +128,28 @@ static inline void* mi_atomic_exchange_ptr(volatile _Atomic(void*)* p, void* exc
#include <intrin.h> #include <intrin.h>
#ifdef _WIN64 #ifdef _WIN64
typedef LONG64 msc_intptr_t; typedef LONG64 msc_intptr_t;
#define RC64(f) f##64 #define MI_64(f) f##64
#else #else
typedef LONG msc_intptr_t; typedef LONG msc_intptr_t;
#define RC64(f) f #define MI_64(f) f
#endif #endif
static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) {
return (intptr_t)RC64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); return (intptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
}
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
}
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
} }
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
return (expected == (uintptr_t)RC64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected)); return (expected == (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)expected));
} }
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
return mi_atomic_cas_strong(p,desired,expected); return mi_atomic_cas_strong(p,desired,expected);
} }
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
} }
static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) {
return *p; return *p;
@ -177,6 +190,14 @@ static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add
MI_USING_STD MI_USING_STD
return atomic_fetch_add_explicit(p, add, memory_order_relaxed); return atomic_fetch_add_explicit(p, add, memory_order_relaxed);
} }
static inline uintptr_t mi_atomic_and(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD
return atomic_fetch_and_explicit(p, x, memory_order_relaxed);
}
static inline uintptr_t mi_atomic_or(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
MI_USING_STD
return atomic_fetch_or_explicit(p, x, memory_order_relaxed);
}
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) { static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected) {
MI_USING_STD MI_USING_STD
return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed); return atomic_compare_exchange_weak_explicit(p, &expected, desired, memory_order_release, memory_order_relaxed);

View file

@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include "mimalloc-types.h" #include "mimalloc-types.h"
#if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__)) #if defined(MI_MALLOC_OVERRIDE) && (defined(__APPLE__) || defined(__OpenBSD__) || defined(__DragonFly__))
#define MI_TLS_RECURSE_GUARD #define MI_TLS_RECURSE_GUARD
#endif #endif
@ -33,8 +33,8 @@ terms of the MIT license. A copy of the license can be found in the file
// "options.c" // "options.c"
void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message); void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
void _mi_fprintf(mi_output_fun* out, const char* fmt, ...); void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
void _mi_error_message(const char* fmt, ...); void _mi_error_message(const char* fmt, ...);
void _mi_warning_message(const char* fmt, ...); void _mi_warning_message(const char* fmt, ...);
void _mi_verbose_message(const char* fmt, ...); void _mi_verbose_message(const char* fmt, ...);
@ -42,12 +42,17 @@ void _mi_trace_message(const char* fmt, ...);
void _mi_options_init(void); void _mi_options_init(void);
void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn; void _mi_fatal_error(const char* fmt, ...) mi_attr_noreturn;
// "init.c" // random.c
void _mi_random_init(mi_random_ctx_t* ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
static inline uintptr_t _mi_random_shuffle(uintptr_t x);
// init.c
extern mi_stats_t _mi_stats_main; extern mi_stats_t _mi_stats_main;
extern const mi_page_t _mi_page_empty; extern const mi_page_t _mi_page_empty;
bool _mi_is_main_thread(void); bool _mi_is_main_thread(void);
uintptr_t _mi_random_shuffle(uintptr_t x);
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */);
bool _mi_preloading(); // true while the C runtime is not ready bool _mi_preloading(); // true while the C runtime is not ready
// os.c // os.c
@ -59,15 +64,15 @@ size_t _mi_os_good_alloc_size(size_t size);
// memory.c // memory.c
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld); void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* id, mi_os_tld_t* tld);
void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats); void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld);
bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld);
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld);
bool _mi_mem_protect(void* addr, size_t size); bool _mi_mem_protect(void* addr, size_t size);
bool _mi_mem_unprotect(void* addr, size_t size); bool _mi_mem_unprotect(void* addr, size_t size);
void _mi_mem_collect(mi_stats_t* stats); void _mi_mem_collect(mi_os_tld_t* tld);
// "segment.c" // "segment.c"
mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); mi_page_t* _mi_segment_page_alloc(size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
@ -75,7 +80,7 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t*
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
void _mi_segment_thread_collect(mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld);
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size); // page start for any page uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page
// "page.c" // "page.c"
void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
@ -85,8 +90,9 @@ void _mi_page_unfull(mi_page_t* page);
void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
void _mi_heap_delayed_free(mi_heap_t* heap); void _mi_heap_delayed_free(mi_heap_t* heap);
void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay); void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
void _mi_deferred_free(mi_heap_t* heap, bool force); void _mi_deferred_free(mi_heap_t* heap, bool force);
@ -100,13 +106,14 @@ uint8_t _mi_bsr(uintptr_t x); // bit-scan-right, used on BSD i
// "heap.c" // "heap.c"
void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_destroy_pages(mi_heap_t* heap);
void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap);
uintptr_t _mi_heap_random(mi_heap_t* heap);
void _mi_heap_set_default_direct(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap);
// "stats.c" // "stats.c"
void _mi_stats_done(mi_stats_t* stats); void _mi_stats_done(mi_stats_t* stats);
double _mi_clock_end(double start);
double _mi_clock_start(void); mi_msecs_t _mi_clock_now(void);
mi_msecs_t _mi_clock_end(mi_msecs_t start);
mi_msecs_t _mi_clock_start(void);
// "alloc.c" // "alloc.c"
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
@ -159,7 +166,6 @@ bool _mi_page_is_valid(mi_page_t* page);
// Overflow detecting multiply // Overflow detecting multiply
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5
#include <limits.h> // UINT_MAX, ULONG_MAX #include <limits.h> // UINT_MAX, ULONG_MAX
@ -171,6 +177,7 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
return __builtin_umulll_overflow(count, size, total); return __builtin_umulll_overflow(count, size, total);
#endif #endif
#else /* __builtin_umul_overflow is unavailable */ #else /* __builtin_umul_overflow is unavailable */
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
*total = count * size; *total = count * size;
return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
&& size > 0 && (SIZE_MAX / size) < count); && size > 0 && (SIZE_MAX / size) < count);
@ -184,6 +191,7 @@ static inline bool _mi_is_power_of_two(uintptr_t x) {
// Align upwards // Align upwards
static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
mi_assert_internal(alignment != 0);
uintptr_t mask = alignment - 1; uintptr_t mask = alignment - 1;
if ((alignment & mask) == 0) { // power of two? if ((alignment & mask) == 0) { // power of two?
return ((sz + mask) & ~mask); return ((sz + mask) & ~mask);
@ -193,6 +201,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
} }
} }
// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
mi_assert_internal(divider != 0);
return (divider == 0 ? size : ((size + divider - 1) / divider));
}
// Is memory zero initialized? // Is memory zero initialized?
static inline bool mi_mem_is_zero(void* p, size_t size) { static inline bool mi_mem_is_zero(void* p, size_t size) {
for (size_t i = 0; i < size; i++) { for (size_t i = 0; i < size; i++) {
@ -221,7 +235,7 @@ extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate
static inline mi_heap_t* mi_get_default_heap(void) { static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef MI_TLS_RECURSE_GUARD #ifdef MI_TLS_RECURSE_GUARD
// on some platforms, like macOS, the dynamic loader calls `malloc` // on some BSD platforms, like macOS, the dynamic loader calls `malloc`
// to initialize thread local data. To avoid recursion, we need to avoid // to initialize thread local data. To avoid recursion, we need to avoid
// accessing the thread local `_mi_default_heap` until our module is loaded // accessing the thread local `_mi_default_heap` until our module is loaded
// and use the statically allocated main heap until that time. // and use the statically allocated main heap until that time.
@ -279,7 +293,7 @@ static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) {
// if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages
ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
mi_assert_internal(diff >= 0 && diff < MI_SEGMENT_SIZE); mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE);
uintptr_t idx = (uintptr_t)diff >> segment->page_shift; uintptr_t idx = (uintptr_t)diff >> segment->page_shift;
mi_assert_internal(idx < segment->capacity); mi_assert_internal(idx < segment->capacity);
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0);
@ -294,7 +308,9 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
// Quick page start for initialized pages // Quick page start for initialized pages
static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
return _mi_segment_page_start(segment, page, page->block_size, page_size); const size_t bsize = page->xblock_size;
mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0);
return _mi_segment_page_start(segment, page, bsize, page_size, NULL);
} }
// Get the page containing the pointer // Get the page containing the pointer
@ -302,7 +318,40 @@ static inline mi_page_t* _mi_ptr_page(void* p) {
return _mi_segment_page_of(_mi_ptr_segment(p), p); return _mi_segment_page_of(_mi_ptr_segment(p), p);
} }
// Get the block size of a page (special cased for huge objects)
static inline size_t mi_page_block_size(const mi_page_t* page) {
const size_t bsize = page->xblock_size;
mi_assert_internal(bsize > 0);
if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) {
return bsize;
}
else {
size_t psize;
_mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL);
return psize;
}
}
// Thread free access // Thread free access
static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
return (mi_block_t*)(mi_atomic_read_relaxed(&page->xthread_free) & ~3);
}
static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
return (mi_delayed_t)(mi_atomic_read_relaxed(&page->xthread_free) & 3);
}
// Heap access
static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
return (mi_heap_t*)(mi_atomic_read_relaxed(&page->xheap));
}
static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
mi_atomic_write(&page->xheap,(uintptr_t)heap);
}
// Thread free flag helpers
static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
return (mi_block_t*)(tf & ~0x03); return (mi_block_t*)(tf & ~0x03);
} }
@ -322,7 +371,7 @@ static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t*
// are all blocks in a page freed? // are all blocks in a page freed?
static inline bool mi_page_all_free(const mi_page_t* page) { static inline bool mi_page_all_free(const mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
return (page->used - page->thread_freed == 0); return (page->used == 0);
} }
// are there immediately available blocks // are there immediately available blocks
@ -333,8 +382,8 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) {
// are there free blocks in this page? // are there free blocks in this page?
static inline bool mi_page_has_free(mi_page_t* page) { static inline bool mi_page_has_free(mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_tf_block(page->thread_free) != NULL)); bool hasfree = (mi_page_immediate_available(page) || page->local_free != NULL || (mi_page_thread_free(page) != NULL));
mi_assert_internal(hasfree || page->used - page->thread_freed == page->capacity); mi_assert_internal(hasfree || page->used == page->capacity);
return hasfree; return hasfree;
} }
@ -348,7 +397,7 @@ static inline bool mi_page_all_used(mi_page_t* page) {
static inline bool mi_page_mostly_used(const mi_page_t* page) { static inline bool mi_page_mostly_used(const mi_page_t* page) {
if (page==NULL) return true; if (page==NULL) return true;
uint16_t frac = page->reserved / 8U; uint16_t frac = page->reserved / 8U;
return (page->reserved - page->used + page->thread_freed <= frac); return (page->reserved - page->used <= frac);
} }
static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
@ -377,12 +426,30 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
} }
// ------------------------------------------------------------------- /* -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers Encoding/Decoding the free list next pointers
// Note: we pass a `null` value to be used as the `NULL` value for the
// end of a free list. This is to prevent the cookie itself to ever This is to protect against buffer overflow exploits where the
// be present among user blocks (as `cookie^0==cookie`). free list is mutated. Many hardened allocators xor the next pointer `p`
// ------------------------------------------------------------------- with a secret key `k1`, as `p^k1`. This prevents overwriting with known
values but might be still too weak: if the attacker can guess
the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
Moreover, if multiple blocks can be read as well, the attacker can
xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
about the pointers (and subsequently `k1`).
Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
Since these operations are not associative, the above approaches do not
work so well any more even if the `p` can be guesstimated. For example,
for the read case we can subtract two entries to discard the `+k1` term,
but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
We include the left-rotation since xor and addition are otherwise linear
in the lowest bit. Finally, both keys are unique per page which reduces
the re-use of keys by a large factor.
We also pass a separate `null` value to be used as `NULL` or otherwise
`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
------------------------------------------------------------------- */
static inline bool mi_is_in_same_segment(const void* p, const void* q) { static inline bool mi_is_in_same_segment(const void* p, const void* q) {
return (_mi_ptr_segment(p) == _mi_ptr_segment(q)); return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
@ -397,52 +464,103 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) {
return (idxp == idxq); return (idxp == idxq);
} }
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t cookie ) { static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
shift %= MI_INTPTR_BITS;
return ((x << shift) | (x >> (MI_INTPTR_BITS - shift)));
}
static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
shift %= MI_INTPTR_BITS;
return ((x >> shift) | (x << (MI_INTPTR_BITS - shift)));
}
static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, uintptr_t key1, uintptr_t key2 ) {
#ifdef MI_ENCODE_FREELIST #ifdef MI_ENCODE_FREELIST
mi_block_t* b = (mi_block_t*)(block->next ^ cookie); mi_block_t* b = (mi_block_t*)(mi_rotr(block->next - key1, key1) ^ key2);
if (mi_unlikely((void*)b==null)) { b = NULL; } if (mi_unlikely((void*)b==null)) { b = NULL; }
return b; return b;
#else #else
UNUSED(cookie); UNUSED(null); UNUSED(key1); UNUSED(key2); UNUSED(null);
return (mi_block_t*)block->next; return (mi_block_t*)block->next;
#endif #endif
} }
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t cookie) { static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, uintptr_t key1, uintptr_t key2) {
#ifdef MI_ENCODE_FREELIST #ifdef MI_ENCODE_FREELIST
if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; } if (mi_unlikely(next==NULL)) { next = (mi_block_t*)null; }
block->next = (mi_encoded_t)next ^ cookie; block->next = mi_rotl((uintptr_t)next ^ key2, key1) + key1;
#else #else
UNUSED(cookie); UNUSED(null); UNUSED(key1); UNUSED(key2); UNUSED(null);
block->next = (mi_encoded_t)next; block->next = (mi_encoded_t)next;
#endif #endif
} }
static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
#ifdef MI_ENCODE_FREELIST #ifdef MI_ENCODE_FREELIST
mi_block_t* next = mi_block_nextx(page,block,page->cookie); mi_block_t* next = mi_block_nextx(page,block,page->key[0],page->key[1]);
// check for free list corruption: is `next` at least in our segment range? // check for free list corruption: is `next` at least in the same page?
// TODO: check if `next` is `page->block_size` aligned? // TODO: check if `next` is `page->block_size` aligned?
if (next!=NULL && !mi_is_in_same_page(block, next)) { if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
_mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", page->block_size, block, (uintptr_t)next); _mi_fatal_error("corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
next = NULL; next = NULL;
} }
return next; return next;
#else #else
UNUSED(page); UNUSED(page);
return mi_block_nextx(page,block,0); return mi_block_nextx(page,block,0,0);
#endif #endif
} }
static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
#ifdef MI_ENCODE_FREELIST #ifdef MI_ENCODE_FREELIST
mi_block_set_nextx(page,block,next, page->cookie); mi_block_set_nextx(page,block,next, page->key[0], page->key[1]);
#else #else
UNUSED(page); UNUSED(page);
mi_block_set_nextx(page,block, next,0); mi_block_set_nextx(page,block, next,0,0);
#endif #endif
} }
// -------------------------------------------------------------------
// Fast "random" shuffle
// -------------------------------------------------------------------
static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros
#if (MI_INTPTR_SIZE==8)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#elif (MI_INTPTR_SIZE==4)
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
return x;
}
// -------------------------------------------------------------------
// Optimize numa node access for the common case (= one node)
// -------------------------------------------------------------------
int _mi_os_numa_node_get(mi_os_tld_t* tld);
size_t _mi_os_numa_node_count_get(void);
extern size_t _mi_numa_node_count;
static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
if (mi_likely(_mi_numa_node_count == 1)) return 0;
else return _mi_os_numa_node_get(tld);
}
static inline size_t _mi_os_numa_node_count(void) {
if (mi_likely(_mi_numa_node_count>0)) return _mi_numa_node_count;
else return _mi_os_numa_node_count_get();
}
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Getting the thread id should be performant // Getting the thread id should be performant
// as it is called in the fast path of `_mi_free`, // as it is called in the fast path of `_mi_free`,

View file

@ -76,6 +76,7 @@ terms of the MIT license. A copy of the license can be found in the file
#endif #endif
#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT) #define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
#define KiB ((size_t)1024) #define KiB ((size_t)1024)
#define MiB (KiB*KiB) #define MiB (KiB*KiB)
@ -93,12 +94,12 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb #define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb
// Derived constants // Derived constants
#define MI_SEGMENT_SIZE (1<<MI_SEGMENT_SHIFT) #define MI_SEGMENT_SIZE (1UL<<MI_SEGMENT_SHIFT)
#define MI_SEGMENT_MASK ((uintptr_t)MI_SEGMENT_SIZE - 1) #define MI_SEGMENT_MASK ((uintptr_t)MI_SEGMENT_SIZE - 1)
#define MI_SMALL_PAGE_SIZE (1<<MI_SMALL_PAGE_SHIFT) #define MI_SMALL_PAGE_SIZE (1UL<<MI_SMALL_PAGE_SHIFT)
#define MI_MEDIUM_PAGE_SIZE (1<<MI_MEDIUM_PAGE_SHIFT) #define MI_MEDIUM_PAGE_SIZE (1UL<<MI_MEDIUM_PAGE_SHIFT)
#define MI_LARGE_PAGE_SIZE (1<<MI_LARGE_PAGE_SHIFT) #define MI_LARGE_PAGE_SIZE (1UL<<MI_LARGE_PAGE_SHIFT)
#define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE) #define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE)
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE) #define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
@ -123,6 +124,9 @@ terms of the MIT license. A copy of the license can be found in the file
#error "define more bins" #error "define more bins"
#endif #endif
// Used as a special value to encode block sizes in 32 bits.
#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX)
// The free lists use encoded next fields // The free lists use encoded next fields
// (Only actually encodes when MI_ENCODED_FREELIST is defined.) // (Only actually encodes when MI_ENCODED_FREELIST is defined.)
typedef uintptr_t mi_encoded_t; typedef uintptr_t mi_encoded_t;
@ -135,10 +139,10 @@ typedef struct mi_block_s {
// The delayed flags are used for efficient multi-threaded free-ing // The delayed flags are used for efficient multi-threaded free-ing
typedef enum mi_delayed_e { typedef enum mi_delayed_e {
MI_NO_DELAYED_FREE = 0, MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
MI_USE_DELAYED_FREE = 1, MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
MI_DELAYED_FREEING = 2, MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
MI_NEVER_DELAYED_FREE = 3 MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim
} mi_delayed_t; } mi_delayed_t;
@ -166,14 +170,28 @@ typedef uintptr_t mi_thread_free_t;
// implement a monotonic heartbeat. The `thread_free` list is needed for // implement a monotonic heartbeat. The `thread_free` list is needed for
// avoiding atomic operations in the common case. // avoiding atomic operations in the common case.
// //
// `used - thread_freed` == actual blocks that are in use (alive)
// `used - thread_freed + |free| + |local_free| == capacity`
// //
// note: we don't count `freed` (as |free|) instead of `used` to reduce // `used - |thread_free|` == actual blocks that are in use (alive)
// `used - |thread_free| + |free| + |local_free| == capacity`
//
// We don't count `freed` (as |free|) but use `used` to reduce
// the number of memory accesses in the `mi_page_all_free` function(s). // the number of memory accesses in the `mi_page_all_free` function(s).
// note: the funny layout here is due to: //
// - access is optimized for `mi_free` and `mi_page_alloc` // Notes:
// - using `uint16_t` does not seem to slow things down // - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
// - Using `uint16_t` does not seem to slow things down
// - The size is 8 words on 64-bit which helps the page index calculations
// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
// and 12 are still good for address calculation)
// - To limit the structure size, the `xblock_size` is 32-bits only; for
// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s { typedef struct mi_page_s {
// "owned" by the segment // "owned" by the segment
uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]`
@ -186,29 +204,22 @@ typedef struct mi_page_s {
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
bool is_zero; // `true` if the blocks in the free list are zero initialized uint8_t is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
#ifdef MI_ENCODE_FREELIST #ifdef MI_ENCODE_FREELIST
uintptr_t cookie; // random cookie to encode the free lists uintptr_t key[2]; // two random keys to encode the free lists (see `_mi_block_next`)
#endif #endif
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
uint32_t xblock_size; // size available in each block (always `>0`)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
volatile _Atomic(uintptr_t) thread_freed; // at least this number of blocks are in `thread_free` volatile _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
volatile _Atomic(mi_thread_free_t) thread_free; // list of deferred free blocks freed by other threads volatile _Atomic(uintptr_t) xheap;
// less accessed info
size_t block_size; // size available in each block (always `>0`)
mi_heap_t* heap; // the owning heap
struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// improve page index calculation
// without padding: 10 words on 64-bit, 11 on 32-bit. Secure adds one word
#if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST))
void* padding[1]; // 12 words on 64-bit with cookie, 12 words on 32-bit plain
#endif
} mi_page_t; } mi_page_t;
@ -232,13 +243,13 @@ typedef struct mi_segment_s {
// segment fields // segment fields
struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc` struct mi_segment_s* next; // must be the first segment field -- see `segment.c:segment_alloc`
struct mi_segment_s* prev; struct mi_segment_s* prev;
volatile _Atomic(struct mi_segment_s*) abandoned_next; struct mi_segment_s* abandoned_next;
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t used; // count of pages in use (`used <= capacity`) size_t used; // count of pages in use (`used <= capacity`)
size_t capacity; // count of available pages (`#free + used`) size_t capacity; // count of available pages (`#free + used`)
size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_size;// for huge pages this may be different from `MI_SEGMENT_SIZE`
size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages. size_t segment_info_size; // space we are using from the first page for segment meta-data and possible guard pages.
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
// layout like this to optimize access in `mi_free` // layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
@ -273,6 +284,14 @@ typedef struct mi_page_queue_s {
#define MI_BIN_FULL (MI_BIN_HUGE+1) #define MI_BIN_FULL (MI_BIN_HUGE+1)
// Random context
typedef struct mi_random_cxt_s {
uint32_t input[16];
uint32_t output[16];
int output_available;
} mi_random_ctx_t;
// A heap owns a set of pages. // A heap owns a set of pages.
struct mi_heap_s { struct mi_heap_s {
mi_tld_t* tld; mi_tld_t* tld;
@ -280,8 +299,9 @@ struct mi_heap_s {
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
volatile _Atomic(mi_block_t*) thread_delayed_free; volatile _Atomic(mi_block_t*) thread_delayed_free;
uintptr_t thread_id; // thread this heap belongs too uintptr_t thread_id; // thread this heap belongs too
uintptr_t cookie; uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
uintptr_t random; // random number used for secure allocation uintptr_t key[2]; // twb random keys used to encode the `thread_delayed_free` list
mi_random_ctx_t random; // random number context used for secure allocation
size_t page_count; // total number of pages in the `pages` queues. size_t page_count; // total number of pages in the `pages` queues.
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
}; };
@ -384,22 +404,29 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------ // ------------------------------------------------------
// Thread Local data // Thread Local data
// ------------------------------------------------------ // ------------------------------------------------------
typedef int64_t mi_msecs_t;
// Queue of segments // Queue of segments
typedef struct mi_segment_queue_s { typedef struct mi_segment_queue_s {
mi_segment_t* first; mi_segment_t* first;
mi_segment_t* last; mi_segment_t* last;
} mi_segment_queue_t; } mi_segment_queue_t;
// OS thread local data
typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t;
// Segments thread local data // Segments thread local data
typedef struct mi_segments_tld_s { typedef struct mi_segments_tld_s {
mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t small_free; // queue of segments with free small pages
mi_segment_queue_t medium_free; // queue of segments with free medium pages mi_segment_queue_t medium_free; // queue of segments with free medium pages
mi_page_queue_t pages_reset; // queue of freed pages that can be reset
size_t count; // current number of segments; size_t count; // current number of segments;
size_t peak_count; // peak number of segments size_t peak_count; // peak number of segments
size_t current_size; // current size of all segments size_t current_size; // current size of all segments
@ -408,14 +435,9 @@ typedef struct mi_segments_tld_s {
size_t cache_size; // total size of all segments in the cache size_t cache_size; // total size of all segments in the cache
mi_segment_t* cache; // (small) cache of segments mi_segment_t* cache; // (small) cache of segments
mi_stats_t* stats; // points to tld stats mi_stats_t* stats; // points to tld stats
mi_os_tld_t* os; // points to os stats
} mi_segments_tld_t; } mi_segments_tld_t;
// OS thread local data
typedef struct mi_os_tld_s {
size_t region_idx; // start point for next allocation
mi_stats_t* stats; // points to tld stats
} mi_os_tld_t;
// Thread local data // Thread local data
struct mi_tld_s { struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count unsigned long long heartbeat; // monotonic heartbeat count

View file

@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H #ifndef MIMALLOC_H
#define MIMALLOC_H #define MIMALLOC_H
#define MI_MALLOC_VERSION 120 // major + 2 digits minor #define MI_MALLOC_VERSION 140 // major + 2 digits minor
// ------------------------------------------------------ // ------------------------------------------------------
// Compiler specific attributes // Compiler specific attributes
@ -110,22 +110,23 @@ mi_decl_export mi_decl_allocator void* mi_reallocf(void* p, size_t newsize)
mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept;
mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept;
typedef void (mi_deferred_free_fun)(bool force, unsigned long long heartbeat); typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free) mi_attr_noexcept; mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept;
typedef void (mi_output_fun)(const char* msg); typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg);
mi_decl_export void mi_register_output(mi_output_fun* out) mi_attr_noexcept; mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept;
mi_decl_export void mi_collect(bool force) mi_attr_noexcept; mi_decl_export void mi_collect(bool force) mi_attr_noexcept;
mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept;
mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept;
mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept;
mi_decl_export void mi_stats_print(mi_output_fun* out) mi_attr_noexcept; mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL
mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
mi_decl_export void mi_process_init(void) mi_attr_noexcept; mi_decl_export void mi_process_init(void) mi_attr_noexcept;
mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
mi_decl_export void mi_thread_done(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
mi_decl_export void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept; mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
@ -230,9 +231,14 @@ mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_b
// Experimental // Experimental
mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
mi_decl_export bool mi_is_redirected() mi_attr_noexcept; mi_decl_export bool mi_is_redirected() mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
// deprecated
mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
// ------------------------------------------------------ // ------------------------------------------------------
// Convenience // Convenience
// ------------------------------------------------------ // ------------------------------------------------------
@ -264,17 +270,20 @@ typedef enum mi_option_e {
// the following options are experimental // the following options are experimental
mi_option_eager_commit, mi_option_eager_commit,
mi_option_eager_region_commit, mi_option_eager_region_commit,
mi_option_reset_decommits,
mi_option_large_os_pages, // implies eager commit mi_option_large_os_pages, // implies eager commit
mi_option_reserve_huge_os_pages, mi_option_reserve_huge_os_pages,
mi_option_segment_cache, mi_option_segment_cache,
mi_option_page_reset, mi_option_page_reset,
mi_option_cache_reset, mi_option_abandoned_page_reset,
mi_option_reset_decommits,
mi_option_eager_commit_delay,
mi_option_segment_reset, mi_option_segment_reset,
mi_option_eager_commit_delay,
mi_option_reset_delay,
mi_option_use_numa_nodes,
mi_option_os_tag, mi_option_os_tag,
mi_option_max_errors, mi_option_max_errors,
_mi_option_last _mi_option_last,
mi_option_eager_page_commit = mi_option_eager_commit
} mi_option_t; } mi_option_t;

View file

@ -56,6 +56,8 @@ Enjoy!
### Releases ### Releases
* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and stronger
free list encoding in secure mode.
* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates.
* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
* 2019-10-07, `v1.1.0`: stable release 1.1. * 2019-10-07, `v1.1.0`: stable release 1.1.

View file

@ -22,7 +22,7 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list. // Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty. // Fall back to generic allocation only if the list is empty.
extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
mi_assert_internal(page->block_size==0||page->block_size >= size); mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
mi_block_t* block = page->free; mi_block_t* block = page->free;
if (mi_unlikely(block == NULL)) { if (mi_unlikely(block == NULL)) {
return _mi_malloc_generic(heap, size); // slow path return _mi_malloc_generic(heap, size); // slow path
@ -94,16 +94,16 @@ void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
// or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
UNUSED(size); UNUSED(size);
mi_assert_internal(p != NULL); mi_assert_internal(p != NULL);
mi_assert_internal(size > 0 && page->block_size >= size); mi_assert_internal(size > 0 && mi_page_block_size(page) >= size);
mi_assert_internal(_mi_ptr_page(p)==page); mi_assert_internal(_mi_ptr_page(p)==page);
if (page->is_zero) { if (page->is_zero) {
// already zero initialized memory? // already zero initialized memory?
((mi_block_t*)p)->next = 0; // clear the free list pointer ((mi_block_t*)p)->next = 0; // clear the free list pointer
mi_assert_expensive(mi_mem_is_zero(p,page->block_size)); mi_assert_expensive(mi_mem_is_zero(p, mi_page_block_size(page)));
} }
else { else {
// otherwise memset // otherwise memset
memset(p, 0, page->block_size); memset(p, 0, mi_page_block_size(page));
} }
} }
@ -139,31 +139,27 @@ static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, cons
return false; return false;
} }
static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block, const mi_block_t* n) { static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) {
size_t psize; // The decoded value is in the same page (or NULL).
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
if (n == NULL || ((uint8_t*)n >= pstart && (uint8_t*)n < (pstart + psize))) {
// Suspicious: the decoded value is in the same page (or NULL).
// Walk the free lists to verify positively if it is already freed // Walk the free lists to verify positively if it is already freed
if (mi_list_contains(page, page->free, block) || if (mi_list_contains(page, page->free, block) ||
mi_list_contains(page, page->local_free, block) || mi_list_contains(page, page->local_free, block) ||
mi_list_contains(page, (const mi_block_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&page->thread_free)), block)) mi_list_contains(page, mi_page_thread_free(page), block))
{ {
_mi_fatal_error("double free detected of block %p with size %zu\n", block, page->block_size); _mi_fatal_error("double free detected of block %p with size %zu\n", block, mi_page_block_size(page));
return true; return true;
} }
}
return false; return false;
} }
static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
mi_block_t* n = mi_block_nextx(page, block, page->cookie); // pretend it is freed, and get the decoded first field mi_block_t* n = mi_block_nextx(page, block, page->key[0], page->key[1]); // pretend it is freed, and get the decoded first field
if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer?
(n==NULL || mi_is_in_same_segment(block, n))) // quick check: in same segment or NULL? (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
{ {
// Suspicous: decoded value in block is in the same segment (or NULL) -- maybe a double free? // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
// (continue in separate function to improve code generation) // (continue in separate function to improve code generation)
return mi_check_is_double_freex(page, block, n); return mi_check_is_double_freex(page, block);
} }
return false; return false;
} }
@ -180,44 +176,50 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
// Free // Free
// ------------------------------------------------------ // ------------------------------------------------------
// multi-threaded free // free huge block from another thread
static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) static mi_decl_noinline void mi_free_huge_block_mt(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
{
mi_thread_free_t tfree;
mi_thread_free_t tfreex;
bool use_delayed;
mi_segment_t* segment = _mi_page_segment(page);
if (segment->page_kind==MI_PAGE_HUGE) {
// huge page segments are always abandoned and can be freed immediately // huge page segments are always abandoned and can be freed immediately
mi_assert_internal(segment->page_kind==MI_PAGE_HUGE);
mi_assert_internal(segment == _mi_page_segment(page));
mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL);
// claim it and free // claim it and free
mi_heap_t* heap = mi_get_default_heap(); mi_heap_t* heap = mi_get_default_heap();
// paranoia: if this it the last reference, the cas should always succeed // paranoia: if this it the last reference, the cas should always succeed
if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) { if (mi_atomic_cas_strong(&segment->thread_id, heap->thread_id, 0)) {
mi_block_set_next(page, block, page->free); mi_block_set_next(page, block, page->free);
page->free = block; page->free = block;
page->used--; page->used--;
page->is_zero = false; page->is_zero = false;
mi_assert(page->used == 0); mi_assert(page->used == 0);
mi_tld_t* tld = heap->tld; mi_tld_t* tld = heap->tld;
if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { const size_t bsize = mi_page_block_size(page);
_mi_stat_decrease(&tld->stats.giant, page->block_size); if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
_mi_stat_decrease(&tld->stats.giant, bsize);
} }
else { else {
_mi_stat_decrease(&tld->stats.huge, page->block_size); _mi_stat_decrease(&tld->stats.huge, bsize);
} }
_mi_segment_page_free(page,true,&tld->segments); _mi_segment_page_free(page, true, &tld->segments);
} }
}
// multi-threaded free
static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
// huge page segments are always abandoned and can be freed immediately
mi_segment_t* segment = _mi_page_segment(page);
if (segment->page_kind==MI_PAGE_HUGE) {
mi_free_huge_block_mt(segment, page, block);
return; return;
} }
mi_thread_free_t tfree;
mi_thread_free_t tfreex;
bool use_delayed;
do { do {
tfree = page->thread_free; tfree = mi_atomic_read_relaxed(&page->xthread_free);
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
(mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page
);
if (mi_unlikely(use_delayed)) { if (mi_unlikely(use_delayed)) {
// unlikely: this only happens on the first concurrent free in a page that is in the full list // unlikely: this only happens on the first concurrent free in a page that is in the full list
tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
@ -227,31 +229,27 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
mi_block_set_next(page, block, mi_tf_block(tfree)); mi_block_set_next(page, block, mi_tf_block(tfree));
tfreex = mi_tf_set_block(tfree,block); tfreex = mi_tf_set_block(tfree,block);
} }
} while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
if (mi_likely(!use_delayed)) { if (mi_unlikely(use_delayed)) {
// increment the thread free count and return
mi_atomic_increment(&page->thread_freed);
}
else {
// racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
mi_heap_t* heap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap)); mi_heap_t* heap = mi_page_heap(page);
mi_assert_internal(heap != NULL); mi_assert_internal(heap != NULL);
if (heap != NULL) { if (heap != NULL) {
// add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity)
mi_block_t* dfree; mi_block_t* dfree;
do { do {
dfree = (mi_block_t*)heap->thread_delayed_free; dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap,block,dfree, heap->cookie); mi_block_set_nextx(heap,block,dfree, heap->key[0], heap->key[1]);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
} }
// and reset the MI_DELAYED_FREEING flag // and reset the MI_DELAYED_FREEING flag
do { do {
tfreex = tfree = page->thread_free; tfreex = tfree = mi_atomic_read_relaxed(&page->xthread_free);
mi_assert_internal(mi_tf_delayed(tfree) == MI_NEVER_DELAYED_FREE || mi_tf_delayed(tfree) == MI_DELAYED_FREEING); mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING);
if (mi_tf_delayed(tfree) != MI_NEVER_DELAYED_FREE) tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE);
} while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
} }
} }
@ -260,13 +258,13 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
{ {
#if (MI_DEBUG) #if (MI_DEBUG)
memset(block, MI_DEBUG_FREED, page->block_size); memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
#endif #endif
// and push it on the free list // and push it on the free list
if (mi_likely(local)) { if (mi_likely(local)) {
// owning thread can free a block directly // owning thread can free a block directly
if (mi_check_is_double_free(page, block)) return; if (mi_unlikely(mi_check_is_double_free(page, block))) return;
mi_block_set_next(page, block, page->local_free); mi_block_set_next(page, block, page->local_free);
page->local_free = block; page->local_free = block;
page->used--; page->used--;
@ -287,7 +285,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL); mi_assert_internal(page!=NULL && p!=NULL);
size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL);
size_t adjust = (diff % page->block_size); size_t adjust = (diff % mi_page_block_size(page));
return (mi_block_t*)((uintptr_t)p - adjust); return (mi_block_t*)((uintptr_t)p - adjust);
} }
@ -332,8 +330,8 @@ void mi_free(void* p) mi_attr_noexcept
#if (MI_STAT>1) #if (MI_STAT>1)
mi_heap_t* heap = mi_heap_get_default(); mi_heap_t* heap = mi_heap_get_default();
mi_heap_stat_decrease(heap, malloc, mi_usable_size(p)); mi_heap_stat_decrease(heap, malloc, mi_usable_size(p));
if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { if (page->xblock_size <= MI_LARGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, normal[_mi_bin(page->block_size)], 1); mi_heap_stat_decrease(heap, normal[_mi_bin(page->xblock_size)], 1);
} }
// huge page stat is accounted for in `_mi_page_retire` // huge page stat is accounted for in `_mi_page_retire`
#endif #endif
@ -341,11 +339,13 @@ void mi_free(void* p) mi_attr_noexcept
if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks
// local, and not full or aligned // local, and not full or aligned
mi_block_t* block = (mi_block_t*)p; mi_block_t* block = (mi_block_t*)p;
if (mi_check_is_double_free(page,block)) return; if (mi_unlikely(mi_check_is_double_free(page,block))) return;
mi_block_set_next(page, block, page->local_free); mi_block_set_next(page, block, page->local_free);
page->local_free = block; page->local_free = block;
page->used--; page->used--;
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } if (mi_unlikely(mi_page_all_free(page))) {
_mi_page_retire(page);
}
} }
else { else {
// non-local, aligned blocks, or a full page; use the more generic path // non-local, aligned blocks, or a full page; use the more generic path
@ -359,13 +359,19 @@ bool _mi_free_delayed_block(mi_block_t* block) {
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(_mi_thread_id() == segment->thread_id); mi_assert_internal(_mi_thread_id() == segment->thread_id);
mi_page_t* page = _mi_segment_page_of(segment, block); mi_page_t* page = _mi_segment_page_of(segment, block);
if (mi_tf_delayed(page->thread_free) == MI_DELAYED_FREEING) {
// we might already start delayed freeing while another thread has not yet // Clear the no-delayed flag so delayed freeing is used again for this page.
// reset the delayed_freeing flag; in that case don't free it quite yet if // This must be done before collecting the free lists on this page -- otherwise
// this is the last block remaining. // some blocks may end up in the page `thread_free` list with no blocks in the
if (page->used - page->thread_freed == 1) return false; // heap `thread_delayed_free` list which may cause the page to be never freed!
} // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
_mi_free_block(page,true,block); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */);
// collect all other non-local frees to ensure up-to-date `used` count
_mi_page_free_collect(page, false);
// and free the block (possibly freeing the page as well since used is updated)
_mi_free_block(page, true, block);
return true; return true;
} }
@ -374,7 +380,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
if (p==NULL) return 0; if (p==NULL) return 0;
const mi_segment_t* segment = _mi_ptr_segment(p); const mi_segment_t* segment = _mi_ptr_segment(p);
const mi_page_t* page = _mi_segment_page_of(segment,p); const mi_page_t* page = _mi_segment_page_of(segment,p);
size_t size = page->block_size; size_t size = mi_page_block_size(page);
if (mi_unlikely(mi_page_has_aligned(page))) { if (mi_unlikely(mi_page_has_aligned(page))) {
ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p); ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)_mi_page_ptr_unalign(segment,page,p);
mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
@ -437,7 +443,12 @@ mi_decl_allocator void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
// Uninitialized `calloc` // Uninitialized `calloc`
extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { extern mi_decl_allocator void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
size_t total; size_t total;
if (mi_mul_overflow(count,size,&total)) return NULL; if (count==1) {
total = size;
}
else if (mi_mul_overflow(count, size, &total)) {
return NULL;
}
return mi_heap_malloc(heap, total); return mi_heap_malloc(heap, total);
} }

355
src/arena.c Normal file
View file

@ -0,0 +1,355 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
"Arenas" are fixed area's of OS memory from which we can allocate
large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
In contrast to the rest of mimalloc, the arenas are shared between
threads and need to be accessed using atomic operations.
Currently arenas are only used to for huge OS page (1GiB) reservations,
otherwise it delegates to direct allocation from the OS.
In the future, we can expose an API to manually add more kinds of arenas
which is sometimes needed for embedded devices or shared memory for example.
(We can also employ this with WASI or `sbrk` systems to reserve large arenas
on demand and be able to reuse them efficiently).
The arena allocation needs to be thread safe and we use an atomic
bitmap to allocate. The current implementation of the bitmap can
only do this within a field (`uintptr_t`) so we can allocate at most
blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
can lead to fragmentation but fortunately most objects will be regions
of 256MiB in practice.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include "mimalloc-atomic.h"
#include <string.h> // memset
#include "bitmap.inc.c" // atomic bitmap
// os.c
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
/* -----------------------------------------------------------
Arena allocation
----------------------------------------------------------- */
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_ARENA_BLOCK_SIZE (8*MI_SEGMENT_ALIGN) // 32MiB
#define MI_ARENA_MAX_OBJ_SIZE (MI_BITMAP_FIELD_BITS * MI_ARENA_BLOCK_SIZE) // 2GiB
#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB
#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
// A memory arena descriptor
typedef struct mi_arena_s {
uint8_t* start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
int numa_node; // associated NUMA node
bool is_zero_init; // is the arena zero initialized?
bool is_committed; // is the memory committed
bool is_large; // large OS page allocated
volatile _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed?
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
} mi_arena_t;
// The available arenas
static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static _Atomic(uintptr_t) mi_arena_count; // = 0
/* -----------------------------------------------------------
Arena allocations get a memory id where the lower 8 bits are
the arena index +1, and the upper bits the block index.
----------------------------------------------------------- */
// Use `0` as a special id for direct OS allocated memory.
#define MI_MEMID_OS 0
static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
mi_assert_internal(arena_index < 0xFE);
mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
}
static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
mi_assert_internal(memid != MI_MEMID_OS);
*arena_index = (memid & 0xFF) - 1;
*bitmap_index = (memid >> 8);
}
static size_t mi_block_count_of_size(size_t size) {
return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
}
/* -----------------------------------------------------------
Thread safe allocation in an arena
----------------------------------------------------------- */
static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
{
const size_t fcount = arena->field_count;
size_t idx = mi_atomic_read(&arena->search_idx); // start from last search
for (size_t visited = 0; visited < fcount; visited++, idx++) {
if (idx >= fcount) idx = 0; // wrap around
if (mi_bitmap_try_find_claim_field(arena->blocks_inuse, idx, blocks, bitmap_idx)) {
mi_atomic_write(&arena->search_idx, idx); // start search from here next time
return true;
}
}
return false;
}
/* -----------------------------------------------------------
Arena Allocation
----------------------------------------------------------- */
static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_bitmap_index_t bitmap_index;
if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
// claimed it! set the dirty bits (todo: no need for an atomic op here?)
void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
*memid = mi_arena_id_create(arena_index, bitmap_index);
*is_zero = mi_bitmap_claim(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
*large = arena->is_large;
if (arena->is_committed) {
// always committed
*commit = true;
}
else if (commit) {
// ensure commit now
bool any_uncommitted;
mi_bitmap_claim(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
if (any_uncommitted) {
bool commit_zero;
_mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
if (commit_zero) *is_zero = true;
}
}
else {
// no need to commit, but check if already fully committed
*commit = mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
}
return p;
}
void* _mi_arena_alloc_aligned(size_t size, size_t alignment,
bool* commit, bool* large, bool* is_zero,
size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(commit != NULL && large != NULL && is_zero != NULL && memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
*memid = MI_MEMID_OS;
*is_zero = false;
// try to allocate in an arena if the alignment is small enough
// and the object is not too large or too small.
if (alignment <= MI_SEGMENT_ALIGN &&
size <= MI_ARENA_MAX_OBJ_SIZE &&
size >= MI_ARENA_MIN_OBJ_SIZE)
{
const size_t bcount = mi_block_count_of_size(size);
const int numa_node = _mi_os_numa_node(tld); // current numa node
mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
// try numa affine allocation
for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
if (arena==NULL) break; // end reached
if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
{
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
}
}
// try from another numa node instead..
for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[i]));
if (arena==NULL) break; // end reached
if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
{
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_zero, memid, tld);
mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
}
}
}
// finally, fall back to the OS
*is_zero = true;
*memid = MI_MEMID_OS;
return _mi_os_alloc_aligned(size, alignment, *commit, large, tld);
}
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_zero, memid, tld);
}
/* -----------------------------------------------------------
Arena free
----------------------------------------------------------- */
void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats) {
mi_assert_internal(size > 0 && stats != NULL);
if (p==NULL) return;
if (size==0) return;
if (memid == MI_MEMID_OS) {
// was a direct OS allocation, pass through
_mi_os_free(p, size, stats);
}
else {
// allocated in an arena
size_t arena_idx;
size_t bitmap_idx;
mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
mi_assert_internal(arena_idx < MI_MAX_ARENAS);
mi_arena_t* arena = (mi_arena_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &mi_arenas[arena_idx]));
mi_assert_internal(arena != NULL);
if (arena == NULL) {
_mi_fatal_error("trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
return;
}
mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
_mi_fatal_error("trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
return;
}
const size_t blocks = mi_block_count_of_size(size);
bool ones = mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
if (!ones) {
_mi_fatal_error("trying to free an already freed block: %p, size %zu\n", p, size);
return;
};
}
}
/* -----------------------------------------------------------
Add an arena.
----------------------------------------------------------- */
static bool mi_arena_add(mi_arena_t* arena) {
mi_assert_internal(arena != NULL);
mi_assert_internal((uintptr_t)arena->start % MI_SEGMENT_ALIGN == 0);
mi_assert_internal(arena->block_count > 0);
uintptr_t i = mi_atomic_addu(&mi_arena_count,1);
if (i >= MI_MAX_ARENAS) {
mi_atomic_subu(&mi_arena_count, 1);
return false;
}
mi_atomic_write_ptr(mi_atomic_cast(void*,&mi_arenas[i]), arena);
return true;
}
/* -----------------------------------------------------------
Reserve a huge page arena.
----------------------------------------------------------- */
#include <errno.h> // ENOMEM
// reserve at a specific numa node
int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
if (pages==0) return 0;
if (numa_node < -1) numa_node = -1;
if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
size_t hsize = 0;
size_t pages_reserved = 0;
void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
if (p==NULL || pages_reserved==0) {
_mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
return ENOMEM;
}
_mi_verbose_message("reserved %zu gb huge pages (of the %zu gb requested)\n", pages_reserved, pages);
size_t bcount = mi_block_count_of_size(hsize);
size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
size_t asize = sizeof(mi_arena_t) + (2*fields*sizeof(mi_bitmap_field_t));
mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
if (arena == NULL) {
_mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
return ENOMEM;
}
arena->block_count = bcount;
arena->field_count = fields;
arena->start = (uint8_t*)p;
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
arena->is_large = true;
arena->is_zero_init = true;
arena->is_committed = true;
arena->search_idx = 0;
arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
arena->blocks_committed = NULL;
// the bitmaps are already zero initialized due to os_alloc
// just claim leftover blocks if needed
ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
mi_assert_internal(post >= 0);
if (post > 0) {
// don't use leftover bits at the end
mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
}
mi_arena_add(arena);
return 0;
}
// reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
if (pages == 0) return 0;
// pages per numa node
size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
if (numa_count <= 0) numa_count = 1;
const size_t pages_per = pages / numa_count;
const size_t pages_mod = pages % numa_count;
const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
// reserve evenly among numa nodes
for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
size_t node_pages = pages_per; // can be 0
if (numa_node < pages_mod) node_pages++;
int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
if (err) return err;
if (pages < node_pages) {
pages = 0;
}
else {
pages -= node_pages;
}
}
return 0;
}
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
UNUSED(max_secs);
_mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
if (pages_reserved != NULL) *pages_reserved = 0;
int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
return err;
}

240
src/bitmap.inc.c Normal file
View file

@ -0,0 +1,240 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
/* ----------------------------------------------------------------------------
This file is meant to be included in other files for efficiency.
It implements a bitmap that can set/reset sequences of bits atomically
and is used to concurrently claim memory ranges.
A bitmap is an array of fields where each field is a machine word (`uintptr_t`)
A current limitation is that the bit sequences cannot cross fields
and that the sequence must be smaller or equal to the bits in a field.
---------------------------------------------------------------------------- */
#pragma once
#ifndef MI_BITMAP_C
#define MI_BITMAP_C
#include "mimalloc.h"
#include "mimalloc-internal.h"
/* -----------------------------------------------------------
Bitmap definition
----------------------------------------------------------- */
#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE)
#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set
// An atomic bitmap of `uintptr_t` fields
typedef volatile _Atomic(uintptr_t) mi_bitmap_field_t;
typedef mi_bitmap_field_t* mi_bitmap_t;
// A bitmap index is the index of the bit in a bitmap.
typedef size_t mi_bitmap_index_t;
// Create a bit index.
static inline mi_bitmap_index_t mi_bitmap_index_create(size_t idx, size_t bitidx) {
mi_assert_internal(bitidx < MI_BITMAP_FIELD_BITS);
return (idx*MI_BITMAP_FIELD_BITS) + bitidx;
}
// Get the field index from a bit index.
static inline size_t mi_bitmap_index_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx / MI_BITMAP_FIELD_BITS);
}
// Get the bit index in a bitmap field
static inline size_t mi_bitmap_index_bit_in_field(mi_bitmap_index_t bitmap_idx) {
return (bitmap_idx % MI_BITMAP_FIELD_BITS);
}
// Get the full bit index
static inline size_t mi_bitmap_index_bit(mi_bitmap_index_t bitmap_idx) {
return bitmap_idx;
}
// The bit mask for a given number of blocks at a specified bit index.
static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) {
mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS);
if (count == MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL;
return ((((uintptr_t)1 << count) - 1) << bitidx);
}
/* -----------------------------------------------------------
Use bit scan forward/reverse to quickly find the first zero bit if it is available
----------------------------------------------------------- */
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanForward)(&idx, x);
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
MI_64(_BitScanReverse)(&idx, x);
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#include <limits.h> // LONG_MAX
#define MI_HAVE_BITSCAN
#if (INTPTR_MAX == LONG_MAX)
# define MI_L(x) x##l
#else
# define MI_L(x) x##ll
#endif
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : MI_L(__builtin_ctz)(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - MI_L(__builtin_clz)(x));
}
#endif
/* -----------------------------------------------------------
Claim a bit sequence atomically
----------------------------------------------------------- */
// Try to atomically claim a sequence of `count` bits at in `idx`
// in the bitmap field. Returns `true` on success.
static inline bool mi_bitmap_try_claim_field(mi_bitmap_t bitmap, size_t bitmap_fields, const size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
mi_assert_internal(bitidx + count <= MI_BITMAP_FIELD_BITS);
mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]);
if ((field & mask) == 0) { // free?
if (mi_atomic_cas_strong(&bitmap[idx], (field|mask), field)) {
// claimed!
return true;
}
}
return false;
}
// Try to atomically claim a sequence of `count` bits in a single
// field at `idx` in `bitmap`. Returns `true` on success.
static inline bool mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_t count, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
volatile _Atomic(uintptr_t)* field = &bitmap[idx];
uintptr_t map = mi_atomic_read(field);
if (map==MI_BITMAP_FIELD_FULL) return false; // short cut
// search for 0-bit sequence of length count
const uintptr_t mask = mi_bitmap_mask_(count, 0);
const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count;
#ifdef MI_HAVE_BITSCAN
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
// scan linearly for a free range of zero bits
while (bitidx <= bitidx_max) {
if ((map & m) == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
const uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak(field, newmap, map)) { // TODO: use strong cas here?
// no success, another thread claimed concurrently.. keep going
map = mi_atomic_read(field);
continue;
}
else {
// success, we claimed the bits!
*bitmap_idx = mi_bitmap_index_create(idx, bitidx);
return true;
}
}
else {
// on to the next bit range
#ifdef MI_HAVE_BITSCAN
const size_t shift = (count == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
mi_assert_internal(shift > 0 && shift <= count);
#else
const size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;
}
}
// no bits found
return false;
}
// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
static inline bool mi_bitmap_try_find_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t* bitmap_idx) {
for (size_t idx = 0; idx < bitmap_fields; idx++) {
if (mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
return true;
}
}
return false;
}
// Set `count` bits at `bitmap_idx` to 0 atomically
// Returns `true` if all `count` bits were 1 previously.
static inline bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
// mi_assert_internal((bitmap[idx] & mask) == mask);
uintptr_t prev = mi_atomic_and(&bitmap[idx], ~mask);
return ((prev & mask) == mask);
}
// Set `count` bits at `bitmap_idx` to 1 atomically
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
static inline bool mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
//mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0);
uintptr_t prev = mi_atomic_or(&bitmap[idx], mask);
if (any_zero != NULL) *any_zero = ((prev & mask) != mask);
return ((prev & mask) == 0);
}
// Returns `true` if all `count` bits were 1. `any_ones` is `true` if there was at least one bit set to one.
static inline bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) {
const size_t idx = mi_bitmap_index_field(bitmap_idx);
const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
const uintptr_t mask = mi_bitmap_mask_(count, bitidx);
mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields);
mi_bitmap_field_t field = mi_atomic_read_relaxed(&bitmap[idx]);
if (any_ones != NULL) *any_ones = ((field & mask) != 0);
return ((field & mask) == mask);
}
static inline bool mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL);
}
static inline bool mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
bool any_ones;
mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
return any_ones;
}
#endif

View file

@ -34,7 +34,7 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
mi_page_t* page = pq->first; mi_page_t* page = pq->first;
while(page != NULL) { while(page != NULL) {
mi_page_t* next = page->next; // save next in case the page gets removed from the queue mi_page_t* next = page->next; // save next in case the page gets removed from the queue
mi_assert_internal(page->heap == heap); mi_assert_internal(mi_page_heap(page) == heap);
count++; count++;
if (!fn(heap, pq, page, arg1, arg2)) return false; if (!fn(heap, pq, page, arg1, arg2)) return false;
page = next; // and continue page = next; // and continue
@ -45,21 +45,22 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void
} }
#if MI_DEBUG>1 #if MI_DEBUG>=2
static bool _mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) {
UNUSED(arg1); UNUSED(arg1);
UNUSED(arg2); UNUSED(arg2);
UNUSED(pq); UNUSED(pq);
mi_assert_internal(page->heap == heap); mi_assert_internal(mi_page_heap(page) == heap);
mi_segment_t* segment = _mi_page_segment(page); mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(segment->thread_id == heap->thread_id); mi_assert_internal(segment->thread_id == heap->thread_id);
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
return true; return true;
} }
#endif
#if MI_DEBUG>=3
static bool mi_heap_is_valid(mi_heap_t* heap) { static bool mi_heap_is_valid(mi_heap_t* heap) {
mi_assert_internal(heap!=NULL); mi_assert_internal(heap!=NULL);
mi_heap_visit_pages(heap, &_mi_heap_page_is_valid, NULL, NULL); mi_heap_visit_pages(heap, &mi_heap_page_is_valid, NULL, NULL);
return true; return true;
} }
#endif #endif
@ -84,6 +85,7 @@ typedef enum mi_collect_e {
static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) {
UNUSED(arg2); UNUSED(arg2);
UNUSED(heap); UNUSED(heap);
mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL));
mi_collect_t collect = *((mi_collect_t*)arg_collect); mi_collect_t collect = *((mi_collect_t*)arg_collect);
_mi_page_free_collect(page, collect >= ABANDON); _mi_page_free_collect(page, collect >= ABANDON);
if (mi_page_all_free(page)) { if (mi_page_all_free(page)) {
@ -102,7 +104,7 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq
UNUSED(arg2); UNUSED(arg2);
UNUSED(heap); UNUSED(heap);
UNUSED(pq); UNUSED(pq);
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
return true; // don't break return true; // don't break
} }
@ -117,13 +119,18 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
// this may free some segments (but also take ownership of abandoned pages) // this may free some segments (but also take ownership of abandoned pages)
_mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments); _mi_segment_try_reclaim_abandoned(heap, false, &heap->tld->segments);
} }
#if MI_DEBUG else if (
else if (collect == ABANDON && _mi_is_main_thread() && mi_heap_is_backing(heap)) { #ifdef NDEBUG
collect == FORCE
#else
collect >= FORCE
#endif
&& _mi_is_main_thread() && mi_heap_is_backing(heap))
{
// the main thread is abandoned, try to free all abandoned segments. // the main thread is abandoned, try to free all abandoned segments.
// if all memory is freed by now, all segments should be freed. // if all memory is freed by now, all segments should be freed.
_mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments); _mi_segment_try_reclaim_abandoned(heap, true, &heap->tld->segments);
} }
#endif
} }
// if abandoning, mark all pages to no longer add to delayed_free // if abandoning, mark all pages to no longer add to delayed_free
@ -149,7 +156,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
// collect regions // collect regions
if (collect >= FORCE && _mi_is_main_thread()) { if (collect >= FORCE && _mi_is_main_thread()) {
_mi_mem_collect(&heap->tld->stats); _mi_mem_collect(&heap->tld->os);
} }
} }
@ -184,12 +191,6 @@ mi_heap_t* mi_heap_get_backing(void) {
return bheap; return bheap;
} }
uintptr_t _mi_heap_random(mi_heap_t* heap) {
uintptr_t r = heap->random;
heap->random = _mi_random_shuffle(r);
return r;
}
mi_heap_t* mi_heap_new(void) { mi_heap_t* mi_heap_new(void) {
mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* bheap = mi_heap_get_backing();
mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);
@ -197,12 +198,18 @@ mi_heap_t* mi_heap_new(void) {
memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t)); memcpy(heap, &_mi_heap_empty, sizeof(mi_heap_t));
heap->tld = bheap->tld; heap->tld = bheap->tld;
heap->thread_id = _mi_thread_id(); heap->thread_id = _mi_thread_id();
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(bheap)) | 1; _mi_random_split(&bheap->random, &heap->random);
heap->random = _mi_heap_random(bheap); heap->cookie = _mi_heap_random_next(heap) | 1;
heap->key[0] = _mi_heap_random_next(heap);
heap->key[1] = _mi_heap_random_next(heap);
heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe
return heap; return heap;
} }
uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
return _mi_random_next(&heap->random);
}
// zero out the page queues // zero out the page queues
static void mi_heap_reset_pages(mi_heap_t* heap) { static void mi_heap_reset_pages(mi_heap_t* heap) {
mi_assert_internal(mi_heap_is_initialized(heap)); mi_assert_internal(mi_heap_is_initialized(heap));
@ -241,28 +248,30 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_
UNUSED(pq); UNUSED(pq);
// ensure no more thread_delayed_free will be added // ensure no more thread_delayed_free will be added
_mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE); _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false);
// stats // stats
if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) { const size_t bsize = mi_page_block_size(page);
if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { if (bsize > MI_LARGE_OBJ_SIZE_MAX) {
_mi_stat_decrease(&heap->tld->stats.giant,page->block_size); if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
_mi_stat_decrease(&heap->tld->stats.giant, bsize);
} }
else { else {
_mi_stat_decrease(&heap->tld->stats.huge, page->block_size); _mi_stat_decrease(&heap->tld->stats.huge, bsize);
} }
} }
#if (MI_STAT>1) #if (MI_STAT>1)
size_t inuse = page->used - page->thread_freed; _mi_page_free_collect(page, false); // update used count
if (page->block_size <= MI_LARGE_OBJ_SIZE_MAX) { const size_t inuse = page->used;
mi_heap_stat_decrease(heap,normal[_mi_bin(page->block_size)], inuse); if (bsize <= MI_LARGE_OBJ_SIZE_MAX) {
mi_heap_stat_decrease(heap, normal[_mi_bin(bsize)], inuse);
} }
mi_heap_stat_decrease(heap,malloc, page->block_size * inuse); // todo: off for aligned blocks... mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks...
#endif #endif
// pretend it is all free now /// pretend it is all free now
mi_assert_internal(page->thread_freed<=0xFFFF); mi_assert_internal(mi_page_thread_free(page) == NULL);
page->used = (uint16_t)page->thread_freed; page->used = 0;
// and free the page // and free the page
_mi_segment_page_free(page,false /* no force? */, &heap->tld->segments); _mi_segment_page_free(page,false /* no force? */, &heap->tld->segments);
@ -373,7 +382,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) {
bool valid = (_mi_ptr_cookie(segment) == segment->cookie); bool valid = (_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(valid); mi_assert_internal(valid);
if (mi_unlikely(!valid)) return NULL; if (mi_unlikely(!valid)) return NULL;
return _mi_segment_page_of(segment,p)->heap; return mi_page_heap(_mi_segment_page_of(segment,p));
} }
bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { bool mi_heap_contains_block(mi_heap_t* heap, const void* p) {
@ -389,7 +398,7 @@ static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
bool* found = (bool*)vfound; bool* found = (bool*)vfound;
mi_segment_t* segment = _mi_page_segment(page); mi_segment_t* segment = _mi_page_segment(page);
void* start = _mi_page_start(segment, page, NULL); void* start = _mi_page_start(segment, page, NULL);
void* end = (uint8_t*)start + (page->capacity * page->block_size); void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page));
*found = (p >= start && p < end); *found = (p >= start && p < end);
return (!*found); // continue if not found return (!*found); // continue if not found
} }
@ -431,13 +440,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
mi_assert_internal(page->local_free == NULL); mi_assert_internal(page->local_free == NULL);
if (page->used == 0) return true; if (page->used == 0) return true;
const size_t bsize = mi_page_block_size(page);
size_t psize; size_t psize;
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
if (page->capacity == 1) { if (page->capacity == 1) {
// optimize page with one block // optimize page with one block
mi_assert_internal(page->used == 1 && page->free == NULL); mi_assert_internal(page->used == 1 && page->free == NULL);
return visitor(page->heap, area, pstart, page->block_size, arg); return visitor(mi_page_heap(page), area, pstart, bsize, arg);
} }
// create a bitmap of free blocks. // create a bitmap of free blocks.
@ -450,8 +460,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
free_count++; free_count++;
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
size_t offset = (uint8_t*)block - pstart; size_t offset = (uint8_t*)block - pstart;
mi_assert_internal(offset % page->block_size == 0); mi_assert_internal(offset % bsize == 0);
size_t blockidx = offset / page->block_size; // Todo: avoid division? size_t blockidx = offset / bsize; // Todo: avoid division?
mi_assert_internal( blockidx < MI_MAX_BLOCKS); mi_assert_internal( blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / sizeof(uintptr_t)); size_t bitidx = (blockidx / sizeof(uintptr_t));
size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
@ -470,8 +480,8 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
} }
else if ((m & ((uintptr_t)1 << bit)) == 0) { else if ((m & ((uintptr_t)1 << bit)) == 0) {
used_count++; used_count++;
uint8_t* block = pstart + (i * page->block_size); uint8_t* block = pstart + (i * bsize);
if (!visitor(page->heap, area, block, page->block_size, arg)) return false; if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false;
} }
} }
mi_assert_internal(page->used == used_count); mi_assert_internal(page->used == used_count);
@ -486,12 +496,13 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
UNUSED(pq); UNUSED(pq);
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
mi_heap_area_ex_t xarea; mi_heap_area_ex_t xarea;
const size_t bsize = mi_page_block_size(page);
xarea.page = page; xarea.page = page;
xarea.area.reserved = page->reserved * page->block_size; xarea.area.reserved = page->reserved * bsize;
xarea.area.committed = page->capacity * page->block_size; xarea.area.committed = page->capacity * bsize;
xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
xarea.area.used = page->used - page->thread_freed; // race is ok xarea.area.used = page->used;
xarea.area.block_size = page->block_size; xarea.area.block_size = bsize;
return fun(heap, &xarea, arg); return fun(heap, &xarea, arg);
} }
@ -524,4 +535,3 @@ bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_vis
mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args);
} }

View file

@ -12,19 +12,22 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array // Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = { const mi_page_t _mi_page_empty = {
0, false, false, false, false, 0, 0, 0, false, false, false, false,
{ 0 }, false, 0, // capacity
0, // reserved capacity
{ 0 }, // flags
false, // is_zero
0, // retire_expire
NULL, // free NULL, // free
#if MI_ENCODE_FREELIST #if MI_ENCODE_FREELIST
0, { 0, 0 },
#endif #endif
0, // used 0, // used
NULL, 0, // xblock_size
ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(0), NULL, // local_free
0, NULL, NULL, NULL ATOMIC_VAR_INIT(0), // xthread_free
#if (MI_INTPTR_SIZE==8 && defined(MI_ENCODE_FREELIST)) || (MI_INTPTR_SIZE==4 && !defined(MI_ENCODE_FREELIST)) ATOMIC_VAR_INIT(0), // xheap
, { NULL } // padding NULL, NULL
#endif
}; };
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
@ -83,10 +86,11 @@ const mi_heap_t _mi_heap_empty = {
MI_SMALL_PAGES_EMPTY, MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY, MI_PAGE_QUEUES_EMPTY,
ATOMIC_VAR_INIT(NULL), ATOMIC_VAR_INIT(NULL),
0, 0, // tid
0, 0, // cookie
0, { 0, 0 }, // keys
0, { {0}, {0}, 0 },
0, // page count
false false
}; };
@ -95,27 +99,34 @@ mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty;
#define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats))) #define tld_main_stats ((mi_stats_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,stats)))
#define tld_main_os ((mi_os_tld_t*)((uint8_t*)&tld_main + offsetof(mi_tld_t,os)))
static mi_tld_t tld_main = { static mi_tld_t tld_main = {
0, false, 0, false,
&_mi_heap_main, &_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0},
0, 0, 0, 0, 0, 0, NULL,
tld_main_stats, tld_main_os
}, // segments
{ 0, tld_main_stats }, // os { 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats { MI_STATS_NULL } // stats
}; };
#if MI_INTPTR_SIZE==8
#define MI_INIT_COOKIE (0xCDCDCDCDCDCDCDCDUL)
#else
#define MI_INIT_COOKIE (0xCDCDCDCDUL)
#endif
mi_heap_t _mi_heap_main = { mi_heap_t _mi_heap_main = {
&tld_main, &tld_main,
MI_SMALL_PAGES_EMPTY, MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY, MI_PAGE_QUEUES_EMPTY,
NULL, NULL,
0, // thread id 0, // thread id
#if MI_INTPTR_SIZE==8 // the cookie of the main heap can be fixed (unlike page cookies that need to be secure!) MI_INIT_COOKIE, // initial cookie
0xCDCDCDCDCDCDCDCDUL, { MI_INIT_COOKIE, MI_INIT_COOKIE }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
#else { {0}, {0}, 0 }, // random
0xCDCDCDCDUL,
#endif
0, // random
0, // page count 0, // page count
false // can reclaim false // can reclaim
}; };
@ -124,66 +135,6 @@ bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.
mi_stats_t _mi_stats_main = { MI_STATS_NULL }; mi_stats_t _mi_stats_main = { MI_STATS_NULL };
/* -----------------------------------------------------------
Initialization of random numbers
----------------------------------------------------------- */
#if defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#else
#include <time.h>
#endif
uintptr_t _mi_random_shuffle(uintptr_t x) {
#if (MI_INTPTR_SIZE==8)
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9UL;
x ^= x >> 27;
x *= 0x94d049bb133111ebUL;
x ^= x >> 31;
#elif (MI_INTPTR_SIZE==4)
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
x ^= x >> 16;
x *= 0x7feb352dUL;
x ^= x >> 15;
x *= 0x846ca68bUL;
x ^= x >> 16;
#endif
return x;
}
uintptr_t _mi_random_init(uintptr_t seed /* can be zero */) {
#ifdef __wasi__ // no ASLR when using WebAssembly, and time granularity may be coarse
uintptr_t x;
arc4random_buf(&x, sizeof x);
#else
// Hopefully, ASLR makes our function address random
uintptr_t x = (uintptr_t)((void*)&_mi_random_init);
x ^= seed;
// xor with high res time
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
x ^= (uintptr_t)(pcount.QuadPart);
#elif defined(__APPLE__)
x ^= (uintptr_t)mach_absolute_time();
#else
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
x ^= (uintptr_t)time.tv_sec;
x ^= (uintptr_t)time.tv_nsec;
#endif
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {
x = _mi_random_shuffle(x);
}
#endif
return x;
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Initialization and freeing of the thread local heaps Initialization and freeing of the thread local heaps
@ -213,12 +164,15 @@ static bool _mi_heap_init(void) {
mi_heap_t* heap = &td->heap; mi_heap_t* heap = &td->heap;
memcpy(heap, &_mi_heap_empty, sizeof(*heap)); memcpy(heap, &_mi_heap_empty, sizeof(*heap));
heap->thread_id = _mi_thread_id(); heap->thread_id = _mi_thread_id();
heap->random = _mi_random_init(heap->thread_id); _mi_random_init(&heap->random);
heap->cookie = ((uintptr_t)heap ^ _mi_heap_random(heap)) | 1; heap->cookie = _mi_heap_random_next(heap) | 1;
heap->key[0] = _mi_heap_random_next(heap);
heap->key[1] = _mi_heap_random_next(heap);
heap->tld = tld; heap->tld = tld;
memset(tld, 0, sizeof(*tld)); memset(tld, 0, sizeof(*tld));
tld->heap_backing = heap; tld->heap_backing = heap;
tld->segments.stats = &tld->stats; tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
tld->os.stats = &tld->stats; tld->os.stats = &tld->stats;
_mi_heap_set_default_direct(heap); _mi_heap_set_default_direct(heap);
} }
@ -438,13 +392,7 @@ static void mi_process_load(void) {
const char* msg = NULL; const char* msg = NULL;
mi_allocator_init(&msg); mi_allocator_init(&msg);
if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
_mi_fputs(NULL,NULL,msg); _mi_fputs(NULL,NULL,NULL,msg);
}
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
double max_secs = (double)pages / 2.0; // 0.5s per page (1GiB)
mi_reserve_huge_os_pages(pages, max_secs, NULL);
} }
} }
@ -455,16 +403,17 @@ void mi_process_init(void) mi_attr_noexcept {
// access _mi_heap_default before setting _mi_process_is_initialized to ensure // access _mi_heap_default before setting _mi_process_is_initialized to ensure
// that the TLS slot is allocated without getting into recursion on macOS // that the TLS slot is allocated without getting into recursion on macOS
// when using dynamic linking with interpose. // when using dynamic linking with interpose.
mi_heap_t* h = mi_get_default_heap(); mi_get_default_heap();
_mi_process_is_initialized = true; _mi_process_is_initialized = true;
_mi_heap_main.thread_id = _mi_thread_id(); _mi_heap_main.thread_id = _mi_thread_id();
_mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id); _mi_verbose_message("process init: 0x%zx\n", _mi_heap_main.thread_id);
uintptr_t random = _mi_random_init(_mi_heap_main.thread_id) ^ (uintptr_t)h; _mi_random_init(&_mi_heap_main.random);
#ifndef __APPLE__ #ifndef __APPLE__ // TODO: fix this? cannot update cookie if allocation already happened..
_mi_heap_main.cookie = (uintptr_t)&_mi_heap_main ^ random; _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.key[1] = _mi_heap_random_next(&_mi_heap_main);
#endif #endif
_mi_heap_main.random = _mi_random_shuffle(random);
mi_process_setup_auto_thread_done(); mi_process_setup_auto_thread_done();
_mi_os_init(); _mi_os_init();
#if (MI_DEBUG) #if (MI_DEBUG)
@ -473,6 +422,11 @@ void mi_process_init(void) mi_attr_noexcept {
_mi_verbose_message("secure level: %d\n", MI_SECURE); _mi_verbose_message("secure level: %d\n", MI_SECURE);
mi_thread_init(); mi_thread_init();
mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL)
if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
size_t pages = mi_option_get(mi_option_reserve_huge_os_pages);
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
}
} }
// Called when the process is done (through `at_exit`) // Called when the process is done (through `at_exit`)

View file

@ -16,10 +16,10 @@ We need this memory layer between the raw OS calls because of:
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
to reuse memory effectively. to reuse memory effectively.
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
an OS allocation/free is still (much) too expensive relative to the accesses in that an OS allocation/free is still (much) too expensive relative to the accesses
object :-( (`malloc-large` tests this). This means we need a cheaper way to in that object :-( (`malloc-large` tests this). This means we need a cheaper
reuse memory. way to reuse memory.
3. This layer can help with a NUMA aware allocation in the future. 3. This layer allows for NUMA aware allocation.
Possible issues: Possible issues:
- (2) can potentially be addressed too with a small cache per thread which is much - (2) can potentially be addressed too with a small cache per thread which is much
@ -37,6 +37,8 @@ Possible issues:
#include <string.h> // memset #include <string.h> // memset
#include "bitmap.inc.c"
// Internal raw OS interface // Internal raw OS interface
size_t _mi_os_large_page_size(); size_t _mi_os_large_page_size();
bool _mi_os_protect(void* addr, size_t size); bool _mi_os_protect(void* addr, size_t size);
@ -45,56 +47,60 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_os_tld_t* tld);
void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); // arena.c
void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment); void _mi_arena_free(void* p, size_t size, size_t memid, mi_stats_t* stats);
bool _mi_os_is_huge_reserved(void* p); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
// Constants // Constants
#if (MI_INTPTR_SIZE==8) #if (MI_INTPTR_SIZE==8)
#define MI_HEAP_REGION_MAX_SIZE (256 * (1ULL << 30)) // 256GiB => 16KiB for the region map #define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 48KiB for the region map
#elif (MI_INTPTR_SIZE==4) #elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * (1UL << 30)) // 3GiB => 196 bytes for the region map #define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map
#else #else
#error "define the maximum heap space allowed for regions on this platform" #error "define the maximum heap space allowed for regions on this platform"
#endif #endif
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
#define MI_REGION_MAP_BITS (MI_INTPTR_SIZE * 8) #define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_REGION_MAP_BITS) #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits)
#define MI_REGION_MAX_ALLOC_SIZE ((MI_REGION_MAP_BITS/4)*MI_SEGMENT_SIZE) // 64MiB #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) #define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB
#define MI_REGION_MAP_FULL UINTPTR_MAX #define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
// Region info is a pointer to the memory region and two bits for
typedef uintptr_t mi_region_info_t; // its flags: is_large, and is_committed.
typedef union mi_region_info_u {
static inline mi_region_info_t mi_region_info_create(void* start, bool is_large, bool is_committed) { uintptr_t value;
return ((uintptr_t)start | ((uintptr_t)(is_large?1:0) << 1) | (is_committed?1:0)); struct {
} bool valid;
bool is_large;
static inline void* mi_region_info_read(mi_region_info_t info, bool* is_large, bool* is_committed) { short numa_node;
if (is_large) *is_large = ((info&0x02) != 0); };
if (is_committed) *is_committed = ((info&0x01) != 0); } mi_region_info_t;
return (void*)(info & ~0x03);
}
// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s { typedef struct mem_region_s {
volatile _Atomic(uintptr_t) map; // in-use bit per MI_SEGMENT_SIZE block volatile _Atomic(uintptr_t) info; // is_large, and associated numa node + 1 (so 0 is no association)
volatile _Atomic(mi_region_info_t) info; // start of virtual memory area, and flags volatile _Atomic(void*) start; // start of the memory area (and flags)
volatile _Atomic(uintptr_t) dirty_mask; // bit per block if the contents are not zero'd mi_bitmap_field_t in_use; // bit per in-use block
mi_bitmap_field_t dirty; // track if non-zero per block
mi_bitmap_field_t commit; // track if committed per block (if `!info.is_committed))
mi_bitmap_field_t reset; // track reset per block
volatile _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena-
} mem_region_t; } mem_region_t;
// The region map
// The region map; 16KiB for a 256GiB HEAP_REGION_MAX
// TODO: in the future, maintain a map per NUMA node for numa aware allocation
static mem_region_t regions[MI_REGION_MAX]; static mem_region_t regions[MI_REGION_MAX];
static volatile _Atomic(uintptr_t) regions_count; // = 0; // allocated regions // Allocated regions
static volatile _Atomic(uintptr_t) regions_count; // = 0;
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
@ -103,257 +109,224 @@ Utility functions
// Blocks (of 4MiB) needed for the given size. // Blocks (of 4MiB) needed for the given size.
static size_t mi_region_block_count(size_t size) { static size_t mi_region_block_count(size_t size) {
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); return _mi_divide_up(size, MI_SEGMENT_SIZE);
return (size + MI_SEGMENT_SIZE - 1) / MI_SEGMENT_SIZE;
}
// The bit mask for a given number of blocks at a specified bit index.
static uintptr_t mi_region_block_mask(size_t blocks, size_t bitidx) {
mi_assert_internal(blocks + bitidx <= MI_REGION_MAP_BITS);
return ((((uintptr_t)1 << blocks) - 1) << bitidx);
} }
/*
// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones.
static size_t mi_good_commit_size(size_t size) { static size_t mi_good_commit_size(size_t size) {
if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; if (size > (SIZE_MAX - _mi_os_large_page_size())) return size;
return _mi_align_up(size, _mi_os_large_page_size()); return _mi_align_up(size, _mi_os_large_page_size());
} }
*/
// Return if a pointer points into a region reserved by us. // Return if a pointer points into a region reserved by us.
bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
if (p==NULL) return false; if (p==NULL) return false;
size_t count = mi_atomic_read_relaxed(&regions_count); size_t count = mi_atomic_read_relaxed(&regions_count);
for (size_t i = 0; i < count; i++) { for (size_t i = 0; i < count; i++) {
uint8_t* start = (uint8_t*)mi_region_info_read( mi_atomic_read_relaxed(&regions[i].info), NULL, NULL); uint8_t* start = (uint8_t*)mi_atomic_read_ptr_relaxed(&regions[i].start);
if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true;
} }
return false; return false;
} }
static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
void* start = mi_atomic_read_ptr(&region->start);
mi_assert_internal(start != NULL);
return ((uint8_t*)start + (bit_idx * MI_SEGMENT_SIZE));
}
static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS);
size_t idx = region - regions;
mi_assert_internal(&regions[idx] == region);
return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1;
}
static size_t mi_memid_create_from_arena(size_t arena_memid) {
return (arena_memid << 1) | 1;
}
static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) {
if ((id&1)==1) {
if (arena_memid != NULL) *arena_memid = (id>>1);
return true;
}
else {
size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS;
*bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS;
*region = &regions[idx];
return false;
}
}
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
Commit from a region Allocate a region is allocated from the OS (or an arena)
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
// Commit the `blocks` in `region` at `idx` and `bitidx` of a given `size`. static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bitidx, size_t blocks,
size_t size, bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
{ {
size_t mask = mi_region_block_mask(blocks,bitidx); // not out of regions yet?
mi_assert_internal(mask != 0); if (mi_atomic_read_relaxed(&regions_count) >= MI_REGION_MAX - 1) return false;
mi_assert_internal((mask & mi_atomic_read_relaxed(&region->map)) == mask);
mi_assert_internal(&regions[idx] == region);
// ensure the region is reserved // try to allocate a fresh region from the OS
mi_region_info_t info = mi_atomic_read(&region->info); bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit));
if (info == 0) bool region_large = (commit && allow_large);
{ bool is_zero = false;
bool region_commit = mi_option_is_enabled(mi_option_eager_region_commit); size_t arena_memid = 0;
bool region_large = *allow_large; void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_zero, &arena_memid, tld);
void* start = NULL; if (start == NULL) return false;
if (region_large) { mi_assert_internal(!(region_large && !allow_large));
start = _mi_os_try_alloc_from_huge_reserved(MI_REGION_SIZE, MI_SEGMENT_ALIGN); mi_assert_internal(!region_large || region_commit);
if (start != NULL) { region_commit = true; }
}
if (start == NULL) {
start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, region_commit, &region_large, tld);
}
mi_assert_internal(!(region_large && !*allow_large));
if (start == NULL) { // claim a fresh slot
// failure to allocate from the OS! unclaim the blocks and fail const uintptr_t idx = mi_atomic_increment(&regions_count);
size_t map; if (idx >= MI_REGION_MAX) {
do { mi_atomic_decrement(&regions_count);
map = mi_atomic_read_relaxed(&region->map); _mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
} while (!mi_atomic_cas_weak(&region->map, map & ~mask, map));
return false; return false;
} }
// set the newly allocated region // allocated, initialize and claim the initial blocks
info = mi_region_info_create(start,region_large,region_commit); mem_region_t* r = &regions[idx];
if (mi_atomic_cas_strong(&region->info, info, 0)) { r->arena_memid = arena_memid;
// update the region count mi_atomic_write(&r->in_use, 0);
mi_atomic_increment(&regions_count); mi_atomic_write(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL));
} mi_atomic_write(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0));
else { mi_atomic_write(&r->reset, 0);
// failed, another thread allocated just before us! *bit_idx = 0;
// we assign it to a later slot instead (up to 4 tries). mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { mi_atomic_write_ptr(&r->start, start);
if (mi_atomic_cas_strong(&regions[idx+i].info, info, 0)) {
mi_atomic_increment(&regions_count);
start = NULL;
break;
}
}
if (start != NULL) {
// free it if we didn't succeed to save it to some other region
_mi_os_free_ex(start, MI_REGION_SIZE, region_commit, tld->stats);
}
// and continue with the memory at our index
info = mi_atomic_read(&region->info);
}
}
mi_assert_internal(info == mi_atomic_read(&region->info));
mi_assert_internal(info != 0);
// Commit the blocks to memory // and share it
bool region_is_committed = false; mi_region_info_t info;
bool region_is_large = false; info.valid = true;
void* start = mi_region_info_read(info,&region_is_large,&region_is_committed); info.is_large = region_large;
mi_assert_internal(!(region_is_large && !*allow_large)); info.numa_node = _mi_os_numa_node(tld);
mi_assert_internal(start!=NULL); mi_atomic_write(&r->info, info.value); // now make it available to others
*region = r;
return true;
}
// set dirty bits /* ----------------------------------------------------------------------------
uintptr_t m; Try to claim blocks in suitable regions
do { -----------------------------------------------------------------------------*/
m = mi_atomic_read(&region->dirty_mask);
} while (!mi_atomic_cas_weak(&region->dirty_mask, m | mask, m));
*is_zero = ((m & mask) == 0); // no dirty bit set in our claimed range?
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) {
if (*commit && !region_is_committed) { // initialized at all?
mi_region_info_t info;
info.value = mi_atomic_read_relaxed(&region->info);
if (info.value==0) return false;
// numa correct
if (numa_node >= 0) { // use negative numa node to always succeed
int rnode = info.numa_node;
if (rnode >= 0 && rnode != numa_node) return false;
}
// check allow-large
if (!allow_large && info.is_large) return false;
return true;
}
static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{
// try all regions for a free slot
const size_t count = mi_atomic_read(&regions_count);
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses?
for (size_t visited = 0; visited < count; visited++, idx++) {
if (idx >= count) idx = 0; // wrap around
mem_region_t* r = &regions[idx];
if (mi_region_is_suitable(r, numa_node, allow_large)) {
if (mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) {
tld->region_idx = idx; // remember the last found position
*region = r;
return true;
}
}
}
return false;
}
static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* is_large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS);
mem_region_t* region;
mi_bitmap_index_t bit_idx;
const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld));
// try to claim in existing regions
if (!mi_region_try_claim(numa_node, blocks, *is_large, &region, &bit_idx, tld)) {
// otherwise try to allocate a fresh region
if (!mi_region_try_alloc_os(blocks, *commit, *is_large, &region, &bit_idx, tld)) {
// out of regions or memory
return NULL;
}
}
// found a region and claimed `blocks` at `bit_idx`
mi_assert_internal(region != NULL);
mi_assert_internal(mi_bitmap_is_claimed(&region->in_use, 1, blocks, bit_idx));
mi_region_info_t info;
info.value = mi_atomic_read(&region->info);
void* start = mi_atomic_read_ptr(&region->start);
mi_assert_internal(!(info.is_large && !*is_large));
mi_assert_internal(start != NULL);
*is_zero = mi_bitmap_unclaim(&region->dirty, 1, blocks, bit_idx);
*is_large = info.is_large;
*memid = mi_memid_create(region, bit_idx);
void* p = (uint8_t*)start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE);
// commit
if (*commit) {
// ensure commit // ensure commit
bool commit_zero = false; bool any_uncommitted;
_mi_os_commit(blocks_start, mi_good_commit_size(size), &commit_zero, tld->stats); // only commit needed size (unless using large OS pages) mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, &any_uncommitted);
if (any_uncommitted) {
mi_assert_internal(!info.is_large);
bool commit_zero;
_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld);
if (commit_zero) *is_zero = true; if (commit_zero) *is_zero = true;
} }
else if (!*commit && region_is_committed) {
// but even when no commit is requested, we might have committed anyway (in a huge OS page for example)
*commit = true;
} }
else {
// no need to commit, but check if already fully committed
*commit = mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
}
mi_assert_internal(!*commit || mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
// unreset reset blocks
if (mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
// some blocks are still reset
mi_assert_internal(!info.is_large);
mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
bool reset_zero = false;
_mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld);
if (reset_zero) *is_zero = true;
}
}
mi_assert_internal(!mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
#if (MI_DEBUG>=2)
if (*commit) { ((uint8_t*)p)[0] = 0; }
#endif
// and return the allocation // and return the allocation
mi_assert_internal(blocks_start != NULL); mi_assert_internal(p != NULL);
*allow_large = region_is_large; return p;
*p = blocks_start;
*id = (idx*MI_REGION_MAP_BITS) + bitidx;
return true;
} }
// Use bit scan forward to quickly find the first zero bit if it is available
#if defined(_MSC_VER)
#define MI_HAVE_BITSCAN
#include <intrin.h>
static inline size_t mi_bsf(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
#if (MI_INTPTR_SIZE==8)
_BitScanForward64(&idx, x);
#else
_BitScanForward(&idx, x);
#endif
return idx;
}
static inline size_t mi_bsr(uintptr_t x) {
if (x==0) return 8*MI_INTPTR_SIZE;
DWORD idx;
#if (MI_INTPTR_SIZE==8)
_BitScanReverse64(&idx, x);
#else
_BitScanReverse(&idx, x);
#endif
return idx;
}
#elif defined(__GNUC__) || defined(__clang__)
#define MI_HAVE_BITSCAN
static inline size_t mi_bsf(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : __builtin_ctzl(x));
}
static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? 8*MI_INTPTR_SIZE : (8*MI_INTPTR_SIZE - 1) - __builtin_clzl(x));
}
#endif
// Allocate `blocks` in a `region` at `idx` of a given `size`.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/SIZE_MAX before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t blocks, size_t size,
bool* commit, bool* allow_large, bool* is_zero, void** p, size_t* id, mi_os_tld_t* tld)
{
mi_assert_internal(p != NULL && id != NULL);
mi_assert_internal(blocks < MI_REGION_MAP_BITS);
const uintptr_t mask = mi_region_block_mask(blocks, 0);
const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
uintptr_t map = mi_atomic_read(&region->map);
if (map==MI_REGION_MAP_FULL) return true;
#ifdef MI_HAVE_BITSCAN
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
#else
size_t bitidx = 0; // otherwise start at 0
#endif
uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx
// scan linearly for a free range of zero bits
while(bitidx <= bitidx_max) {
if ((map & m) == 0) { // are the mask bits free at bitidx?
mi_assert_internal((m >> bitidx) == mask); // no overflow?
uintptr_t newmap = map | m;
mi_assert_internal((newmap^map) >> bitidx == mask);
if (!mi_atomic_cas_weak(&region->map, newmap, map)) { // TODO: use strong cas here?
// no success, another thread claimed concurrently.. keep going
map = mi_atomic_read(&region->map);
continue;
}
else {
// success, we claimed the bits
// now commit the block memory -- this can still fail
return mi_region_commit_blocks(region, idx, bitidx, blocks,
size, commit, allow_large, is_zero, p, id, tld);
}
}
else {
// on to the next bit range
#ifdef MI_HAVE_BITSCAN
size_t shift = (blocks == 1 ? 1 : mi_bsr(map & m) - bitidx + 1);
mi_assert_internal(shift > 0 && shift <= blocks);
#else
size_t shift = 1;
#endif
bitidx += shift;
m <<= shift;
}
}
// no error, but also no bits found
return true;
}
// Try to allocate `blocks` in a `region` at `idx` of a given `size`. Does a quick check before trying to claim.
// Returns `false` on an error (OOM); `true` otherwise. `p` and `id` are only written
// if the blocks were successfully claimed so ensure they are initialized to NULL/0 before the call.
// (not being able to claim is not considered an error so check for `p != NULL` afterwards).
static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size,
bool* commit, bool* allow_large, bool* is_zero,
void** p, size_t* id, mi_os_tld_t* tld)
{
// check if there are available blocks in the region..
mi_assert_internal(idx < MI_REGION_MAX);
mem_region_t* region = &regions[idx];
uintptr_t m = mi_atomic_read_relaxed(&region->map);
if (m != MI_REGION_MAP_FULL) { // some bits are zero
bool ok = (*commit || *allow_large); // committing or allow-large is always ok
if (!ok) {
// otherwise skip incompatible regions if possible.
// this is not guaranteed due to multiple threads allocating at the same time but
// that's ok. In secure mode, large is never allowed for any thread, so that works out;
// otherwise we might just not be able to reset/decommit individual pages sometimes.
mi_region_info_t info = mi_atomic_read_relaxed(&region->info);
bool is_large;
bool is_committed;
void* start = mi_region_info_read(info,&is_large,&is_committed);
ok = (start == NULL || (*commit || !is_committed) || (*allow_large || !is_large)); // Todo: test with one bitmap operation?
}
if (ok) {
return mi_region_alloc_blocks(region, idx, blocks, size, commit, allow_large, is_zero, p, id, tld);
}
}
return true; // no error, but no success either
}
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
Allocation Allocation
@ -361,59 +334,37 @@ static bool mi_region_try_alloc_blocks(size_t idx, size_t blocks, size_t size,
// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
size_t* id, mi_os_tld_t* tld)
{ {
mi_assert_internal(id != NULL && tld != NULL); mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0); mi_assert_internal(size > 0);
*id = SIZE_MAX; *memid = 0;
*is_zero = false; *is_zero = false;
bool default_large = false; bool default_large = false;
if (large==NULL) large = &default_large; // ensure `large != NULL` if (large==NULL) large = &default_large; // ensure `large != NULL`
if (size == 0) return NULL;
// use direct OS allocation for huge blocks or alignment (with `id = SIZE_MAX`)
if (size > MI_REGION_MAX_ALLOC_SIZE || alignment > MI_SEGMENT_ALIGN) {
*is_zero = true;
return _mi_os_alloc_aligned(mi_good_commit_size(size), alignment, *commit, large, tld); // round up size
}
// always round size to OS page size multiple (so commit/decommit go over the entire range)
// TODO: use large OS page size here?
size = _mi_align_up(size, _mi_os_page_size()); size = _mi_align_up(size, _mi_os_page_size());
// calculate the number of needed blocks // allocate from regions if possible
size_t blocks = mi_region_block_count(size); size_t arena_memid;
mi_assert_internal(blocks > 0 && blocks <= 8*MI_INTPTR_SIZE); const size_t blocks = mi_region_block_count(size);
if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
// find a range of free blocks void* p = mi_region_try_alloc(blocks, commit, large, is_zero, memid, tld);
void* p = NULL; mi_assert_internal(p == NULL || (uintptr_t)p % alignment == 0);
size_t count = mi_atomic_read(&regions_count); if (p != NULL) {
size_t idx = tld->region_idx; // start at 0 to reuse low addresses? Or, use tld->region_idx to reduce contention? #if (MI_DEBUG>=2)
for (size_t visited = 0; visited < count; visited++, idx++) { if (*commit) { ((uint8_t*)p)[0] = 0; }
if (idx >= count) idx = 0; // wrap around #endif
if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error return p;
if (p != NULL) break;
} }
if (p == NULL) {
// no free range in existing regions -- try to extend beyond the count.. but at most 8 regions
for (idx = count; idx < mi_atomic_read_relaxed(&regions_count) + 8 && idx < MI_REGION_MAX; idx++) {
if (!mi_region_try_alloc_blocks(idx, blocks, size, commit, large, is_zero, &p, id, tld)) return NULL; // error
if (p != NULL) break;
}
}
if (p == NULL) {
// we could not find a place to allocate, fall back to the os directly
_mi_warning_message("unable to allocate from region: size %zu\n", size); _mi_warning_message("unable to allocate from region: size %zu\n", size);
*is_zero = true;
p = _mi_os_alloc_aligned(size, alignment, commit, large, tld);
}
else {
tld->region_idx = idx; // next start of search? currently not used as we use first-fit
} }
// and otherwise fall back to the OS
void* p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_zero, &arena_memid, tld);
*memid = mi_memid_create_from_arena(arena_memid);
mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0); mi_assert_internal( p == NULL || (uintptr_t)p % alignment == 0);
if (p != NULL && *commit) { ((uint8_t*)p)[0] = 0; }
return p; return p;
} }
@ -424,67 +375,57 @@ Free
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
// Free previously allocated memory with a given id. // Free previously allocated memory with a given id.
void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
mi_assert_internal(size > 0 && stats != NULL); mi_assert_internal(size > 0 && tld != NULL);
if (p==NULL) return; if (p==NULL) return;
if (size==0) return; if (size==0) return;
if (id == SIZE_MAX) { size = _mi_align_up(size, _mi_os_page_size());
// was a direct OS allocation, pass through
_mi_os_free(p, size, stats); size_t arena_memid = 0;
mi_bitmap_index_t bit_idx;
mem_region_t* region;
if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
// was a direct arena allocation, pass through
_mi_arena_free(p, size, arena_memid, tld->stats);
} }
else { else {
// allocated in a region // allocated in a region
mi_assert_internal(size <= MI_REGION_MAX_ALLOC_SIZE); if (size > MI_REGION_MAX_ALLOC_SIZE) return; mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
// we can align the size up to page size (as we allocate that way too) const size_t blocks = mi_region_block_count(size);
// this ensures we fully commit/decommit/reset mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
size = _mi_align_up(size, _mi_os_page_size()); mi_region_info_t info;
size_t idx = (id / MI_REGION_MAP_BITS); info.value = mi_atomic_read(&region->info);
size_t bitidx = (id % MI_REGION_MAP_BITS); mi_assert_internal(info.value != 0);
size_t blocks = mi_region_block_count(size); void* blocks_start = mi_region_blocks_start(region, bit_idx);
size_t mask = mi_region_block_mask(blocks, bitidx);
mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
mem_region_t* region = &regions[idx];
mi_assert_internal((mi_atomic_read_relaxed(&region->map) & mask) == mask ); // claimed?
mi_region_info_t info = mi_atomic_read(&region->info);
bool is_large;
bool is_eager_committed;
void* start = mi_region_info_read(info,&is_large,&is_eager_committed);
mi_assert_internal(start != NULL);
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
mi_assert_internal(blocks_start == p); // not a pointer in our area? mi_assert_internal(blocks_start == p); // not a pointer in our area?
mi_assert_internal(bitidx + blocks <= MI_REGION_MAP_BITS); mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS);
if (blocks_start != p || bitidx + blocks > MI_REGION_MAP_BITS) return; // or `abort`? if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`?
// decommit (or reset) the blocks to reduce the working set. // committed?
// TODO: implement delayed decommit/reset as these calls are too expensive if (full_commit && (size % MI_SEGMENT_SIZE) == 0) {
// if the memory is reused soon. mi_bitmap_claim(&region->commit, 1, blocks, bit_idx, NULL);
// reset: 10x slowdown on malloc-large, decommit: 17x slowdown on malloc-large }
if (!is_large) {
if (mi_option_is_enabled(mi_option_segment_reset)) { if (any_reset) {
if (!is_eager_committed && // cannot reset large pages // set the is_reset bits if any pages were reset
(mi_option_is_enabled(mi_option_eager_commit) || // cannot reset halfway committed segments, use `option_page_reset` instead mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, NULL);
mi_option_is_enabled(mi_option_reset_decommits))) // but we can decommit halfway committed segments }
// reset the blocks to reduce the working set.
if (!info.is_large && mi_option_is_enabled(mi_option_segment_reset)
&& (mi_option_is_enabled(mi_option_eager_commit) ||
mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
{ {
_mi_os_reset(p, size, stats); bool any_unreset;
//_mi_os_decommit(p, size, stats); // todo: and clear dirty bits? mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
if (any_unreset) {
_mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
} }
} }
}
if (!is_eager_committed) {
// adjust commit statistics as we commit again when re-using the same slot
_mi_stat_decrease(&stats->committed, mi_good_commit_size(size));
}
// TODO: should we free empty regions? currently only done _mi_mem_collect.
// this frees up virtual address space which might be useful on 32-bit systems?
// and unclaim // and unclaim
uintptr_t map; bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
uintptr_t newmap; mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed);
do {
map = mi_atomic_read_relaxed(&region->map);
newmap = map & ~mask;
} while (!mi_atomic_cas_weak(&region->map, newmap, map));
} }
} }
@ -492,49 +433,51 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
collection collection
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
void _mi_mem_collect(mi_stats_t* stats) { void _mi_mem_collect(mi_os_tld_t* tld) {
// free every region that has no segments in use. // free every region that has no segments in use.
for (size_t i = 0; i < regions_count; i++) { uintptr_t rcount = mi_atomic_read_relaxed(&regions_count);
for (size_t i = 0; i < rcount; i++) {
mem_region_t* region = &regions[i]; mem_region_t* region = &regions[i];
if (mi_atomic_read_relaxed(&region->map) == 0) { if (mi_atomic_read_relaxed(&region->info) != 0) {
// if no segments used, try to claim the whole region // if no segments used, try to claim the whole region
uintptr_t m; uintptr_t m;
do { do {
m = mi_atomic_read_relaxed(&region->map); m = mi_atomic_read_relaxed(&region->in_use);
} while(m == 0 && !mi_atomic_cas_weak(&region->map, ~((uintptr_t)0), 0 )); } while(m == 0 && !mi_atomic_cas_weak(&region->in_use, MI_BITMAP_FIELD_FULL, 0 ));
if (m == 0) { if (m == 0) {
// on success, free the whole region (unless it was huge reserved) // on success, free the whole region
bool is_eager_committed; void* start = mi_atomic_read_ptr(&regions[i].start);
void* start = mi_region_info_read(mi_atomic_read(&region->info), NULL, &is_eager_committed); size_t arena_memid = mi_atomic_read_relaxed(&regions[i].arena_memid);
if (start != NULL && !_mi_os_is_huge_reserved(start)) { memset(&regions[i], 0, sizeof(mem_region_t));
_mi_os_free_ex(start, MI_REGION_SIZE, is_eager_committed, stats); // and release the whole region
mi_atomic_write(&region->info, 0);
if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
_mi_arena_free(start, MI_REGION_SIZE, arena_memid, tld->stats);
} }
// and release
mi_atomic_write(&region->info,0);
mi_atomic_write(&region->map,0);
} }
} }
} }
} }
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
Other Other
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
return _mi_os_commit(p, size, is_zero, stats); return _mi_os_reset(p, size, tld->stats);
} }
bool _mi_mem_decommit(void* p, size_t size, mi_stats_t* stats) { bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
return _mi_os_decommit(p, size, stats); return _mi_os_unreset(p, size, is_zero, tld->stats);
} }
bool _mi_mem_reset(void* p, size_t size, mi_stats_t* stats) { bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
return _mi_os_reset(p, size, stats); return _mi_os_commit(p, size, is_zero, tld->stats);
} }
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats) { bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) {
return _mi_os_unreset(p, size, is_zero, stats); return _mi_os_decommit(p, size, tld->stats);
} }
bool _mi_mem_protect(void* p, size_t size) { bool _mi_mem_protect(void* p, size_t size) {

View file

@ -56,20 +56,23 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(verbose) }, { 0, UNINIT, MI_OPTION(verbose) },
// the following options are experimental and not all combinations make sense. // the following options are experimental and not all combinations make sense.
{ 1, UNINIT, MI_OPTION(eager_commit) }, // note: needs to be on when eager_region_commit is enabled { 1, UNINIT, MI_OPTION(eager_commit) }, // commit on demand
#ifdef _WIN32 // and BSD? #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit?
{ 0, UNINIT, MI_OPTION(eager_region_commit) }, // don't commit too eagerly on windows (just for looks...) { 0, UNINIT, MI_OPTION(eager_region_commit) },
{ 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory
#else #else
{ 1, UNINIT, MI_OPTION(eager_region_commit) }, { 1, UNINIT, MI_OPTION(eager_region_commit) },
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED
#endif #endif
{ 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's
{ 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) },
{ 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread
{ 0, UNINIT, MI_OPTION(page_reset) }, { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
{ 0, UNINIT, MI_OPTION(cache_reset) }, { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates
{ 0, UNINIT, MI_OPTION(reset_decommits) }, // note: cannot enable this if secure is on
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit)
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
{ 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds
{ 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output { 16, UNINIT, MI_OPTION(max_errors) } // maximum errors that are output
}; };
@ -138,7 +141,8 @@ void mi_option_disable(mi_option_t option) {
} }
static void mi_out_stderr(const char* msg) { static void mi_out_stderr(const char* msg, void* arg) {
UNUSED(arg);
#ifdef _WIN32 #ifdef _WIN32
// on windows with redirection, the C runtime cannot handle locale dependent output // on windows with redirection, the C runtime cannot handle locale dependent output
// after the main thread closes so we use direct console output. // after the main thread closes so we use direct console output.
@ -158,7 +162,8 @@ static void mi_out_stderr(const char* msg) {
static char out_buf[MI_MAX_DELAY_OUTPUT+1]; static char out_buf[MI_MAX_DELAY_OUTPUT+1];
static _Atomic(uintptr_t) out_len; static _Atomic(uintptr_t) out_len;
static void mi_out_buf(const char* msg) { static void mi_out_buf(const char* msg, void* arg) {
UNUSED(arg);
if (msg==NULL) return; if (msg==NULL) return;
if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; if (mi_atomic_read_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
size_t n = strlen(msg); size_t n = strlen(msg);
@ -173,14 +178,14 @@ static void mi_out_buf(const char* msg) {
memcpy(&out_buf[start], msg, n); memcpy(&out_buf[start], msg, n);
} }
static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) { static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) {
if (out==NULL) return; if (out==NULL) return;
// claim (if `no_more_buf == true`, no more output will be added after this point) // claim (if `no_more_buf == true`, no more output will be added after this point)
size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1)); size_t count = mi_atomic_addu(&out_len, (no_more_buf ? MI_MAX_DELAY_OUTPUT : 1));
// and output the current contents // and output the current contents
if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT; if (count>MI_MAX_DELAY_OUTPUT) count = MI_MAX_DELAY_OUTPUT;
out_buf[count] = 0; out_buf[count] = 0;
out(out_buf); out(out_buf,arg);
if (!no_more_buf) { if (!no_more_buf) {
out_buf[count] = '\n'; // if continue with the buffer, insert a newline out_buf[count] = '\n'; // if continue with the buffer, insert a newline
} }
@ -189,9 +194,9 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf) {
// Once this module is loaded, switch to this routine // Once this module is loaded, switch to this routine
// which outputs to stderr and the delayed output buffer. // which outputs to stderr and the delayed output buffer.
static void mi_out_buf_stderr(const char* msg) { static void mi_out_buf_stderr(const char* msg, void* arg) {
mi_out_stderr(msg); mi_out_stderr(msg,arg);
mi_out_buf(msg); mi_out_buf(msg,arg);
} }
@ -204,60 +209,65 @@ static void mi_out_buf_stderr(const char* msg) {
// For now, don't register output from multiple threads. // For now, don't register output from multiple threads.
#pragma warning(suppress:4180) #pragma warning(suppress:4180)
static mi_output_fun* volatile mi_out_default; // = NULL static mi_output_fun* volatile mi_out_default; // = NULL
static volatile _Atomic(void*) mi_out_arg; // = NULL
static mi_output_fun* mi_out_get_default(void) { static mi_output_fun* mi_out_get_default(void** parg) {
if (parg != NULL) { *parg = mi_atomic_read_ptr(&mi_out_arg); }
mi_output_fun* out = mi_out_default; mi_output_fun* out = mi_out_default;
return (out == NULL ? &mi_out_buf : out); return (out == NULL ? &mi_out_buf : out);
} }
void mi_register_output(mi_output_fun* out) mi_attr_noexcept { void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept {
mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer mi_out_default = (out == NULL ? &mi_out_stderr : out); // stop using the delayed output buffer
if (out!=NULL) mi_out_buf_flush(out,true); // output all the delayed output now mi_atomic_write_ptr(&mi_out_arg, arg);
if (out!=NULL) mi_out_buf_flush(out,true,arg); // output all the delayed output now
} }
// add stderr to the delayed output after the module is loaded // add stderr to the delayed output after the module is loaded
static void mi_add_stderr_output() { static void mi_add_stderr_output() {
mi_out_buf_flush(&mi_out_stderr, false); // flush current contents to stderr mi_assert_internal(mi_out_default == NULL);
mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr
mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output
} }
// -------------------------------------------------------- // --------------------------------------------------------
// Messages, all end up calling `_mi_fputs`. // Messages, all end up calling `_mi_fputs`.
// -------------------------------------------------------- // --------------------------------------------------------
#define MAX_ERROR_COUNT (10)
static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings static volatile _Atomic(uintptr_t) error_count; // = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings
// When overriding malloc, we may recurse into mi_vfprintf if an allocation // When overriding malloc, we may recurse into mi_vfprintf if an allocation
// inside the C runtime causes another message. // inside the C runtime causes another message.
static mi_decl_thread bool recurse = false; static mi_decl_thread bool recurse = false;
void _mi_fputs(mi_output_fun* out, const char* prefix, const char* message) { void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) {
if (recurse) return; if (recurse) return;
if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) out = mi_out_get_default(); if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr?
out = mi_out_get_default(&arg);
}
recurse = true; recurse = true;
if (prefix != NULL) out(prefix); if (prefix != NULL) out(prefix,arg);
out(message); out(message,arg);
recurse = false; recurse = false;
return; return;
} }
// Define our own limited `fprintf` that avoids memory allocation. // Define our own limited `fprintf` that avoids memory allocation.
// We do this using `snprintf` with a limited buffer. // We do this using `snprintf` with a limited buffer.
static void mi_vfprintf( mi_output_fun* out, const char* prefix, const char* fmt, va_list args ) { static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) {
char buf[512]; char buf[512];
if (fmt==NULL) return; if (fmt==NULL) return;
if (recurse) return; if (recurse) return;
recurse = true; recurse = true;
vsnprintf(buf,sizeof(buf)-1,fmt,args); vsnprintf(buf,sizeof(buf)-1,fmt,args);
recurse = false; recurse = false;
_mi_fputs(out,prefix,buf); _mi_fputs(out,arg,prefix,buf);
} }
void _mi_fprintf( mi_output_fun* out, const char* fmt, ... ) { void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
va_list args; va_list args;
va_start(args,fmt); va_start(args,fmt);
mi_vfprintf(out,NULL,fmt,args); mi_vfprintf(out,arg,NULL,fmt,args);
va_end(args); va_end(args);
} }
@ -265,7 +275,7 @@ void _mi_trace_message(const char* fmt, ...) {
if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
mi_vfprintf(NULL, "mimalloc: ", fmt, args); mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args);
va_end(args); va_end(args);
} }
@ -273,7 +283,7 @@ void _mi_verbose_message(const char* fmt, ...) {
if (!mi_option_is_enabled(mi_option_verbose)) return; if (!mi_option_is_enabled(mi_option_verbose)) return;
va_list args; va_list args;
va_start(args,fmt); va_start(args,fmt);
mi_vfprintf(NULL, "mimalloc: ", fmt, args); mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args);
va_end(args); va_end(args);
} }
@ -282,7 +292,7 @@ void _mi_error_message(const char* fmt, ...) {
if (mi_atomic_increment(&error_count) > mi_max_error_count) return; if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
va_list args; va_list args;
va_start(args,fmt); va_start(args,fmt);
mi_vfprintf(NULL, "mimalloc: error: ", fmt, args); mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args);
va_end(args); va_end(args);
mi_assert(false); mi_assert(false);
} }
@ -292,14 +302,14 @@ void _mi_warning_message(const char* fmt, ...) {
if (mi_atomic_increment(&error_count) > mi_max_error_count) return; if (mi_atomic_increment(&error_count) > mi_max_error_count) return;
va_list args; va_list args;
va_start(args,fmt); va_start(args,fmt);
mi_vfprintf(NULL, "mimalloc: warning: ", fmt, args); mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args);
va_end(args); va_end(args);
} }
#if MI_DEBUG #if MI_DEBUG
void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) { void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, const char* func ) {
_mi_fprintf(NULL,"mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion); _mi_fprintf(NULL, NULL, "mimalloc: assertion failed: at \"%s\":%u, %s\n assertion: \"%s\"\n", fname, line, (func==NULL?"":func), assertion);
abort(); abort();
} }
#endif #endif
@ -307,7 +317,7 @@ void _mi_assert_fail(const char* assertion, const char* fname, unsigned line, co
mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) { mi_attr_noreturn void _mi_fatal_error(const char* fmt, ...) {
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
mi_vfprintf(NULL, "mimalloc: fatal: ", fmt, args); mi_vfprintf(NULL, NULL, "mimalloc: fatal: ", fmt, args);
va_end(args); va_end(args);
#if (MI_SECURE>=0) #if (MI_SECURE>=0)
abort(); abort();

466
src/os.c
View file

@ -36,8 +36,6 @@ terms of the MIT license. A copy of the license can be found in the file
large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). large OS pages (if MIMALLOC_LARGE_OS_PAGES is true).
----------------------------------------------------------- */ ----------------------------------------------------------- */
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
bool _mi_os_is_huge_reserved(void* p);
void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment);
static void* mi_align_up_ptr(void* p, size_t alignment) { static void* mi_align_up_ptr(void* p, size_t alignment) {
return (void*)_mi_align_up((uintptr_t)p, alignment); return (void*)_mi_align_up((uintptr_t)p, alignment);
@ -182,7 +180,7 @@ void _mi_os_init() {
static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats)
{ {
if (addr == NULL || size == 0 || _mi_os_is_huge_reserved(addr)) return true; if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr)
bool err = false; bool err = false;
#if defined(_WIN32) #if defined(_WIN32)
err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
@ -207,31 +205,6 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size);
#ifdef _WIN32 #ifdef _WIN32
static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) {
#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
// on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
if ((size % ((uintptr_t)1 << 30)) == 0 /* 1GiB multiple */
&& (flags & MEM_LARGE_PAGES) != 0 && (flags & MEM_COMMIT) != 0 && (flags & MEM_RESERVE) != 0
&& (addr != NULL || try_alignment == 0 || try_alignment % _mi_os_page_size() == 0)
&& pNtAllocateVirtualMemoryEx != NULL)
{
#ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10)
#endif
MEM_EXTENDED_PARAMETER param = { 0, 0 };
param.Type = 5; // == MemExtendedParameterAttributeFlags;
param.ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
SIZE_T psize = size;
void* base = addr;
NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, &param, 1);
if (err == 0) {
return base;
}
else {
// else fall back to regular large OS pages
_mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error 0x%lx)\n", err);
}
}
#endif
#if (MI_INTPTR_SIZE >= 8) #if (MI_INTPTR_SIZE >= 8)
// on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations
void* hint; void* hint;
@ -325,7 +298,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
#if !defined(MAP_ANONYMOUS) #if !defined(MAP_ANONYMOUS)
#define MAP_ANONYMOUS MAP_ANON #define MAP_ANONYMOUS MAP_ANON
#endif #endif
int flags = MAP_PRIVATE | MAP_ANONYMOUS; #if !defined(MAP_NORESERVE)
#define MAP_NORESERVE 0
#endif
int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
int fd = -1; int fd = -1;
#if defined(MAP_ALIGNED) // BSD #if defined(MAP_ALIGNED) // BSD
if (try_alignment > 0) { if (try_alignment > 0) {
@ -355,7 +331,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok); mi_atomic_cas_weak(&large_page_try_ok, try_ok - 1, try_ok);
} }
else { else {
int lflags = flags; int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux
int lfd = fd; int lfd = fd;
#ifdef MAP_ALIGNED_SUPER #ifdef MAP_ALIGNED_SUPER
lflags |= MAP_ALIGNED_SUPER; lflags |= MAP_ALIGNED_SUPER;
@ -364,7 +340,8 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
lflags |= MAP_HUGETLB; lflags |= MAP_HUGETLB;
#endif #endif
#ifdef MAP_HUGE_1GB #ifdef MAP_HUGE_1GB
if ((size % ((uintptr_t)1 << 30)) == 0) { static bool mi_huge_pages_available = true;
if ((size % GiB) == 0 && mi_huge_pages_available) {
lflags |= MAP_HUGE_1GB; lflags |= MAP_HUGE_1GB;
} }
else else
@ -383,6 +360,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
#ifdef MAP_HUGE_1GB #ifdef MAP_HUGE_1GB
if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) {
mi_huge_pages_available = false; // don't try huge 1GiB pages again
_mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno);
lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB);
p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd);
@ -400,10 +378,10 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd);
#if defined(MADV_HUGEPAGE) #if defined(MADV_HUGEPAGE)
// Many Linux systems don't allow MAP_HUGETLB but they support instead // Many Linux systems don't allow MAP_HUGETLB but they support instead
// transparent huge pages (TPH). It is not required to call `madvise` with MADV_HUGE // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE
// though since properly aligned allocations will already use large pages if available // though since properly aligned allocations will already use large pages if available
// in that case -- in particular for our large regions (in `memory.c`). // in that case -- in particular for our large regions (in `memory.c`).
// However, some systems only allow TPH if called with explicit `madvise`, so // However, some systems only allow THP if called with explicit `madvise`, so
// when large OS pages are enabled for mimalloc, we call `madvice` anyways. // when large OS pages are enabled for mimalloc, we call `madvice` anyways.
if (allow_large && use_large_os_page(size, try_alignment)) { if (allow_large && use_large_os_page(size, try_alignment)) {
if (madvise(p, size, MADV_HUGEPAGE) == 0) { if (madvise(p, size, MADV_HUGEPAGE) == 0) {
@ -429,8 +407,8 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages) if (hint == 0 || hint > ((intptr_t)30<<40)) { // try to wrap around after 30TiB (area after 32TiB is used for huge OS pages)
intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area intptr_t init = ((intptr_t)4 << 40); // start at 4TiB area
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode
uintptr_t r = _mi_random_init((uintptr_t)&mi_os_get_aligned_hint ^ hint); uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFF)); // (randomly 0-64k)*4MiB == 0 to 256GiB init = init + (MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)); // (randomly 20 bits)*4MiB == 0 to 4TiB
#endif #endif
mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size); mi_atomic_cas_strong(mi_atomic_cast(uintptr_t, &aligned_base), init, hint + size);
hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all hint = mi_atomic_add(&aligned_base, size); // this may still give 0 or > 30TiB but that is ok, it is a hint after all
@ -618,15 +596,27 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t*
return mi_os_page_align_areax(true, addr, size, newsize); return mi_os_page_align_areax(true, addr, size, newsize);
} }
static void mi_mprotect_hint(int err) {
#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page
if (err == ENOMEM) {
_mi_warning_message("the previous warning may have been caused by a low memory map limit.\n"
" On Linux this is controlled by the vm.max_map_count. For example:\n"
" > sudo sysctl -w vm.max_map_count=262144\n");
}
#else
UNUSED(err);
#endif
}
// Commit/Decommit memory. // Commit/Decommit memory.
// Usuelly commit is aligned liberal, while decommit is aligned conservative. // Usuelly commit is aligned liberal, while decommit is aligned conservative.
// (but not for the reset version where we want commit to be conservative as well) // (but not for the reset version where we want commit to be conservative as well)
static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) {
// page align in the range, commit liberally, decommit conservative // page align in the range, commit liberally, decommit conservative
*is_zero = false; if (is_zero != NULL) { *is_zero = false; }
size_t csize; size_t csize;
void* start = mi_os_page_align_areax(conservative, addr, size, &csize); void* start = mi_os_page_align_areax(conservative, addr, size, &csize);
if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr))
int err = 0; int err = 0;
if (commit) { if (commit) {
_mi_stat_increase(&stats->committed, csize); _mi_stat_increase(&stats->committed, csize);
@ -649,31 +639,42 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
} }
#elif defined(__wasi__) #elif defined(__wasi__)
// WebAssembly guests can't control memory protection // WebAssembly guests can't control memory protection
#elif defined(MAP_FIXED)
if (!commit) {
// use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge)
void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0);
if (p != start) { err = errno; }
}
else {
// for commit, just change the protection
err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
if (err != 0) { err = errno; }
}
#else #else
err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE));
if (err != 0) { err = errno; } if (err != 0) { err = errno; }
#endif #endif
if (err != 0) { if (err != 0) {
_mi_warning_message("commit/decommit error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); _mi_warning_message("%s error: start: 0x%p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err);
mi_mprotect_hint(err);
} }
mi_assert_internal(err == 0); mi_assert_internal(err == 0);
return (err == 0); return (err == 0);
} }
bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
return mi_os_commitx(addr, size, true, false /* conservative? */, is_zero, stats); return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats);
} }
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) { bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats) {
bool is_zero; bool is_zero;
return mi_os_commitx(addr, size, false, true /* conservative? */, &is_zero, stats); return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats);
} }
bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) {
return mi_os_commitx(addr, size, true, true /* conservative? */, is_zero, stats); return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats);
} }
// Signal to the OS that the address range is no longer in use // Signal to the OS that the address range is no longer in use
// but may be used later again. This will release physical memory // but may be used later again. This will release physical memory
// pages and reduce swapping while keeping the memory committed. // pages and reduce swapping while keeping the memory committed.
@ -682,7 +683,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
// page align conservatively within the range // page align conservatively within the range
size_t csize; size_t csize;
void* start = mi_os_page_align_area_conservative(addr, size, &csize); void* start = mi_os_page_align_area_conservative(addr, size, &csize);
if (csize == 0 || _mi_os_is_huge_reserved(addr)) return true; if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)
if (reset) _mi_stat_increase(&stats->reset, csize); if (reset) _mi_stat_increase(&stats->reset, csize);
else _mi_stat_decrease(&stats->reset, csize); else _mi_stat_decrease(&stats->reset, csize);
if (!reset) return true; // nothing to do on unreset! if (!reset) return true; // nothing to do on unreset!
@ -732,7 +733,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
// We page align to a conservative area inside the range to reset. // We page align to a conservative area inside the range to reset.
bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) {
if (mi_option_is_enabled(mi_option_reset_decommits)) { if (mi_option_is_enabled(mi_option_reset_decommits)) {
return _mi_os_decommit(addr,size,stats); return _mi_os_decommit(addr, size, stats);
} }
else { else {
return mi_os_resetx(addr, size, true, stats); return mi_os_resetx(addr, size, true, stats);
@ -756,9 +757,11 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) {
size_t csize = 0; size_t csize = 0;
void* start = mi_os_page_align_area_conservative(addr, size, &csize); void* start = mi_os_page_align_area_conservative(addr, size, &csize);
if (csize == 0) return false; if (csize == 0) return false;
/*
if (_mi_os_is_huge_reserved(addr)) { if (_mi_os_is_huge_reserved(addr)) {
_mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n");
} }
*/
int err = 0; int err = 0;
#ifdef _WIN32 #ifdef _WIN32
DWORD oldprotect = 0; DWORD oldprotect = 0;
@ -772,6 +775,7 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) {
#endif #endif
if (err != 0) { if (err != 0) {
_mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err); _mi_warning_message("mprotect error: start: 0x%p, csize: 0x%x, err: %i\n", start, csize, err);
mi_mprotect_hint(err);
} }
return (err == 0); return (err == 0);
} }
@ -808,141 +812,267 @@ bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) {
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
Support for huge OS pages (1Gib) that are reserved up-front and never Support for allocating huge OS pages (1Gib) that are reserved up-front
released. Only regions are allocated in here (see `memory.c`) so the memory and possibly associated with a specific NUMA node. (use `numa_node>=0`)
will be reused.
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
#define MI_HUGE_OS_PAGE_SIZE ((size_t)1 << 30) // 1GiB #define MI_HUGE_OS_PAGE_SIZE (GiB)
typedef struct mi_huge_info_s { #if defined(WIN32) && (MI_INTPTR_SIZE >= 8)
volatile _Atomic(void*) start; // start of huge page area (32TiB) static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
volatile _Atomic(size_t) reserved; // total reserved size
volatile _Atomic(size_t) used; // currently allocated
} mi_huge_info_t;
static mi_huge_info_t os_huge_reserved = { NULL, 0, ATOMIC_VAR_INIT(0) };
bool _mi_os_is_huge_reserved(void* p) {
return (mi_atomic_read_ptr(&os_huge_reserved.start) != NULL &&
p >= mi_atomic_read_ptr(&os_huge_reserved.start) &&
(uint8_t*)p < (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + mi_atomic_read(&os_huge_reserved.reserved));
}
void* _mi_os_try_alloc_from_huge_reserved(size_t size, size_t try_alignment)
{ {
// only allow large aligned allocations (e.g. regions) mi_assert_internal(size%GiB == 0);
if (size < MI_SEGMENT_SIZE || (size % MI_SEGMENT_SIZE) != 0) return NULL; mi_assert_internal(addr != NULL);
if (try_alignment > MI_SEGMENT_SIZE) return NULL; const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
if (mi_atomic_read_ptr(&os_huge_reserved.start)==NULL) return NULL;
if (mi_atomic_read(&os_huge_reserved.used) >= mi_atomic_read(&os_huge_reserved.reserved)) return NULL; // already full
// always aligned mi_win_enable_large_os_pages();
mi_assert_internal(mi_atomic_read(&os_huge_reserved.used) % MI_SEGMENT_SIZE == 0 );
mi_assert_internal( (uintptr_t)mi_atomic_read_ptr(&os_huge_reserved.start) % MI_SEGMENT_SIZE == 0 );
// try to reserve space #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS)
size_t base = mi_atomic_addu( &os_huge_reserved.used, size ); MEM_EXTENDED_PARAMETER params[3] = { {0,0},{0,0},{0,0} };
if ((base + size) > os_huge_reserved.reserved) { // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
// "free" our over-allocation static bool mi_huge_pages_available = true;
mi_atomic_subu( &os_huge_reserved.used, size); if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
return NULL; #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE
#define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10)
#endif
params[0].Type = 5; // == MemExtendedParameterAttributeFlags;
params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
ULONG param_count = 1;
if (numa_node >= 0) {
param_count++;
params[1].Type = MemExtendedParameterNumaNode;
params[1].ULong = (unsigned)numa_node;
} }
SIZE_T psize = size;
// success! void* base = addr;
uint8_t* p = (uint8_t*)mi_atomic_read_ptr(&os_huge_reserved.start) + base; NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
mi_assert_internal( (uintptr_t)p % MI_SEGMENT_SIZE == 0 ); if (err == 0 && base != NULL) {
return p; return base;
}
else {
// fall back to regular large pages
mi_huge_pages_available = false; // don't try further huge pages
_mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
}
}
// on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
if (pVirtualAlloc2 != NULL && numa_node >= 0) {
params[0].Type = MemExtendedParameterNumaNode;
params[0].ULong = (unsigned)numa_node;
return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
}
#endif
// otherwise use regular virtual alloc on older windows
return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
} }
/* #elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8)
static void mi_os_free_huge_reserved() { #include <sys/syscall.h>
uint8_t* addr = os_huge_reserved.start; #ifndef MPOL_PREFERRED
size_t total = os_huge_reserved.reserved; #define MPOL_PREFERRED 1
os_huge_reserved.reserved = 0; #endif
os_huge_reserved.start = NULL; #if defined(SYS_mbind)
for( size_t current = 0; current < total; current += MI_HUGE_OS_PAGE_SIZE) { static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
_mi_os_free(addr + current, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags);
}
}
*/
#if !(MI_INTPTR_SIZE >= 8 && (defined(_WIN32) || defined(MI_OS_USE_MMAP)))
int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
UNUSED(pages); UNUSED(max_secs);
if (pages_reserved != NULL) *pages_reserved = 0;
return ENOMEM;
} }
#else #else
int mi_reserve_huge_os_pages( size_t pages, double max_secs, size_t* pages_reserved ) mi_attr_noexcept static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) {
{ UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags);
if (pages_reserved != NULL) *pages_reserved = 0;
if (max_secs==0) return ETIMEDOUT; // timeout
if (pages==0) return 0; // ok
if (!mi_atomic_cas_ptr_strong(&os_huge_reserved.start,(void*)1,NULL)) return ETIMEDOUT; // already reserved
// Set the start address after the 32TiB area
uint8_t* start = (uint8_t*)((uintptr_t)32 << 40); // 32TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
uintptr_t r = _mi_random_init((uintptr_t)&mi_reserve_huge_os_pages);
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x3FF)); // (randomly 0-1024)*1GiB == 0 to 1TiB
#endif
// Allocate one page at the time but try to place them contiguously
// We allocate one page at the time to be able to abort if it takes too long
double start_t = _mi_clock_start();
uint8_t* addr = start; // current top of the allocations
for (size_t page = 0; page < pages; page++, addr += MI_HUGE_OS_PAGE_SIZE ) {
// allocate a page
void* p = NULL;
bool is_large = true;
#ifdef _WIN32
if (page==0) { mi_win_enable_large_os_pages(); }
p = mi_win_virtual_alloc(addr, MI_HUGE_OS_PAGE_SIZE, 0, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE, true, true, &is_large);
#elif defined(MI_OS_USE_MMAP)
p = mi_unix_mmap(addr, MI_HUGE_OS_PAGE_SIZE, 0, PROT_READ | PROT_WRITE, true, true, &is_large);
#else
// always fail
#endif
// Did we succeed at a contiguous address?
if (p != addr) {
// no success, issue a warning and return with an error
if (p != NULL) {
_mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr);
_mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main );
}
else {
#ifdef _WIN32
int err = GetLastError();
#else
int err = errno;
#endif
_mi_warning_message("could not allocate huge page %zu at 0x%p, error: %i\n", page, addr, err);
}
return ENOMEM;
}
// success, record it
if (page==0) {
mi_atomic_write_ptr(&os_huge_reserved.start, addr); // don't switch the order of these writes
mi_atomic_write(&os_huge_reserved.reserved, MI_HUGE_OS_PAGE_SIZE);
}
else {
mi_atomic_addu(&os_huge_reserved.reserved,MI_HUGE_OS_PAGE_SIZE);
}
_mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
_mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
if (pages_reserved != NULL) { *pages_reserved = page + 1; }
// check for timeout
double elapsed = _mi_clock_end(start_t);
if (elapsed > max_secs) return ETIMEDOUT;
if (page >= 1) {
double estimate = ((elapsed / (double)(page+1)) * (double)pages);
if (estimate > 1.5*max_secs) return ETIMEDOUT; // seems like we are going to timeout
}
}
_mi_verbose_message("reserved %zu huge pages\n", pages);
return 0; return 0;
} }
#endif #endif
static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
mi_assert_internal(size%GiB == 0);
bool is_large = true;
void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large);
if (p == NULL) return NULL;
if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes
uintptr_t numa_mask = (1UL << numa_node);
// TODO: does `mbind` work correctly for huge OS pages? should we
// use `set_mempolicy` before calling mmap instead?
// see: <https://lkml.org/lkml/2017/2/9/875>
long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0);
if (err != 0) {
_mi_warning_message("failed to bind huge (1GiB) pages to NUMA node %d: %s\n", numa_node, strerror(errno));
}
}
return p;
}
#else
static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) {
return NULL;
}
#endif
#if (MI_INTPTR_SIZE >= 8)
// To ensure proper alignment, use our own area for huge OS pages
static _Atomic(uintptr_t) mi_huge_start; // = 0
// Claim an aligned address range for huge pages
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
if (total_size != NULL) *total_size = 0;
const size_t size = pages * MI_HUGE_OS_PAGE_SIZE;
uintptr_t start = 0;
uintptr_t end = 0;
uintptr_t expected;
do {
start = expected = mi_atomic_read_relaxed(&mi_huge_start);
if (start == 0) {
// Initialize the start address after the 32TiB area
start = ((uintptr_t)32 << 40); // 32TiB virtual start address
#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode
uintptr_t r = _mi_heap_random_next(mi_get_default_heap());
start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB
#endif
}
end = start + size;
mi_assert_internal(end % MI_SEGMENT_SIZE == 0);
} while (!mi_atomic_cas_strong(&mi_huge_start, end, expected));
if (total_size != NULL) *total_size = size;
return (uint8_t*)start;
}
#else
static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
if (total_size != NULL) *total_size = 0;
return NULL;
}
#endif
// Allocate MI_SEGMENT_SIZE aligned huge pages
void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) {
if (psize != NULL) *psize = 0;
if (pages_reserved != NULL) *pages_reserved = 0;
size_t size = 0;
uint8_t* start = mi_os_claim_huge_pages(pages, &size);
if (start == NULL) return NULL; // or 32-bit systems
// Allocate one page at the time but try to place them contiguously
// We allocate one page at the time to be able to abort if it takes too long
// or to at least allocate as many as available on the system.
mi_msecs_t start_t = _mi_clock_start();
size_t page;
for (page = 0; page < pages; page++) {
// allocate a page
void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE);
void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node);
// Did we succeed at a contiguous address?
if (p != addr) {
// no success, issue a warning and break
if (p != NULL) {
_mi_warning_message("could not allocate contiguous huge page %zu at 0x%p\n", page, addr);
_mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main);
}
break;
}
// success, record it
_mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE);
_mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE);
// check for timeout
if (max_msecs > 0) {
mi_msecs_t elapsed = _mi_clock_end(start_t);
if (page >= 1) {
mi_msecs_t estimate = ((elapsed / (page+1)) * pages);
if (estimate > 2*max_msecs) { // seems like we are going to timeout, break
elapsed = max_msecs + 1;
}
}
if (elapsed > max_msecs) {
_mi_warning_message("huge page allocation timed out\n");
break;
}
}
}
mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size);
if (pages_reserved != NULL) *pages_reserved = page;
if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE;
return (page == 0 ? NULL : start);
}
// free every huge page in a range individually (as we allocated per page)
// note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems.
void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
if (p==NULL || size==0) return;
uint8_t* base = (uint8_t*)p;
while (size >= MI_HUGE_OS_PAGE_SIZE) {
_mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats);
size -= MI_HUGE_OS_PAGE_SIZE;
}
}
/* ----------------------------------------------------------------------------
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
#ifdef WIN32
static size_t mi_os_numa_nodex() {
PROCESSOR_NUMBER pnum;
USHORT numa_node = 0;
GetCurrentProcessorNumberEx(&pnum);
GetNumaProcessorNodeEx(&pnum,&numa_node);
return numa_node;
}
static size_t mi_os_numa_node_countx(void) {
ULONG numa_max = 0;
GetNumaHighestNodeNumber(&numa_max);
return (numa_max + 1);
}
#elif defined(__linux__)
#include <sys/syscall.h> // getcpu
#include <stdio.h> // access
static size_t mi_os_numa_nodex(void) {
#ifdef SYS_getcpu
unsigned long node = 0;
unsigned long ncpu = 0;
long err = syscall(SYS_getcpu, &ncpu, &node, NULL);
if (err != 0) return 0;
return node;
#else
return 0;
#endif
}
static size_t mi_os_numa_node_countx(void) {
char buf[128];
unsigned node = 0;
for(node = 0; node < 256; node++) {
// enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation)
snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1);
if (access(buf,R_OK) != 0) break;
}
return (node+1);
}
#else
static size_t mi_os_numa_nodex(void) {
return 0;
}
static size_t mi_os_numa_node_countx(void) {
return 1;
}
#endif
size_t _mi_numa_node_count = 0; // cache the node count
size_t _mi_os_numa_node_count_get(void) {
if (mi_unlikely(_mi_numa_node_count <= 0)) {
long ncount = mi_option_get(mi_option_use_numa_nodes); // given explicitly?
if (ncount <= 0) ncount = (long)mi_os_numa_node_countx(); // or detect dynamically
_mi_numa_node_count = (size_t)(ncount <= 0 ? 1 : ncount);
_mi_verbose_message("using %zd numa regions\n", _mi_numa_node_count);
}
mi_assert_internal(_mi_numa_node_count >= 1);
return _mi_numa_node_count;
}
int _mi_os_numa_node_get(mi_os_tld_t* tld) {
UNUSED(tld);
size_t numa_count = _mi_os_numa_node_count();
if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0
// never more than the node count and >= 0
size_t numa_node = mi_os_numa_nodex();
if (numa_node >= numa_count) { numa_node = numa_node % numa_count; }
return (int)numa_node;
}

View file

@ -178,20 +178,20 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t*
#endif #endif
static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) {
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
mi_heap_t* heap = page->heap; mi_heap_t* heap = mi_page_heap(page);
mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin]; mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal(bin >= MI_BIN_HUGE || page->block_size == pq->block_size); mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size);
mi_assert_expensive(mi_page_queue_contains(pq, page)); mi_assert_expensive(mi_page_queue_contains(pq, page));
return pq; return pq;
} }
static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) {
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->block_size)); uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
mi_assert_internal(bin <= MI_BIN_FULL); mi_assert_internal(bin <= MI_BIN_FULL);
mi_page_queue_t* pq = &heap->pages[bin]; mi_page_queue_t* pq = &heap->pages[bin];
mi_assert_internal(mi_page_is_in_full(page) || page->block_size == pq->block_size); mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size);
return pq; return pq;
} }
@ -246,35 +246,35 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) {
static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(queue, page)); mi_assert_expensive(mi_page_queue_contains(queue, page));
mi_assert_internal(page->block_size == queue->block_size || (page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_heap_t* heap = mi_page_heap(page);
if (page->prev != NULL) page->prev->next = page->next; if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev; if (page->next != NULL) page->next->prev = page->prev;
if (page == queue->last) queue->last = page->prev; if (page == queue->last) queue->last = page->prev;
if (page == queue->first) { if (page == queue->first) {
queue->first = page->next; queue->first = page->next;
// update first // update first
mi_heap_t* heap = page->heap;
mi_assert_internal(mi_heap_contains_queue(heap, queue)); mi_assert_internal(mi_heap_contains_queue(heap, queue));
mi_heap_queue_first_update(heap,queue); mi_heap_queue_first_update(heap,queue);
} }
page->heap->page_count--; heap->page_count--;
page->next = NULL; page->next = NULL;
page->prev = NULL; page->prev = NULL;
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL);
mi_page_set_in_full(page,false); mi_page_set_in_full(page,false);
} }
static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(page->heap == NULL); mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(!mi_page_queue_contains(queue, page)); mi_assert_internal(!mi_page_queue_contains(queue, page));
mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
mi_assert_internal(page->block_size == queue->block_size || mi_assert_internal(page->xblock_size == queue->block_size ||
(page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); (mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
mi_page_set_in_full(page, mi_page_queue_is_full(queue)); mi_page_set_in_full(page, mi_page_queue_is_full(queue));
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); // mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap);
page->next = queue->first; page->next = queue->first;
page->prev = NULL; page->prev = NULL;
if (queue->first != NULL) { if (queue->first != NULL) {
@ -296,19 +296,19 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(mi_page_queue_contains(from, page));
mi_assert_expensive(!mi_page_queue_contains(to, page)); mi_assert_expensive(!mi_page_queue_contains(to, page));
mi_assert_internal((page->block_size == to->block_size && page->block_size == from->block_size) || mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) ||
(page->block_size == to->block_size && mi_page_queue_is_full(from)) || (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) ||
(page->block_size == from->block_size && mi_page_queue_is_full(to)) || (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) ||
(page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) ||
(page->block_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to)));
mi_heap_t* heap = mi_page_heap(page);
if (page->prev != NULL) page->prev->next = page->next; if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev; if (page->next != NULL) page->next->prev = page->prev;
if (page == from->last) from->last = page->prev; if (page == from->last) from->last = page->prev;
if (page == from->first) { if (page == from->first) {
from->first = page->next; from->first = page->next;
// update first // update first
mi_heap_t* heap = page->heap;
mi_assert_internal(mi_heap_contains_queue(heap, from)); mi_assert_internal(mi_heap_contains_queue(heap, from));
mi_heap_queue_first_update(heap, from); mi_heap_queue_first_update(heap, from);
} }
@ -316,14 +316,14 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro
page->prev = to->last; page->prev = to->last;
page->next = NULL; page->next = NULL;
if (to->last != NULL) { if (to->last != NULL) {
mi_assert_internal(page->heap == to->last->heap); mi_assert_internal(heap == mi_page_heap(to->last));
to->last->next = page; to->last->next = page;
to->last = page; to->last = page;
} }
else { else {
to->first = page; to->first = page;
to->last = page; to->last = page;
mi_heap_queue_first_update(page->heap, to); mi_heap_queue_first_update(heap, to);
} }
mi_page_set_in_full(page, mi_page_queue_is_full(to)); mi_page_set_in_full(page, mi_page_queue_is_full(to));
@ -338,7 +338,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
// set append pages to new heap and count // set append pages to new heap and count
size_t count = 0; size_t count = 0;
for (mi_page_t* page = append->first; page != NULL; page = page->next) { for (mi_page_t* page = append->first; page != NULL; page = page->next) {
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), heap); mi_page_set_heap(page,heap);
count++; count++;
} }

View file

@ -29,16 +29,17 @@ terms of the MIT license. A copy of the license can be found in the file
----------------------------------------------------------- */ ----------------------------------------------------------- */
// Index a block in a page // Index a block in a page
static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t i) { static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) {
UNUSED(page);
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
mi_assert_internal(i <= page->reserved); mi_assert_internal(i <= page->reserved);
return (mi_block_t*)((uint8_t*)page_start + (i * page->block_size)); return (mi_block_t*)((uint8_t*)page_start + (i * block_size));
} }
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_stats_t* stats); static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t size, mi_tld_t* tld);
#if (MI_DEBUG>1) #if (MI_DEBUG>=3)
static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) { static size_t mi_page_list_count(mi_page_t* page, mi_block_t* head) {
size_t count = 0; size_t count = 0;
while (head != NULL) { while (head != NULL) {
@ -69,13 +70,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) {
} }
static bool mi_page_is_valid_init(mi_page_t* page) { static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->block_size > 0); mi_assert_internal(page->xblock_size > 0);
mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->used <= page->capacity);
mi_assert_internal(page->capacity <= page->reserved); mi_assert_internal(page->capacity <= page->reserved);
const size_t bsize = mi_page_block_size(page);
mi_segment_t* segment = _mi_page_segment(page); mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = _mi_page_start(segment,page,NULL); uint8_t* start = _mi_page_start(segment,page,NULL);
mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL));
//mi_assert_internal(start + page->capacity*page->block_size == page->top); //mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free)); mi_assert_internal(mi_page_list_is_valid(page,page->free));
@ -89,10 +91,10 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
} }
#endif #endif
mi_block_t* tfree = mi_tf_block(page->thread_free); mi_block_t* tfree = mi_page_thread_free(page);
mi_assert_internal(mi_page_list_is_valid(page, tfree)); mi_assert_internal(mi_page_list_is_valid(page, tfree));
size_t tfree_count = mi_page_list_count(page, tfree); //size_t tfree_count = mi_page_list_count(page, tfree);
mi_assert_internal(tfree_count <= page->thread_freed + 1); //mi_assert_internal(tfree_count <= page->thread_freed + 1);
size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free);
mi_assert_internal(page->used + free_count == page->capacity); mi_assert_internal(page->used + free_count == page->capacity);
@ -103,42 +105,43 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
bool _mi_page_is_valid(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) {
mi_assert_internal(mi_page_is_valid_init(page)); mi_assert_internal(mi_page_is_valid_init(page));
#if MI_SECURE #if MI_SECURE
mi_assert_internal(page->cookie != 0); mi_assert_internal(page->key != 0);
#endif #endif
if (page->heap!=NULL) { if (mi_page_heap(page)!=NULL) {
mi_segment_t* segment = _mi_page_segment(page); mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id || segment->thread_id==0); mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0);
if (segment->page_kind != MI_PAGE_HUGE) { if (segment->page_kind != MI_PAGE_HUGE) {
mi_page_queue_t* pq = mi_page_queue_of(page); mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page)); mi_assert_internal(mi_page_queue_contains(pq, page));
mi_assert_internal(pq->block_size==page->block_size || page->block_size > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page));
mi_assert_internal(mi_heap_contains_queue(page->heap,pq)); mi_assert_internal(mi_heap_contains_queue(mi_page_heap(page),pq));
} }
} }
return true; return true;
} }
#endif #endif
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) {
mi_thread_free_t tfree; mi_thread_free_t tfree;
mi_thread_free_t tfreex; mi_thread_free_t tfreex;
mi_delayed_t old_delay;
do { do {
tfreex = tfree = page->thread_free; tfree = mi_atomic_read(&page->xthread_free);
if (mi_unlikely(mi_tf_delayed(tfree) < MI_DELAYED_FREEING)) { tfreex = mi_tf_set_delayed(tfree, delay);
tfreex = mi_tf_set_delayed(tfree,delay); old_delay = mi_tf_delayed(tfree);
if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
// mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
} }
else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { else if (delay == old_delay) {
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. break; // avoid atomic operation if already equal
continue; // and try again
} }
else if (!override_never && old_delay == MI_NEVER_DELAYED_FREE) {
break; // leave never-delayed flag set
} }
while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree));
} }
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Page collect the `local_free` and `thread_free` lists Page collect the `local_free` and `thread_free` lists
----------------------------------------------------------- */ ----------------------------------------------------------- */
@ -153,17 +156,17 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
mi_thread_free_t tfree; mi_thread_free_t tfree;
mi_thread_free_t tfreex; mi_thread_free_t tfreex;
do { do {
tfree = page->thread_free; tfree = mi_atomic_read_relaxed(&page->xthread_free);
head = mi_tf_block(tfree); head = mi_tf_block(tfree);
tfreex = mi_tf_set_block(tfree,NULL); tfreex = mi_tf_set_block(tfree,NULL);
} while (!mi_atomic_cas_weak(mi_atomic_cast(uintptr_t,&page->thread_free), tfreex, tfree)); } while (!mi_atomic_cas_weak(&page->xthread_free, tfreex, tfree));
// return if the list is empty // return if the list is empty
if (head == NULL) return; if (head == NULL) return;
// find the tail -- also to get a proper count (without data races) // find the tail -- also to get a proper count (without data races)
uintptr_t max_count = page->capacity; // cannot collect more than capacity uint32_t max_count = page->capacity; // cannot collect more than capacity
uintptr_t count = 1; uint32_t count = 1;
mi_block_t* tail = head; mi_block_t* tail = head;
mi_block_t* next; mi_block_t* next;
while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) {
@ -181,7 +184,6 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
page->local_free = head; page->local_free = head;
// update counts now // update counts now
mi_atomic_subu(&page->thread_freed, count);
page->used -= count; page->used -= count;
} }
@ -189,7 +191,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
mi_assert_internal(page!=NULL); mi_assert_internal(page!=NULL);
// collect the thread free list // collect the thread free list
if (force || mi_tf_block(page->thread_free) != NULL) { // quick test to avoid an atomic operation if (force || mi_page_thread_free(page) != NULL) { // quick test to avoid an atomic operation
_mi_page_thread_free_collect(page); _mi_page_thread_free_collect(page);
} }
@ -227,10 +229,11 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
// called from segments when reclaiming abandoned pages // called from segments when reclaiming abandoned pages
void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_expensive(mi_page_is_valid_init(page));
mi_assert_internal(page->heap == NULL); mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
_mi_page_free_collect(page,false); mi_assert_internal(!page->is_reset);
mi_page_queue_t* pq = mi_page_queue(heap, page->block_size); mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
mi_page_queue_push(heap, pq, page); mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
} }
@ -241,7 +244,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
if (page == NULL) return NULL; if (page == NULL) return NULL;
mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE);
mi_page_init(heap, page, block_size, &heap->tld->stats); mi_page_init(heap, page, block_size, heap->tld);
_mi_stat_increase( &heap->tld->stats.pages, 1); _mi_stat_increase( &heap->tld->stats.pages, 1);
if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
@ -265,8 +268,8 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
// otherwise allocate the page // otherwise allocate the page
page = mi_page_fresh_alloc(heap, pq, pq->block_size); page = mi_page_fresh_alloc(heap, pq, pq->block_size);
if (page==NULL) return NULL; if (page==NULL) return NULL;
mi_assert_internal(pq->block_size==page->block_size); mi_assert_internal(pq->block_size==mi_page_block_size(page));
mi_assert_internal(pq==mi_page_queue(heap,page->block_size)); mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
return page; return page;
} }
@ -283,7 +286,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
// and free them all // and free them all
while(block != NULL) { while(block != NULL) {
mi_block_t* next = mi_block_nextx(heap,block, heap->cookie); mi_block_t* next = mi_block_nextx(heap,block, heap->key[0], heap->key[1]);
// use internal free instead of regular one to keep stats etc correct // use internal free instead of regular one to keep stats etc correct
if (!_mi_free_delayed_block(block)) { if (!_mi_free_delayed_block(block)) {
// we might already start delayed freeing while another thread has not yet // we might already start delayed freeing while another thread has not yet
@ -291,9 +294,8 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
mi_block_t* dfree; mi_block_t* dfree;
do { do {
dfree = (mi_block_t*)heap->thread_delayed_free; dfree = (mi_block_t*)heap->thread_delayed_free;
mi_block_set_nextx(heap, block, dfree, heap->cookie); mi_block_set_nextx(heap, block, dfree, heap->key[0], heap->key[1]);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree)); } while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&heap->thread_delayed_free), block, dfree));
} }
block = next; block = next;
} }
@ -308,11 +310,9 @@ void _mi_page_unfull(mi_page_t* page) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(mi_page_is_in_full(page)); mi_assert_internal(mi_page_is_in_full(page));
_mi_page_use_delayed_free(page, MI_NO_DELAYED_FREE);
if (!mi_page_is_in_full(page)) return; if (!mi_page_is_in_full(page)) return;
mi_heap_t* heap = page->heap; mi_heap_t* heap = mi_page_heap(page);
mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL]; mi_page_queue_t* pqfull = &heap->pages[MI_BIN_FULL];
mi_page_set_in_full(page, false); // to get the right queue mi_page_set_in_full(page, false); // to get the right queue
mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page);
@ -325,10 +325,8 @@ static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(!mi_page_immediate_available(page)); mi_assert_internal(!mi_page_immediate_available(page));
mi_assert_internal(!mi_page_is_in_full(page)); mi_assert_internal(!mi_page_is_in_full(page));
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE);
if (mi_page_is_in_full(page)) return; if (mi_page_is_in_full(page)) return;
mi_page_queue_enqueue_from(&mi_page_heap(page)->pages[MI_BIN_FULL], pq, page);
mi_page_queue_enqueue_from(&page->heap->pages[MI_BIN_FULL], pq, page);
_mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set _mi_page_free_collect(page,false); // try to collect right away in case another thread freed just before MI_USE_DELAYED_FREE was set
} }
@ -341,28 +339,27 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) {
mi_assert_internal(page != NULL); mi_assert_internal(page != NULL);
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(page->heap != NULL); mi_assert_internal(mi_page_heap(page) != NULL);
#if MI_DEBUG > 1 mi_heap_t* pheap = mi_page_heap(page);
mi_heap_t* pheap = (mi_heap_t*)mi_atomic_read_ptr(mi_atomic_cast(void*, &page->heap));
#endif
// remove from our page list // remove from our page list
mi_segments_tld_t* segments_tld = &page->heap->tld->segments; mi_segments_tld_t* segments_tld = &pheap->tld->segments;
mi_page_queue_remove(pq, page); mi_page_queue_remove(pq, page);
// page is no longer associated with our heap // page is no longer associated with our heap
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
mi_page_set_heap(page, NULL);
#if MI_DEBUG>1 #if MI_DEBUG>1
// check there are no references left.. // check there are no references left..
for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->cookie)) { for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->key[0], pheap->key[1])) {
mi_assert_internal(_mi_ptr_page(block) != page); mi_assert_internal(_mi_ptr_page(block) != page);
} }
#endif #endif
// and abandon it // and abandon it
mi_assert_internal(page->heap == NULL); mi_assert_internal(mi_page_heap(page) == NULL);
_mi_segment_page_abandon(page,segments_tld); _mi_segment_page_abandon(page,segments_tld);
} }
@ -373,33 +370,18 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_expensive(_mi_page_is_valid(page));
mi_assert_internal(pq == mi_page_queue_of(page)); mi_assert_internal(pq == mi_page_queue_of(page));
mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(mi_page_all_free(page));
#if MI_DEBUG>1 mi_assert_internal(mi_page_thread_free_flag(page)!=MI_DELAYED_FREEING);
// check if we can safely free
mi_thread_free_t free = mi_tf_set_delayed(page->thread_free,MI_NEVER_DELAYED_FREE);
free = mi_atomic_exchange(&page->thread_free, free);
mi_assert_internal(mi_tf_delayed(free) != MI_DELAYED_FREEING);
#endif
// no more aligned blocks in here
mi_page_set_has_aligned(page, false); mi_page_set_has_aligned(page, false);
// account for huge pages here
// (note: no longer necessary as huge pages are always abandoned)
if (page->block_size > MI_LARGE_OBJ_SIZE_MAX) {
if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) {
_mi_stat_decrease(&page->heap->tld->stats.giant, page->block_size);
}
else {
_mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
}
}
// remove from the page list // remove from the page list
// (no need to do _mi_heap_delayed_free first as all blocks are already free) // (no need to do _mi_heap_delayed_free first as all blocks are already free)
mi_segments_tld_t* segments_tld = &page->heap->tld->segments; mi_segments_tld_t* segments_tld = &mi_page_heap(page)->tld->segments;
mi_page_queue_remove(pq, page); mi_page_queue_remove(pq, page);
// and free it // and free it
mi_assert_internal(page->heap == NULL); mi_page_set_heap(page,NULL);
_mi_segment_page_free(page, force, segments_tld); _mi_segment_page_free(page, force, segments_tld);
} }
@ -423,17 +405,37 @@ void _mi_page_retire(mi_page_t* page) {
// how to check this efficiently though... // how to check this efficiently though...
// for now, we don't retire if it is the only page left of this size class. // for now, we don't retire if it is the only page left of this size class.
mi_page_queue_t* pq = mi_page_queue_of(page); mi_page_queue_t* pq = mi_page_queue_of(page);
if (mi_likely(page->block_size <= (MI_SMALL_SIZE_MAX/4))) { if (mi_likely(page->xblock_size <= MI_SMALL_SIZE_MAX && !mi_page_is_in_full(page))) {
// if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { if (pq->last==page && pq->first==page) { // the only page in the queue?
if (pq->last==page && pq->first==page) {
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
return; // dont't retire after all page->retire_expire = 4;
mi_assert_internal(mi_page_all_free(page));
return; // dont't free after all
} }
} }
_mi_page_free(page, pq, false); _mi_page_free(page, pq, false);
} }
// free retired pages: we don't need to look at the entire queues
// since we only retire pages that are the last one in a queue.
void _mi_heap_collect_retired(mi_heap_t* heap, bool force) {
for(mi_page_queue_t* pq = heap->pages; pq->block_size <= MI_SMALL_SIZE_MAX; pq++) {
mi_page_t* page = pq->first;
if (page != NULL && page->retire_expire != 0) {
if (mi_page_all_free(page)) {
page->retire_expire--;
if (force || page->retire_expire == 0) {
_mi_page_free(pq->first, pq, force);
}
}
else {
page->retire_expire = 0;
}
}
}
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Initialize the initial free list in a page. Initialize the initial free list in a page.
@ -445,15 +447,15 @@ void _mi_page_retire(mi_page_t* page) {
#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT)
#define MI_MIN_SLICES (2) #define MI_MIN_SLICES (2)
static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t extend, mi_stats_t* const stats) { static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) {
UNUSED(stats); UNUSED(stats);
#if (MI_SECURE<=2) #if (MI_SECURE<=2)
mi_assert_internal(page->free == NULL); mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL); mi_assert_internal(page->local_free == NULL);
#endif #endif
mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(page->capacity + extend <= page->reserved);
mi_assert_internal(bsize == mi_page_block_size(page));
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
const size_t bsize = page->block_size;
// initialize a randomized free list // initialize a randomized free list
// set up `slice_count` slices to alternate between // set up `slice_count` slices to alternate between
@ -467,18 +469,19 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
size_t counts[MI_MAX_SLICES]; // available objects in the slice size_t counts[MI_MAX_SLICES]; // available objects in the slice
for (size_t i = 0; i < slice_count; i++) { for (size_t i = 0; i < slice_count; i++) {
blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); blocks[i] = mi_page_block_at(page, page_area, bsize, page->capacity + i*slice_extend);
counts[i] = slice_extend; counts[i] = slice_extend;
} }
counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?)
// and initialize the free list by randomly threading through them // and initialize the free list by randomly threading through them
// set up first element // set up first element
size_t current = _mi_heap_random(heap) % slice_count; const uintptr_t r = _mi_heap_random_next(heap);
size_t current = r % slice_count;
counts[current]--; counts[current]--;
mi_block_t* const free_start = blocks[current]; mi_block_t* const free_start = blocks[current];
// and iterate through the rest // and iterate through the rest; use `random_shuffle` for performance
uintptr_t rnd = heap->random; uintptr_t rnd = _mi_random_shuffle(r|1); // ensure not 0
for (size_t i = 1; i < extend; i++) { for (size_t i = 1; i < extend; i++) {
// call random_shuffle only every INTPTR_SIZE rounds // call random_shuffle only every INTPTR_SIZE rounds
const size_t round = i%MI_INTPTR_SIZE; const size_t round = i%MI_INTPTR_SIZE;
@ -499,10 +502,9 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co
// prepend to the free list (usually NULL) // prepend to the free list (usually NULL)
mi_block_set_next(page, blocks[current], page->free); // end of the list mi_block_set_next(page, blocks[current], page->free); // end of the list
page->free = free_start; page->free = free_start;
heap->random = _mi_random_shuffle(rnd);
} }
static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t extend, mi_stats_t* const stats) static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats)
{ {
UNUSED(stats); UNUSED(stats);
#if (MI_SECURE <= 2) #if (MI_SECURE <= 2)
@ -510,12 +512,13 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
mi_assert_internal(page->local_free == NULL); mi_assert_internal(page->local_free == NULL);
#endif #endif
mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(page->capacity + extend <= page->reserved);
mi_assert_internal(bsize == mi_page_block_size(page));
void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
const size_t bsize = page->block_size;
mi_block_t* const start = mi_page_block_at(page, page_area, page->capacity); mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity);
// initialize a sequential free list // initialize a sequential free list
mi_block_t* const last = mi_page_block_at(page, page_area, page->capacity + extend - 1); mi_block_t* const last = mi_page_block_at(page, page_area, bsize, page->capacity + extend - 1);
mi_block_t* block = start; mi_block_t* block = start;
while(block <= last) { while(block <= last) {
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
@ -543,8 +546,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
// Note: we also experimented with "bump" allocation on the first // Note: we also experimented with "bump" allocation on the first
// allocations but this did not speed up any benchmark (due to an // allocations but this did not speed up any benchmark (due to an
// extra test in malloc? or cache effects?) // extra test in malloc? or cache effects?)
static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* stats) { static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) {
UNUSED(stats);
mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_expensive(mi_page_is_valid_init(page));
#if (MI_SECURE<=2) #if (MI_SECURE<=2)
mi_assert(page->free == NULL); mi_assert(page->free == NULL);
@ -554,12 +556,13 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
if (page->capacity >= page->reserved) return; if (page->capacity >= page->reserved) return;
size_t page_size; size_t page_size;
_mi_page_start(_mi_page_segment(page), page, &page_size); uint8_t* page_start = _mi_page_start(_mi_page_segment(page), page, &page_size);
mi_stat_counter_increase(stats->pages_extended, 1); mi_stat_counter_increase(tld->stats.pages_extended, 1);
// calculate the extend count // calculate the extend count
const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size);
size_t extend = page->reserved - page->capacity; size_t extend = page->reserved - page->capacity;
size_t max_extend = (page->block_size >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)page->block_size); size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND;
if (extend > max_extend) { if (extend > max_extend) {
@ -571,16 +574,22 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved);
mi_assert_internal(extend < (1UL<<16)); mi_assert_internal(extend < (1UL<<16));
// commit on-demand for large and huge pages?
if (_mi_page_segment(page)->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
uint8_t* start = page_start + (page->capacity * bsize);
_mi_mem_commit(start, extend * bsize, NULL, &tld->os);
}
// and append the extend the free list // and append the extend the free list
if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) { if (extend < MI_MIN_SLICES || MI_SECURE==0) { //!mi_option_is_enabled(mi_option_secure)) {
mi_page_free_list_extend(page, extend, stats ); mi_page_free_list_extend(page, bsize, extend, &tld->stats );
} }
else { else {
mi_page_free_list_extend_secure(heap, page, extend, stats); mi_page_free_list_extend_secure(heap, page, bsize, extend, &tld->stats);
} }
// enable the new free list // enable the new free list
page->capacity += (uint16_t)extend; page->capacity += (uint16_t)extend;
mi_stat_increase(stats->page_committed, extend * page->block_size); mi_stat_increase(tld->stats.page_committed, extend * bsize);
// extension into zero initialized memory preserves the zero'd free list // extension into zero initialized memory preserves the zero'd free list
if (!page->is_zero_init) { if (!page->is_zero_init) {
@ -590,37 +599,40 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
} }
// Initialize a fresh page // Initialize a fresh page
static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_stats_t* stats) { static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi_tld_t* tld) {
mi_assert(page != NULL); mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page); mi_segment_t* segment = _mi_page_segment(page);
mi_assert(segment != NULL); mi_assert(segment != NULL);
mi_assert_internal(block_size > 0); mi_assert_internal(block_size > 0);
// set fields // set fields
mi_page_set_heap(page, heap);
size_t page_size; size_t page_size;
_mi_segment_page_start(segment, page, block_size, &page_size); _mi_segment_page_start(segment, page, block_size, &page_size, NULL);
page->block_size = block_size; page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
mi_assert_internal(page_size / block_size < (1L<<16)); mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size); page->reserved = (uint16_t)(page_size / block_size);
#ifdef MI_ENCODE_FREELIST #ifdef MI_ENCODE_FREELIST
page->cookie = _mi_heap_random(heap) | 1; page->key[0] = _mi_heap_random_next(heap);
page->key[1] = _mi_heap_random_next(heap);
#endif #endif
page->is_zero = page->is_zero_init; page->is_zero = page->is_zero_init;
mi_assert_internal(page->capacity == 0); mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL); mi_assert_internal(page->free == NULL);
mi_assert_internal(page->used == 0); mi_assert_internal(page->used == 0);
mi_assert_internal(page->thread_free == 0); mi_assert_internal(page->xthread_free == 0);
mi_assert_internal(page->thread_freed == 0);
mi_assert_internal(page->next == NULL); mi_assert_internal(page->next == NULL);
mi_assert_internal(page->prev == NULL); mi_assert_internal(page->prev == NULL);
mi_assert_internal(page->retire_expire == 0);
mi_assert_internal(!mi_page_has_aligned(page)); mi_assert_internal(!mi_page_has_aligned(page));
#if (MI_ENCODE_FREELIST) #if (MI_ENCODE_FREELIST)
mi_assert_internal(page->cookie != 0); mi_assert_internal(page->key[1] != 0);
mi_assert_internal(page->key[2] != 0);
#endif #endif
mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_expensive(mi_page_is_valid_init(page));
// initialize an initial free list // initialize an initial free list
mi_page_extend_free(heap,page,stats); mi_page_extend_free(heap,page,tld);
mi_assert(mi_page_immediate_available(page)); mi_assert(mi_page_immediate_available(page));
} }
@ -633,39 +645,24 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq) static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq)
{ {
// search through the pages in "next fit" order // search through the pages in "next fit" order
mi_page_t* rpage = NULL;
size_t count = 0; size_t count = 0;
size_t page_free_count = 0;
mi_page_t* page = pq->first; mi_page_t* page = pq->first;
while( page != NULL) while (page != NULL)
{ {
mi_page_t* next = page->next; // remember next mi_page_t* next = page->next; // remember next
count++; count++;
// 0. collect freed blocks by us and other threads // 0. collect freed blocks by us and other threads
_mi_page_free_collect(page,false); _mi_page_free_collect(page, false);
// 1. if the page contains free blocks, we are done // 1. if the page contains free blocks, we are done
if (mi_page_immediate_available(page)) { if (mi_page_immediate_available(page)) {
// If all blocks are free, we might retire this page instead.
// do this at most 8 times to bound allocation time.
// (note: this can happen if a page was earlier not retired due
// to having neighbours that were mostly full or due to concurrent frees)
if (page_free_count < 8 && mi_page_all_free(page)) {
page_free_count++;
if (rpage != NULL) _mi_page_free(rpage,pq,false);
rpage = page;
page = next;
continue; // and keep looking
}
else {
break; // pick this one break; // pick this one
} }
}
// 2. Try to extend // 2. Try to extend
if (page->capacity < page->reserved) { if (page->capacity < page->reserved) {
mi_page_extend_free(heap, page, &heap->tld->stats); mi_page_extend_free(heap, page, heap->tld);
mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_immediate_available(page));
break; break;
} }
@ -673,46 +670,44 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
// 3. If the page is completely full, move it to the `mi_pages_full` // 3. If the page is completely full, move it to the `mi_pages_full`
// queue so we don't visit long-lived pages too often. // queue so we don't visit long-lived pages too often.
mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page));
mi_page_to_full(page,pq); mi_page_to_full(page, pq);
page = next; page = next;
} // for each page } // for each page
mi_stat_counter_increase(heap->tld->stats.searches,count); mi_stat_counter_increase(heap->tld->stats.searches, count);
if (page == NULL) {
page = rpage;
rpage = NULL;
}
if (rpage != NULL) {
_mi_page_free(rpage,pq,false);
}
if (page == NULL) { if (page == NULL) {
page = mi_page_fresh(heap, pq); page = mi_page_fresh(heap, pq);
} }
else { else {
mi_assert(pq->first == page); mi_assert(pq->first == page);
page->retire_expire = 0;
} }
mi_assert_internal(page == NULL || mi_page_immediate_available(page)); mi_assert_internal(page == NULL || mi_page_immediate_available(page));
// finally collect retired pages
_mi_heap_collect_retired(heap, false);
return page; return page;
} }
// Find a page with free blocks of `size`. // Find a page with free blocks of `size`.
static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_queue_t* pq = mi_page_queue(heap,size);
mi_page_t* page = pq->first; mi_page_t* page = pq->first;
if (page != NULL) { if (page != NULL) {
if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random(heap) & 1) == 1)) { if ((MI_SECURE >= 3) && page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
// in secure mode, we extend half the time to increase randomness // in secure mode, we extend half the time to increase randomness
mi_page_extend_free(heap, page, &heap->tld->stats); mi_page_extend_free(heap, page, heap->tld);
mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_immediate_available(page));
} }
else { else {
_mi_page_free_collect(page,false); _mi_page_free_collect(page,false);
} }
if (mi_page_immediate_available(page)) { if (mi_page_immediate_available(page)) {
page->retire_expire = 0;
return page; // fast path return page; // fast path
} }
} }
@ -728,18 +723,20 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
----------------------------------------------------------- */ ----------------------------------------------------------- */
static mi_deferred_free_fun* volatile deferred_free = NULL; static mi_deferred_free_fun* volatile deferred_free = NULL;
static volatile _Atomic(void*) deferred_arg; // = NULL
void _mi_deferred_free(mi_heap_t* heap, bool force) { void _mi_deferred_free(mi_heap_t* heap, bool force) {
heap->tld->heartbeat++; heap->tld->heartbeat++;
if (deferred_free != NULL && !heap->tld->recurse) { if (deferred_free != NULL && !heap->tld->recurse) {
heap->tld->recurse = true; heap->tld->recurse = true;
deferred_free(force, heap->tld->heartbeat); deferred_free(force, heap->tld->heartbeat, mi_atomic_read_ptr_relaxed(&deferred_arg));
heap->tld->recurse = false; heap->tld->recurse = false;
} }
} }
void mi_register_deferred_free(mi_deferred_free_fun* fn) mi_attr_noexcept { void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noexcept {
deferred_free = fn; deferred_free = fn;
mi_atomic_write_ptr(&deferred_arg, arg);
} }
@ -756,14 +753,15 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE);
mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size);
if (page != NULL) { if (page != NULL) {
const size_t bsize = mi_page_block_size(page);
mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size == block_size); mi_assert_internal(bsize >= size);
mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE);
mi_assert_internal(_mi_page_segment(page)->used==1); mi_assert_internal(_mi_page_segment(page)->used==1);
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
mi_atomic_write_ptr(mi_atomic_cast(void*, &page->heap), NULL); mi_page_set_heap(page, NULL);
if (page->block_size > MI_HUGE_OBJ_SIZE_MAX) { if (bsize > MI_HUGE_OBJ_SIZE_MAX) {
_mi_stat_increase(&heap->tld->stats.giant, block_size); _mi_stat_increase(&heap->tld->stats.giant, block_size);
_mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1);
} }
@ -811,7 +809,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
if (page == NULL) return NULL; // out of memory if (page == NULL) return NULL; // out of memory
mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size >= size); mi_assert_internal(mi_page_block_size(page) >= size);
// and try again, this time succeeding! (i.e. this should never recurse) // and try again, this time succeeding! (i.e. this should never recurse)
return _mi_page_malloc(heap, page, size); return _mi_page_malloc(heap, page, size);

328
src/random.c Normal file
View file

@ -0,0 +1,328 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#include <string.h> // memset
/* ----------------------------------------------------------------------------
We use our own PRNG to keep predictable performance of random number generation
and to avoid implementations that use a lock. We only use the OS provided
random source to initialize the initial seeds. Since we do not need ultimate
performance but we do rely on the security (for secret cookies in secure mode)
we use a cryptographically secure generator (chacha20).
-----------------------------------------------------------------------------*/
#define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance?
/* ----------------------------------------------------------------------------
Chacha20 implementation as the original algorithm with a 64-bit nonce
and counter: https://en.wikipedia.org/wiki/Salsa20
The input matrix has sixteen 32-bit values:
Position 0 to 3: constant key
Position 4 to 11: the key
Position 12 to 13: the counter.
Position 14 to 15: the nonce.
The implementation uses regular C code which compiles very well on modern compilers.
(gcc x64 has no register spills, and clang 6+ uses SSE instructions)
-----------------------------------------------------------------------------*/
static inline uint32_t rotl(uint32_t x, uint32_t shift) {
return (x << shift) | (x >> (32 - shift));
}
static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
}
static void chacha_block(mi_random_ctx_t* ctx)
{
// scramble into `x`
uint32_t x[16];
for (size_t i = 0; i < 16; i++) {
x[i] = ctx->input[i];
}
for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
qround(x, 0, 4, 8, 12);
qround(x, 1, 5, 9, 13);
qround(x, 2, 6, 10, 14);
qround(x, 3, 7, 11, 15);
qround(x, 0, 5, 10, 15);
qround(x, 1, 6, 11, 12);
qround(x, 2, 7, 8, 13);
qround(x, 3, 4, 9, 14);
}
// add scrambled data to the initial state
for (size_t i = 0; i < 16; i++) {
ctx->output[i] = x[i] + ctx->input[i];
}
ctx->output_available = 16;
// increment the counter for the next round
ctx->input[12] += 1;
if (ctx->input[12] == 0) {
ctx->input[13] += 1;
if (ctx->input[13] == 0) { // and keep increasing into the nonce
ctx->input[14] += 1;
}
}
}
static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
if (ctx->output_available <= 0) {
chacha_block(ctx);
ctx->output_available = 16; // (assign again to suppress static analysis warning)
}
const uint32_t x = ctx->output[16 - ctx->output_available];
ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
ctx->output_available--;
return x;
}
static inline uint32_t read32(const uint8_t* p, size_t idx32) {
const size_t i = 4*idx32;
return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
}
static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
{
// since we only use chacha for randomness (and not encryption) we
// do not _need_ to read 32-bit values as little endian but we do anyways
// just for being compatible :-)
memset(ctx, 0, sizeof(*ctx));
for (size_t i = 0; i < 4; i++) {
const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
ctx->input[i] = read32(sigma,i);
}
for (size_t i = 0; i < 8; i++) {
ctx->input[i + 4] = read32(key,i);
}
ctx->input[12] = 0;
ctx->input[13] = 0;
ctx->input[14] = (uint32_t)nonce;
ctx->input[15] = (uint32_t)(nonce >> 32);
}
static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
memset(ctx_new, 0, sizeof(*ctx_new));
memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
ctx_new->input[12] = 0;
ctx_new->input[13] = 0;
ctx_new->input[14] = (uint32_t)nonce;
ctx_new->input[15] = (uint32_t)(nonce >> 32);
mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
chacha_block(ctx_new);
}
/* ----------------------------------------------------------------------------
Random interface
-----------------------------------------------------------------------------*/
#if MI_DEBUG>1
static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
return (ctx != NULL && ctx->input[0] != 0);
}
#endif
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
mi_assert_internal(mi_random_is_initialized(ctx));
mi_assert_internal(ctx != ctx_new);
chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
}
uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
mi_assert_internal(mi_random_is_initialized(ctx));
#if MI_INTPTR_SIZE <= 4
return chacha_next32(ctx);
#elif MI_INTPTR_SIZE == 8
return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
#else
# error "define mi_random_next for this platform"
#endif
}
/* ----------------------------------------------------------------------------
To initialize a fresh random context we rely on the OS:
- Windows : BCryptGenRandom
- osX,bsd,wasi: arc4random_buf
- Linux : getrandom,/dev/urandom
If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
-----------------------------------------------------------------------------*/
#if defined(_WIN32)
#pragma comment (lib,"bcrypt.lib")
#include <bcrypt.h>
static bool os_random_buf(void* buf, size_t buf_len) {
return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
}
/*
#define SystemFunction036 NTAPI SystemFunction036
#include <NTSecAPI.h>
#undef SystemFunction036
static bool os_random_buf(void* buf, size_t buf_len) {
RtlGenRandom(buf, (ULONG)buf_len);
return true;
}
*/
#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__DragonFly__) || \
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__wasi__)
#include <stdlib.h>
static bool os_random_buf(void* buf, size_t buf_len) {
arc4random_buf(buf, buf_len);
return true;
}
#elif defined(__linux__)
#include <sys/syscall.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
static bool os_random_buf(void* buf, size_t buf_len) {
// Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
// and for the latter the actual `getrandom` call is not always defined.
// (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
// We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
#ifdef SYS_getrandom
#ifndef GRND_NONBLOCK
#define GRND_NONBLOCK (1)
#endif
static volatile _Atomic(uintptr_t) no_getrandom; // = 0
if (mi_atomic_read(&no_getrandom)==0) {
ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
if (ret >= 0) return (buf_len == (size_t)ret);
if (ret != ENOSYS) return false;
mi_atomic_write(&no_getrandom,1); // don't call again, and fall back to /dev/urandom
}
#endif
int flags = O_RDONLY;
#if defined(O_CLOEXEC)
flags |= O_CLOEXEC;
#endif
int fd = open("/dev/urandom", flags, 0);
if (fd < 0) return false;
size_t count = 0;
while(count < buf_len) {
ssize_t ret = read(fd, (char*)buf + count, buf_len - count);
if (ret<=0) {
if (errno!=EAGAIN && errno!=EINTR) break;
}
else {
count += ret;
}
}
close(fd);
return (count==buf_len);
}
#else
static bool os_random_buf(void* buf, size_t buf_len) {
return false;
}
#endif
#if defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#else
#include <time.h>
#endif
static uintptr_t os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&os_random_weak ^ extra_seed; // ASLR makes the address random
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
x ^= (uintptr_t)(pcount.QuadPart);
#elif defined(__APPLE__)
x ^= (uintptr_t)mach_absolute_time();
#else
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
x ^= (uintptr_t)time.tv_sec;
x ^= (uintptr_t)time.tv_nsec;
#endif
// and do a few randomization steps
uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
for (uintptr_t i = 0; i < max; i++) {
x = _mi_random_shuffle(x);
}
mi_assert_internal(x != 0);
return x;
}
void _mi_random_init(mi_random_ctx_t* ctx) {
uint8_t key[32];
if (!os_random_buf(key, sizeof(key))) {
// if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time
_mi_warning_message("unable to use secure randomness\n");
uintptr_t x = os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x;
}
}
chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
}
/* --------------------------------------------------------
test vectors from <https://tools.ietf.org/html/rfc8439>
----------------------------------------------------------- */
/*
static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
for (size_t i = 0; i < n; i++) {
if (x[i] != y[i]) return false;
}
return true;
}
static void chacha_test(void)
{
uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
qround(x, 0, 1, 2, 3);
mi_assert_internal(array_equals(x, x_out, 4));
uint32_t y[16] = {
0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a,
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c,
0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963,
0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 };
uint32_t y_out[16] = {
0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a,
0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2,
0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963,
0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 };
qround(y, 2, 7, 8, 13);
mi_assert_internal(array_equals(y, y_out, 16));
mi_random_ctx_t r = {
{ 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
{0},
0
};
uint32_t r_out[16] = {
0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
chacha_block(&r);
mi_assert_internal(array_equals(r.output, r_out, 16));
}
*/

View file

@ -13,6 +13,8 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_PAGE_HUGE_ALIGN (256*1024) #define MI_PAGE_HUGE_ALIGN (256*1024)
static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size);
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Segment allocation Segment allocation
We allocate pages inside big OS allocated "segments" We allocate pages inside big OS allocated "segments"
@ -40,9 +42,8 @@ terms of the MIT license. A copy of the license can be found in the file
Queue of segments containing free pages Queue of segments containing free pages
----------------------------------------------------------- */ ----------------------------------------------------------- */
#if (MI_DEBUG>=3)
#if (MI_DEBUG>1) static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_segment_t* segment) {
static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, mi_segment_t* segment) {
mi_assert_internal(segment != NULL); mi_assert_internal(segment != NULL);
mi_segment_t* list = queue->first; mi_segment_t* list = queue->first;
while (list != NULL) { while (list != NULL) {
@ -89,7 +90,7 @@ static mi_segment_queue_t* mi_segment_free_queue_of_kind(mi_page_kind_t kind, mi
else return NULL; else return NULL;
} }
static mi_segment_queue_t* mi_segment_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { static mi_segment_queue_t* mi_segment_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) {
return mi_segment_free_queue_of_kind(segment->page_kind, tld); return mi_segment_free_queue_of_kind(segment->page_kind, tld);
} }
@ -111,8 +112,8 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t
Invariant checking Invariant checking
----------------------------------------------------------- */ ----------------------------------------------------------- */
#if (MI_DEBUG > 1) #if (MI_DEBUG>=2)
static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t* tld) { static bool mi_segment_is_in_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld);
bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment));
if (in_queue) { if (in_queue) {
@ -120,50 +121,268 @@ static bool mi_segment_is_in_free_queue(mi_segment_t* segment, mi_segments_tld_t
} }
return in_queue; return in_queue;
} }
#endif
static size_t mi_segment_pagesize(mi_segment_t* segment) { static size_t mi_segment_page_size(const mi_segment_t* segment) {
if (segment->capacity > 1) {
mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM);
return ((size_t)1 << segment->page_shift); return ((size_t)1 << segment->page_shift);
}
else {
mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE);
return segment->segment_size;
}
} }
static bool mi_segment_is_valid(mi_segment_t* segment) {
#if (MI_DEBUG>=2)
static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) {
mi_page_t* p = tld->pages_reset.first;
while (p != NULL) {
if (p == page) return true;
p = p->next;
}
return false;
}
#endif
#if (MI_DEBUG>=3)
static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment != NULL); mi_assert_internal(segment != NULL);
mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie);
mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->used <= segment->capacity);
mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->abandoned <= segment->used);
size_t nfree = 0; size_t nfree = 0;
for (size_t i = 0; i < segment->capacity; i++) { for (size_t i = 0; i < segment->capacity; i++) {
if (!segment->pages[i].segment_in_use) nfree++; const mi_page_t* const page = &segment->pages[i];
if (!page->segment_in_use) {
nfree++;
}
if (page->segment_in_use || page->is_reset) {
mi_assert_expensive(!mi_pages_reset_contains(page, tld));
}
} }
mi_assert_internal(nfree + segment->used == segment->capacity); mi_assert_internal(nfree + segment->used == segment->capacity);
mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0
mi_assert_internal(segment->page_kind == MI_PAGE_HUGE || mi_assert_internal(segment->page_kind == MI_PAGE_HUGE ||
(mi_segment_pagesize(segment) * segment->capacity == segment->segment_size)); (mi_segment_page_size(segment) * segment->capacity == segment->segment_size));
return true; return true;
} }
#endif #endif
static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_internal(page != NULL);
if (page->next != NULL || page->prev != NULL) {
mi_assert_internal(mi_pages_reset_contains(page, tld));
return false;
}
else {
// both next and prev are NULL, check for singleton list
return (tld->pages_reset.first != page && tld->pages_reset.last != page);
}
}
/* -----------------------------------------------------------
Guard pages
----------------------------------------------------------- */
static void mi_segment_protect_range(void* p, size_t size, bool protect) {
if (protect) {
_mi_mem_protect(p, size);
}
else {
_mi_mem_unprotect(p, size);
}
}
static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* tld) {
// add/remove guard pages
if (MI_SECURE != 0) {
// in secure mode, we set up a protected page in between the segment info and the page data
const size_t os_page_size = _mi_os_page_size();
mi_assert_internal((segment->segment_info_size - os_page_size) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t))));
mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_page_size == 0);
mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_page_size, os_page_size, protect);
if (MI_SECURE <= 1 || segment->capacity == 1) {
// and protect the last (or only) page too
mi_assert_internal(segment->page_kind >= MI_PAGE_LARGE);
uint8_t* start = (uint8_t*)segment + segment->segment_size - os_page_size;
if (protect && !mi_option_is_enabled(mi_option_eager_page_commit)) {
// ensure secure page is committed
_mi_mem_commit(start, os_page_size, NULL, tld);
}
mi_segment_protect_range(start, os_page_size, protect);
}
else {
// or protect every page
const size_t page_size = mi_segment_page_size(segment);
for (size_t i = 0; i < segment->capacity; i++) {
if (segment->pages[i].is_committed) {
mi_segment_protect_range((uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size, protect);
}
}
}
}
}
/* -----------------------------------------------------------
Page reset
----------------------------------------------------------- */
static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) {
if (!mi_option_is_enabled(mi_option_page_reset)) return;
if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return;
size_t psize;
void* start = mi_segment_raw_page_start(segment, page, &psize);
page->is_reset = true;
mi_assert_internal(size <= psize);
size_t reset_size = (size == 0 || size > psize ? psize : size);
if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
mi_assert_internal(page->xblock_size > 0);
reset_size = page->capacity * mi_page_block_size(page);
}
_mi_mem_reset(start, reset_size, tld->os);
}
static void mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld)
{
mi_assert_internal(page->is_reset);
mi_assert_internal(!segment->mem_is_fixed);
page->is_reset = false;
size_t psize;
uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
size_t unreset_size = (size == 0 || size > psize ? psize : size);
if (size == 0 && segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
mi_assert_internal(page->xblock_size > 0);
unreset_size = page->capacity * mi_page_block_size(page);
}
bool is_zero = false;
_mi_mem_unreset(start, unreset_size, &is_zero, tld->os);
if (is_zero) page->is_zero_init = true;
}
/* -----------------------------------------------------------
The free page queue
----------------------------------------------------------- */
// we re-use the `used` field for the expiration counter. Since this is a
// a 32-bit field while the clock is always 64-bit we need to guard
// against overflow, we use substraction to check for expiry which work
// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days)
static void mi_page_reset_set_expire(mi_page_t* page) {
uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_reset_delay);
page->used = expire;
}
static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) {
int32_t expire = (int32_t)(page->used);
return (((int32_t)now - expire) >= 0);
}
static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_internal(!page->segment_in_use);
mi_assert_internal(mi_page_not_in_queue(page,tld));
mi_assert_expensive(!mi_pages_reset_contains(page, tld));
mi_assert_internal(_mi_page_segment(page)==segment);
if (!mi_option_is_enabled(mi_option_page_reset)) return;
if (segment->mem_is_fixed || page->segment_in_use || page->is_reset) return;
if (mi_option_get(mi_option_reset_delay) == 0) {
// reset immediately?
mi_page_reset(segment, page, 0, tld);
}
else {
// otherwise push on the delayed page reset queue
mi_page_queue_t* pq = &tld->pages_reset;
// push on top
mi_page_reset_set_expire(page);
page->next = pq->first;
page->prev = NULL;
if (pq->first == NULL) {
mi_assert_internal(pq->last == NULL);
pq->first = pq->last = page;
}
else {
pq->first->prev = page;
pq->first = page;
}
}
}
static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) {
if (mi_page_not_in_queue(page,tld)) return;
mi_page_queue_t* pq = &tld->pages_reset;
mi_assert_internal(pq!=NULL);
mi_assert_internal(!page->segment_in_use);
mi_assert_internal(mi_pages_reset_contains(page, tld));
if (page->prev != NULL) page->prev->next = page->next;
if (page->next != NULL) page->next->prev = page->prev;
if (page == pq->last) pq->last = page->prev;
if (page == pq->first) pq->first = page->next;
page->next = page->prev = NULL;
page->used = 0;
}
static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool force_reset, mi_segments_tld_t* tld) {
if (segment->mem_is_fixed) return; // never reset in huge OS pages
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (!page->segment_in_use && !page->is_reset) {
mi_pages_reset_remove(page, tld);
if (force_reset) {
mi_page_reset(segment, page, 0, tld);
}
}
else {
mi_assert_internal(mi_page_not_in_queue(page,tld));
}
}
}
static void mi_reset_delayed(mi_segments_tld_t* tld) {
if (!mi_option_is_enabled(mi_option_page_reset)) return;
mi_msecs_t now = _mi_clock_now();
mi_page_queue_t* pq = &tld->pages_reset;
// from oldest up to the first that has not expired yet
mi_page_t* page = pq->last;
while (page != NULL && mi_page_reset_is_expired(page,now)) {
mi_page_t* const prev = page->prev; // save previous field
mi_page_reset(_mi_page_segment(page), page, 0, tld);
page->used = 0;
page->prev = page->next = NULL;
page = prev;
}
// discard the reset pages from the queue
pq->last = page;
if (page != NULL){
page->next = NULL;
}
else {
pq->first = NULL;
}
}
/* ----------------------------------------------------------- /* -----------------------------------------------------------
Segment size calculations Segment size calculations
----------------------------------------------------------- */ ----------------------------------------------------------- */
// Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) // Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set)
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size) // The raw start is not taking aligned block allocation into consideration.
{ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift);
uint8_t* p = (uint8_t*)segment + page->segment_idx*psize; uint8_t* p = (uint8_t*)segment + page->segment_idx * psize;
if (page->segment_idx == 0) { if (page->segment_idx == 0) {
// the first page starts after the segment info (and possible guard page) // the first page starts after the segment info (and possible guard page)
p += segment->segment_info_size; p += segment->segment_info_size;
psize -= segment->segment_info_size; psize -= segment->segment_info_size;
// for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
if (block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) {
size_t adjust = block_size - ((uintptr_t)p % block_size);
if (adjust < block_size) {
p += adjust;
psize -= adjust;
}
mi_assert_internal((uintptr_t)p % block_size == 0);
}
} }
if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) { if (MI_SECURE > 1 || (MI_SECURE == 1 && page->segment_idx == segment->capacity - 1)) {
@ -173,19 +392,36 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa
} }
if (page_size != NULL) *page_size = psize; if (page_size != NULL) *page_size = psize;
mi_assert_internal(_mi_ptr_page(p) == page); mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page);
mi_assert_internal(_mi_ptr_segment(p) == segment); mi_assert_internal(_mi_ptr_segment(p) == segment);
return p; return p;
} }
static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { // Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set)
/* uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size)
if (mi_option_is_enabled(mi_option_secure)) { {
// always reserve maximally so the protection falls on size_t psize;
// the same address area, as we need to reuse them from the caches interchangably. uint8_t* p = mi_segment_raw_page_start(segment, page, &psize);
capacity = MI_SMALL_PAGES_PER_SEGMENT; if (pre_size != NULL) *pre_size = 0;
if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) {
// for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore)
size_t adjust = block_size - ((uintptr_t)p % block_size);
if (adjust < block_size) {
p += adjust;
psize -= adjust;
if (pre_size != NULL) *pre_size = adjust;
} }
*/ mi_assert_internal((uintptr_t)p % block_size == 0);
}
if (page_size != NULL) *page_size = psize;
mi_assert_internal(page->xblock_size==0 || _mi_ptr_page(p) == page);
mi_assert_internal(_mi_ptr_segment(p) == segment);
return p;
}
static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size)
{
const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */;
size_t guardsize = 0; size_t guardsize = 0;
size_t isize = 0; size_t isize = 0;
@ -230,9 +466,24 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
mi_segments_track_size(-((long)segment_size),tld); mi_segments_track_size(-((long)segment_size),tld);
if (MI_SECURE != 0) { if (MI_SECURE != 0) {
mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!segment->mem_is_fixed);
_mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set
} }
_mi_mem_free(segment, segment_size, segment->memid, tld->stats);
bool any_reset = false;
bool fully_committed = true;
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (!page->is_committed) { fully_committed = false; }
if (page->is_reset) { any_reset = true; }
}
if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) {
fully_committed = false;
}
if (segment->page_kind >= MI_PAGE_LARGE && !mi_option_is_enabled(mi_option_eager_page_commit)) {
fully_committed = false;
}
_mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os);
} }
@ -254,7 +505,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
static bool mi_segment_cache_full(mi_segments_tld_t* tld) static bool mi_segment_cache_full(mi_segments_tld_t* tld)
{ {
if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread
size_t max_cache = mi_option_get(mi_option_segment_cache); size_t max_cache = mi_option_get(mi_option_segment_cache);
if (tld->cache_count < max_cache if (tld->cache_count < max_cache
&& tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache
@ -278,9 +529,6 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
return false; return false;
} }
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
if (!segment->mem_is_fixed && mi_option_is_enabled(mi_option_cache_reset)) {
_mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
}
segment->next = tld->cache; segment->next = tld->cache;
tld->cache = segment; tld->cache = segment;
tld->cache_count++; tld->cache_count++;
@ -296,6 +544,8 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
} }
mi_assert_internal(tld->cache_count == 0); mi_assert_internal(tld->cache_count == 0);
mi_assert_internal(tld->cache == NULL); mi_assert_internal(tld->cache == NULL);
mi_assert_internal(tld->pages_reset.first == NULL);
mi_assert_internal(tld->pages_reset.last == NULL);
} }
@ -323,47 +573,57 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
size_t pre_size; size_t pre_size;
size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size);
mi_assert_internal(segment_size >= required); mi_assert_internal(segment_size >= required);
size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
// Initialize parameters
const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit);
bool commit = eager; // || (page_kind >= MI_PAGE_LARGE);
bool pages_still_good = false;
bool is_zero = false;
// Try to get it from our thread local cache first // Try to get it from our thread local cache first
bool eager_delay = (tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
bool commit = eager || (page_kind > MI_PAGE_MEDIUM);
bool protection_still_good = false;
bool is_zero = false;
mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
if (segment != NULL) { if (segment != NULL) {
if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) {
pages_still_good = true;
}
else
{
if (MI_SECURE!=0) { if (MI_SECURE!=0) {
mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!segment->mem_is_fixed);
if (segment->page_kind != page_kind) { mi_segment_protect(segment, false, tld->os); // reset protection if the page kind differs
_mi_mem_unprotect(segment, segment->segment_size); // reset protection if the page kind differs }
// different page kinds; unreset any reset pages, and unprotect
// TODO: optimize cache pop to return fitting pages if possible?
for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (page->is_reset) {
if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) {
page->is_reset = false;
} }
else { else {
protection_still_good = true; // otherwise, the guard pages are still in place mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page)
} }
} }
if (!segment->mem_is_committed && page_kind > MI_PAGE_MEDIUM) {
mi_assert_internal(!segment->mem_is_fixed);
_mi_mem_commit(segment, segment->segment_size, &is_zero, tld->stats);
segment->mem_is_committed = true;
} }
if (!segment->mem_is_fixed && // ensure the initial info is committed
(mi_option_is_enabled(mi_option_cache_reset) || mi_option_is_enabled(mi_option_page_reset))) { if (segment->capacity < capacity) {
bool reset_zero = false; bool commit_zero = false;
_mi_mem_unreset(segment, segment->segment_size, &reset_zero, tld->stats); _mi_mem_commit(segment, pre_size, &commit_zero, tld->os);
if (reset_zero) is_zero = true; if (commit_zero) is_zero = true;
}
} }
} }
else { else {
// Allocate the segment from the OS // Allocate the segment from the OS
size_t memid; size_t memid;
bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld); segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_zero, &memid, os_tld);
if (segment == NULL) return NULL; // failed to allocate if (segment == NULL) return NULL; // failed to allocate
if (!commit) { if (!commit) {
// ensure the initial info is committed // ensure the initial info is committed
bool commit_zero = false; bool commit_zero = false;
_mi_mem_commit(segment, info_size, &commit_zero, tld->stats); _mi_mem_commit(segment, pre_size, &commit_zero, tld->os);
if (commit_zero) is_zero = true; if (commit_zero) is_zero = true;
} }
segment->memid = memid; segment->memid = memid;
@ -373,27 +633,23 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
} }
mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
if (!pages_still_good) {
// zero the segment info (but not the `mem` fields) // zero the segment info (but not the `mem` fields)
ptrdiff_t ofs = offsetof(mi_segment_t,next); ptrdiff_t ofs = offsetof(mi_segment_t, next);
memset((uint8_t*)segment + ofs, 0, info_size - ofs); memset((uint8_t*)segment + ofs, 0, info_size - ofs);
// guard pages // initialize pages info
if ((MI_SECURE != 0) && !protection_still_good) { for (uint8_t i = 0; i < capacity; i++) {
// in secure mode, we set up a protected page in between the segment info segment->pages[i].segment_idx = i;
// and the page data segment->pages[i].is_reset = false;
mi_assert_internal( info_size == pre_size - _mi_os_page_size() && info_size % _mi_os_page_size() == 0); segment->pages[i].is_committed = commit;
_mi_mem_protect( (uint8_t*)segment + info_size, (pre_size - info_size) ); segment->pages[i].is_zero_init = is_zero;
size_t os_page_size = _mi_os_page_size(); }
if (MI_SECURE <= 1) {
// and protect the last page too
_mi_mem_protect( (uint8_t*)segment + segment_size - os_page_size, os_page_size );
} }
else { else {
// protect every page // zero the segment info but not the pages info (and mem fields)
for (size_t i = 0; i < capacity; i++) { ptrdiff_t ofs = offsetof(mi_segment_t, next);
_mi_mem_protect( (uint8_t*)segment + (i+1)*page_size - os_page_size, os_page_size ); memset((uint8_t*)segment + ofs, 0, offsetof(mi_segment_t,pages) - ofs);
}
}
} }
// initialize // initialize
@ -404,13 +660,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
segment->segment_info_size = pre_size; segment->segment_info_size = pre_size;
segment->thread_id = _mi_thread_id(); segment->thread_id = _mi_thread_id();
segment->cookie = _mi_ptr_cookie(segment); segment->cookie = _mi_ptr_cookie(segment);
for (uint8_t i = 0; i < segment->capacity; i++) { // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
segment->pages[i].segment_idx = i;
segment->pages[i].is_reset = false; // set protection
segment->pages[i].is_committed = commit; mi_segment_protect(segment, true, tld->os);
segment->pages[i].is_zero_init = is_zero;
}
_mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size);
//fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment);
return segment; return segment;
} }
@ -418,8 +672,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
UNUSED(force); UNUSED(force);
//fprintf(stderr,"mimalloc: free segment at %p\n", (void*)segment);
mi_assert(segment != NULL); mi_assert(segment != NULL);
// note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse)
bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset));
mi_pages_reset_remove_all_in_segment(segment, force_reset, tld);
mi_segment_remove_from_free_queue(segment,tld); mi_segment_remove_from_free_queue(segment,tld);
mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment));
@ -428,17 +684,6 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
mi_assert(segment->prev == NULL); mi_assert(segment->prev == NULL);
_mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
// update reset memory statistics
/*
for (uint8_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i];
if (page->is_reset) {
page->is_reset = false;
mi_stat_decrease( tld->stats->reset,mi_page_size(page));
}
}
*/
if (!force && mi_segment_cache_push(segment, tld)) { if (!force && mi_segment_cache_push(segment, tld)) {
// it is put in our cache // it is put in our cache
} }
@ -457,35 +702,38 @@ static bool mi_segment_has_free(const mi_segment_t* segment) {
return (segment->used < segment->capacity); return (segment->used < segment->capacity);
} }
static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats) { static void mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment)); mi_assert_internal(_mi_page_segment(page) == segment);
mi_assert_expensive(mi_segment_is_valid(segment)); mi_assert_internal(!page->segment_in_use);
for (size_t i = 0; i < segment->capacity; i++) { // set in-use before doing unreset to prevent delayed reset
mi_page_t* page = &segment->pages[i]; mi_pages_reset_remove(page, tld);
if (!page->segment_in_use) { page->segment_in_use = true;
if (page->is_reset || !page->is_committed) { segment->used++;
size_t psize;
uint8_t* start = _mi_page_start(segment, page, &psize);
if (!page->is_committed) { if (!page->is_committed) {
mi_assert_internal(!segment->mem_is_fixed); mi_assert_internal(!segment->mem_is_fixed);
mi_assert_internal(!page->is_reset);
page->is_committed = true; page->is_committed = true;
if (segment->page_kind < MI_PAGE_LARGE
|| !mi_option_is_enabled(mi_option_eager_page_commit)) {
size_t psize;
uint8_t* start = mi_segment_raw_page_start(segment, page, &psize);
bool is_zero = false; bool is_zero = false;
_mi_mem_commit(start,psize,&is_zero,stats); const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0);
if (is_zero) page->is_zero_init = true; _mi_mem_commit(start, psize + gsize, &is_zero, tld->os);
if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); }
if (is_zero) { page->is_zero_init = true; }
}
} }
if (page->is_reset) { if (page->is_reset) {
mi_assert_internal(!segment->mem_is_fixed); mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset?
page->is_reset = false;
bool is_zero = false;
_mi_mem_unreset(start, psize, &is_zero, stats);
if (is_zero) page->is_zero_init = true;
} }
mi_assert_internal(page->segment_in_use);
mi_assert_internal(segment->used <= segment->capacity);
if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) {
// if no more free pages, remove from the queue
mi_assert_internal(!mi_segment_has_free(segment));
mi_segment_remove_from_free_queue(segment, tld);
} }
return page;
}
}
mi_assert(false);
return NULL;
} }
@ -495,39 +743,50 @@ static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_stats_t* stats)
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld);
static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_stats_t* stats) { static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) {
UNUSED(stats);
mi_assert_internal(page->segment_in_use); mi_assert_internal(page->segment_in_use);
mi_assert_internal(mi_page_all_free(page)); mi_assert_internal(mi_page_all_free(page));
mi_assert_internal(page->is_committed); mi_assert_internal(page->is_committed);
size_t inuse = page->capacity * page->block_size; mi_assert_internal(mi_page_not_in_queue(page, tld));
_mi_stat_decrease(&stats->page_committed, inuse);
_mi_stat_decrease(&stats->pages, 1); size_t inuse = page->capacity * mi_page_block_size(page);
_mi_stat_decrease(&tld->stats->page_committed, inuse);
_mi_stat_decrease(&tld->stats->pages, 1);
// calculate the used size from the raw (non-aligned) start of the page
//size_t pre_size;
//_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size);
//size_t used_size = pre_size + (page->capacity * page->block_size);
page->is_zero_init = false;
page->segment_in_use = false;
// reset the page memory to reduce memory pressure? // reset the page memory to reduce memory pressure?
if (!segment->mem_is_fixed && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { // note: must come after setting `segment_in_use` to false but before block_size becomes 0
size_t psize; //mi_page_reset(segment, page, 0 /*used_size*/, tld);
uint8_t* start = _mi_page_start(segment, page, &psize);
page->is_reset = true;
_mi_mem_reset(start, psize, stats);
}
// zero the page data, but not the segment fields // zero the page data, but not the segment fields and block_size (for page size calculations)
page->is_zero_init = false; uint32_t block_size = page->xblock_size;
ptrdiff_t ofs = offsetof(mi_page_t,capacity); ptrdiff_t ofs = offsetof(mi_page_t,capacity);
memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs);
page->segment_in_use = false; page->xblock_size = block_size;
segment->used--; segment->used--;
// add to the free page list for reuse/reset
if (segment->page_kind <= MI_PAGE_MEDIUM) {
mi_pages_reset_add(segment, page, tld);
}
} }
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
{ {
mi_assert(page != NULL); mi_assert(page != NULL);
mi_segment_t* segment = _mi_page_segment(page); mi_segment_t* segment = _mi_page_segment(page);
mi_assert_expensive(mi_segment_is_valid(segment)); mi_assert_expensive(mi_segment_is_valid(segment,tld));
mi_reset_delayed(tld);
// mark it as free now // mark it as free now
mi_segment_page_clear(segment, page, tld->stats); mi_segment_page_clear(segment, page, tld);
if (segment->used == 0) { if (segment->used == 0) {
// no more used pages; remove from the free list and free the segment // no more used pages; remove from the free list and free the segment
@ -556,34 +815,57 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld)
// are "abandoned" and will be reclaimed by other threads to // are "abandoned" and will be reclaimed by other threads to
// reuse their pages and/or free them eventually // reuse their pages and/or free them eventually
static volatile _Atomic(mi_segment_t*) abandoned; // = NULL; static volatile _Atomic(mi_segment_t*) abandoned; // = NULL;
static volatile _Atomic(uintptr_t) abandoned_count; // = 0; static volatile _Atomic(uintptr_t) abandoned_count; // = 0; approximate count of abandoned segments
// prepend a list of abandoned segments atomically to the global abandoned list; O(n)
static void mi_segments_prepend_abandoned(mi_segment_t* first) {
if (first == NULL) return;
// first try if the abandoned list happens to be NULL
if (mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, NULL)) return;
// if not, find the end of the list
mi_segment_t* last = first;
while (last->abandoned_next != NULL) {
last = last->abandoned_next;
}
// and atomically prepend
mi_segment_t* next;
do {
next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned));
last->abandoned_next = next;
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*, &abandoned), first, next));
}
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used == segment->abandoned);
mi_assert_internal(segment->used > 0); mi_assert_internal(segment->used > 0);
mi_assert_internal(segment->abandoned_next == NULL); mi_assert_internal(segment->abandoned_next == NULL);
mi_assert_expensive(mi_segment_is_valid(segment)); mi_assert_expensive(mi_segment_is_valid(segment,tld));
// remove the segment from the free page queue if needed // remove the segment from the free page queue if needed
mi_segment_remove_from_free_queue(segment,tld); mi_reset_delayed(tld);
mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld);
mi_segment_remove_from_free_queue(segment, tld);
mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL);
// all pages in the segment are abandoned; add it to the abandoned list // all pages in the segment are abandoned; add it to the abandoned list
_mi_stat_increase(&tld->stats->segments_abandoned, 1); _mi_stat_increase(&tld->stats->segments_abandoned, 1);
mi_segments_track_size(-((long)segment->segment_size), tld); mi_segments_track_size(-((long)segment->segment_size), tld);
segment->thread_id = 0; segment->thread_id = 0;
mi_segment_t* next; segment->abandoned_next = NULL;
do { mi_segments_prepend_abandoned(segment); // prepend one-element list
next = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&abandoned)); mi_atomic_increment(&abandoned_count); // keep approximate count
mi_atomic_write_ptr(mi_atomic_cast(void*,&segment->abandoned_next), next);
} while (!mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), segment, next));
mi_atomic_increment(&abandoned_count);
} }
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
mi_assert(page != NULL); mi_assert(page != NULL);
mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
mi_assert_internal(mi_page_heap(page) == NULL);
mi_segment_t* segment = _mi_page_segment(page); mi_segment_t* segment = _mi_page_segment(page);
mi_assert_expensive(mi_segment_is_valid(segment)); mi_assert_expensive(!mi_pages_reset_contains(page, tld));
mi_assert_expensive(mi_segment_is_valid(segment,tld));
segment->abandoned++; segment->abandoned++;
_mi_stat_increase(&tld->stats->pages_abandoned, 1); _mi_stat_increase(&tld->stats->pages_abandoned, 1);
mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->abandoned <= segment->used);
@ -594,24 +876,35 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) {
} }
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) { bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld) {
uintptr_t reclaimed = 0; // To avoid the A-B-A problem, grab the entire list atomically
uintptr_t atmost; mi_segment_t* segment = (mi_segment_t*)mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*, &abandoned)); // pre-read to avoid expensive atomic operations
if (try_all) { if (segment == NULL) return false;
atmost = abandoned_count+16; // close enough segment = (mi_segment_t*)mi_atomic_exchange_ptr(mi_atomic_cast(void*, &abandoned), NULL);
} if (segment == NULL) return false;
else {
atmost = abandoned_count/8; // at most 1/8th of all outstanding (estimated) // we got a non-empty list
if (!try_all) {
// take at most 1/8th of the list and append the rest back to the abandoned list again
// this is O(n) but simplifies the code a lot (as we don't have an A-B-A problem)
// and probably ok since the length will tend to be not too large.
uintptr_t atmost = mi_atomic_read(&abandoned_count)/8; // at most 1/8th of all outstanding (estimated)
if (atmost < 8) atmost = 8; // but at least 8 if (atmost < 8) atmost = 8; // but at least 8
// find the split point
mi_segment_t* last = segment;
while (last->abandoned_next != NULL && atmost > 0) {
last = last->abandoned_next;
atmost--;
}
// split the list and push back the remaining segments
mi_segment_t* next = last->abandoned_next;
last->abandoned_next = NULL;
mi_segments_prepend_abandoned(next);
} }
// for `atmost` `reclaimed` abandoned segments... // reclaim all segments that we kept
while(atmost > reclaimed) { while(segment != NULL) {
// try to claim the head of the abandoned segments mi_segment_t* const next = segment->abandoned_next; // save the next segment
mi_segment_t* segment;
do {
segment = (mi_segment_t*)abandoned;
} while(segment != NULL && !mi_atomic_cas_ptr_weak(mi_atomic_cast(void*,&abandoned), (mi_segment_t*)segment->abandoned_next, segment));
if (segment==NULL) break; // stop early if no more segments available
// got it. // got it.
mi_atomic_decrement(&abandoned_count); mi_atomic_decrement(&abandoned_count);
@ -619,7 +912,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
segment->abandoned_next = NULL; segment->abandoned_next = NULL;
mi_segments_track_size((long)segment->segment_size,tld); mi_segments_track_size((long)segment->segment_size,tld);
mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_internal(segment->next == NULL && segment->prev == NULL);
mi_assert_expensive(mi_segment_is_valid(segment)); mi_assert_expensive(mi_segment_is_valid(segment,tld));
_mi_stat_decrease(&tld->stats->segments_abandoned,1); _mi_stat_decrease(&tld->stats->segments_abandoned,1);
// add its abandoned pages to the current thread // add its abandoned pages to the current thread
@ -627,15 +920,24 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
for (size_t i = 0; i < segment->capacity; i++) { for (size_t i = 0; i < segment->capacity; i++) {
mi_page_t* page = &segment->pages[i]; mi_page_t* page = &segment->pages[i];
if (page->segment_in_use) { if (page->segment_in_use) {
mi_assert_internal(!page->is_reset);
mi_assert_internal(page->is_committed);
mi_assert_internal(mi_page_not_in_queue(page, tld));
mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE);
mi_assert_internal(mi_page_heap(page) == NULL);
segment->abandoned--; segment->abandoned--;
mi_assert(page->next == NULL); mi_assert(page->next == NULL);
_mi_stat_decrease(&tld->stats->pages_abandoned, 1); _mi_stat_decrease(&tld->stats->pages_abandoned, 1);
// set the heap again and allow delayed free again
mi_page_set_heap(page, heap);
_mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set)
_mi_page_free_collect(page, false); // ensure used count is up to date
if (mi_page_all_free(page)) { if (mi_page_all_free(page)) {
// if everything free by now, free the page // if everything free already, clear the page directly
mi_segment_page_clear(segment,page,tld->stats); mi_segment_page_clear(segment,page,tld);
} }
else { else {
// otherwise reclaim it // otherwise reclaim it into the heap
_mi_page_reclaim(heap,page); _mi_page_reclaim(heap,page);
} }
} }
@ -645,14 +947,17 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
mi_segment_free(segment,false,tld); mi_segment_free(segment,false,tld);
} }
else { else {
reclaimed++;
// add its free pages to the the current thread free small segment queue // add its free pages to the the current thread free small segment queue
if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) { if (segment->page_kind <= MI_PAGE_MEDIUM && mi_segment_has_free(segment)) {
mi_segment_insert_in_free_queue(segment,tld); mi_segment_insert_in_free_queue(segment,tld);
} }
} }
// go on
segment = next;
} }
return (reclaimed>0);
return true;
} }
@ -660,31 +965,44 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
Small page allocation Small page allocation
----------------------------------------------------------- */ ----------------------------------------------------------- */
// Allocate a small page inside a segment.
// Requires that the page has free pages static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment));
mi_assert_expensive(mi_segment_is_valid(segment, tld));
for (size_t i = 0; i < segment->capacity; i++) { // TODO: use a bitmap instead of search?
mi_page_t* page = &segment->pages[i];
if (!page->segment_in_use) {
mi_segment_page_claim(segment, page, tld);
return page;
}
}
mi_assert(false);
return NULL;
}
// Allocate a page inside a segment. Requires that the page has free pages
static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) { static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(mi_segment_has_free(segment)); mi_assert_internal(mi_segment_has_free(segment));
mi_page_t* page = mi_segment_find_free(segment, tld->stats); return mi_segment_find_free(segment, tld);
page->segment_in_use = true;
segment->used++;
mi_assert_internal(segment->used <= segment->capacity);
if (segment->used == segment->capacity) {
// if no more free pages, remove from the queue
mi_assert_internal(!mi_segment_has_free(segment));
mi_segment_remove_from_free_queue(segment,tld);
}
return page;
} }
static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { static mi_page_t* mi_segment_page_alloc(mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_segment_queue_t* free_queue = mi_segment_free_queue_of_kind(kind,tld); // find an available segment the segment free queue
mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld);
if (mi_segment_queue_is_empty(free_queue)) { if (mi_segment_queue_is_empty(free_queue)) {
mi_segment_t* segment = mi_segment_alloc(0,kind,page_shift,tld,os_tld); // possibly allocate a fresh segment
if (segment == NULL) return NULL; mi_segment_t* segment = mi_segment_alloc(0, kind, page_shift, tld, os_tld);
if (segment == NULL) return NULL; // return NULL if out-of-memory
mi_segment_enqueue(free_queue, segment); mi_segment_enqueue(free_queue, segment);
} }
mi_assert_internal(free_queue->first != NULL); mi_assert_internal(free_queue->first != NULL);
return mi_segment_page_alloc_in(free_queue->first,tld); mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld);
mi_assert_internal(page != NULL);
#if MI_DEBUG>=2
// verify it is committed
_mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0;
#endif
return page;
} }
static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { static mi_page_t* mi_segment_small_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
@ -702,9 +1020,11 @@ static mi_page_t* mi_segment_medium_page_alloc(mi_segments_tld_t* tld, mi_os_tld
static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld); mi_segment_t* segment = mi_segment_alloc(0,MI_PAGE_LARGE,MI_LARGE_PAGE_SHIFT,tld,os_tld);
if (segment == NULL) return NULL; if (segment == NULL) return NULL;
segment->used = 1; mi_page_t* page = mi_segment_find_free(segment, tld);
mi_page_t* page = &segment->pages[0]; mi_assert_internal(page != NULL);
page->segment_in_use = true; #if MI_DEBUG>=2
_mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0;
#endif
return page; return page;
} }
@ -712,11 +1032,10 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
{ {
mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld);
if (segment == NULL) return NULL; if (segment == NULL) return NULL;
mi_assert_internal(segment->segment_size - segment->segment_info_size >= size); mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size);
segment->used = 1;
segment->thread_id = 0; // huge pages are immediately abandoned segment->thread_id = 0; // huge pages are immediately abandoned
mi_page_t* page = &segment->pages[0]; mi_page_t* page = mi_segment_find_free(segment, tld);
page->segment_in_use = true; mi_assert_internal(page != NULL);
return page; return page;
} }
@ -738,6 +1057,9 @@ mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_
else { else {
page = mi_segment_huge_page_alloc(block_size,tld,os_tld); page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
} }
mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page))); mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
mi_reset_delayed(tld);
mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
return page; return page;
} }

View file

@ -14,7 +14,9 @@ terms of the MIT license. A copy of the license can be found in the file
// it will override all the standard library allocation // it will override all the standard library allocation
// functions (on Unix's). // functions (on Unix's).
#include "stats.c" #include "stats.c"
#include "random.c"
#include "os.c" #include "os.c"
#include "arena.c"
#include "memory.c" #include "memory.c"
#include "segment.c" #include "segment.c"
#include "page.c" #include "page.c"

View file

@ -126,90 +126,96 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
// unit > 0 : size in binary bytes // unit > 0 : size in binary bytes
// unit == 0: count as decimal // unit == 0: count as decimal
// unit < 0 : count in binary // unit < 0 : count in binary
static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, const char* fmt) { static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
char buf[32]; char buf[32];
int len = 32; int len = 32;
const char* suffix = (unit <= 0 ? " " : "b"); const char* suffix = (unit <= 0 ? " " : "b");
double base = (unit == 0 ? 1000.0 : 1024.0); const int64_t base = (unit == 0 ? 1000 : 1024);
if (unit>0) n *= unit; if (unit>0) n *= unit;
double pos = (double)(n < 0 ? -n : n); const int64_t pos = (n < 0 ? -n : n);
if (pos < base) if (pos < base) {
snprintf(buf,len, "%d %s ", (int)n, suffix); snprintf(buf, len, "%d %s ", (int)n, suffix);
else if (pos < base*base) }
snprintf(buf, len, "%.1f k%s", (double)n / base, suffix); else {
else if (pos < base*base*base) int64_t divider = base;
snprintf(buf, len, "%.1f m%s", (double)n / (base*base), suffix); const char* magnitude = "k";
else if (pos >= divider*base) { divider *= base; magnitude = "m"; }
snprintf(buf, len, "%.1f g%s", (double)n / (base*base*base), suffix); if (pos >= divider*base) { divider *= base; magnitude = "g"; }
const int64_t tens = (n / (divider/10));
_mi_fprintf(out, (fmt==NULL ? "%11s" : fmt), buf); const long whole = (long)(tens/10);
const long frac1 = (long)(tens%10);
snprintf(buf, len, "%ld.%ld %s%s", whole, frac1, magnitude, suffix);
}
_mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf);
} }
static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out) { static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
mi_printf_amount(n,unit,out,NULL); mi_printf_amount(n,unit,out,arg,NULL);
} }
static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out) { static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) {
if (unit==1) _mi_fprintf(out,"%11s"," "); if (unit==1) _mi_fprintf(out, arg, "%11s"," ");
else mi_print_amount(n,0,out); else mi_print_amount(n,0,out,arg);
} }
static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out ) { static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) {
_mi_fprintf(out,"%10s:", msg); _mi_fprintf(out, arg,"%10s:", msg);
if (unit>0) { if (unit>0) {
mi_print_amount(stat->peak, unit, out); mi_print_amount(stat->peak, unit, out, arg);
mi_print_amount(stat->allocated, unit, out); mi_print_amount(stat->allocated, unit, out, arg);
mi_print_amount(stat->freed, unit, out); mi_print_amount(stat->freed, unit, out, arg);
mi_print_amount(unit, 1, out); mi_print_amount(unit, 1, out, arg);
mi_print_count(stat->allocated, unit, out); mi_print_count(stat->allocated, unit, out, arg);
if (stat->allocated > stat->freed) if (stat->allocated > stat->freed)
_mi_fprintf(out, " not all freed!\n"); _mi_fprintf(out, arg, " not all freed!\n");
else else
_mi_fprintf(out, " ok\n"); _mi_fprintf(out, arg, " ok\n");
} }
else if (unit<0) { else if (unit<0) {
mi_print_amount(stat->peak, -1, out); mi_print_amount(stat->peak, -1, out, arg);
mi_print_amount(stat->allocated, -1, out); mi_print_amount(stat->allocated, -1, out, arg);
mi_print_amount(stat->freed, -1, out); mi_print_amount(stat->freed, -1, out, arg);
if (unit==-1) { if (unit==-1) {
_mi_fprintf(out, "%22s", ""); _mi_fprintf(out, arg, "%22s", "");
} }
else { else {
mi_print_amount(-unit, 1, out); mi_print_amount(-unit, 1, out, arg);
mi_print_count((stat->allocated / -unit), 0, out); mi_print_count((stat->allocated / -unit), 0, out, arg);
} }
if (stat->allocated > stat->freed) if (stat->allocated > stat->freed)
_mi_fprintf(out, " not all freed!\n"); _mi_fprintf(out, arg, " not all freed!\n");
else else
_mi_fprintf(out, " ok\n"); _mi_fprintf(out, arg, " ok\n");
} }
else { else {
mi_print_amount(stat->peak, 1, out); mi_print_amount(stat->peak, 1, out, arg);
mi_print_amount(stat->allocated, 1, out); mi_print_amount(stat->allocated, 1, out, arg);
_mi_fprintf(out, "\n"); _mi_fprintf(out, arg, "\n");
} }
} }
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out ) { static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
_mi_fprintf(out, "%10s:", msg); _mi_fprintf(out, arg, "%10s:", msg);
mi_print_amount(stat->total, -1, out); mi_print_amount(stat->total, -1, out, arg);
_mi_fprintf(out, "\n"); _mi_fprintf(out, arg, "\n");
} }
static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out) { static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) {
double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count));
_mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); const long avg_whole = (long)(avg_tens/10);
const long avg_frac1 = (long)(avg_tens%10);
_mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
} }
static void mi_print_header(mi_output_fun* out ) { static void mi_print_header(mi_output_fun* out, void* arg ) {
_mi_fprintf(out,"%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count "); _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "unit ", "count ");
} }
#if MI_STAT>1 #if MI_STAT>1
static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out) { static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bins, size_t max, const char* fmt, mi_output_fun* out, void* arg) {
bool found = false; bool found = false;
char buf[64]; char buf[64];
for (size_t i = 0; i <= max; i++) { for (size_t i = 0; i <= max; i++) {
@ -218,75 +224,73 @@ static void mi_stats_print_bins(mi_stat_count_t* all, const mi_stat_count_t* bin
int64_t unit = _mi_bin_size((uint8_t)i); int64_t unit = _mi_bin_size((uint8_t)i);
snprintf(buf, 64, "%s %3zu", fmt, i); snprintf(buf, 64, "%s %3zu", fmt, i);
mi_stat_add(all, &bins[i], unit); mi_stat_add(all, &bins[i], unit);
mi_stat_print(&bins[i], buf, unit, out); mi_stat_print(&bins[i], buf, unit, out, arg);
} }
} }
//snprintf(buf, 64, "%s all", fmt); //snprintf(buf, 64, "%s all", fmt);
//mi_stat_print(all, buf, 1); //mi_stat_print(all, buf, 1);
if (found) { if (found) {
_mi_fprintf(out, "\n"); _mi_fprintf(out, arg, "\n");
mi_print_header(out); mi_print_header(out, arg);
} }
} }
#endif #endif
static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit); static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit);
static void _mi_stats_print(mi_stats_t* stats, double secs, mi_output_fun* out) mi_attr_noexcept { static void _mi_stats_print(mi_stats_t* stats, mi_msecs_t elapsed, mi_output_fun* out, void* arg) mi_attr_noexcept {
mi_print_header(out); mi_print_header(out,arg);
#if MI_STAT>1 #if MI_STAT>1
mi_stat_count_t normal = { 0,0,0,0 }; mi_stat_count_t normal = { 0,0,0,0 };
mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out); mi_stats_print_bins(&normal, stats->normal, MI_BIN_HUGE, "normal",out,arg);
mi_stat_print(&normal, "normal", 1, out); mi_stat_print(&normal, "normal", 1, out, arg);
mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out); mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg);
mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out); mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg);
mi_stat_count_t total = { 0,0,0,0 }; mi_stat_count_t total = { 0,0,0,0 };
mi_stat_add(&total, &normal, 1); mi_stat_add(&total, &normal, 1);
mi_stat_add(&total, &stats->huge, 1); mi_stat_add(&total, &stats->huge, 1);
mi_stat_add(&total, &stats->giant, 1); mi_stat_add(&total, &stats->giant, 1);
mi_stat_print(&total, "total", 1, out); mi_stat_print(&total, "total", 1, out, arg);
_mi_fprintf(out, "malloc requested: "); _mi_fprintf(out, arg, "malloc requested: ");
mi_print_amount(stats->malloc.allocated, 1, out); mi_print_amount(stats->malloc.allocated, 1, out, arg);
_mi_fprintf(out, "\n\n"); _mi_fprintf(out, arg, "\n\n");
#endif #endif
mi_stat_print(&stats->reserved, "reserved", 1, out); mi_stat_print(&stats->reserved, "reserved", 1, out, arg);
mi_stat_print(&stats->committed, "committed", 1, out); mi_stat_print(&stats->committed, "committed", 1, out, arg);
mi_stat_print(&stats->reset, "reset", 1, out); mi_stat_print(&stats->reset, "reset", 1, out, arg);
mi_stat_print(&stats->page_committed, "touched", 1, out); mi_stat_print(&stats->page_committed, "touched", 1, out, arg);
mi_stat_print(&stats->segments, "segments", -1, out); mi_stat_print(&stats->segments, "segments", -1, out, arg);
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg);
mi_stat_print(&stats->segments_cache, "-cached", -1, out); mi_stat_print(&stats->segments_cache, "-cached", -1, out, arg);
mi_stat_print(&stats->pages, "pages", -1, out); mi_stat_print(&stats->pages, "pages", -1, out, arg);
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out); mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg);
mi_stat_counter_print(&stats->pages_extended, "-extended", out); mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg);
mi_stat_counter_print(&stats->page_no_retire, "-noretire", out); mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg);
mi_stat_counter_print(&stats->mmap_calls, "mmaps", out); mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg);
mi_stat_counter_print(&stats->commit_calls, "commits", out); mi_stat_counter_print(&stats->commit_calls, "commits", out, arg);
mi_stat_print(&stats->threads, "threads", -1, out); mi_stat_print(&stats->threads, "threads", -1, out, arg);
mi_stat_counter_print_avg(&stats->searches, "searches", out); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
_mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count());
if (elapsed > 0) _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); mi_msecs_t user_time;
mi_msecs_t sys_time;
double user_time;
double sys_time;
size_t peak_rss; size_t peak_rss;
size_t page_faults; size_t page_faults;
size_t page_reclaim; size_t page_reclaim;
size_t peak_commit; size_t peak_commit;
mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit); mi_process_info(&user_time, &sys_time, &peak_rss, &page_faults, &page_reclaim, &peak_commit);
_mi_fprintf(out,"%10s: user: %.3f s, system: %.3f s, faults: %lu, reclaims: %lu, rss: ", "process", user_time, sys_time, (unsigned long)page_faults, (unsigned long)page_reclaim ); _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, reclaims: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults, (unsigned long)page_reclaim );
mi_printf_amount((int64_t)peak_rss, 1, out, "%s"); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s");
if (peak_commit > 0) { if (peak_commit > 0) {
_mi_fprintf(out,", commit charge: "); _mi_fprintf(out, arg, ", commit charge: ");
mi_printf_amount((int64_t)peak_commit, 1, out, "%s"); mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s");
} }
_mi_fprintf(out,"\n"); _mi_fprintf(out, arg, "\n");
} }
double _mi_clock_end(double start); static mi_msecs_t mi_time_start; // = 0
double _mi_clock_start(void);
static double mi_time_start = 0.0;
static mi_stats_t* mi_stats_get_default(void) { static mi_stats_t* mi_stats_get_default(void) {
mi_heap_t* heap = mi_heap_get_default(); mi_heap_t* heap = mi_heap_get_default();
@ -315,72 +319,73 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done`
mi_stats_merge_from(stats); mi_stats_merge_from(stats);
} }
void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
static void mi_stats_print_ex(mi_stats_t* stats, double secs, mi_output_fun* out) { mi_msecs_t elapsed = _mi_clock_end(mi_time_start);
mi_stats_merge_from(stats); mi_stats_merge_from(mi_stats_get_default());
_mi_stats_print(&_mi_stats_main, secs, out); _mi_stats_print(&_mi_stats_main, elapsed, out, arg);
} }
void mi_stats_print(mi_output_fun* out) mi_attr_noexcept { void mi_stats_print(void* out) mi_attr_noexcept {
mi_stats_print_ex(mi_stats_get_default(),_mi_clock_end(mi_time_start),out); // for compatibility there is an `out` parameter (which can be `stdout` or `stderr`)
mi_stats_print_out((mi_output_fun*)out, NULL);
} }
void mi_thread_stats_print(mi_output_fun* out) mi_attr_noexcept { void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept {
_mi_stats_print(mi_stats_get_default(), _mi_clock_end(mi_time_start), out); mi_msecs_t elapsed = _mi_clock_end(mi_time_start);
_mi_stats_print(mi_stats_get_default(), elapsed, out, arg);
} }
// ----------------------------------------------------------------
// -------------------------------------------------------- // Basic timer for convenience; use milli-seconds to avoid doubles
// Basic timer for convenience // ----------------------------------------------------------------
// --------------------------------------------------------
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
static double mi_to_seconds(LARGE_INTEGER t) { static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
static double freq = 0.0; static LARGE_INTEGER mfreq; // = 0
if (freq <= 0.0) { if (mfreq.QuadPart == 0LL) {
LARGE_INTEGER f; LARGE_INTEGER f;
QueryPerformanceFrequency(&f); QueryPerformanceFrequency(&f);
freq = (double)(f.QuadPart); mfreq.QuadPart = f.QuadPart/1000LL;
if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
} }
return ((double)(t.QuadPart) / freq); return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
} }
static double mi_clock_now(void) { mi_msecs_t _mi_clock_now(void) {
LARGE_INTEGER t; LARGE_INTEGER t;
QueryPerformanceCounter(&t); QueryPerformanceCounter(&t);
return mi_to_seconds(t); return mi_to_msecs(t);
} }
#else #else
#include <time.h> #include <time.h>
#ifdef CLOCK_REALTIME #ifdef CLOCK_REALTIME
static double mi_clock_now(void) { mi_msecs_t _mi_clock_now(void) {
struct timespec t; struct timespec t;
clock_gettime(CLOCK_REALTIME, &t); clock_gettime(CLOCK_REALTIME, &t);
return (double)t.tv_sec + (1.0e-9 * (double)t.tv_nsec); return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
} }
#else #else
// low resolution timer // low resolution timer
static double mi_clock_now(void) { mi_msecs_t _mi_clock_now(void) {
return ((double)clock() / (double)CLOCKS_PER_SEC); return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000));
} }
#endif #endif
#endif #endif
static double mi_clock_diff = 0.0; static mi_msecs_t mi_clock_diff;
double _mi_clock_start(void) { mi_msecs_t _mi_clock_start(void) {
if (mi_clock_diff == 0.0) { if (mi_clock_diff == 0.0) {
double t0 = mi_clock_now(); mi_msecs_t t0 = _mi_clock_now();
mi_clock_diff = mi_clock_now() - t0; mi_clock_diff = _mi_clock_now() - t0;
} }
return mi_clock_now(); return _mi_clock_now();
} }
double _mi_clock_end(double start) { mi_msecs_t _mi_clock_end(mi_msecs_t start) {
double end = mi_clock_now(); mi_msecs_t end = _mi_clock_now();
return (end - start - mi_clock_diff); return (end - start - mi_clock_diff);
} }
@ -394,21 +399,21 @@ double _mi_clock_end(double start) {
#include <psapi.h> #include <psapi.h>
#pragma comment(lib,"psapi.lib") #pragma comment(lib,"psapi.lib")
static double filetime_secs(const FILETIME* ftime) { static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
ULARGE_INTEGER i; ULARGE_INTEGER i;
i.LowPart = ftime->dwLowDateTime; i.LowPart = ftime->dwLowDateTime;
i.HighPart = ftime->dwHighDateTime; i.HighPart = ftime->dwHighDateTime;
double secs = (double)(i.QuadPart) * 1.0e-7; // FILETIME is in 100 nano seconds mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
return secs; return msecs;
} }
static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
FILETIME ct; FILETIME ct;
FILETIME ut; FILETIME ut;
FILETIME st; FILETIME st;
FILETIME et; FILETIME et;
GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
*utime = filetime_secs(&ut); *utime = filetime_msecs(&ut);
*stime = filetime_secs(&st); *stime = filetime_msecs(&st);
PROCESS_MEMORY_COUNTERS info; PROCESS_MEMORY_COUNTERS info;
GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
@ -427,11 +432,11 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size
#include <mach/mach.h> #include <mach/mach.h>
#endif #endif
static double timeval_secs(const struct timeval* tv) { static mi_msecs_t timeval_secs(const struct timeval* tv) {
return (double)tv->tv_sec + ((double)tv->tv_usec * 1.0e-6); return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
} }
static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
struct rusage rusage; struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage); getrusage(RUSAGE_SELF, &rusage);
#if defined(__APPLE__) && defined(__MACH__) #if defined(__APPLE__) && defined(__MACH__)
@ -452,12 +457,12 @@ static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size
#pragma message("define a way to get process info") #pragma message("define a way to get process info")
#endif #endif
static void mi_process_info(double* utime, double* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) { static void mi_process_info(mi_msecs_t* utime, mi_msecs_t* stime, size_t* peak_rss, size_t* page_faults, size_t* page_reclaim, size_t* peak_commit) {
*peak_rss = 0; *peak_rss = 0;
*page_faults = 0; *page_faults = 0;
*page_reclaim = 0; *page_reclaim = 0;
*peak_commit = 0; *peak_commit = 0;
*utime = 0.0; *utime = 0;
*stime = 0.0; *stime = 0;
} }
#endif #endif

View file

@ -13,7 +13,7 @@ if (NOT CMAKE_BUILD_TYPE)
endif() endif()
# Import mimalloc (if installed) # Import mimalloc (if installed)
find_package(mimalloc 1.2 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) find_package(mimalloc 1.4 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH)
message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}")
# overriding with a dynamic library # overriding with a dynamic library

View file

@ -24,7 +24,7 @@ public:
int main() { int main() {
//mi_stats_reset(); // ignore earlier allocations mi_stats_reset(); // ignore earlier allocations
atexit(free_p); atexit(free_p);
void* p1 = malloc(78); void* p1 = malloc(78);
void* p2 = mi_malloc_aligned(16,24); void* p2 = mi_malloc_aligned(16,24);

View file

@ -5,8 +5,13 @@ terms of the MIT license.
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
/* This is a stress test for the allocator, using multiple threads and /* This is a stress test for the allocator, using multiple threads and
transferring objects between threads. This is not a typical workload transferring objects between threads. It tries to reflect real-world workloads:
but uses a random linear size distribution. Timing can also depend on - allocation size is distributed linearly in powers of two
- with some fraction extra large (and some extra extra large)
- the allocations are initialized and read again at free
- pointers transfer between threads
- threads are terminated and recreated with some objects surviving in between
- uses deterministic "randomness", but execution can still depend on
(random) thread scheduling. Do not use this test as a benchmark! (random) thread scheduling. Do not use this test as a benchmark!
*/ */
@ -21,14 +26,14 @@ terms of the MIT license.
// //
// argument defaults // argument defaults
static int THREADS = 32; // more repeatable if THREADS <= #processors static int THREADS = 32; // more repeatable if THREADS <= #processors
static int SCALE = 50; // scaling factor static int SCALE = 10; // scaling factor
static int ITER = 10; // N full iterations re-creating all threads static int ITER = 50; // N full iterations destructing and re-creating all threads
// static int THREADS = 8; // more repeatable if THREADS <= #processors // static int THREADS = 8; // more repeatable if THREADS <= #processors
// static int SCALE = 100; // scaling factor // static int SCALE = 100; // scaling factor
static bool allow_large_objects = true; // allow very large objects? static bool allow_large_objects = true; // allow very large objects?
static size_t use_one_size = 0; // use single object size of N uintptr_t? static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
#ifdef USE_STD_MALLOC #ifdef USE_STD_MALLOC
@ -132,7 +137,7 @@ static void stress(intptr_t tid) {
data_size += 100000; data_size += 100000;
data = (void**)custom_realloc(data, data_size * sizeof(void*)); data = (void**)custom_realloc(data, data_size * sizeof(void*));
} }
data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r); data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
} }
else { else {
// 25% retain // 25% retain
@ -185,7 +190,7 @@ int main(int argc, char** argv) {
long n = (strtol(argv[3], &end, 10)); long n = (strtol(argv[3], &end, 10));
if (n > 0) ITER = n; if (n > 0) ITER = n;
} }
printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER); printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
//int res = mi_reserve_huge_os_pages(4,1); //int res = mi_reserve_huge_os_pages(4,1);
//printf("(reserve huge: %i\n)", res); //printf("(reserve huge: %i\n)", res);
@ -204,7 +209,7 @@ int main(int argc, char** argv) {
} }
mi_collect(false); mi_collect(false);
#ifndef NDEBUG #ifndef NDEBUG
if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); } if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
#endif #endif
} }