mirror of
https://github.com/microsoft/mimalloc.git
synced 2025-05-09 08:49:31 +03:00
Merge branch 'dev-win' into dev-exp
This commit is contained in:
commit
19163c7097
17 changed files with 945 additions and 166 deletions
|
@ -35,7 +35,6 @@
|
|||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
|
@ -46,7 +45,6 @@
|
|||
<ConfigurationType>DynamicLibrary</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
|
@ -70,25 +68,25 @@
|
|||
<OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<TargetName>mimalloc</TargetName>
|
||||
<TargetName>mimalloc-override</TargetName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<TargetName>mimalloc</TargetName>
|
||||
<TargetName>mimalloc-override</TargetName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<TargetName>mimalloc</TargetName>
|
||||
<TargetName>mimalloc-override</TargetName>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
<TargetExt>.dll</TargetExt>
|
||||
<TargetName>mimalloc</TargetName>
|
||||
<TargetName>mimalloc-override</TargetName>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
|
@ -100,15 +98,17 @@
|
|||
<PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<SupportJustMyCode>false</SupportJustMyCode>
|
||||
<CompileAs>CompileAsCpp</CompileAs>
|
||||
<CompileAs>Default</CompileAs>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<IgnoreSpecificDefaultLibraries>
|
||||
</IgnoreSpecificDefaultLibraries>
|
||||
<ModuleDefinitionFile>
|
||||
</ModuleDefinitionFile>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
<EntryPointSymbol>DllEntry</EntryPointSymbol>
|
||||
<IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
|
@ -121,15 +121,17 @@
|
|||
<PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<SupportJustMyCode>false</SupportJustMyCode>
|
||||
<CompileAs>CompileAsCpp</CompileAs>
|
||||
<CompileAs>Default</CompileAs>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>../../bin/mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<IgnoreSpecificDefaultLibraries>
|
||||
</IgnoreSpecificDefaultLibraries>
|
||||
<ModuleDefinitionFile>
|
||||
</ModuleDefinitionFile>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
<EntryPointSymbol>DllEntry</EntryPointSymbol>
|
||||
<IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
|
||||
</Link>
|
||||
<PostBuildEvent>
|
||||
<Command>COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
|
||||
|
@ -152,15 +154,17 @@
|
|||
<AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
|
||||
<WholeProgramOptimization>false</WholeProgramOptimization>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<CompileAs>CompileAsCpp</CompileAs>
|
||||
<CompileAs>Default</CompileAs>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<ModuleDefinitionFile>
|
||||
</ModuleDefinitionFile>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
<EntryPointSymbol>DllEntry</EntryPointSymbol>
|
||||
<IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
|
@ -177,15 +181,17 @@
|
|||
<AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
|
||||
<WholeProgramOptimization>false</WholeProgramOptimization>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<CompileAs>CompileAsCpp</CompileAs>
|
||||
<CompileAs>Default</CompileAs>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>../../bin/mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<ModuleDefinitionFile>
|
||||
</ModuleDefinitionFile>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
<EntryPointSymbol>DllEntry</EntryPointSymbol>
|
||||
<IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
|
||||
</Link>
|
||||
<PostBuildEvent>
|
||||
<Command>COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
|
||||
|
@ -208,6 +214,7 @@
|
|||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\alloc-override-win.c" />
|
||||
<ClCompile Include="..\..\src\alloc-override.c">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
|
||||
|
|
|
@ -67,5 +67,8 @@
|
|||
<ClCompile Include="..\..\src\alloc-posix.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\src\alloc-override-win.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
|
@ -67,19 +67,19 @@
|
|||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
|
|
|
@ -67,19 +67,19 @@
|
|||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
|
||||
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
|
||||
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
|
|
|
@ -307,13 +307,23 @@ static inline bool mi_page_all_used(mi_page_t* page) {
|
|||
static inline bool mi_page_mostly_used(const mi_page_t* page) {
|
||||
if (page==NULL) return true;
|
||||
uint16_t frac = page->reserved / 8U;
|
||||
return (page->reserved - page->used + page->thread_freed < frac);
|
||||
return (page->reserved - page->used + page->thread_freed <= frac);
|
||||
}
|
||||
|
||||
static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
|
||||
return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
|
||||
}
|
||||
|
||||
static inline uintptr_t mi_page_thread_id(const mi_page_t* page) {
|
||||
return (page->flags.xthread_id << MI_PAGE_FLAGS_BITS);
|
||||
}
|
||||
|
||||
static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) {
|
||||
page->flags.value = 0;
|
||||
page->flags.xthread_id = (thread_id >> MI_PAGE_FLAGS_BITS);
|
||||
mi_assert(page->flags.value == thread_id);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Encoding/Decoding the free list next pointers
|
||||
// -------------------------------------------------------------------
|
||||
|
|
|
@ -91,19 +91,19 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
|
||||
#define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
|
||||
|
||||
#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64kb on 64-bit
|
||||
#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit
|
||||
#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit
|
||||
#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1Mb on 64-bit
|
||||
#define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT)
|
||||
|
||||
|
||||
// Maximum number of size classes. (spaced exponentially in 16.7% increments)
|
||||
#define MI_BIN_HUGE (64U)
|
||||
|
||||
// Minimal alignment necessary. On most platforms 16 bytes are needed
|
||||
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
|
||||
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
|
||||
|
||||
#if (MI_LARGE_WSIZE_MAX > 131072)
|
||||
// Maximum number of size classes. (spaced exponentially in 12.5% increments)
|
||||
#define MI_BIN_HUGE (70U)
|
||||
|
||||
#if (MI_LARGE_WSIZE_MAX > 393216)
|
||||
#error "define more bins"
|
||||
#endif
|
||||
|
||||
|
@ -123,14 +123,26 @@ typedef enum mi_delayed_e {
|
|||
} mi_delayed_t;
|
||||
|
||||
|
||||
// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags
|
||||
// This allows a single test in `mi_free` to check for unlikely cases
|
||||
// (namely, non-local free, aligned free, or freeing in a full page)
|
||||
#define MI_PAGE_FLAGS_BITS (2)
|
||||
#define MI_PAGE_FLAGS_TID_BITS (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS)
|
||||
typedef union mi_page_flags_u {
|
||||
uint16_t value;
|
||||
uintptr_t value;
|
||||
struct {
|
||||
bool has_aligned;
|
||||
bool in_full;
|
||||
#ifdef MI_BIG_ENDIAN
|
||||
uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS;
|
||||
#endif
|
||||
uintptr_t in_full : 1;
|
||||
uintptr_t has_aligned : 1;
|
||||
#ifndef MI_BIG_ENDIAN
|
||||
uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS;
|
||||
#endif
|
||||
};
|
||||
} mi_page_flags_t;
|
||||
|
||||
|
||||
// Thread free list.
|
||||
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
|
||||
typedef uintptr_t mi_thread_free_t;
|
||||
|
@ -161,15 +173,15 @@ typedef struct mi_page_s {
|
|||
bool is_committed:1; // `true` if the page virtual memory is committed
|
||||
|
||||
// layout like this to optimize access in `mi_malloc` and `mi_free`
|
||||
mi_page_flags_t flags;
|
||||
uint16_t capacity; // number of blocks committed
|
||||
uint16_t reserved; // number of blocks reserved in memory
|
||||
|
||||
// 16 bits padding
|
||||
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
|
||||
#if MI_SECURE
|
||||
uintptr_t cookie; // random cookie to encode the free lists
|
||||
#endif
|
||||
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
|
||||
mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1
|
||||
|
||||
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
|
||||
volatile uintptr_t thread_freed; // at least this number of blocks are in `thread_free`
|
||||
|
@ -182,10 +194,10 @@ typedef struct mi_page_s {
|
|||
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
|
||||
|
||||
// improve page index calculation
|
||||
#if MI_INTPTR_SIZE==8
|
||||
//void* padding[1]; // 12 words on 64-bit
|
||||
#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
|
||||
void* padding[1]; // 12 words on 64-bit
|
||||
#elif MI_INTPTR_SIZE==4
|
||||
void* padding[1]; // 12 words on 32-bit
|
||||
// void* padding[1]; // 12 words on 32-bit
|
||||
#endif
|
||||
} mi_page_t;
|
||||
|
||||
|
@ -215,7 +227,7 @@ typedef struct mi_segment_s {
|
|||
|
||||
// layout like this to optimize access in `mi_free`
|
||||
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
|
||||
uintptr_t thread_id; // unique id of the thread owning this segment
|
||||
volatile uintptr_t thread_id; // unique id of the thread owning this segment
|
||||
mi_page_kind_t page_kind; // kind of pages: small, large, or huge
|
||||
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
|
||||
} mi_segment_t;
|
||||
|
@ -324,12 +336,12 @@ typedef struct mi_stats_s {
|
|||
mi_stat_count_t pages_abandoned;
|
||||
mi_stat_count_t pages_extended;
|
||||
mi_stat_count_t mmap_calls;
|
||||
mi_stat_count_t mmap_right_align;
|
||||
mi_stat_count_t mmap_ensure_aligned;
|
||||
mi_stat_count_t commit_calls;
|
||||
mi_stat_count_t threads;
|
||||
mi_stat_count_t huge;
|
||||
mi_stat_count_t malloc;
|
||||
mi_stat_count_t segments_cache;
|
||||
mi_stat_counter_t page_no_retire;
|
||||
mi_stat_counter_t searches;
|
||||
#if MI_STAT>1
|
||||
mi_stat_count_t normal[MI_BIN_HUGE+1];
|
||||
|
|
714
src/alloc-override-win.c
Normal file
714
src/alloc-override-win.c
Normal file
|
@ -0,0 +1,714 @@
|
|||
/* ----------------------------------------------------------------------------
|
||||
Copyright (c) 2018, Microsoft Research, Daan Leijen
|
||||
This is free software; you can redistribute it and/or modify it under the
|
||||
terms of the MIT license. A copy of the license can be found in the file
|
||||
"LICENSE" at the root of this distribution.
|
||||
-----------------------------------------------------------------------------*/
|
||||
|
||||
#include "mimalloc.h"
|
||||
#include "mimalloc-internal.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#error "this file should only be included on Windows"
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
#include <psapi.h>
|
||||
|
||||
#include <stdlib.h> // getenv
|
||||
#include <string.h> // strstr
|
||||
|
||||
|
||||
/*
|
||||
To override the C runtime `malloc` on Windows we need to patch the allocation
|
||||
functions at runtime initialization. Unfortunately we can never patch before the
|
||||
runtime initializes itself, because as soon as we call `GetProcAddress` on the
|
||||
runtime module (a DLL or EXE in Windows speak), it will first load and initialize
|
||||
(by the OS calling `DllMain` on it).
|
||||
|
||||
This means that some things might be already allocated by the C runtime itself
|
||||
(and possibly other DLL's) before we get to resolve runtime adresses. This is
|
||||
no problem if everyone unwinds in order: when we unload, we unpatch and restore
|
||||
the original crt `free` routines and crt malloc'd memory is freed correctly.
|
||||
|
||||
But things go wrong if such early CRT alloc'd memory is freed or re-allocated
|
||||
_after_ we patch, but _before_ we unload (and unpatch), or if any memory allocated
|
||||
by us is freed after we unpatched.
|
||||
|
||||
There are two tricky situations to deal with:
|
||||
|
||||
1. The Thread Local Storage (TLS): when the main thread stops it will call registered
|
||||
callbacks on TLS entries (allocated by `FlsAlloc`). This is done by the OS
|
||||
before any DLL's are unloaded. Unfortunately, the C runtime registers such
|
||||
TLS entries with CRT allocated memory which is freed in the callback.
|
||||
|
||||
2. Inside the CRT:
|
||||
a. Some variables might get initialized by patched allocated
|
||||
blocks but freed during CRT unloading after we unpatched
|
||||
(like temporary file buffers).
|
||||
b. Some blocks are allocated at CRT and freed by the CRT (like the
|
||||
environment storage).
|
||||
c. And some blocks are allocated by the CRT and then reallocated
|
||||
while patched, and finally freed after unpatching! This
|
||||
happens with the `atexit` functions for example to grow the array
|
||||
of registered functions.
|
||||
|
||||
In principle situation 2 is hopeless: since we cannot patch before CRT initialization,
|
||||
we can never be sure how to free or reallocate a pointer during CRT unloading.
|
||||
However, in practice there is a good solution: when terminating, we just patch
|
||||
the reallocation and free routines to no-ops -- we are winding down anyway! This leaves
|
||||
just the reallocation problm of CRT alloc'd memory once we are patched. Here, a study of the
|
||||
CRT reveals that there seem to be just three such situations:
|
||||
|
||||
1. When registering `atexit` routines (to grow the exit function table),
|
||||
2. When calling `_setmaxstdio` (to grow the file handle table),
|
||||
3. and `_popen`/`_wpopen` (to grow handle pairs). These turn out not to be
|
||||
a problem as these are NULL initialized.
|
||||
|
||||
We fix these by providing wrappers:
|
||||
|
||||
1. We first register a _global_ `atexit` routine ourselves (`mi_patches_at_exit`) before patching,
|
||||
and then patch the `_crt_atexit` function to implement our own global exit list (and the
|
||||
same for `_crt_at_quick_exit`). All module local lists are no problem since they are always fully
|
||||
(un)patched from initialization to end. We can register in the global list by dynamically
|
||||
getting the global `_crt_atexit` entry from `ucrtbase.dll`.
|
||||
|
||||
2. The `_setmaxstdio` is _detoured_: we patch it by a stub that unpatches first,
|
||||
calls the original routine and repatches again.
|
||||
|
||||
That leaves us to reliably shutdown and enter "termination mode":
|
||||
|
||||
1. Using our trick to get the global exit list entry point, we register an exit function `mi_patches_atexit`
|
||||
that first executes all our home brew list of exit functions, and then enters a _termination_
|
||||
phase that patches realloc/free variants with no-ops. Patching later again with special no-ops for
|
||||
`free` also improves efficiency during the program run since no flags need to be checked.
|
||||
|
||||
2. That is not quite good enough yet since after executing exit routines after us on the
|
||||
global exit list (registered by the CRT),
|
||||
the OS starts to unwind the TLS callbacks and we would like to run callbacks registered after loading
|
||||
our DLL to be done in patched mode. So, we also allocate a TLS entry when our DLL is loaded and when its
|
||||
callback is called, we re-enable the original patches again. Since TLS is destroyed in FIFO order
|
||||
this runs any callbacks in later DLL's in patched mode.
|
||||
|
||||
3. Finally the DLL's get unloaded by the OS in order (still patched) until our DLL gets unloaded
|
||||
and then we start a termination phase again, and patch realloc/free with no-ops for good this time.
|
||||
|
||||
*/
|
||||
|
||||
static int __cdecl mi_setmaxstdio(int newmax);
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Microsoft allocation extensions
|
||||
// ------------------------------------------------------
|
||||
|
||||
|
||||
typedef size_t mi_nothrow_t;
|
||||
|
||||
static void mi_free_nothrow(void* p, mi_nothrow_t tag) {
|
||||
UNUSED(tag);
|
||||
mi_free(p);
|
||||
}
|
||||
|
||||
// Versions of `free`, `realloc`, `recalloc`, `expand` and `msize`
|
||||
// that are used during termination and are no-ops.
|
||||
static void mi_free_term(void* p) {
|
||||
UNUSED(p);
|
||||
}
|
||||
|
||||
static void mi_free_size_term(void* p, size_t size) {
|
||||
UNUSED(size);
|
||||
UNUSED(p);
|
||||
}
|
||||
|
||||
static void mi_free_nothrow_term(void* p, mi_nothrow_t tag) {
|
||||
UNUSED(tag);
|
||||
UNUSED(p);
|
||||
}
|
||||
|
||||
static void* mi_realloc_term(void* p, size_t newsize) {
|
||||
UNUSED(p); UNUSED(newsize);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void* mi__recalloc_term(void* p, size_t newcount, size_t newsize) {
|
||||
UNUSED(p); UNUSED(newcount); UNUSED(newsize);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void* mi__expand_term(void* p, size_t newsize) {
|
||||
UNUSED(p); UNUSED(newsize);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static size_t mi__msize_term(void* p) {
|
||||
UNUSED(p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void* mi__malloc_dbg(size_t size, int block_type, const char* fname, int line) {
|
||||
UNUSED(block_type); UNUSED(fname); UNUSED(line);
|
||||
return _malloc_base(size);
|
||||
}
|
||||
|
||||
static void* mi__calloc_dbg(size_t count, size_t size, int block_type, const char* fname, int line) {
|
||||
UNUSED(block_type); UNUSED(fname); UNUSED(line);
|
||||
return _calloc_base(count, size);
|
||||
}
|
||||
|
||||
static void* mi__realloc_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
|
||||
UNUSED(block_type); UNUSED(fname); UNUSED(line);
|
||||
return _realloc_base(p, size);
|
||||
}
|
||||
|
||||
static void mi__free_dbg(void* p, int block_type) {
|
||||
UNUSED(block_type);
|
||||
_free_base(p);
|
||||
}
|
||||
|
||||
|
||||
// the `recalloc`,`expand`, and `msize` don't have base versions and thus need a separate term version
|
||||
|
||||
static void* mi__recalloc_dbg(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
|
||||
UNUSED(block_type); UNUSED(fname); UNUSED(line);
|
||||
return mi_recalloc(p, count, size);
|
||||
}
|
||||
|
||||
static void* mi__expand_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
|
||||
UNUSED(block_type); UNUSED(fname); UNUSED(line);
|
||||
return mi__expand(p, size);
|
||||
}
|
||||
|
||||
static size_t mi__msize_dbg(void* p, int block_type) {
|
||||
UNUSED(block_type);
|
||||
return mi_usable_size(p);
|
||||
}
|
||||
|
||||
static void* mi__recalloc_dbg_term(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
|
||||
UNUSED(block_type); UNUSED(fname); UNUSED(line);
|
||||
return mi__recalloc_term(p, count, size);
|
||||
}
|
||||
|
||||
static void* mi__expand_dbg_term(void* p, size_t size, int block_type, const char* fname, int line) {
|
||||
UNUSED(block_type); UNUSED(fname); UNUSED(line);
|
||||
return mi__expand_term(p, size);
|
||||
}
|
||||
|
||||
static size_t mi__msize_dbg_term(void* p, int block_type) {
|
||||
UNUSED(block_type);
|
||||
return mi__msize_term(p);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// implement our own global atexit handler
|
||||
// ------------------------------------------------------
|
||||
typedef void (cbfun_t)(void);
|
||||
typedef int (atexit_fun_t)(cbfun_t* fn);
|
||||
typedef uintptr_t encoded_t;
|
||||
|
||||
typedef struct exit_list_s {
|
||||
encoded_t functions; // encoded pointer to array of encoded function pointers
|
||||
size_t count;
|
||||
size_t capacity;
|
||||
} exit_list_t;
|
||||
|
||||
#define MI_EXIT_INC (64)
|
||||
|
||||
static exit_list_t atexit_list = { 0, 0, 0 };
|
||||
static exit_list_t at_quick_exit_list = { 0, 0, 0 };
|
||||
static CRITICAL_SECTION atexit_lock;
|
||||
|
||||
// encode/decode function pointers with a random canary for security
|
||||
static encoded_t canary;
|
||||
|
||||
static inline void *decode(encoded_t x) {
|
||||
return (void*)(x^canary);
|
||||
}
|
||||
|
||||
static inline encoded_t encode(void* p) {
|
||||
return ((uintptr_t)p ^ canary);
|
||||
}
|
||||
|
||||
|
||||
static void init_canary()
|
||||
{
|
||||
canary = _mi_random_init(0);
|
||||
atexit_list.functions = at_quick_exit_list.functions = encode(NULL);
|
||||
}
|
||||
|
||||
|
||||
// initialize the list
|
||||
static void mi_initialize_atexit(void) {
|
||||
InitializeCriticalSection(&atexit_lock);
|
||||
init_canary();
|
||||
}
|
||||
|
||||
// register an exit function
|
||||
static int mi_register_atexit(exit_list_t* list, cbfun_t* fn) {
|
||||
if (fn == NULL) return EINVAL;
|
||||
EnterCriticalSection(&atexit_lock);
|
||||
encoded_t* functions = (encoded_t*)decode(list->functions);
|
||||
if (list->count >= list->capacity) { // at first `functions == decode(0) == NULL`
|
||||
encoded_t* newf = (encoded_t*)mi_recalloc(functions, list->capacity + MI_EXIT_INC, sizeof(cbfun_t*));
|
||||
if (newf != NULL) {
|
||||
list->capacity += MI_EXIT_INC;
|
||||
list->functions = encode(newf);
|
||||
functions = newf;
|
||||
}
|
||||
}
|
||||
int result;
|
||||
if (list->count < list->capacity && functions != NULL) {
|
||||
functions[list->count] = encode(fn);
|
||||
list->count++;
|
||||
result = 0; // success
|
||||
}
|
||||
else {
|
||||
result = ENOMEM;
|
||||
}
|
||||
LeaveCriticalSection(&atexit_lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Register a global `atexit` function
|
||||
static int mi_atexit(cbfun_t* fn) {
|
||||
return mi_register_atexit(&atexit_list,fn);
|
||||
}
|
||||
|
||||
static int mi_at_quick_exit(cbfun_t* fn) {
|
||||
return mi_register_atexit(&at_quick_exit_list,fn);
|
||||
}
|
||||
|
||||
static int mi_register_onexit(void* table, cbfun_t* fn) {
|
||||
// TODO: how can we distinguish a quick_exit from atexit?
|
||||
return mi_atexit(fn);
|
||||
}
|
||||
|
||||
// Execute exit functions in a list
|
||||
static void mi_execute_exit_list(exit_list_t* list) {
|
||||
// copy and zero the list structure
|
||||
EnterCriticalSection(&atexit_lock);
|
||||
exit_list_t clist = *list;
|
||||
memset(list,0,sizeof(*list));
|
||||
LeaveCriticalSection(&atexit_lock);
|
||||
|
||||
// now execute the functions outside of the lock
|
||||
encoded_t* functions = (encoded_t*)decode(clist.functions);
|
||||
if (functions != NULL) {
|
||||
for (size_t i = clist.count; i > 0; i--) { // careful with unsigned count down..
|
||||
cbfun_t* fn = (cbfun_t*)decode(functions[i-1]);
|
||||
if (fn==NULL) break; // corrupted!
|
||||
fn();
|
||||
}
|
||||
mi_free(functions);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Jump assembly instructions for patches
|
||||
// ------------------------------------------------------
|
||||
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
|
||||
#define MI_JUMP_SIZE 14 // at most 2+4+8 for a long jump or 1+5 for a short one
|
||||
|
||||
typedef struct mi_jump_s {
|
||||
uint8_t opcodes[MI_JUMP_SIZE];
|
||||
} mi_jump_t;
|
||||
|
||||
void mi_jump_restore(void* current, const mi_jump_t* saved) {
|
||||
memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
|
||||
}
|
||||
|
||||
void mi_jump_write(void* current, void* target, mi_jump_t* save) {
|
||||
if (save != NULL) {
|
||||
memcpy(&save->opcodes, current, MI_JUMP_SIZE);
|
||||
}
|
||||
uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
|
||||
ptrdiff_t diff = (uint8_t*)target - (uint8_t*)current;
|
||||
uint32_t ofs32 = (uint32_t)diff;
|
||||
#ifdef _M_X64
|
||||
uint64_t ofs64 = (uint64_t)diff;
|
||||
if (ofs64 != (uint64_t)ofs32) {
|
||||
// use long jump
|
||||
opcodes[0] = 0xFF;
|
||||
opcodes[1] = 0x25;
|
||||
*((uint32_t*)&opcodes[2]) = 0;
|
||||
*((uint64_t*)&opcodes[6]) = (uint64_t)target;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
// use short jump
|
||||
opcodes[0] = 0xE9;
|
||||
*((uint32_t*)&opcodes[1]) = ofs32 - 5 /* size of the short jump instruction */;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(_M_ARM64)
|
||||
|
||||
#define MI_JUMP_SIZE 16
|
||||
|
||||
typedef struct mi_jump_s {
|
||||
uint8_t opcodes[MI_JUMP_SIZE];
|
||||
} mi_jump_t;
|
||||
|
||||
void mi_jump_restore(void* current, const mi_jump_t* saved) {
|
||||
memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
|
||||
}
|
||||
|
||||
void mi_jump_write(void* current, void* target, mi_jump_t* save) {
|
||||
if (save != NULL) {
|
||||
memcpy(&save->opcodes, current, MI_JUMP_SIZE);
|
||||
}
|
||||
uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
|
||||
uint64_t diff = (uint8_t*)target - (uint8_t*)current;
|
||||
|
||||
// 0x50 0x00 0x00 0x58 ldr x16, .+8 # load PC relative +8
|
||||
// 0x00 0x02 0x3F 0xD6 blr x16 # and jump
|
||||
// <address>
|
||||
// <address>
|
||||
static const uint8_t jump_opcodes[8] = { 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x3F, 0xD6 };
|
||||
memcpy(&opcodes[0], jump_opcodes, sizeof(jump_opcodes));
|
||||
*((uint64_t*)&opcodes[8]) = diff;
|
||||
}
|
||||
|
||||
#else
|
||||
#error "define jump instructions for this platform"
|
||||
#endif
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Patches
|
||||
// ------------------------------------------------------
|
||||
typedef enum patch_apply_e {
|
||||
PATCH_NONE,
|
||||
PATCH_TARGET,
|
||||
PATCH_TARGET_TERM
|
||||
} patch_apply_t;
|
||||
|
||||
#define MAX_ENTRIES 4 // maximum number of patched entry points (like `malloc` in ucrtbase and msvcrt)
|
||||
|
||||
typedef struct mi_patch_s {
|
||||
const char* name; // name of the function to patch
|
||||
void* target; // the address of the new target (never NULL)
|
||||
void* target_term; // the address of the target during termination (or NULL)
|
||||
patch_apply_t applied; // what target has been applied?
|
||||
void* originals[MAX_ENTRIES]; // the resolved addresses of the function (or NULLs)
|
||||
mi_jump_t saves[MAX_ENTRIES]; // the saved instructions in case it was applied
|
||||
} mi_patch_t;
|
||||
|
||||
#define MI_PATCH_NAME3(name,target,term) { name, &target, &term, PATCH_NONE, {NULL,NULL,NULL,NULL} }
|
||||
#define MI_PATCH_NAME2(name,target) { name, &target, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} }
|
||||
#define MI_PATCH3(name,target,term) MI_PATCH_NAME3(#name, target, term)
|
||||
#define MI_PATCH2(name,target) MI_PATCH_NAME2(#name, target)
|
||||
#define MI_PATCH1(name) MI_PATCH2(name,mi_##name)
|
||||
|
||||
static mi_patch_t patches[] = {
|
||||
// we implement our own global exit handler (as the CRT versions do a realloc internally)
|
||||
//MI_PATCH2(_crt_atexit, mi_atexit),
|
||||
//MI_PATCH2(_crt_at_quick_exit, mi_at_quick_exit),
|
||||
MI_PATCH2(_setmaxstdio, mi_setmaxstdio),
|
||||
MI_PATCH2(_register_onexit_function, mi_register_onexit),
|
||||
|
||||
// override higher level atexit functions so we can implement at_quick_exit correcty
|
||||
MI_PATCH2(atexit, mi_atexit),
|
||||
MI_PATCH2(at_quick_exit, mi_at_quick_exit),
|
||||
|
||||
// regular entries
|
||||
MI_PATCH2(malloc, mi_malloc),
|
||||
MI_PATCH2(calloc, mi_calloc),
|
||||
MI_PATCH3(realloc, mi_realloc,mi_realloc_term),
|
||||
MI_PATCH3(free, mi_free,mi_free_term),
|
||||
|
||||
// extended api
|
||||
MI_PATCH2(_strdup, mi_strdup),
|
||||
MI_PATCH2(_strndup, mi_strndup),
|
||||
MI_PATCH3(_expand, mi__expand,mi__expand_term),
|
||||
MI_PATCH3(_recalloc, mi_recalloc,mi__recalloc_term),
|
||||
MI_PATCH3(_msize, mi_usable_size,mi__msize_term),
|
||||
|
||||
// base versions
|
||||
MI_PATCH2(_malloc_base, mi_malloc),
|
||||
MI_PATCH2(_calloc_base, mi_calloc),
|
||||
MI_PATCH3(_realloc_base, mi_realloc,mi_realloc_term),
|
||||
MI_PATCH3(_free_base, mi_free,mi_free_term),
|
||||
|
||||
// these base versions are in the crt but without import records
|
||||
MI_PATCH_NAME3("_recalloc_base", mi_recalloc,mi__recalloc_term),
|
||||
MI_PATCH_NAME3("_msize_base", mi_usable_size,mi__msize_term),
|
||||
|
||||
// debug
|
||||
MI_PATCH2(_malloc_dbg, mi__malloc_dbg),
|
||||
MI_PATCH2(_realloc_dbg, mi__realloc_dbg),
|
||||
MI_PATCH2(_calloc_dbg, mi__calloc_dbg),
|
||||
MI_PATCH2(_free_dbg, mi__free_dbg),
|
||||
|
||||
MI_PATCH3(_expand_dbg, mi__expand_dbg, mi__expand_dbg_term),
|
||||
MI_PATCH3(_recalloc_dbg, mi__recalloc_dbg, mi__recalloc_dbg_term),
|
||||
MI_PATCH3(_msize_dbg, mi__msize_dbg, mi__msize_dbg_term),
|
||||
|
||||
#if 0
|
||||
// override new/delete variants for efficiency (?)
|
||||
#ifdef _WIN64
|
||||
// 64 bit new/delete
|
||||
MI_PATCH_NAME2("??2@YAPEAX_K@Z", mi_new),
|
||||
MI_PATCH_NAME2("??_U@YAPEAX_K@Z", mi_new),
|
||||
MI_PATCH_NAME3("??3@YAXPEAX@Z", mi_free, mi_free_term),
|
||||
MI_PATCH_NAME3("??_V@YAXPEAX@Z", mi_free, mi_free_term),
|
||||
MI_PATCH_NAME3("??3@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized
|
||||
MI_PATCH_NAME3("??_V@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized
|
||||
MI_PATCH_NAME2("??2@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new),
|
||||
MI_PATCH_NAME2("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new),
|
||||
MI_PATCH_NAME3("??3@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
|
||||
MI_PATCH_NAME3("??_V@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
|
||||
|
||||
|
||||
#else
|
||||
// 32 bit new/delete
|
||||
MI_PATCH_NAME2("??2@YAPAXI@Z", mi_new),
|
||||
MI_PATCH_NAME2("??_U@YAPAXI@Z", mi_new),
|
||||
MI_PATCH_NAME3("??3@YAXPAX@Z", mi_free, mi_free_term),
|
||||
MI_PATCH_NAME3("??_V@YAXPAX@Z", mi_free, mi_free_term),
|
||||
MI_PATCH_NAME3("??3@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized
|
||||
MI_PATCH_NAME3("??_V@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized
|
||||
|
||||
MI_PATCH_NAME2("??2@YAPAXIABUnothrow_t@std@@@Z", mi_new),
|
||||
MI_PATCH_NAME2("??_U@YAPAXIABUnothrow_t@std@@@Z", mi_new),
|
||||
MI_PATCH_NAME3("??3@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
|
||||
MI_PATCH_NAME3("??_V@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
|
||||
|
||||
#endif
|
||||
#endif
|
||||
{ NULL, NULL, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} }
|
||||
};
|
||||
|
||||
|
||||
// Apply a patch
|
||||
static bool mi_patch_apply(mi_patch_t* patch, patch_apply_t apply)
|
||||
{
|
||||
if (patch->originals[0] == NULL) return true; // unresolved
|
||||
if (apply == PATCH_TARGET_TERM && patch->target_term == NULL) apply = PATCH_TARGET; // avoid re-applying non-term variants
|
||||
if (patch->applied == apply) return false;
|
||||
|
||||
for (int i = 0; i < MAX_ENTRIES; i++) {
|
||||
void* original = patch->originals[i];
|
||||
if (original == NULL) break; // no more
|
||||
|
||||
DWORD protect = PAGE_READWRITE;
|
||||
if (!VirtualProtect(original, MI_JUMP_SIZE, PAGE_EXECUTE_READWRITE, &protect)) return false;
|
||||
if (apply == PATCH_NONE) {
|
||||
mi_jump_restore(original, &patch->saves[i]);
|
||||
}
|
||||
else {
|
||||
void* target = (apply == PATCH_TARGET ? patch->target : patch->target_term);
|
||||
mi_assert_internal(target != NULL);
|
||||
if (target != NULL) mi_jump_write(original, target, &patch->saves[i]);
|
||||
}
|
||||
VirtualProtect(original, MI_JUMP_SIZE, protect, &protect);
|
||||
}
|
||||
patch->applied = apply;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Apply all patches
|
||||
static bool _mi_patches_apply(patch_apply_t apply, patch_apply_t* previous) {
|
||||
static patch_apply_t current = PATCH_NONE;
|
||||
if (previous != NULL) *previous = current;
|
||||
if (current == apply) return true;
|
||||
current = apply;
|
||||
bool ok = true;
|
||||
for (size_t i = 0; patches[i].name != NULL; i++) {
|
||||
if (!mi_patch_apply(&patches[i], apply)) ok = false;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
// Export the following three functions just in case
|
||||
// a user needs that level of control.
|
||||
|
||||
// Disable all patches
|
||||
mi_decl_export void mi_patches_disable(void) {
|
||||
_mi_patches_apply(PATCH_NONE, NULL);
|
||||
}
|
||||
|
||||
// Enable all patches normally
|
||||
mi_decl_export bool mi_patches_enable(void) {
|
||||
return _mi_patches_apply( PATCH_TARGET, NULL );
|
||||
}
|
||||
|
||||
// Enable all patches in termination phase where free is a no-op
|
||||
mi_decl_export bool mi_patches_enable_term(void) {
|
||||
return _mi_patches_apply(PATCH_TARGET_TERM, NULL);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Stub for _setmaxstdio
|
||||
// ------------------------------------------------------
|
||||
|
||||
static int __cdecl mi_setmaxstdio(int newmax) {
|
||||
patch_apply_t previous;
|
||||
_mi_patches_apply(PATCH_NONE, &previous); // disable patches
|
||||
int result = _setmaxstdio(newmax); // call original function (that calls original CRT recalloc)
|
||||
_mi_patches_apply(previous,NULL); // and re-enable patches
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Resolve addresses dynamically
|
||||
// ------------------------------------------------------
|
||||
|
||||
// Try to resolve patches for a given module (DLL)
|
||||
static void mi_module_resolve(const char* fname, HMODULE mod, int priority) {
|
||||
// see if any patches apply
|
||||
for (size_t i = 0; patches[i].name != NULL; i++) {
|
||||
mi_patch_t* patch = &patches[i];
|
||||
if (patch->applied == PATCH_NONE) {
|
||||
// find an available entry
|
||||
int i = 0;
|
||||
while (i < MAX_ENTRIES && patch->originals[i] != NULL) i++;
|
||||
if (i < MAX_ENTRIES) {
|
||||
void* addr = GetProcAddress(mod, patch->name);
|
||||
if (addr != NULL) {
|
||||
// found it! set the address
|
||||
patch->originals[i] = addr;
|
||||
_mi_trace_message(" found %s at %s!%p (entry %i)\n", patch->name, fname, addr, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define MIMALLOC_NAME "mimalloc-override.dll"
|
||||
#define UCRTBASE_NAME "ucrtbase.dll"
|
||||
#define UCRTBASED_NAME "ucrtbased.dll"
|
||||
|
||||
// Resolve addresses of all patches by inspecting the loaded modules
|
||||
static atexit_fun_t* crt_atexit = NULL;
|
||||
static atexit_fun_t* crt_at_quick_exit = NULL;
|
||||
|
||||
|
||||
static bool mi_patches_resolve(void) {
|
||||
// get all loaded modules
|
||||
HANDLE process = GetCurrentProcess(); // always -1, no need to release
|
||||
DWORD needed = 0;
|
||||
HMODULE modules[400]; // try to stay under 4k to not trigger the guard page
|
||||
EnumProcessModules(process, modules, sizeof(modules), &needed);
|
||||
if (needed == 0) return false;
|
||||
int count = needed / sizeof(HMODULE);
|
||||
int ucrtbase_index = 0;
|
||||
int mimalloc_index = 0;
|
||||
// iterate through the loaded modules
|
||||
for (int i = 0; i < count; i++) {
|
||||
HMODULE mod = modules[i];
|
||||
char filename[MAX_PATH] = { 0 };
|
||||
DWORD slen = GetModuleFileName(mod, filename, MAX_PATH);
|
||||
if (slen > 0 && slen < MAX_PATH) {
|
||||
// filter out potential crt modules only
|
||||
filename[slen] = 0;
|
||||
const char* lastsep = strrchr(filename, '\\');
|
||||
const char* basename = (lastsep==NULL ? filename : lastsep+1);
|
||||
_mi_trace_message(" %i: dynamic module %s\n", i, filename);
|
||||
|
||||
// remember indices so we can check load order (in debug mode)
|
||||
if (_stricmp(basename, MIMALLOC_NAME) == 0) mimalloc_index = i;
|
||||
if (_stricmp(basename, UCRTBASE_NAME) == 0) ucrtbase_index = i;
|
||||
if (_stricmp(basename, UCRTBASED_NAME) == 0) ucrtbase_index = i;
|
||||
|
||||
// see if we potentially patch in this module
|
||||
int priority = 0;
|
||||
if (i == 0) priority = 2; // main module to allow static crt linking
|
||||
else if (_strnicmp(basename, "ucrt", 4) == 0) priority = 3; // new ucrtbase.dll in windows 10
|
||||
// NOTE: don't override msvcr -- leads to crashes in setlocale (needs more testing)
|
||||
// else if (_strnicmp(basename, "msvcr", 5) == 0) priority = 1; // older runtimes
|
||||
|
||||
if (priority > 0) {
|
||||
// probably found a crt module, try to patch it
|
||||
mi_module_resolve(basename,mod,priority);
|
||||
|
||||
// try to find the atexit functions for the main process (in `ucrtbase.dll`)
|
||||
if (crt_atexit==NULL) crt_atexit = (atexit_fun_t*)GetProcAddress(mod, "_crt_atexit");
|
||||
if (crt_at_quick_exit == NULL) crt_at_quick_exit = (atexit_fun_t*)GetProcAddress(mod, "_crt_at_quick_exit");
|
||||
}
|
||||
}
|
||||
}
|
||||
int diff = mimalloc_index - ucrtbase_index;
|
||||
if (diff > 1) {
|
||||
_mi_warning_message("warning: the \"mimalloc-override\" DLL seems not to load before or right after the C runtime (\"ucrtbase\").\n"
|
||||
" Try to fix this by changing the linking order.\n");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------
|
||||
// Dll Entry
|
||||
// ------------------------------------------------------
|
||||
|
||||
extern BOOL WINAPI _DllMainCRTStartup(HINSTANCE inst, DWORD reason, LPVOID reserved);
|
||||
|
||||
static DWORD mi_fls_unwind_entry;
|
||||
static void NTAPI mi_fls_unwind(PVOID value) {
|
||||
if (value != NULL) mi_patches_enable(); // and re-enable normal patches again for DLL's loaded after us
|
||||
return;
|
||||
}
|
||||
|
||||
static void mi_patches_atexit(void) {
|
||||
mi_execute_exit_list(&atexit_list);
|
||||
mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
|
||||
}
|
||||
|
||||
static void mi_patches_at_quick_exit(void) {
|
||||
mi_execute_exit_list(&at_quick_exit_list);
|
||||
mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
|
||||
}
|
||||
|
||||
__declspec(dllexport) BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID reserved) {
|
||||
if (reason == DLL_PROCESS_ATTACH) {
|
||||
__security_init_cookie();
|
||||
}
|
||||
else if (reason == DLL_PROCESS_DETACH) {
|
||||
// enter termination phase for good now
|
||||
mi_patches_enable_term();
|
||||
}
|
||||
// C runtime main
|
||||
BOOL ok = _DllMainCRTStartup(inst, reason, reserved);
|
||||
if (reason == DLL_PROCESS_ATTACH && ok) {
|
||||
// initialize at exit lists
|
||||
mi_initialize_atexit();
|
||||
|
||||
// Now resolve patches
|
||||
ok = mi_patches_resolve();
|
||||
if (ok) {
|
||||
// check if patching is not disabled
|
||||
#pragma warning(suppress:4996)
|
||||
const char* s = getenv("MIMALLOC_DISABLE_OVERRIDE");
|
||||
bool enabled = (s == NULL || !(strstr("1;TRUE;YES;ON", s) != NULL));
|
||||
if (!enabled) {
|
||||
_mi_verbose_message("override is disabled\n");
|
||||
}
|
||||
else {
|
||||
// and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress)
|
||||
mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind);
|
||||
if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) {
|
||||
FlsSetValue(mi_fls_unwind_entry, (void*)1);
|
||||
}
|
||||
|
||||
// register our patch disabler in the global exit list
|
||||
if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit);
|
||||
if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit);
|
||||
|
||||
// and patch ! this also redirects the `atexit` handling for the global exit list
|
||||
mi_patches_enable();
|
||||
_mi_verbose_message("override is enabled\n");
|
||||
|
||||
// hide internal allocation
|
||||
mi_stats_reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
return ok;
|
||||
}
|
|
@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
#error "this file should be included from 'alloc.c' (so aliases can work)"
|
||||
#endif
|
||||
|
||||
#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL) && defined(_WIN64))
|
||||
#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL))
|
||||
#error "It is only possible to override "malloc" on Windows when building as a 64-bit DLL (and linking the C runtime as a DLL)"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -38,7 +38,9 @@ size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
|
|||
}
|
||||
|
||||
void mi_cfree(void* p) mi_attr_noexcept {
|
||||
mi_free(p);
|
||||
if (mi_is_in_heap_region(p)) {
|
||||
mi_free(p);
|
||||
}
|
||||
}
|
||||
|
||||
int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept {
|
||||
|
|
30
src/alloc.c
30
src/alloc.c
|
@ -57,6 +57,7 @@ extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept {
|
|||
return mi_heap_malloc_small(mi_get_default_heap(), size);
|
||||
}
|
||||
|
||||
|
||||
// zero initialized small block
|
||||
void* mi_zalloc_small(size_t size) mi_attr_noexcept {
|
||||
void* p = mi_malloc_small(size);
|
||||
|
@ -71,7 +72,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep
|
|||
void* p;
|
||||
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
|
||||
p = mi_heap_malloc_small(heap, size);
|
||||
}
|
||||
}
|
||||
else {
|
||||
p = _mi_malloc_generic(heap, size);
|
||||
}
|
||||
|
@ -223,8 +224,7 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool local = (_mi_thread_id() == segment->thread_id); // preload, note: putting the thread_id in the page->flags does not improve performance
|
||||
|
||||
mi_page_t* page = _mi_segment_page_of(segment, p);
|
||||
|
||||
#if (MI_STAT>1)
|
||||
|
@ -236,24 +236,18 @@ void mi_free(void* p) mi_attr_noexcept
|
|||
// huge page stat is accounted for in `_mi_page_retire`
|
||||
#endif
|
||||
|
||||
// adjust if it might be an un-aligned block
|
||||
if (mi_likely(page->flags.value==0)) { // not full or aligned
|
||||
uintptr_t tid = _mi_thread_id();
|
||||
if (mi_likely(tid == page->flags.value)) {
|
||||
// local, and not full or aligned
|
||||
mi_block_t* block = (mi_block_t*)p;
|
||||
if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance
|
||||
// owning thread can free a block directly
|
||||
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
|
||||
page->local_free = block;
|
||||
page->used--;
|
||||
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
|
||||
}
|
||||
else {
|
||||
// use atomic operations for a multi-threaded free
|
||||
_mi_free_block_mt(page, block);
|
||||
}
|
||||
mi_block_set_next(page, block, page->local_free);
|
||||
page->local_free = block;
|
||||
page->used--;
|
||||
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
|
||||
}
|
||||
else {
|
||||
// aligned blocks, or a full page; use the more generic path
|
||||
mi_free_generic(segment, page, local, p);
|
||||
// non-local, aligned blocks, or a full page; use the more generic path
|
||||
mi_free_generic(segment, page, tid == mi_page_thread_id(page), p);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
39
src/init.c
39
src/init.c
|
@ -12,15 +12,16 @@ terms of the MIT license. A copy of the license can be found in the file
|
|||
|
||||
// Empty page used to initialize the small free pages array
|
||||
const mi_page_t _mi_page_empty = {
|
||||
0, false, false, false, {0}, 0, 0,
|
||||
NULL, 0, // free, used
|
||||
0, false, false, false, 0, 0,
|
||||
NULL, // free
|
||||
#if MI_SECURE
|
||||
0,
|
||||
#endif
|
||||
0, {0}, // used, flags
|
||||
NULL, 0, 0,
|
||||
0, NULL, NULL, NULL
|
||||
#if (MI_INTPTR_SIZE==4)
|
||||
, { NULL }
|
||||
#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
|
||||
, { NULL }
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -33,22 +34,23 @@ const mi_page_t _mi_page_empty = {
|
|||
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
|
||||
#define MI_PAGE_QUEUES_EMPTY \
|
||||
{ QNULL(1), \
|
||||
QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \
|
||||
QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \
|
||||
QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \
|
||||
QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \
|
||||
QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \
|
||||
QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \
|
||||
QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \
|
||||
QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \
|
||||
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
|
||||
QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \
|
||||
QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \
|
||||
QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \
|
||||
QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \
|
||||
QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
|
||||
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
|
||||
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
|
||||
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \
|
||||
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
|
||||
|
||||
#define MI_STAT_COUNT_NULL() {0,0,0,0}
|
||||
|
||||
// Empty statistics
|
||||
#if MI_STAT>1
|
||||
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) }
|
||||
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) }
|
||||
#else
|
||||
#define MI_STAT_COUNT_END_NULL()
|
||||
#endif
|
||||
|
@ -61,7 +63,8 @@ const mi_page_t _mi_page_empty = {
|
|||
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
|
||||
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
|
||||
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
|
||||
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
|
||||
MI_STAT_COUNT_NULL(), \
|
||||
{ 0, 0 }, \
|
||||
{ 0, 0 } \
|
||||
MI_STAT_COUNT_END_NULL()
|
||||
|
||||
|
@ -95,8 +98,8 @@ static mi_tld_t tld_main = {
|
|||
0,
|
||||
&_mi_heap_main,
|
||||
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
|
||||
{ 0, NULL, NULL, 0, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
{ 0, NULL, NULL, 0, tld_main_stats }, // os
|
||||
{ MI_STATS_NULL } // stats
|
||||
};
|
||||
|
||||
mi_heap_t _mi_heap_main = {
|
||||
|
@ -385,7 +388,7 @@ bool _mi_preloading() {
|
|||
}
|
||||
|
||||
// Communicate with the redirection module on Windows
|
||||
#if defined(_WIN32) && defined(MI_SHARED_LIB)
|
||||
#if 0
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
|
@ -106,6 +106,7 @@ static size_t mi_good_commit_size(size_t size) {
|
|||
|
||||
// Return if a pointer points into a region reserved by us.
|
||||
bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
|
||||
if (p==NULL) return false;
|
||||
size_t count = mi_atomic_read(®ions_count);
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
uint8_t* start = (uint8_t*)mi_atomic_read_ptr(®ions[i].start);
|
||||
|
|
|
@ -97,7 +97,7 @@ uint8_t _mi_bsr(uintptr_t x) {
|
|||
// Returns MI_BIN_HUGE if the size is too large.
|
||||
// We use `wsize` for the size in "machine word sizes",
|
||||
// i.e. byte size == `wsize*sizeof(void*)`.
|
||||
inline uint8_t _mi_bin(size_t size) {
|
||||
extern inline uint8_t _mi_bin(size_t size) {
|
||||
size_t wsize = _mi_wsize_from_size(size);
|
||||
uint8_t bin;
|
||||
if (wsize <= 1) {
|
||||
|
@ -120,13 +120,13 @@ inline uint8_t _mi_bin(size_t size) {
|
|||
bin = MI_BIN_HUGE;
|
||||
}
|
||||
else {
|
||||
#if defined(MI_ALIGN4W)
|
||||
#if defined(MI_ALIGN4W)
|
||||
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
|
||||
#endif
|
||||
wsize--;
|
||||
// find the highest bit
|
||||
uint8_t b = mi_bsr32((uint32_t)wsize);
|
||||
// and use the top 3 bits to determine the bin (~16% worst internal fragmentation).
|
||||
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
|
||||
// - adjust with 3 because we use do not round the first 8 sizes
|
||||
// which each get an exact bin
|
||||
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
|
||||
|
|
148
src/page.c
148
src/page.c
|
@ -71,10 +71,11 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
|
|||
mi_assert_internal(page->block_size > 0);
|
||||
mi_assert_internal(page->used <= page->capacity);
|
||||
mi_assert_internal(page->capacity <= page->reserved);
|
||||
|
||||
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
uint8_t* start = _mi_page_start(segment,page,NULL);
|
||||
mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL));
|
||||
mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page));
|
||||
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
|
||||
|
||||
mi_assert_internal(mi_page_list_is_valid(page,page->free));
|
||||
|
@ -216,7 +217,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
|
|||
mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
|
||||
if (page == NULL) return NULL;
|
||||
mi_page_init(heap, page, block_size, &heap->tld->stats);
|
||||
mi_heap_stat_increase( heap, pages, 1);
|
||||
_mi_stat_increase( &heap->tld->stats.pages, 1);
|
||||
mi_page_queue_push(heap, pq, page);
|
||||
mi_assert_expensive(_mi_page_is_valid(page));
|
||||
return page;
|
||||
|
@ -352,7 +353,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
|
|||
|
||||
// account for huge pages here
|
||||
if (page->block_size > MI_LARGE_SIZE_MAX) {
|
||||
mi_heap_stat_decrease(page->heap, huge, page->block_size);
|
||||
_mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
|
||||
}
|
||||
|
||||
// remove from the page list
|
||||
|
@ -384,8 +385,9 @@ void _mi_page_retire(mi_page_t* page) {
|
|||
// is the only page left with free blocks. It is not clear
|
||||
// how to check this efficiently though... for now we just check
|
||||
// if its neighbours are almost fully used.
|
||||
if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
|
||||
if (mi_likely(page->block_size <= MI_MEDIUM_SIZE_MAX)) {
|
||||
if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
|
||||
_mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1);
|
||||
return; // dont't retire after all
|
||||
}
|
||||
}
|
||||
|
@ -404,7 +406,60 @@ void _mi_page_retire(mi_page_t* page) {
|
|||
#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT)
|
||||
#define MI_MIN_SLICES (2)
|
||||
|
||||
static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats)
|
||||
static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) {
|
||||
UNUSED(stats);
|
||||
mi_assert_internal(page->free == NULL);
|
||||
mi_assert_internal(page->local_free == NULL);
|
||||
mi_assert_internal(page->capacity + extend <= page->reserved);
|
||||
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
|
||||
size_t bsize = page->block_size;
|
||||
|
||||
// initialize a randomized free list
|
||||
// set up `slice_count` slices to alternate between
|
||||
size_t shift = MI_MAX_SLICE_SHIFT;
|
||||
while ((extend >> shift) == 0) {
|
||||
shift--;
|
||||
}
|
||||
size_t slice_count = (size_t)1U << shift;
|
||||
size_t slice_extend = extend / slice_count;
|
||||
mi_assert_internal(slice_extend >= 1);
|
||||
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
|
||||
size_t counts[MI_MAX_SLICES]; // available objects in the slice
|
||||
for (size_t i = 0; i < slice_count; i++) {
|
||||
blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend);
|
||||
counts[i] = slice_extend;
|
||||
}
|
||||
counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?)
|
||||
|
||||
// and initialize the free list by randomly threading through them
|
||||
// set up first element
|
||||
size_t current = _mi_heap_random(heap) % slice_count;
|
||||
counts[current]--;
|
||||
page->free = blocks[current];
|
||||
// and iterate through the rest
|
||||
uintptr_t rnd = heap->random;
|
||||
for (size_t i = 1; i < extend; i++) {
|
||||
// call random_shuffle only every INTPTR_SIZE rounds
|
||||
size_t round = i%MI_INTPTR_SIZE;
|
||||
if (round == 0) rnd = _mi_random_shuffle(rnd);
|
||||
// select a random next slice index
|
||||
size_t next = ((rnd >> 8*round) & (slice_count-1));
|
||||
while (counts[next]==0) { // ensure it still has space
|
||||
next++;
|
||||
if (next==slice_count) next = 0;
|
||||
}
|
||||
// and link the current block to it
|
||||
counts[next]--;
|
||||
mi_block_t* block = blocks[current];
|
||||
blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
|
||||
mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
|
||||
current = next;
|
||||
}
|
||||
mi_block_set_next(page, blocks[current], NULL); // end of the list
|
||||
heap->random = _mi_random_shuffle(rnd);
|
||||
}
|
||||
|
||||
static void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
|
||||
{
|
||||
UNUSED(stats);
|
||||
mi_assert_internal(page->free == NULL);
|
||||
|
@ -413,66 +468,17 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e
|
|||
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
|
||||
size_t bsize = page->block_size;
|
||||
mi_block_t* start = mi_page_block_at(page, page_area, page->capacity);
|
||||
if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) {
|
||||
// initialize a sequential free list
|
||||
mi_block_t* end = mi_page_block_at(page, page_area, page->capacity + extend - 1);
|
||||
mi_block_t* block = start;
|
||||
for (size_t i = 0; i < extend; i++) {
|
||||
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
|
||||
mi_block_set_next(page,block,next);
|
||||
block = next;
|
||||
}
|
||||
mi_block_set_next(page, end, NULL);
|
||||
page->free = start;
|
||||
}
|
||||
else {
|
||||
// initialize a randomized free list
|
||||
// set up `slice_count` slices to alternate between
|
||||
size_t shift = MI_MAX_SLICE_SHIFT;
|
||||
while ((extend >> shift) == 0) {
|
||||
shift--;
|
||||
}
|
||||
size_t slice_count = (size_t)1U << shift;
|
||||
size_t slice_extend = extend / slice_count;
|
||||
mi_assert_internal(slice_extend >= 1);
|
||||
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
|
||||
size_t counts[MI_MAX_SLICES]; // available objects in the slice
|
||||
for (size_t i = 0; i < slice_count; i++) {
|
||||
blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend);
|
||||
counts[i] = slice_extend;
|
||||
}
|
||||
counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?)
|
||||
|
||||
// and initialize the free list by randomly threading through them
|
||||
// set up first element
|
||||
size_t current = _mi_heap_random(heap) % slice_count;
|
||||
counts[current]--;
|
||||
page->free = blocks[current];
|
||||
// and iterate through the rest
|
||||
uintptr_t rnd = heap->random;
|
||||
for (size_t i = 1; i < extend; i++) {
|
||||
// call random_shuffle only every INTPTR_SIZE rounds
|
||||
size_t round = i%MI_INTPTR_SIZE;
|
||||
if (round == 0) rnd = _mi_random_shuffle(rnd);
|
||||
// select a random next slice index
|
||||
size_t next = ((rnd >> 8*round) & (slice_count-1));
|
||||
while (counts[next]==0) { // ensure it still has space
|
||||
next++;
|
||||
if (next==slice_count) next = 0;
|
||||
}
|
||||
// and link the current block to it
|
||||
counts[next]--;
|
||||
mi_block_t* block = blocks[current];
|
||||
blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
|
||||
mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
|
||||
current = next;
|
||||
}
|
||||
mi_block_set_next( page, blocks[current], NULL); // end of the list
|
||||
heap->random = _mi_random_shuffle(rnd);
|
||||
// initialize a sequential free list
|
||||
mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
|
||||
mi_block_t* block = start;
|
||||
while(block <= last) {
|
||||
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
|
||||
mi_block_set_next(page,block,next);
|
||||
block = next;
|
||||
}
|
||||
// enable the new free list
|
||||
page->capacity += (uint16_t)extend;
|
||||
_mi_stat_increase(&stats->page_committed, extend * page->block_size);
|
||||
mi_block_set_next(page, last, NULL);
|
||||
page->free = start;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
|
@ -518,7 +524,15 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
|
|||
mi_assert_internal(extend < (1UL<<16));
|
||||
|
||||
// and append the extend the free list
|
||||
mi_page_free_list_extend(heap, page, extend, stats );
|
||||
if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) {
|
||||
mi_page_free_list_extend(page, extend, stats );
|
||||
}
|
||||
else {
|
||||
mi_page_free_list_extend_secure(heap, page, extend, stats);
|
||||
}
|
||||
// enable the new free list
|
||||
page->capacity += (uint16_t)extend;
|
||||
_mi_stat_increase(&stats->page_committed, extend * page->block_size);
|
||||
|
||||
mi_assert_expensive(mi_page_is_valid_init(page));
|
||||
}
|
||||
|
@ -688,7 +702,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
|
|||
if (page != NULL) {
|
||||
mi_assert_internal(mi_page_immediate_available(page));
|
||||
mi_assert_internal(page->block_size == block_size);
|
||||
mi_heap_stat_increase( heap, huge, block_size);
|
||||
_mi_stat_increase( &heap->tld->stats.huge, block_size);
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
@ -708,10 +722,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
|
|||
|
||||
// call potential deferred free routines
|
||||
_mi_deferred_free(heap, false);
|
||||
|
||||
|
||||
// free delayed frees from other threads
|
||||
_mi_heap_delayed_free(heap);
|
||||
|
||||
|
||||
// huge allocation?
|
||||
mi_page_t* page;
|
||||
if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) {
|
||||
|
|
|
@ -226,6 +226,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
|
|||
|
||||
|
||||
static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
|
||||
segment->thread_id = 0;
|
||||
mi_segments_track_size(-((long)segment_size),tld);
|
||||
if (mi_option_is_enabled(mi_option_secure)) {
|
||||
_mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
|
||||
|
@ -235,8 +236,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
|
|||
|
||||
|
||||
// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
|
||||
// and no more than 2.
|
||||
#define MI_SEGMENT_CACHE_MAX (2)
|
||||
// and no more than 4.
|
||||
#define MI_SEGMENT_CACHE_MAX (4)
|
||||
#define MI_SEGMENT_CACHE_FRACTION (8)
|
||||
|
||||
// note: returned segment may be partially reset
|
||||
|
@ -248,17 +249,19 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
|
|||
tld->cache = segment->next;
|
||||
segment->next = NULL;
|
||||
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
|
||||
_mi_stat_decrease(&tld->stats->segments_cache, 1);
|
||||
return segment;
|
||||
}
|
||||
|
||||
static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
|
||||
if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
|
||||
tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache
|
||||
if (tld->cache_count < MI_SEGMENT_CACHE_MAX
|
||||
&& tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))
|
||||
) { // always allow 1 element cache
|
||||
return false;
|
||||
}
|
||||
// take the opportunity to reduce the segment cache if it is too large (now)
|
||||
// TODO: this never happens as we check against peak usage, should we use current usage instead?
|
||||
while (tld->cache_count > (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
|
||||
while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
|
||||
mi_segment_t* segment = mi_segment_cache_pop(0,tld);
|
||||
mi_assert_internal(segment != NULL);
|
||||
if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld);
|
||||
|
@ -269,7 +272,9 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
|
|||
static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
|
||||
mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld));
|
||||
mi_assert_internal(segment->next == NULL);
|
||||
if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) return false;
|
||||
if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
|
||||
return false;
|
||||
}
|
||||
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
|
||||
if (mi_option_is_enabled(mi_option_cache_reset)) {
|
||||
_mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
|
||||
|
@ -277,6 +282,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
|
|||
segment->next = tld->cache;
|
||||
tld->cache = segment;
|
||||
tld->cache_count++;
|
||||
_mi_stat_increase(&tld->stats->segments_cache,1);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -407,8 +413,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
|
|||
mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment));
|
||||
mi_assert(segment->next == NULL);
|
||||
mi_assert(segment->prev == NULL);
|
||||
_mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
|
||||
segment->thread_id = 0;
|
||||
_mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
|
||||
|
||||
// update reset memory statistics
|
||||
/*
|
||||
|
@ -613,6 +618,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
|
|||
}
|
||||
else {
|
||||
// otherwise reclaim it
|
||||
mi_page_init_flags(page,segment->thread_id);
|
||||
_mi_page_reclaim(heap,page);
|
||||
}
|
||||
}
|
||||
|
@ -643,6 +649,7 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl
|
|||
mi_assert_internal(mi_segment_has_free(segment));
|
||||
mi_page_t* page = mi_segment_find_free(segment, tld->stats);
|
||||
page->segment_in_use = true;
|
||||
mi_page_init_flags(page,segment->thread_id);
|
||||
segment->used++;
|
||||
mi_assert_internal(segment->used <= segment->capacity);
|
||||
if (segment->used == segment->capacity) {
|
||||
|
@ -682,6 +689,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_
|
|||
segment->used = 1;
|
||||
mi_page_t* page = &segment->pages[0];
|
||||
page->segment_in_use = true;
|
||||
mi_page_init_flags(page,segment->thread_id);
|
||||
return page;
|
||||
}
|
||||
|
||||
|
@ -693,22 +701,27 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
|
|||
segment->used = 1;
|
||||
mi_page_t* page = &segment->pages[0];
|
||||
page->segment_in_use = true;
|
||||
mi_page_init_flags(page,segment->thread_id);
|
||||
return page;
|
||||
}
|
||||
|
||||
/* -----------------------------------------------------------
|
||||
Page allocation and free
|
||||
----------------------------------------------------------- */
|
||||
static bool mi_is_good_fit(size_t bsize, size_t size) {
|
||||
// good fit if no more than 25% wasted
|
||||
return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4));
|
||||
}
|
||||
|
||||
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
|
||||
mi_page_t* page;
|
||||
if (block_size <= (MI_SMALL_PAGE_SIZE/4)) {
|
||||
if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
|
||||
page = mi_segment_small_page_alloc(tld,os_tld);
|
||||
}
|
||||
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) {
|
||||
else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
|
||||
page = mi_segment_medium_page_alloc(tld, os_tld);
|
||||
}
|
||||
else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) {
|
||||
else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) {
|
||||
page = mi_segment_large_page_alloc(tld, os_tld);
|
||||
}
|
||||
else {
|
||||
|
|
19
src/stats.c
19
src/stats.c
|
@ -99,14 +99,14 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
|
|||
mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1);
|
||||
mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1);
|
||||
mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1);
|
||||
mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1);
|
||||
mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1);
|
||||
mi_stat_add(&stats->commit_calls, &src->commit_calls, 1);
|
||||
mi_stat_add(&stats->threads, &src->threads, 1);
|
||||
mi_stat_add(&stats->pages_extended, &src->pages_extended, 1);
|
||||
|
||||
mi_stat_add(&stats->malloc, &src->malloc, 1);
|
||||
mi_stat_add(&stats->segments_cache, &src->segments_cache, 1);
|
||||
mi_stat_add(&stats->huge, &src->huge, 1);
|
||||
mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
|
||||
mi_stat_counter_add(&stats->searches, &src->searches, 1);
|
||||
#if MI_STAT>1
|
||||
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
|
||||
|
@ -172,10 +172,15 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t
|
|||
}
|
||||
|
||||
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, FILE* out ) {
|
||||
double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
|
||||
_mi_fprintf(out,"%10s: %7.1f avg\n", msg, avg);
|
||||
_mi_fprintf(out, "%10s:", msg);
|
||||
mi_print_amount(stat->total, -1, out);
|
||||
_mi_fprintf(out, "\n");
|
||||
}
|
||||
|
||||
static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, FILE* out) {
|
||||
double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
|
||||
_mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg);
|
||||
}
|
||||
|
||||
|
||||
static void mi_print_header( FILE* out ) {
|
||||
|
@ -229,15 +234,15 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n
|
|||
mi_stat_print(&stats->page_committed, "touched", 1, out);
|
||||
mi_stat_print(&stats->segments, "segments", -1, out);
|
||||
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out);
|
||||
mi_stat_print(&stats->segments_cache, "-cached", -1, out);
|
||||
mi_stat_print(&stats->pages, "pages", -1, out);
|
||||
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out);
|
||||
mi_stat_print(&stats->pages_extended, "-extended", 0, out);
|
||||
mi_stat_counter_print(&stats->page_no_retire, "-noretire", out);
|
||||
mi_stat_print(&stats->mmap_calls, "mmaps", 0, out);
|
||||
mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out);
|
||||
mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out);
|
||||
mi_stat_print(&stats->commit_calls, "commits", 0, out);
|
||||
mi_stat_print(&stats->threads, "threads", 0, out);
|
||||
mi_stat_counter_print(&stats->searches, "searches", out);
|
||||
mi_stat_counter_print_avg(&stats->searches, "searches", out);
|
||||
|
||||
if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs);
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <mimalloc.h>
|
||||
#include <mimalloc-override.h> // redefines malloc etc.
|
||||
|
||||
|
||||
int main() {
|
||||
mi_version();
|
||||
void* p1 = malloc(78);
|
||||
|
|
Loading…
Add table
Reference in a new issue