Merge branch 'dev-win' into dev-exp

This commit is contained in:
daan 2019-08-09 15:32:23 -07:00
commit 19163c7097
17 changed files with 945 additions and 166 deletions

View file

@ -35,7 +35,6 @@
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
@ -46,7 +45,6 @@
<ConfigurationType>DynamicLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v141</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
@ -70,25 +68,25 @@
<OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
<TargetExt>.dll</TargetExt>
<TargetName>mimalloc</TargetName>
<TargetName>mimalloc-override</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
<TargetExt>.dll</TargetExt>
<TargetName>mimalloc</TargetName>
<TargetName>mimalloc-override</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
<TargetExt>.dll</TargetExt>
<TargetName>mimalloc</TargetName>
<TargetName>mimalloc-override</TargetName>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<OutDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
<TargetExt>.dll</TargetExt>
<TargetName>mimalloc</TargetName>
<TargetName>mimalloc-override</TargetName>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
@ -100,15 +98,17 @@
<PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<SupportJustMyCode>false</SupportJustMyCode>
<CompileAs>CompileAsCpp</CompileAs>
<CompileAs>Default</CompileAs>
</ClCompile>
<Link>
<AdditionalDependencies>../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
<IgnoreSpecificDefaultLibraries>
</IgnoreSpecificDefaultLibraries>
<ModuleDefinitionFile>
</ModuleDefinitionFile>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
<EntryPointSymbol>DllEntry</EntryPointSymbol>
<IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@ -121,15 +121,17 @@
<PreprocessorDefinitions>MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<SupportJustMyCode>false</SupportJustMyCode>
<CompileAs>CompileAsCpp</CompileAs>
<CompileAs>Default</CompileAs>
</ClCompile>
<Link>
<AdditionalDependencies>../../bin/mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
<IgnoreSpecificDefaultLibraries>
</IgnoreSpecificDefaultLibraries>
<ModuleDefinitionFile>
</ModuleDefinitionFile>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
<EntryPointSymbol>DllEntry</EntryPointSymbol>
<IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
</Link>
<PostBuildEvent>
<Command>COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
@ -152,15 +154,17 @@
<AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
<WholeProgramOptimization>false</WholeProgramOptimization>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<CompileAs>CompileAsCpp</CompileAs>
<CompileAs>Default</CompileAs>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
<ModuleDefinitionFile>
</ModuleDefinitionFile>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
<EntryPointSymbol>DllEntry</EntryPointSymbol>
<IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@ -177,15 +181,17 @@
<AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
<WholeProgramOptimization>false</WholeProgramOptimization>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<CompileAs>CompileAsCpp</CompileAs>
<CompileAs>Default</CompileAs>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>../../bin/mimalloc-redirect.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
<ModuleDefinitionFile>
</ModuleDefinitionFile>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
<EntryPointSymbol>DllEntry</EntryPointSymbol>
<IgnoreAllDefaultLibraries>false</IgnoreAllDefaultLibraries>
</Link>
<PostBuildEvent>
<Command>COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)</Command>
@ -208,6 +214,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\alloc-override-win.c" />
<ClCompile Include="..\..\src\alloc-override.c">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>

View file

@ -67,5 +67,8 @@
<ClCompile Include="..\..\src\alloc-posix.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\alloc-override-win.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

View file

@ -67,19 +67,19 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">

View file

@ -67,19 +67,19 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</OutDir>
<OutDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">

View file

@ -307,13 +307,23 @@ static inline bool mi_page_all_used(mi_page_t* page) {
static inline bool mi_page_mostly_used(const mi_page_t* page) {
if (page==NULL) return true;
uint16_t frac = page->reserved / 8U;
return (page->reserved - page->used + page->thread_freed < frac);
return (page->reserved - page->used + page->thread_freed <= frac);
}
static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
}
static inline uintptr_t mi_page_thread_id(const mi_page_t* page) {
return (page->flags.xthread_id << MI_PAGE_FLAGS_BITS);
}
static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) {
page->flags.value = 0;
page->flags.xthread_id = (thread_id >> MI_PAGE_FLAGS_BITS);
mi_assert(page->flags.value == thread_id);
}
// -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers
// -------------------------------------------------------------------

View file

@ -91,19 +91,19 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
#define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64kb on 64-bit
#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit
#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit
#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1Mb on 64-bit
#define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT)
// Maximum number of size classes. (spaced exponentially in 16.7% increments)
#define MI_BIN_HUGE (64U)
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
#if (MI_LARGE_WSIZE_MAX > 131072)
// Maximum number of size classes. (spaced exponentially in 12.5% increments)
#define MI_BIN_HUGE (70U)
#if (MI_LARGE_WSIZE_MAX > 393216)
#error "define more bins"
#endif
@ -123,14 +123,26 @@ typedef enum mi_delayed_e {
} mi_delayed_t;
// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags
// This allows a single test in `mi_free` to check for unlikely cases
// (namely, non-local free, aligned free, or freeing in a full page)
#define MI_PAGE_FLAGS_BITS (2)
#define MI_PAGE_FLAGS_TID_BITS (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS)
typedef union mi_page_flags_u {
uint16_t value;
uintptr_t value;
struct {
bool has_aligned;
bool in_full;
#ifdef MI_BIG_ENDIAN
uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS;
#endif
uintptr_t in_full : 1;
uintptr_t has_aligned : 1;
#ifndef MI_BIG_ENDIAN
uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS;
#endif
};
} mi_page_flags_t;
// Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
typedef uintptr_t mi_thread_free_t;
@ -161,15 +173,15 @@ typedef struct mi_page_s {
bool is_committed:1; // `true` if the page virtual memory is committed
// layout like this to optimize access in `mi_malloc` and `mi_free`
mi_page_flags_t flags;
uint16_t capacity; // number of blocks committed
uint16_t reserved; // number of blocks reserved in memory
// 16 bits padding
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
#if MI_SECURE
uintptr_t cookie; // random cookie to encode the free lists
#endif
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
volatile uintptr_t thread_freed; // at least this number of blocks are in `thread_free`
@ -182,10 +194,10 @@ typedef struct mi_page_s {
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// improve page index calculation
#if MI_INTPTR_SIZE==8
//void* padding[1]; // 12 words on 64-bit
#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
void* padding[1]; // 12 words on 64-bit
#elif MI_INTPTR_SIZE==4
void* padding[1]; // 12 words on 32-bit
// void* padding[1]; // 12 words on 32-bit
#endif
} mi_page_t;
@ -215,7 +227,7 @@ typedef struct mi_segment_s {
// layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
uintptr_t thread_id; // unique id of the thread owning this segment
volatile uintptr_t thread_id; // unique id of the thread owning this segment
mi_page_kind_t page_kind; // kind of pages: small, large, or huge
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
} mi_segment_t;
@ -324,12 +336,12 @@ typedef struct mi_stats_s {
mi_stat_count_t pages_abandoned;
mi_stat_count_t pages_extended;
mi_stat_count_t mmap_calls;
mi_stat_count_t mmap_right_align;
mi_stat_count_t mmap_ensure_aligned;
mi_stat_count_t commit_calls;
mi_stat_count_t threads;
mi_stat_count_t huge;
mi_stat_count_t malloc;
mi_stat_count_t segments_cache;
mi_stat_counter_t page_no_retire;
mi_stat_counter_t searches;
#if MI_STAT>1
mi_stat_count_t normal[MI_BIN_HUGE+1];

714
src/alloc-override-win.c Normal file
View file

@ -0,0 +1,714 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
-----------------------------------------------------------------------------*/
#include "mimalloc.h"
#include "mimalloc-internal.h"
#if !defined(_WIN32)
#error "this file should only be included on Windows"
#endif
#include <windows.h>
#include <psapi.h>
#include <stdlib.h> // getenv
#include <string.h> // strstr
/*
To override the C runtime `malloc` on Windows we need to patch the allocation
functions at runtime initialization. Unfortunately we can never patch before the
runtime initializes itself, because as soon as we call `GetProcAddress` on the
runtime module (a DLL or EXE in Windows speak), it will first load and initialize
(by the OS calling `DllMain` on it).
This means that some things might be already allocated by the C runtime itself
(and possibly other DLL's) before we get to resolve runtime adresses. This is
no problem if everyone unwinds in order: when we unload, we unpatch and restore
the original crt `free` routines and crt malloc'd memory is freed correctly.
But things go wrong if such early CRT alloc'd memory is freed or re-allocated
_after_ we patch, but _before_ we unload (and unpatch), or if any memory allocated
by us is freed after we unpatched.
There are two tricky situations to deal with:
1. The Thread Local Storage (TLS): when the main thread stops it will call registered
callbacks on TLS entries (allocated by `FlsAlloc`). This is done by the OS
before any DLL's are unloaded. Unfortunately, the C runtime registers such
TLS entries with CRT allocated memory which is freed in the callback.
2. Inside the CRT:
a. Some variables might get initialized by patched allocated
blocks but freed during CRT unloading after we unpatched
(like temporary file buffers).
b. Some blocks are allocated at CRT and freed by the CRT (like the
environment storage).
c. And some blocks are allocated by the CRT and then reallocated
while patched, and finally freed after unpatching! This
happens with the `atexit` functions for example to grow the array
of registered functions.
In principle situation 2 is hopeless: since we cannot patch before CRT initialization,
we can never be sure how to free or reallocate a pointer during CRT unloading.
However, in practice there is a good solution: when terminating, we just patch
the reallocation and free routines to no-ops -- we are winding down anyway! This leaves
just the reallocation problm of CRT alloc'd memory once we are patched. Here, a study of the
CRT reveals that there seem to be just three such situations:
1. When registering `atexit` routines (to grow the exit function table),
2. When calling `_setmaxstdio` (to grow the file handle table),
3. and `_popen`/`_wpopen` (to grow handle pairs). These turn out not to be
a problem as these are NULL initialized.
We fix these by providing wrappers:
1. We first register a _global_ `atexit` routine ourselves (`mi_patches_at_exit`) before patching,
and then patch the `_crt_atexit` function to implement our own global exit list (and the
same for `_crt_at_quick_exit`). All module local lists are no problem since they are always fully
(un)patched from initialization to end. We can register in the global list by dynamically
getting the global `_crt_atexit` entry from `ucrtbase.dll`.
2. The `_setmaxstdio` is _detoured_: we patch it by a stub that unpatches first,
calls the original routine and repatches again.
That leaves us to reliably shutdown and enter "termination mode":
1. Using our trick to get the global exit list entry point, we register an exit function `mi_patches_atexit`
that first executes all our home brew list of exit functions, and then enters a _termination_
phase that patches realloc/free variants with no-ops. Patching later again with special no-ops for
`free` also improves efficiency during the program run since no flags need to be checked.
2. That is not quite good enough yet since after executing exit routines after us on the
global exit list (registered by the CRT),
the OS starts to unwind the TLS callbacks and we would like to run callbacks registered after loading
our DLL to be done in patched mode. So, we also allocate a TLS entry when our DLL is loaded and when its
callback is called, we re-enable the original patches again. Since TLS is destroyed in FIFO order
this runs any callbacks in later DLL's in patched mode.
3. Finally the DLL's get unloaded by the OS in order (still patched) until our DLL gets unloaded
and then we start a termination phase again, and patch realloc/free with no-ops for good this time.
*/
static int __cdecl mi_setmaxstdio(int newmax);
// ------------------------------------------------------
// Microsoft allocation extensions
// ------------------------------------------------------
typedef size_t mi_nothrow_t;
static void mi_free_nothrow(void* p, mi_nothrow_t tag) {
UNUSED(tag);
mi_free(p);
}
// Versions of `free`, `realloc`, `recalloc`, `expand` and `msize`
// that are used during termination and are no-ops.
static void mi_free_term(void* p) {
UNUSED(p);
}
static void mi_free_size_term(void* p, size_t size) {
UNUSED(size);
UNUSED(p);
}
static void mi_free_nothrow_term(void* p, mi_nothrow_t tag) {
UNUSED(tag);
UNUSED(p);
}
static void* mi_realloc_term(void* p, size_t newsize) {
UNUSED(p); UNUSED(newsize);
return NULL;
}
static void* mi__recalloc_term(void* p, size_t newcount, size_t newsize) {
UNUSED(p); UNUSED(newcount); UNUSED(newsize);
return NULL;
}
static void* mi__expand_term(void* p, size_t newsize) {
UNUSED(p); UNUSED(newsize);
return NULL;
}
static size_t mi__msize_term(void* p) {
UNUSED(p);
return 0;
}
static void* mi__malloc_dbg(size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _malloc_base(size);
}
static void* mi__calloc_dbg(size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _calloc_base(count, size);
}
static void* mi__realloc_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return _realloc_base(p, size);
}
static void mi__free_dbg(void* p, int block_type) {
UNUSED(block_type);
_free_base(p);
}
// the `recalloc`,`expand`, and `msize` don't have base versions and thus need a separate term version
static void* mi__recalloc_dbg(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi_recalloc(p, count, size);
}
static void* mi__expand_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__expand(p, size);
}
static size_t mi__msize_dbg(void* p, int block_type) {
UNUSED(block_type);
return mi_usable_size(p);
}
static void* mi__recalloc_dbg_term(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__recalloc_term(p, count, size);
}
static void* mi__expand_dbg_term(void* p, size_t size, int block_type, const char* fname, int line) {
UNUSED(block_type); UNUSED(fname); UNUSED(line);
return mi__expand_term(p, size);
}
static size_t mi__msize_dbg_term(void* p, int block_type) {
UNUSED(block_type);
return mi__msize_term(p);
}
// ------------------------------------------------------
// implement our own global atexit handler
// ------------------------------------------------------
typedef void (cbfun_t)(void);
typedef int (atexit_fun_t)(cbfun_t* fn);
typedef uintptr_t encoded_t;
typedef struct exit_list_s {
encoded_t functions; // encoded pointer to array of encoded function pointers
size_t count;
size_t capacity;
} exit_list_t;
#define MI_EXIT_INC (64)
static exit_list_t atexit_list = { 0, 0, 0 };
static exit_list_t at_quick_exit_list = { 0, 0, 0 };
static CRITICAL_SECTION atexit_lock;
// encode/decode function pointers with a random canary for security
static encoded_t canary;
static inline void *decode(encoded_t x) {
return (void*)(x^canary);
}
static inline encoded_t encode(void* p) {
return ((uintptr_t)p ^ canary);
}
static void init_canary()
{
canary = _mi_random_init(0);
atexit_list.functions = at_quick_exit_list.functions = encode(NULL);
}
// initialize the list
static void mi_initialize_atexit(void) {
InitializeCriticalSection(&atexit_lock);
init_canary();
}
// register an exit function
static int mi_register_atexit(exit_list_t* list, cbfun_t* fn) {
if (fn == NULL) return EINVAL;
EnterCriticalSection(&atexit_lock);
encoded_t* functions = (encoded_t*)decode(list->functions);
if (list->count >= list->capacity) { // at first `functions == decode(0) == NULL`
encoded_t* newf = (encoded_t*)mi_recalloc(functions, list->capacity + MI_EXIT_INC, sizeof(cbfun_t*));
if (newf != NULL) {
list->capacity += MI_EXIT_INC;
list->functions = encode(newf);
functions = newf;
}
}
int result;
if (list->count < list->capacity && functions != NULL) {
functions[list->count] = encode(fn);
list->count++;
result = 0; // success
}
else {
result = ENOMEM;
}
LeaveCriticalSection(&atexit_lock);
return result;
}
// Register a global `atexit` function
static int mi_atexit(cbfun_t* fn) {
return mi_register_atexit(&atexit_list,fn);
}
static int mi_at_quick_exit(cbfun_t* fn) {
return mi_register_atexit(&at_quick_exit_list,fn);
}
static int mi_register_onexit(void* table, cbfun_t* fn) {
// TODO: how can we distinguish a quick_exit from atexit?
return mi_atexit(fn);
}
// Execute exit functions in a list
static void mi_execute_exit_list(exit_list_t* list) {
// copy and zero the list structure
EnterCriticalSection(&atexit_lock);
exit_list_t clist = *list;
memset(list,0,sizeof(*list));
LeaveCriticalSection(&atexit_lock);
// now execute the functions outside of the lock
encoded_t* functions = (encoded_t*)decode(clist.functions);
if (functions != NULL) {
for (size_t i = clist.count; i > 0; i--) { // careful with unsigned count down..
cbfun_t* fn = (cbfun_t*)decode(functions[i-1]);
if (fn==NULL) break; // corrupted!
fn();
}
mi_free(functions);
}
}
// ------------------------------------------------------
// Jump assembly instructions for patches
// ------------------------------------------------------
#if defined(_M_IX86) || defined(_M_X64)
#define MI_JUMP_SIZE 14 // at most 2+4+8 for a long jump or 1+5 for a short one
typedef struct mi_jump_s {
uint8_t opcodes[MI_JUMP_SIZE];
} mi_jump_t;
void mi_jump_restore(void* current, const mi_jump_t* saved) {
memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
}
void mi_jump_write(void* current, void* target, mi_jump_t* save) {
if (save != NULL) {
memcpy(&save->opcodes, current, MI_JUMP_SIZE);
}
uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
ptrdiff_t diff = (uint8_t*)target - (uint8_t*)current;
uint32_t ofs32 = (uint32_t)diff;
#ifdef _M_X64
uint64_t ofs64 = (uint64_t)diff;
if (ofs64 != (uint64_t)ofs32) {
// use long jump
opcodes[0] = 0xFF;
opcodes[1] = 0x25;
*((uint32_t*)&opcodes[2]) = 0;
*((uint64_t*)&opcodes[6]) = (uint64_t)target;
}
else
#endif
{
// use short jump
opcodes[0] = 0xE9;
*((uint32_t*)&opcodes[1]) = ofs32 - 5 /* size of the short jump instruction */;
}
}
#elif defined(_M_ARM64)
#define MI_JUMP_SIZE 16
typedef struct mi_jump_s {
uint8_t opcodes[MI_JUMP_SIZE];
} mi_jump_t;
void mi_jump_restore(void* current, const mi_jump_t* saved) {
memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
}
void mi_jump_write(void* current, void* target, mi_jump_t* save) {
if (save != NULL) {
memcpy(&save->opcodes, current, MI_JUMP_SIZE);
}
uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
uint64_t diff = (uint8_t*)target - (uint8_t*)current;
// 0x50 0x00 0x00 0x58 ldr x16, .+8 # load PC relative +8
// 0x00 0x02 0x3F 0xD6 blr x16 # and jump
// <address>
// <address>
static const uint8_t jump_opcodes[8] = { 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x3F, 0xD6 };
memcpy(&opcodes[0], jump_opcodes, sizeof(jump_opcodes));
*((uint64_t*)&opcodes[8]) = diff;
}
#else
#error "define jump instructions for this platform"
#endif
// ------------------------------------------------------
// Patches
// ------------------------------------------------------
typedef enum patch_apply_e {
PATCH_NONE,
PATCH_TARGET,
PATCH_TARGET_TERM
} patch_apply_t;
#define MAX_ENTRIES 4 // maximum number of patched entry points (like `malloc` in ucrtbase and msvcrt)
typedef struct mi_patch_s {
const char* name; // name of the function to patch
void* target; // the address of the new target (never NULL)
void* target_term; // the address of the target during termination (or NULL)
patch_apply_t applied; // what target has been applied?
void* originals[MAX_ENTRIES]; // the resolved addresses of the function (or NULLs)
mi_jump_t saves[MAX_ENTRIES]; // the saved instructions in case it was applied
} mi_patch_t;
#define MI_PATCH_NAME3(name,target,term) { name, &target, &term, PATCH_NONE, {NULL,NULL,NULL,NULL} }
#define MI_PATCH_NAME2(name,target) { name, &target, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} }
#define MI_PATCH3(name,target,term) MI_PATCH_NAME3(#name, target, term)
#define MI_PATCH2(name,target) MI_PATCH_NAME2(#name, target)
#define MI_PATCH1(name) MI_PATCH2(name,mi_##name)
static mi_patch_t patches[] = {
// we implement our own global exit handler (as the CRT versions do a realloc internally)
//MI_PATCH2(_crt_atexit, mi_atexit),
//MI_PATCH2(_crt_at_quick_exit, mi_at_quick_exit),
MI_PATCH2(_setmaxstdio, mi_setmaxstdio),
MI_PATCH2(_register_onexit_function, mi_register_onexit),
// override higher level atexit functions so we can implement at_quick_exit correcty
MI_PATCH2(atexit, mi_atexit),
MI_PATCH2(at_quick_exit, mi_at_quick_exit),
// regular entries
MI_PATCH2(malloc, mi_malloc),
MI_PATCH2(calloc, mi_calloc),
MI_PATCH3(realloc, mi_realloc,mi_realloc_term),
MI_PATCH3(free, mi_free,mi_free_term),
// extended api
MI_PATCH2(_strdup, mi_strdup),
MI_PATCH2(_strndup, mi_strndup),
MI_PATCH3(_expand, mi__expand,mi__expand_term),
MI_PATCH3(_recalloc, mi_recalloc,mi__recalloc_term),
MI_PATCH3(_msize, mi_usable_size,mi__msize_term),
// base versions
MI_PATCH2(_malloc_base, mi_malloc),
MI_PATCH2(_calloc_base, mi_calloc),
MI_PATCH3(_realloc_base, mi_realloc,mi_realloc_term),
MI_PATCH3(_free_base, mi_free,mi_free_term),
// these base versions are in the crt but without import records
MI_PATCH_NAME3("_recalloc_base", mi_recalloc,mi__recalloc_term),
MI_PATCH_NAME3("_msize_base", mi_usable_size,mi__msize_term),
// debug
MI_PATCH2(_malloc_dbg, mi__malloc_dbg),
MI_PATCH2(_realloc_dbg, mi__realloc_dbg),
MI_PATCH2(_calloc_dbg, mi__calloc_dbg),
MI_PATCH2(_free_dbg, mi__free_dbg),
MI_PATCH3(_expand_dbg, mi__expand_dbg, mi__expand_dbg_term),
MI_PATCH3(_recalloc_dbg, mi__recalloc_dbg, mi__recalloc_dbg_term),
MI_PATCH3(_msize_dbg, mi__msize_dbg, mi__msize_dbg_term),
#if 0
// override new/delete variants for efficiency (?)
#ifdef _WIN64
// 64 bit new/delete
MI_PATCH_NAME2("??2@YAPEAX_K@Z", mi_new),
MI_PATCH_NAME2("??_U@YAPEAX_K@Z", mi_new),
MI_PATCH_NAME3("??3@YAXPEAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??_V@YAXPEAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??3@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized
MI_PATCH_NAME3("??_V@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized
MI_PATCH_NAME2("??2@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new),
MI_PATCH_NAME2("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new),
MI_PATCH_NAME3("??3@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
MI_PATCH_NAME3("??_V@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
#else
// 32 bit new/delete
MI_PATCH_NAME2("??2@YAPAXI@Z", mi_new),
MI_PATCH_NAME2("??_U@YAPAXI@Z", mi_new),
MI_PATCH_NAME3("??3@YAXPAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??_V@YAXPAX@Z", mi_free, mi_free_term),
MI_PATCH_NAME3("??3@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized
MI_PATCH_NAME3("??_V@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized
MI_PATCH_NAME2("??2@YAPAXIABUnothrow_t@std@@@Z", mi_new),
MI_PATCH_NAME2("??_U@YAPAXIABUnothrow_t@std@@@Z", mi_new),
MI_PATCH_NAME3("??3@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
MI_PATCH_NAME3("??_V@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
#endif
#endif
{ NULL, NULL, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} }
};
// Apply a patch
static bool mi_patch_apply(mi_patch_t* patch, patch_apply_t apply)
{
if (patch->originals[0] == NULL) return true; // unresolved
if (apply == PATCH_TARGET_TERM && patch->target_term == NULL) apply = PATCH_TARGET; // avoid re-applying non-term variants
if (patch->applied == apply) return false;
for (int i = 0; i < MAX_ENTRIES; i++) {
void* original = patch->originals[i];
if (original == NULL) break; // no more
DWORD protect = PAGE_READWRITE;
if (!VirtualProtect(original, MI_JUMP_SIZE, PAGE_EXECUTE_READWRITE, &protect)) return false;
if (apply == PATCH_NONE) {
mi_jump_restore(original, &patch->saves[i]);
}
else {
void* target = (apply == PATCH_TARGET ? patch->target : patch->target_term);
mi_assert_internal(target != NULL);
if (target != NULL) mi_jump_write(original, target, &patch->saves[i]);
}
VirtualProtect(original, MI_JUMP_SIZE, protect, &protect);
}
patch->applied = apply;
return true;
}
// Apply all patches
static bool _mi_patches_apply(patch_apply_t apply, patch_apply_t* previous) {
static patch_apply_t current = PATCH_NONE;
if (previous != NULL) *previous = current;
if (current == apply) return true;
current = apply;
bool ok = true;
for (size_t i = 0; patches[i].name != NULL; i++) {
if (!mi_patch_apply(&patches[i], apply)) ok = false;
}
return ok;
}
// Export the following three functions just in case
// a user needs that level of control.
// Disable all patches
mi_decl_export void mi_patches_disable(void) {
_mi_patches_apply(PATCH_NONE, NULL);
}
// Enable all patches normally
mi_decl_export bool mi_patches_enable(void) {
return _mi_patches_apply( PATCH_TARGET, NULL );
}
// Enable all patches in termination phase where free is a no-op
mi_decl_export bool mi_patches_enable_term(void) {
return _mi_patches_apply(PATCH_TARGET_TERM, NULL);
}
// ------------------------------------------------------
// Stub for _setmaxstdio
// ------------------------------------------------------
static int __cdecl mi_setmaxstdio(int newmax) {
patch_apply_t previous;
_mi_patches_apply(PATCH_NONE, &previous); // disable patches
int result = _setmaxstdio(newmax); // call original function (that calls original CRT recalloc)
_mi_patches_apply(previous,NULL); // and re-enable patches
return result;
}
// ------------------------------------------------------
// Resolve addresses dynamically
// ------------------------------------------------------
// Try to resolve patches for a given module (DLL)
static void mi_module_resolve(const char* fname, HMODULE mod, int priority) {
// see if any patches apply
for (size_t i = 0; patches[i].name != NULL; i++) {
mi_patch_t* patch = &patches[i];
if (patch->applied == PATCH_NONE) {
// find an available entry
int i = 0;
while (i < MAX_ENTRIES && patch->originals[i] != NULL) i++;
if (i < MAX_ENTRIES) {
void* addr = GetProcAddress(mod, patch->name);
if (addr != NULL) {
// found it! set the address
patch->originals[i] = addr;
_mi_trace_message(" found %s at %s!%p (entry %i)\n", patch->name, fname, addr, i);
}
}
}
}
}
#define MIMALLOC_NAME "mimalloc-override.dll"
#define UCRTBASE_NAME "ucrtbase.dll"
#define UCRTBASED_NAME "ucrtbased.dll"
// Resolve addresses of all patches by inspecting the loaded modules
static atexit_fun_t* crt_atexit = NULL;
static atexit_fun_t* crt_at_quick_exit = NULL;
static bool mi_patches_resolve(void) {
// get all loaded modules
HANDLE process = GetCurrentProcess(); // always -1, no need to release
DWORD needed = 0;
HMODULE modules[400]; // try to stay under 4k to not trigger the guard page
EnumProcessModules(process, modules, sizeof(modules), &needed);
if (needed == 0) return false;
int count = needed / sizeof(HMODULE);
int ucrtbase_index = 0;
int mimalloc_index = 0;
// iterate through the loaded modules
for (int i = 0; i < count; i++) {
HMODULE mod = modules[i];
char filename[MAX_PATH] = { 0 };
DWORD slen = GetModuleFileName(mod, filename, MAX_PATH);
if (slen > 0 && slen < MAX_PATH) {
// filter out potential crt modules only
filename[slen] = 0;
const char* lastsep = strrchr(filename, '\\');
const char* basename = (lastsep==NULL ? filename : lastsep+1);
_mi_trace_message(" %i: dynamic module %s\n", i, filename);
// remember indices so we can check load order (in debug mode)
if (_stricmp(basename, MIMALLOC_NAME) == 0) mimalloc_index = i;
if (_stricmp(basename, UCRTBASE_NAME) == 0) ucrtbase_index = i;
if (_stricmp(basename, UCRTBASED_NAME) == 0) ucrtbase_index = i;
// see if we potentially patch in this module
int priority = 0;
if (i == 0) priority = 2; // main module to allow static crt linking
else if (_strnicmp(basename, "ucrt", 4) == 0) priority = 3; // new ucrtbase.dll in windows 10
// NOTE: don't override msvcr -- leads to crashes in setlocale (needs more testing)
// else if (_strnicmp(basename, "msvcr", 5) == 0) priority = 1; // older runtimes
if (priority > 0) {
// probably found a crt module, try to patch it
mi_module_resolve(basename,mod,priority);
// try to find the atexit functions for the main process (in `ucrtbase.dll`)
if (crt_atexit==NULL) crt_atexit = (atexit_fun_t*)GetProcAddress(mod, "_crt_atexit");
if (crt_at_quick_exit == NULL) crt_at_quick_exit = (atexit_fun_t*)GetProcAddress(mod, "_crt_at_quick_exit");
}
}
}
int diff = mimalloc_index - ucrtbase_index;
if (diff > 1) {
_mi_warning_message("warning: the \"mimalloc-override\" DLL seems not to load before or right after the C runtime (\"ucrtbase\").\n"
" Try to fix this by changing the linking order.\n");
}
return true;
}
// ------------------------------------------------------
// Dll Entry
// ------------------------------------------------------
extern BOOL WINAPI _DllMainCRTStartup(HINSTANCE inst, DWORD reason, LPVOID reserved);
static DWORD mi_fls_unwind_entry;
static void NTAPI mi_fls_unwind(PVOID value) {
if (value != NULL) mi_patches_enable(); // and re-enable normal patches again for DLL's loaded after us
return;
}
static void mi_patches_atexit(void) {
mi_execute_exit_list(&atexit_list);
mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
}
static void mi_patches_at_quick_exit(void) {
mi_execute_exit_list(&at_quick_exit_list);
mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
}
__declspec(dllexport) BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID reserved) {
if (reason == DLL_PROCESS_ATTACH) {
__security_init_cookie();
}
else if (reason == DLL_PROCESS_DETACH) {
// enter termination phase for good now
mi_patches_enable_term();
}
// C runtime main
BOOL ok = _DllMainCRTStartup(inst, reason, reserved);
if (reason == DLL_PROCESS_ATTACH && ok) {
// initialize at exit lists
mi_initialize_atexit();
// Now resolve patches
ok = mi_patches_resolve();
if (ok) {
// check if patching is not disabled
#pragma warning(suppress:4996)
const char* s = getenv("MIMALLOC_DISABLE_OVERRIDE");
bool enabled = (s == NULL || !(strstr("1;TRUE;YES;ON", s) != NULL));
if (!enabled) {
_mi_verbose_message("override is disabled\n");
}
else {
// and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress)
mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind);
if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) {
FlsSetValue(mi_fls_unwind_entry, (void*)1);
}
// register our patch disabler in the global exit list
if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit);
if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit);
// and patch ! this also redirects the `atexit` handling for the global exit list
mi_patches_enable();
_mi_verbose_message("override is enabled\n");
// hide internal allocation
mi_stats_reset();
}
}
}
return ok;
}

View file

@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file
#error "this file should be included from 'alloc.c' (so aliases can work)"
#endif
#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL) && defined(_WIN64))
#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL))
#error "It is only possible to override "malloc" on Windows when building as a 64-bit DLL (and linking the C runtime as a DLL)"
#endif

View file

@ -38,7 +38,9 @@ size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
}
void mi_cfree(void* p) mi_attr_noexcept {
mi_free(p);
if (mi_is_in_heap_region(p)) {
mi_free(p);
}
}
int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept {

View file

@ -57,6 +57,7 @@ extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small(mi_get_default_heap(), size);
}
// zero initialized small block
void* mi_zalloc_small(size_t size) mi_attr_noexcept {
void* p = mi_malloc_small(size);
@ -71,7 +72,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep
void* p;
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
p = mi_heap_malloc_small(heap, size);
}
}
else {
p = _mi_malloc_generic(heap, size);
}
@ -223,8 +224,7 @@ void mi_free(void* p) mi_attr_noexcept
return;
}
#endif
bool local = (_mi_thread_id() == segment->thread_id); // preload, note: putting the thread_id in the page->flags does not improve performance
mi_page_t* page = _mi_segment_page_of(segment, p);
#if (MI_STAT>1)
@ -236,24 +236,18 @@ void mi_free(void* p) mi_attr_noexcept
// huge page stat is accounted for in `_mi_page_retire`
#endif
// adjust if it might be an un-aligned block
if (mi_likely(page->flags.value==0)) { // not full or aligned
uintptr_t tid = _mi_thread_id();
if (mi_likely(tid == page->flags.value)) {
// local, and not full or aligned
mi_block_t* block = (mi_block_t*)p;
if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance
// owning thread can free a block directly
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
page->local_free = block;
page->used--;
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
}
else {
// use atomic operations for a multi-threaded free
_mi_free_block_mt(page, block);
}
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
page->used--;
if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
}
else {
// aligned blocks, or a full page; use the more generic path
mi_free_generic(segment, page, local, p);
// non-local, aligned blocks, or a full page; use the more generic path
mi_free_generic(segment, page, tid == mi_page_thread_id(page), p);
}
}

View file

@ -12,15 +12,16 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
0, false, false, false, {0}, 0, 0,
NULL, 0, // free, used
0, false, false, false, 0, 0,
NULL, // free
#if MI_SECURE
0,
#endif
0, {0}, // used, flags
NULL, 0, 0,
0, NULL, NULL, NULL
#if (MI_INTPTR_SIZE==4)
, { NULL }
#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
, { NULL }
#endif
};
@ -33,22 +34,23 @@ const mi_page_t _mi_page_empty = {
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
#define MI_PAGE_QUEUES_EMPTY \
{ QNULL(1), \
QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \
QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \
QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \
QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \
QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \
QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \
QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \
QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \
QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \
QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \
QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \
QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \
QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \
QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \
QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
#define MI_STAT_COUNT_NULL() {0,0,0,0}
// Empty statistics
#if MI_STAT>1
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) }
#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) }
#else
#define MI_STAT_COUNT_END_NULL()
#endif
@ -61,7 +63,8 @@ const mi_page_t _mi_page_empty = {
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), \
{ 0, 0 }, \
{ 0, 0 } \
MI_STAT_COUNT_END_NULL()
@ -95,8 +98,8 @@ static mi_tld_t tld_main = {
0,
&_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
{ 0, NULL, NULL, 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
{ 0, NULL, NULL, 0, tld_main_stats }, // os
{ MI_STATS_NULL } // stats
};
mi_heap_t _mi_heap_main = {
@ -385,7 +388,7 @@ bool _mi_preloading() {
}
// Communicate with the redirection module on Windows
#if defined(_WIN32) && defined(MI_SHARED_LIB)
#if 0
#ifdef __cplusplus
extern "C" {
#endif

View file

@ -106,6 +106,7 @@ static size_t mi_good_commit_size(size_t size) {
// Return if a pointer points into a region reserved by us.
bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
if (p==NULL) return false;
size_t count = mi_atomic_read(&regions_count);
for (size_t i = 0; i < count; i++) {
uint8_t* start = (uint8_t*)mi_atomic_read_ptr(&regions[i].start);

View file

@ -97,7 +97,7 @@ uint8_t _mi_bsr(uintptr_t x) {
// Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes",
// i.e. byte size == `wsize*sizeof(void*)`.
inline uint8_t _mi_bin(size_t size) {
extern inline uint8_t _mi_bin(size_t size) {
size_t wsize = _mi_wsize_from_size(size);
uint8_t bin;
if (wsize <= 1) {
@ -120,13 +120,13 @@ inline uint8_t _mi_bin(size_t size) {
bin = MI_BIN_HUGE;
}
else {
#if defined(MI_ALIGN4W)
#if defined(MI_ALIGN4W)
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
#endif
wsize--;
// find the highest bit
uint8_t b = mi_bsr32((uint32_t)wsize);
// and use the top 3 bits to determine the bin (~16% worst internal fragmentation).
// and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;

View file

@ -71,10 +71,11 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->block_size > 0);
mi_assert_internal(page->used <= page->capacity);
mi_assert_internal(page->capacity <= page->reserved);
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = _mi_page_start(segment,page,NULL);
mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL));
mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page));
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@ -216,7 +217,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
if (page == NULL) return NULL;
mi_page_init(heap, page, block_size, &heap->tld->stats);
mi_heap_stat_increase( heap, pages, 1);
_mi_stat_increase( &heap->tld->stats.pages, 1);
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
return page;
@ -352,7 +353,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
// account for huge pages here
if (page->block_size > MI_LARGE_SIZE_MAX) {
mi_heap_stat_decrease(page->heap, huge, page->block_size);
_mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
}
// remove from the page list
@ -384,8 +385,9 @@ void _mi_page_retire(mi_page_t* page) {
// is the only page left with free blocks. It is not clear
// how to check this efficiently though... for now we just check
// if its neighbours are almost fully used.
if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
if (mi_likely(page->block_size <= MI_MEDIUM_SIZE_MAX)) {
if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
_mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1);
return; // dont't retire after all
}
}
@ -404,7 +406,60 @@ void _mi_page_retire(mi_page_t* page) {
#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT)
#define MI_MIN_SLICES (2)
static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats)
static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) {
UNUSED(stats);
mi_assert_internal(page->free == NULL);
mi_assert_internal(page->local_free == NULL);
mi_assert_internal(page->capacity + extend <= page->reserved);
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
size_t bsize = page->block_size;
// initialize a randomized free list
// set up `slice_count` slices to alternate between
size_t shift = MI_MAX_SLICE_SHIFT;
while ((extend >> shift) == 0) {
shift--;
}
size_t slice_count = (size_t)1U << shift;
size_t slice_extend = extend / slice_count;
mi_assert_internal(slice_extend >= 1);
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
size_t counts[MI_MAX_SLICES]; // available objects in the slice
for (size_t i = 0; i < slice_count; i++) {
blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend);
counts[i] = slice_extend;
}
counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?)
// and initialize the free list by randomly threading through them
// set up first element
size_t current = _mi_heap_random(heap) % slice_count;
counts[current]--;
page->free = blocks[current];
// and iterate through the rest
uintptr_t rnd = heap->random;
for (size_t i = 1; i < extend; i++) {
// call random_shuffle only every INTPTR_SIZE rounds
size_t round = i%MI_INTPTR_SIZE;
if (round == 0) rnd = _mi_random_shuffle(rnd);
// select a random next slice index
size_t next = ((rnd >> 8*round) & (slice_count-1));
while (counts[next]==0) { // ensure it still has space
next++;
if (next==slice_count) next = 0;
}
// and link the current block to it
counts[next]--;
mi_block_t* block = blocks[current];
blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
current = next;
}
mi_block_set_next(page, blocks[current], NULL); // end of the list
heap->random = _mi_random_shuffle(rnd);
}
static void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
{
UNUSED(stats);
mi_assert_internal(page->free == NULL);
@ -413,66 +468,17 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
size_t bsize = page->block_size;
mi_block_t* start = mi_page_block_at(page, page_area, page->capacity);
if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) {
// initialize a sequential free list
mi_block_t* end = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* block = start;
for (size_t i = 0; i < extend; i++) {
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
mi_block_set_next(page,block,next);
block = next;
}
mi_block_set_next(page, end, NULL);
page->free = start;
}
else {
// initialize a randomized free list
// set up `slice_count` slices to alternate between
size_t shift = MI_MAX_SLICE_SHIFT;
while ((extend >> shift) == 0) {
shift--;
}
size_t slice_count = (size_t)1U << shift;
size_t slice_extend = extend / slice_count;
mi_assert_internal(slice_extend >= 1);
mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
size_t counts[MI_MAX_SLICES]; // available objects in the slice
for (size_t i = 0; i < slice_count; i++) {
blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend);
counts[i] = slice_extend;
}
counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?)
// and initialize the free list by randomly threading through them
// set up first element
size_t current = _mi_heap_random(heap) % slice_count;
counts[current]--;
page->free = blocks[current];
// and iterate through the rest
uintptr_t rnd = heap->random;
for (size_t i = 1; i < extend; i++) {
// call random_shuffle only every INTPTR_SIZE rounds
size_t round = i%MI_INTPTR_SIZE;
if (round == 0) rnd = _mi_random_shuffle(rnd);
// select a random next slice index
size_t next = ((rnd >> 8*round) & (slice_count-1));
while (counts[next]==0) { // ensure it still has space
next++;
if (next==slice_count) next = 0;
}
// and link the current block to it
counts[next]--;
mi_block_t* block = blocks[current];
blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
current = next;
}
mi_block_set_next( page, blocks[current], NULL); // end of the list
heap->random = _mi_random_shuffle(rnd);
// initialize a sequential free list
mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
mi_block_t* block = start;
while(block <= last) {
mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
mi_block_set_next(page,block,next);
block = next;
}
// enable the new free list
page->capacity += (uint16_t)extend;
_mi_stat_increase(&stats->page_committed, extend * page->block_size);
mi_block_set_next(page, last, NULL);
page->free = start;
}
/* -----------------------------------------------------------
@ -518,7 +524,15 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
mi_assert_internal(extend < (1UL<<16));
// and append the extend the free list
mi_page_free_list_extend(heap, page, extend, stats );
if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) {
mi_page_free_list_extend(page, extend, stats );
}
else {
mi_page_free_list_extend_secure(heap, page, extend, stats);
}
// enable the new free list
page->capacity += (uint16_t)extend;
_mi_stat_increase(&stats->page_committed, extend * page->block_size);
mi_assert_expensive(mi_page_is_valid_init(page));
}
@ -688,7 +702,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
if (page != NULL) {
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size == block_size);
mi_heap_stat_increase( heap, huge, block_size);
_mi_stat_increase( &heap->tld->stats.huge, block_size);
}
return page;
}
@ -708,10 +722,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
// call potential deferred free routines
_mi_deferred_free(heap, false);
// free delayed frees from other threads
_mi_heap_delayed_free(heap);
// huge allocation?
mi_page_t* page;
if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) {

View file

@ -226,6 +226,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
segment->thread_id = 0;
mi_segments_track_size(-((long)segment_size),tld);
if (mi_option_is_enabled(mi_option_secure)) {
_mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
@ -235,8 +236,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
// and no more than 2.
#define MI_SEGMENT_CACHE_MAX (2)
// and no more than 4.
#define MI_SEGMENT_CACHE_MAX (4)
#define MI_SEGMENT_CACHE_FRACTION (8)
// note: returned segment may be partially reset
@ -248,17 +249,19 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
tld->cache = segment->next;
segment->next = NULL;
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
_mi_stat_decrease(&tld->stats->segments_cache, 1);
return segment;
}
static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache
if (tld->cache_count < MI_SEGMENT_CACHE_MAX
&& tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))
) { // always allow 1 element cache
return false;
}
// take the opportunity to reduce the segment cache if it is too large (now)
// TODO: this never happens as we check against peak usage, should we use current usage instead?
while (tld->cache_count > (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
mi_segment_t* segment = mi_segment_cache_pop(0,tld);
mi_assert_internal(segment != NULL);
if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld);
@ -269,7 +272,9 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld));
mi_assert_internal(segment->next == NULL);
if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) return false;
if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
return false;
}
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
if (mi_option_is_enabled(mi_option_cache_reset)) {
_mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
@ -277,6 +282,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
segment->next = tld->cache;
tld->cache = segment;
tld->cache_count++;
_mi_stat_increase(&tld->stats->segments_cache,1);
return true;
}
@ -407,8 +413,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment));
mi_assert(segment->next == NULL);
mi_assert(segment->prev == NULL);
_mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
segment->thread_id = 0;
_mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
// update reset memory statistics
/*
@ -613,6 +618,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
}
else {
// otherwise reclaim it
mi_page_init_flags(page,segment->thread_id);
_mi_page_reclaim(heap,page);
}
}
@ -643,6 +649,7 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl
mi_assert_internal(mi_segment_has_free(segment));
mi_page_t* page = mi_segment_find_free(segment, tld->stats);
page->segment_in_use = true;
mi_page_init_flags(page,segment->thread_id);
segment->used++;
mi_assert_internal(segment->used <= segment->capacity);
if (segment->used == segment->capacity) {
@ -682,6 +689,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_
segment->used = 1;
mi_page_t* page = &segment->pages[0];
page->segment_in_use = true;
mi_page_init_flags(page,segment->thread_id);
return page;
}
@ -693,22 +701,27 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
segment->used = 1;
mi_page_t* page = &segment->pages[0];
page->segment_in_use = true;
mi_page_init_flags(page,segment->thread_id);
return page;
}
/* -----------------------------------------------------------
Page allocation and free
----------------------------------------------------------- */
static bool mi_is_good_fit(size_t bsize, size_t size) {
// good fit if no more than 25% wasted
return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4));
}
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page;
if (block_size <= (MI_SMALL_PAGE_SIZE/4)) {
if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
page = mi_segment_small_page_alloc(tld,os_tld);
}
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) {
else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
page = mi_segment_medium_page_alloc(tld, os_tld);
}
else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) {
else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) {
page = mi_segment_large_page_alloc(tld, os_tld);
}
else {

View file

@ -99,14 +99,14 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1);
mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1);
mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1);
mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1);
mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1);
mi_stat_add(&stats->commit_calls, &src->commit_calls, 1);
mi_stat_add(&stats->threads, &src->threads, 1);
mi_stat_add(&stats->pages_extended, &src->pages_extended, 1);
mi_stat_add(&stats->malloc, &src->malloc, 1);
mi_stat_add(&stats->segments_cache, &src->segments_cache, 1);
mi_stat_add(&stats->huge, &src->huge, 1);
mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
mi_stat_counter_add(&stats->searches, &src->searches, 1);
#if MI_STAT>1
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
@ -172,10 +172,15 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t
}
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, FILE* out ) {
double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
_mi_fprintf(out,"%10s: %7.1f avg\n", msg, avg);
_mi_fprintf(out, "%10s:", msg);
mi_print_amount(stat->total, -1, out);
_mi_fprintf(out, "\n");
}
static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, FILE* out) {
double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
_mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg);
}
static void mi_print_header( FILE* out ) {
@ -229,15 +234,15 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n
mi_stat_print(&stats->page_committed, "touched", 1, out);
mi_stat_print(&stats->segments, "segments", -1, out);
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out);
mi_stat_print(&stats->segments_cache, "-cached", -1, out);
mi_stat_print(&stats->pages, "pages", -1, out);
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out);
mi_stat_print(&stats->pages_extended, "-extended", 0, out);
mi_stat_counter_print(&stats->page_no_retire, "-noretire", out);
mi_stat_print(&stats->mmap_calls, "mmaps", 0, out);
mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out);
mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out);
mi_stat_print(&stats->commit_calls, "commits", 0, out);
mi_stat_print(&stats->threads, "threads", 0, out);
mi_stat_counter_print(&stats->searches, "searches", out);
mi_stat_counter_print_avg(&stats->searches, "searches", out);
if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs);

View file

@ -6,6 +6,7 @@
#include <mimalloc.h>
#include <mimalloc-override.h> // redefines malloc etc.
int main() {
mi_version();
void* p1 = malloc(78);