diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj
index f41b2efc..7d452b55 100644
--- a/ide/vs2017/mimalloc-override.vcxproj
+++ b/ide/vs2017/mimalloc-override.vcxproj
@@ -35,7 +35,6 @@
DynamicLibrary
false
v141
- true
DynamicLibrary
@@ -46,7 +45,6 @@
DynamicLibrary
false
v141
- true
@@ -70,25 +68,25 @@
$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
.dll
- mimalloc
+ mimalloc-override
$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
.dll
- mimalloc
+ mimalloc-override
$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
.dll
- mimalloc
+ mimalloc-override
$(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
.dll
- mimalloc
+ mimalloc-override
@@ -100,15 +98,17 @@
MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);
MultiThreadedDebugDLL
false
- CompileAsCpp
+ Default
- ../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies)
+ %(AdditionalDependencies)
Default
+ DllEntry
+ false
@@ -121,15 +121,17 @@
MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions);
MultiThreadedDebugDLL
false
- CompileAsCpp
+ Default
- ../../bin/mimalloc-redirect.lib;%(AdditionalDependencies)
+ %(AdditionalDependencies)
Default
+ DllEntry
+ false
COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)
@@ -152,15 +154,17 @@
$(IntDir)
false
MultiThreadedDLL
- CompileAsCpp
+ Default
true
true
- ../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies)
+ %(AdditionalDependencies)
Default
+ DllEntry
+ false
@@ -177,15 +181,17 @@
$(IntDir)
false
MultiThreadedDLL
- CompileAsCpp
+ Default
true
true
- ../../bin/mimalloc-redirect.lib;%(AdditionalDependencies)
+ %(AdditionalDependencies)
Default
+ DllEntry
+ false
COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath)
@@ -208,6 +214,7 @@
false
false
+
true
true
diff --git a/ide/vs2017/mimalloc-override.vcxproj.filters b/ide/vs2017/mimalloc-override.vcxproj.filters
index ffabddac..df0bf5ed 100644
--- a/ide/vs2017/mimalloc-override.vcxproj.filters
+++ b/ide/vs2017/mimalloc-override.vcxproj.filters
@@ -67,5 +67,8 @@
Source Files
+
+ Source Files
+
\ No newline at end of file
diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj b/ide/vs2017/mimalloc-test-stress.vcxproj
index e8cc5045..b8267d0b 100644
--- a/ide/vs2017/mimalloc-test-stress.vcxproj
+++ b/ide/vs2017/mimalloc-test-stress.vcxproj
@@ -67,19 +67,19 @@
- $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
+ $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
- $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
+ $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
- $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
+ $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
- $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
+ $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
diff --git a/ide/vs2017/mimalloc-test.vcxproj b/ide/vs2017/mimalloc-test.vcxproj
index c1539aeb..27c7bb6e 100644
--- a/ide/vs2017/mimalloc-test.vcxproj
+++ b/ide/vs2017/mimalloc-test.vcxproj
@@ -67,19 +67,19 @@
- $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
+ $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
- $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
+ $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
- $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
+ $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
- $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
+ $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\
$(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index e261dba2..ad9b3ecf 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -307,13 +307,23 @@ static inline bool mi_page_all_used(mi_page_t* page) {
static inline bool mi_page_mostly_used(const mi_page_t* page) {
if (page==NULL) return true;
uint16_t frac = page->reserved / 8U;
- return (page->reserved - page->used + page->thread_freed < frac);
+ return (page->reserved - page->used + page->thread_freed <= frac);
}
static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
}
+static inline uintptr_t mi_page_thread_id(const mi_page_t* page) {
+ return (page->flags.xthread_id << MI_PAGE_FLAGS_BITS);
+}
+
+static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) {
+ page->flags.value = 0;
+ page->flags.xthread_id = (thread_id >> MI_PAGE_FLAGS_BITS);
+ mi_assert(page->flags.value == thread_id);
+}
+
// -------------------------------------------------------------------
// Encoding/Decoding the free list next pointers
// -------------------------------------------------------------------
diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h
index 5c14ffd4..4002c12c 100644
--- a/include/mimalloc-types.h
+++ b/include/mimalloc-types.h
@@ -91,19 +91,19 @@ terms of the MIT license. A copy of the license can be found in the file
#define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE)
#define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE)
-#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64kb on 64-bit
-#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit
+#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit
+#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1Mb on 64-bit
#define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT)
-// Maximum number of size classes. (spaced exponentially in 16.7% increments)
-#define MI_BIN_HUGE (64U)
-
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE`
#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
-#if (MI_LARGE_WSIZE_MAX > 131072)
+// Maximum number of size classes. (spaced exponentially in 12.5% increments)
+#define MI_BIN_HUGE (70U)
+
+#if (MI_LARGE_WSIZE_MAX > 393216)
#error "define more bins"
#endif
@@ -123,14 +123,26 @@ typedef enum mi_delayed_e {
} mi_delayed_t;
+// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags
+// This allows a single test in `mi_free` to check for unlikely cases
+// (namely, non-local free, aligned free, or freeing in a full page)
+#define MI_PAGE_FLAGS_BITS (2)
+#define MI_PAGE_FLAGS_TID_BITS (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS)
typedef union mi_page_flags_u {
- uint16_t value;
+ uintptr_t value;
struct {
- bool has_aligned;
- bool in_full;
+ #ifdef MI_BIG_ENDIAN
+ uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS;
+ #endif
+ uintptr_t in_full : 1;
+ uintptr_t has_aligned : 1;
+ #ifndef MI_BIG_ENDIAN
+ uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS;
+ #endif
};
} mi_page_flags_t;
+
// Thread free list.
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
typedef uintptr_t mi_thread_free_t;
@@ -161,15 +173,15 @@ typedef struct mi_page_s {
bool is_committed:1; // `true` if the page virtual memory is committed
// layout like this to optimize access in `mi_malloc` and `mi_free`
- mi_page_flags_t flags;
uint16_t capacity; // number of blocks committed
uint16_t reserved; // number of blocks reserved in memory
-
+ // 16 bits padding
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
#if MI_SECURE
uintptr_t cookie; // random cookie to encode the free lists
#endif
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
+ mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
volatile uintptr_t thread_freed; // at least this number of blocks are in `thread_free`
@@ -182,10 +194,10 @@ typedef struct mi_page_s {
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// improve page index calculation
-#if MI_INTPTR_SIZE==8
- //void* padding[1]; // 12 words on 64-bit
+#if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
+ void* padding[1]; // 12 words on 64-bit
#elif MI_INTPTR_SIZE==4
- void* padding[1]; // 12 words on 32-bit
+ // void* padding[1]; // 12 words on 32-bit
#endif
} mi_page_t;
@@ -215,7 +227,7 @@ typedef struct mi_segment_s {
// layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
- uintptr_t thread_id; // unique id of the thread owning this segment
+ volatile uintptr_t thread_id; // unique id of the thread owning this segment
mi_page_kind_t page_kind; // kind of pages: small, large, or huge
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
} mi_segment_t;
@@ -324,12 +336,12 @@ typedef struct mi_stats_s {
mi_stat_count_t pages_abandoned;
mi_stat_count_t pages_extended;
mi_stat_count_t mmap_calls;
- mi_stat_count_t mmap_right_align;
- mi_stat_count_t mmap_ensure_aligned;
mi_stat_count_t commit_calls;
mi_stat_count_t threads;
mi_stat_count_t huge;
mi_stat_count_t malloc;
+ mi_stat_count_t segments_cache;
+ mi_stat_counter_t page_no_retire;
mi_stat_counter_t searches;
#if MI_STAT>1
mi_stat_count_t normal[MI_BIN_HUGE+1];
diff --git a/src/alloc-override-win.c b/src/alloc-override-win.c
new file mode 100644
index 00000000..d1d51b9a
--- /dev/null
+++ b/src/alloc-override-win.c
@@ -0,0 +1,714 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+
+#if !defined(_WIN32)
+#error "this file should only be included on Windows"
+#endif
+
+#include
+#include
+
+#include // getenv
+#include // strstr
+
+
+/*
+To override the C runtime `malloc` on Windows we need to patch the allocation
+functions at runtime initialization. Unfortunately we can never patch before the
+runtime initializes itself, because as soon as we call `GetProcAddress` on the
+runtime module (a DLL or EXE in Windows speak), it will first load and initialize
+(by the OS calling `DllMain` on it).
+
+This means that some things might be already allocated by the C runtime itself
+(and possibly other DLL's) before we get to resolve runtime adresses. This is
+no problem if everyone unwinds in order: when we unload, we unpatch and restore
+the original crt `free` routines and crt malloc'd memory is freed correctly.
+
+But things go wrong if such early CRT alloc'd memory is freed or re-allocated
+_after_ we patch, but _before_ we unload (and unpatch), or if any memory allocated
+by us is freed after we unpatched.
+
+There are two tricky situations to deal with:
+
+1. The Thread Local Storage (TLS): when the main thread stops it will call registered
+ callbacks on TLS entries (allocated by `FlsAlloc`). This is done by the OS
+ before any DLL's are unloaded. Unfortunately, the C runtime registers such
+ TLS entries with CRT allocated memory which is freed in the callback.
+
+2. Inside the CRT:
+ a. Some variables might get initialized by patched allocated
+ blocks but freed during CRT unloading after we unpatched
+ (like temporary file buffers).
+ b. Some blocks are allocated at CRT and freed by the CRT (like the
+ environment storage).
+ c. And some blocks are allocated by the CRT and then reallocated
+ while patched, and finally freed after unpatching! This
+ happens with the `atexit` functions for example to grow the array
+ of registered functions.
+
+In principle situation 2 is hopeless: since we cannot patch before CRT initialization,
+we can never be sure how to free or reallocate a pointer during CRT unloading.
+However, in practice there is a good solution: when terminating, we just patch
+the reallocation and free routines to no-ops -- we are winding down anyway! This leaves
+just the reallocation problm of CRT alloc'd memory once we are patched. Here, a study of the
+CRT reveals that there seem to be just three such situations:
+
+1. When registering `atexit` routines (to grow the exit function table),
+2. When calling `_setmaxstdio` (to grow the file handle table),
+3. and `_popen`/`_wpopen` (to grow handle pairs). These turn out not to be
+ a problem as these are NULL initialized.
+
+We fix these by providing wrappers:
+
+1. We first register a _global_ `atexit` routine ourselves (`mi_patches_at_exit`) before patching,
+ and then patch the `_crt_atexit` function to implement our own global exit list (and the
+ same for `_crt_at_quick_exit`). All module local lists are no problem since they are always fully
+ (un)patched from initialization to end. We can register in the global list by dynamically
+ getting the global `_crt_atexit` entry from `ucrtbase.dll`.
+
+2. The `_setmaxstdio` is _detoured_: we patch it by a stub that unpatches first,
+ calls the original routine and repatches again.
+
+That leaves us to reliably shutdown and enter "termination mode":
+
+1. Using our trick to get the global exit list entry point, we register an exit function `mi_patches_atexit`
+ that first executes all our home brew list of exit functions, and then enters a _termination_
+ phase that patches realloc/free variants with no-ops. Patching later again with special no-ops for
+ `free` also improves efficiency during the program run since no flags need to be checked.
+
+2. That is not quite good enough yet since after executing exit routines after us on the
+ global exit list (registered by the CRT),
+ the OS starts to unwind the TLS callbacks and we would like to run callbacks registered after loading
+ our DLL to be done in patched mode. So, we also allocate a TLS entry when our DLL is loaded and when its
+ callback is called, we re-enable the original patches again. Since TLS is destroyed in FIFO order
+ this runs any callbacks in later DLL's in patched mode.
+
+3. Finally the DLL's get unloaded by the OS in order (still patched) until our DLL gets unloaded
+ and then we start a termination phase again, and patch realloc/free with no-ops for good this time.
+
+*/
+
+static int __cdecl mi_setmaxstdio(int newmax);
+
+// ------------------------------------------------------
+// Microsoft allocation extensions
+// ------------------------------------------------------
+
+
+typedef size_t mi_nothrow_t;
+
+static void mi_free_nothrow(void* p, mi_nothrow_t tag) {
+ UNUSED(tag);
+ mi_free(p);
+}
+
+// Versions of `free`, `realloc`, `recalloc`, `expand` and `msize`
+// that are used during termination and are no-ops.
+static void mi_free_term(void* p) {
+ UNUSED(p);
+}
+
+static void mi_free_size_term(void* p, size_t size) {
+ UNUSED(size);
+ UNUSED(p);
+}
+
+static void mi_free_nothrow_term(void* p, mi_nothrow_t tag) {
+ UNUSED(tag);
+ UNUSED(p);
+}
+
+static void* mi_realloc_term(void* p, size_t newsize) {
+ UNUSED(p); UNUSED(newsize);
+ return NULL;
+}
+
+static void* mi__recalloc_term(void* p, size_t newcount, size_t newsize) {
+ UNUSED(p); UNUSED(newcount); UNUSED(newsize);
+ return NULL;
+}
+
+static void* mi__expand_term(void* p, size_t newsize) {
+ UNUSED(p); UNUSED(newsize);
+ return NULL;
+}
+
+static size_t mi__msize_term(void* p) {
+ UNUSED(p);
+ return 0;
+}
+
+
+static void* mi__malloc_dbg(size_t size, int block_type, const char* fname, int line) {
+ UNUSED(block_type); UNUSED(fname); UNUSED(line);
+ return _malloc_base(size);
+}
+
+static void* mi__calloc_dbg(size_t count, size_t size, int block_type, const char* fname, int line) {
+ UNUSED(block_type); UNUSED(fname); UNUSED(line);
+ return _calloc_base(count, size);
+}
+
+static void* mi__realloc_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
+ UNUSED(block_type); UNUSED(fname); UNUSED(line);
+ return _realloc_base(p, size);
+}
+
+static void mi__free_dbg(void* p, int block_type) {
+ UNUSED(block_type);
+ _free_base(p);
+}
+
+
+// the `recalloc`,`expand`, and `msize` don't have base versions and thus need a separate term version
+
+static void* mi__recalloc_dbg(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
+ UNUSED(block_type); UNUSED(fname); UNUSED(line);
+ return mi_recalloc(p, count, size);
+}
+
+static void* mi__expand_dbg(void* p, size_t size, int block_type, const char* fname, int line) {
+ UNUSED(block_type); UNUSED(fname); UNUSED(line);
+ return mi__expand(p, size);
+}
+
+static size_t mi__msize_dbg(void* p, int block_type) {
+ UNUSED(block_type);
+ return mi_usable_size(p);
+}
+
+static void* mi__recalloc_dbg_term(void* p, size_t count, size_t size, int block_type, const char* fname, int line) {
+ UNUSED(block_type); UNUSED(fname); UNUSED(line);
+ return mi__recalloc_term(p, count, size);
+}
+
+static void* mi__expand_dbg_term(void* p, size_t size, int block_type, const char* fname, int line) {
+ UNUSED(block_type); UNUSED(fname); UNUSED(line);
+ return mi__expand_term(p, size);
+}
+
+static size_t mi__msize_dbg_term(void* p, int block_type) {
+ UNUSED(block_type);
+ return mi__msize_term(p);
+}
+
+
+// ------------------------------------------------------
+// implement our own global atexit handler
+// ------------------------------------------------------
+typedef void (cbfun_t)(void);
+typedef int (atexit_fun_t)(cbfun_t* fn);
+typedef uintptr_t encoded_t;
+
+typedef struct exit_list_s {
+ encoded_t functions; // encoded pointer to array of encoded function pointers
+ size_t count;
+ size_t capacity;
+} exit_list_t;
+
+#define MI_EXIT_INC (64)
+
+static exit_list_t atexit_list = { 0, 0, 0 };
+static exit_list_t at_quick_exit_list = { 0, 0, 0 };
+static CRITICAL_SECTION atexit_lock;
+
+// encode/decode function pointers with a random canary for security
+static encoded_t canary;
+
+static inline void *decode(encoded_t x) {
+ return (void*)(x^canary);
+}
+
+static inline encoded_t encode(void* p) {
+ return ((uintptr_t)p ^ canary);
+}
+
+
+static void init_canary()
+{
+ canary = _mi_random_init(0);
+ atexit_list.functions = at_quick_exit_list.functions = encode(NULL);
+}
+
+
+// initialize the list
+static void mi_initialize_atexit(void) {
+ InitializeCriticalSection(&atexit_lock);
+ init_canary();
+}
+
+// register an exit function
+static int mi_register_atexit(exit_list_t* list, cbfun_t* fn) {
+ if (fn == NULL) return EINVAL;
+ EnterCriticalSection(&atexit_lock);
+ encoded_t* functions = (encoded_t*)decode(list->functions);
+ if (list->count >= list->capacity) { // at first `functions == decode(0) == NULL`
+ encoded_t* newf = (encoded_t*)mi_recalloc(functions, list->capacity + MI_EXIT_INC, sizeof(cbfun_t*));
+ if (newf != NULL) {
+ list->capacity += MI_EXIT_INC;
+ list->functions = encode(newf);
+ functions = newf;
+ }
+ }
+ int result;
+ if (list->count < list->capacity && functions != NULL) {
+ functions[list->count] = encode(fn);
+ list->count++;
+ result = 0; // success
+ }
+ else {
+ result = ENOMEM;
+ }
+ LeaveCriticalSection(&atexit_lock);
+ return result;
+}
+
+// Register a global `atexit` function
+static int mi_atexit(cbfun_t* fn) {
+ return mi_register_atexit(&atexit_list,fn);
+}
+
+static int mi_at_quick_exit(cbfun_t* fn) {
+ return mi_register_atexit(&at_quick_exit_list,fn);
+}
+
+static int mi_register_onexit(void* table, cbfun_t* fn) {
+ // TODO: how can we distinguish a quick_exit from atexit?
+ return mi_atexit(fn);
+}
+
+// Execute exit functions in a list
+static void mi_execute_exit_list(exit_list_t* list) {
+ // copy and zero the list structure
+ EnterCriticalSection(&atexit_lock);
+ exit_list_t clist = *list;
+ memset(list,0,sizeof(*list));
+ LeaveCriticalSection(&atexit_lock);
+
+ // now execute the functions outside of the lock
+ encoded_t* functions = (encoded_t*)decode(clist.functions);
+ if (functions != NULL) {
+ for (size_t i = clist.count; i > 0; i--) { // careful with unsigned count down..
+ cbfun_t* fn = (cbfun_t*)decode(functions[i-1]);
+ if (fn==NULL) break; // corrupted!
+ fn();
+ }
+ mi_free(functions);
+ }
+}
+
+
+
+// ------------------------------------------------------
+// Jump assembly instructions for patches
+// ------------------------------------------------------
+
+#if defined(_M_IX86) || defined(_M_X64)
+
+#define MI_JUMP_SIZE 14 // at most 2+4+8 for a long jump or 1+5 for a short one
+
+typedef struct mi_jump_s {
+ uint8_t opcodes[MI_JUMP_SIZE];
+} mi_jump_t;
+
+void mi_jump_restore(void* current, const mi_jump_t* saved) {
+ memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
+}
+
+void mi_jump_write(void* current, void* target, mi_jump_t* save) {
+ if (save != NULL) {
+ memcpy(&save->opcodes, current, MI_JUMP_SIZE);
+ }
+ uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
+ ptrdiff_t diff = (uint8_t*)target - (uint8_t*)current;
+ uint32_t ofs32 = (uint32_t)diff;
+ #ifdef _M_X64
+ uint64_t ofs64 = (uint64_t)diff;
+ if (ofs64 != (uint64_t)ofs32) {
+ // use long jump
+ opcodes[0] = 0xFF;
+ opcodes[1] = 0x25;
+ *((uint32_t*)&opcodes[2]) = 0;
+ *((uint64_t*)&opcodes[6]) = (uint64_t)target;
+ }
+ else
+ #endif
+ {
+ // use short jump
+ opcodes[0] = 0xE9;
+ *((uint32_t*)&opcodes[1]) = ofs32 - 5 /* size of the short jump instruction */;
+ }
+}
+
+#elif defined(_M_ARM64)
+
+#define MI_JUMP_SIZE 16
+
+typedef struct mi_jump_s {
+ uint8_t opcodes[MI_JUMP_SIZE];
+} mi_jump_t;
+
+void mi_jump_restore(void* current, const mi_jump_t* saved) {
+ memcpy(current, &saved->opcodes, MI_JUMP_SIZE);
+}
+
+void mi_jump_write(void* current, void* target, mi_jump_t* save) {
+ if (save != NULL) {
+ memcpy(&save->opcodes, current, MI_JUMP_SIZE);
+ }
+ uint8_t* opcodes = ((mi_jump_t*)current)->opcodes;
+ uint64_t diff = (uint8_t*)target - (uint8_t*)current;
+
+ // 0x50 0x00 0x00 0x58 ldr x16, .+8 # load PC relative +8
+ // 0x00 0x02 0x3F 0xD6 blr x16 # and jump
+ //
+ //
+ static const uint8_t jump_opcodes[8] = { 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x3F, 0xD6 };
+ memcpy(&opcodes[0], jump_opcodes, sizeof(jump_opcodes));
+ *((uint64_t*)&opcodes[8]) = diff;
+}
+
+#else
+#error "define jump instructions for this platform"
+#endif
+
+
+// ------------------------------------------------------
+// Patches
+// ------------------------------------------------------
+typedef enum patch_apply_e {
+ PATCH_NONE,
+ PATCH_TARGET,
+ PATCH_TARGET_TERM
+} patch_apply_t;
+
+#define MAX_ENTRIES 4 // maximum number of patched entry points (like `malloc` in ucrtbase and msvcrt)
+
+typedef struct mi_patch_s {
+ const char* name; // name of the function to patch
+ void* target; // the address of the new target (never NULL)
+ void* target_term; // the address of the target during termination (or NULL)
+ patch_apply_t applied; // what target has been applied?
+ void* originals[MAX_ENTRIES]; // the resolved addresses of the function (or NULLs)
+ mi_jump_t saves[MAX_ENTRIES]; // the saved instructions in case it was applied
+} mi_patch_t;
+
+#define MI_PATCH_NAME3(name,target,term) { name, &target, &term, PATCH_NONE, {NULL,NULL,NULL,NULL} }
+#define MI_PATCH_NAME2(name,target) { name, &target, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} }
+#define MI_PATCH3(name,target,term) MI_PATCH_NAME3(#name, target, term)
+#define MI_PATCH2(name,target) MI_PATCH_NAME2(#name, target)
+#define MI_PATCH1(name) MI_PATCH2(name,mi_##name)
+
+static mi_patch_t patches[] = {
+ // we implement our own global exit handler (as the CRT versions do a realloc internally)
+ //MI_PATCH2(_crt_atexit, mi_atexit),
+ //MI_PATCH2(_crt_at_quick_exit, mi_at_quick_exit),
+ MI_PATCH2(_setmaxstdio, mi_setmaxstdio),
+ MI_PATCH2(_register_onexit_function, mi_register_onexit),
+
+ // override higher level atexit functions so we can implement at_quick_exit correcty
+ MI_PATCH2(atexit, mi_atexit),
+ MI_PATCH2(at_quick_exit, mi_at_quick_exit),
+
+ // regular entries
+ MI_PATCH2(malloc, mi_malloc),
+ MI_PATCH2(calloc, mi_calloc),
+ MI_PATCH3(realloc, mi_realloc,mi_realloc_term),
+ MI_PATCH3(free, mi_free,mi_free_term),
+
+ // extended api
+ MI_PATCH2(_strdup, mi_strdup),
+ MI_PATCH2(_strndup, mi_strndup),
+ MI_PATCH3(_expand, mi__expand,mi__expand_term),
+ MI_PATCH3(_recalloc, mi_recalloc,mi__recalloc_term),
+ MI_PATCH3(_msize, mi_usable_size,mi__msize_term),
+
+ // base versions
+ MI_PATCH2(_malloc_base, mi_malloc),
+ MI_PATCH2(_calloc_base, mi_calloc),
+ MI_PATCH3(_realloc_base, mi_realloc,mi_realloc_term),
+ MI_PATCH3(_free_base, mi_free,mi_free_term),
+
+ // these base versions are in the crt but without import records
+ MI_PATCH_NAME3("_recalloc_base", mi_recalloc,mi__recalloc_term),
+ MI_PATCH_NAME3("_msize_base", mi_usable_size,mi__msize_term),
+
+ // debug
+ MI_PATCH2(_malloc_dbg, mi__malloc_dbg),
+ MI_PATCH2(_realloc_dbg, mi__realloc_dbg),
+ MI_PATCH2(_calloc_dbg, mi__calloc_dbg),
+ MI_PATCH2(_free_dbg, mi__free_dbg),
+
+ MI_PATCH3(_expand_dbg, mi__expand_dbg, mi__expand_dbg_term),
+ MI_PATCH3(_recalloc_dbg, mi__recalloc_dbg, mi__recalloc_dbg_term),
+ MI_PATCH3(_msize_dbg, mi__msize_dbg, mi__msize_dbg_term),
+
+#if 0
+ // override new/delete variants for efficiency (?)
+#ifdef _WIN64
+ // 64 bit new/delete
+ MI_PATCH_NAME2("??2@YAPEAX_K@Z", mi_new),
+ MI_PATCH_NAME2("??_U@YAPEAX_K@Z", mi_new),
+ MI_PATCH_NAME3("??3@YAXPEAX@Z", mi_free, mi_free_term),
+ MI_PATCH_NAME3("??_V@YAXPEAX@Z", mi_free, mi_free_term),
+ MI_PATCH_NAME3("??3@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized
+ MI_PATCH_NAME3("??_V@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized
+ MI_PATCH_NAME2("??2@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new),
+ MI_PATCH_NAME2("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new),
+ MI_PATCH_NAME3("??3@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
+ MI_PATCH_NAME3("??_V@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
+
+
+#else
+ // 32 bit new/delete
+ MI_PATCH_NAME2("??2@YAPAXI@Z", mi_new),
+ MI_PATCH_NAME2("??_U@YAPAXI@Z", mi_new),
+ MI_PATCH_NAME3("??3@YAXPAX@Z", mi_free, mi_free_term),
+ MI_PATCH_NAME3("??_V@YAXPAX@Z", mi_free, mi_free_term),
+ MI_PATCH_NAME3("??3@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized
+ MI_PATCH_NAME3("??_V@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized
+
+ MI_PATCH_NAME2("??2@YAPAXIABUnothrow_t@std@@@Z", mi_new),
+ MI_PATCH_NAME2("??_U@YAPAXIABUnothrow_t@std@@@Z", mi_new),
+ MI_PATCH_NAME3("??3@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
+ MI_PATCH_NAME3("??_V@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term),
+
+#endif
+#endif
+ { NULL, NULL, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} }
+};
+
+
+// Apply a patch
+static bool mi_patch_apply(mi_patch_t* patch, patch_apply_t apply)
+{
+ if (patch->originals[0] == NULL) return true; // unresolved
+ if (apply == PATCH_TARGET_TERM && patch->target_term == NULL) apply = PATCH_TARGET; // avoid re-applying non-term variants
+ if (patch->applied == apply) return false;
+
+ for (int i = 0; i < MAX_ENTRIES; i++) {
+ void* original = patch->originals[i];
+ if (original == NULL) break; // no more
+
+ DWORD protect = PAGE_READWRITE;
+ if (!VirtualProtect(original, MI_JUMP_SIZE, PAGE_EXECUTE_READWRITE, &protect)) return false;
+ if (apply == PATCH_NONE) {
+ mi_jump_restore(original, &patch->saves[i]);
+ }
+ else {
+ void* target = (apply == PATCH_TARGET ? patch->target : patch->target_term);
+ mi_assert_internal(target != NULL);
+ if (target != NULL) mi_jump_write(original, target, &patch->saves[i]);
+ }
+ VirtualProtect(original, MI_JUMP_SIZE, protect, &protect);
+ }
+ patch->applied = apply;
+ return true;
+}
+
+// Apply all patches
+static bool _mi_patches_apply(patch_apply_t apply, patch_apply_t* previous) {
+ static patch_apply_t current = PATCH_NONE;
+ if (previous != NULL) *previous = current;
+ if (current == apply) return true;
+ current = apply;
+ bool ok = true;
+ for (size_t i = 0; patches[i].name != NULL; i++) {
+ if (!mi_patch_apply(&patches[i], apply)) ok = false;
+ }
+ return ok;
+}
+
+// Export the following three functions just in case
+// a user needs that level of control.
+
+// Disable all patches
+mi_decl_export void mi_patches_disable(void) {
+ _mi_patches_apply(PATCH_NONE, NULL);
+}
+
+// Enable all patches normally
+mi_decl_export bool mi_patches_enable(void) {
+ return _mi_patches_apply( PATCH_TARGET, NULL );
+}
+
+// Enable all patches in termination phase where free is a no-op
+mi_decl_export bool mi_patches_enable_term(void) {
+ return _mi_patches_apply(PATCH_TARGET_TERM, NULL);
+}
+
+// ------------------------------------------------------
+// Stub for _setmaxstdio
+// ------------------------------------------------------
+
+static int __cdecl mi_setmaxstdio(int newmax) {
+ patch_apply_t previous;
+ _mi_patches_apply(PATCH_NONE, &previous); // disable patches
+ int result = _setmaxstdio(newmax); // call original function (that calls original CRT recalloc)
+ _mi_patches_apply(previous,NULL); // and re-enable patches
+ return result;
+}
+
+
+// ------------------------------------------------------
+// Resolve addresses dynamically
+// ------------------------------------------------------
+
+// Try to resolve patches for a given module (DLL)
+static void mi_module_resolve(const char* fname, HMODULE mod, int priority) {
+ // see if any patches apply
+ for (size_t i = 0; patches[i].name != NULL; i++) {
+ mi_patch_t* patch = &patches[i];
+ if (patch->applied == PATCH_NONE) {
+ // find an available entry
+ int i = 0;
+ while (i < MAX_ENTRIES && patch->originals[i] != NULL) i++;
+ if (i < MAX_ENTRIES) {
+ void* addr = GetProcAddress(mod, patch->name);
+ if (addr != NULL) {
+ // found it! set the address
+ patch->originals[i] = addr;
+ _mi_trace_message(" found %s at %s!%p (entry %i)\n", patch->name, fname, addr, i);
+ }
+ }
+ }
+ }
+}
+
+#define MIMALLOC_NAME "mimalloc-override.dll"
+#define UCRTBASE_NAME "ucrtbase.dll"
+#define UCRTBASED_NAME "ucrtbased.dll"
+
+// Resolve addresses of all patches by inspecting the loaded modules
+static atexit_fun_t* crt_atexit = NULL;
+static atexit_fun_t* crt_at_quick_exit = NULL;
+
+
+static bool mi_patches_resolve(void) {
+ // get all loaded modules
+ HANDLE process = GetCurrentProcess(); // always -1, no need to release
+ DWORD needed = 0;
+ HMODULE modules[400]; // try to stay under 4k to not trigger the guard page
+ EnumProcessModules(process, modules, sizeof(modules), &needed);
+ if (needed == 0) return false;
+ int count = needed / sizeof(HMODULE);
+ int ucrtbase_index = 0;
+ int mimalloc_index = 0;
+ // iterate through the loaded modules
+ for (int i = 0; i < count; i++) {
+ HMODULE mod = modules[i];
+ char filename[MAX_PATH] = { 0 };
+ DWORD slen = GetModuleFileName(mod, filename, MAX_PATH);
+ if (slen > 0 && slen < MAX_PATH) {
+ // filter out potential crt modules only
+ filename[slen] = 0;
+ const char* lastsep = strrchr(filename, '\\');
+ const char* basename = (lastsep==NULL ? filename : lastsep+1);
+ _mi_trace_message(" %i: dynamic module %s\n", i, filename);
+
+ // remember indices so we can check load order (in debug mode)
+ if (_stricmp(basename, MIMALLOC_NAME) == 0) mimalloc_index = i;
+ if (_stricmp(basename, UCRTBASE_NAME) == 0) ucrtbase_index = i;
+ if (_stricmp(basename, UCRTBASED_NAME) == 0) ucrtbase_index = i;
+
+ // see if we potentially patch in this module
+ int priority = 0;
+ if (i == 0) priority = 2; // main module to allow static crt linking
+ else if (_strnicmp(basename, "ucrt", 4) == 0) priority = 3; // new ucrtbase.dll in windows 10
+ // NOTE: don't override msvcr -- leads to crashes in setlocale (needs more testing)
+ // else if (_strnicmp(basename, "msvcr", 5) == 0) priority = 1; // older runtimes
+
+ if (priority > 0) {
+ // probably found a crt module, try to patch it
+ mi_module_resolve(basename,mod,priority);
+
+ // try to find the atexit functions for the main process (in `ucrtbase.dll`)
+ if (crt_atexit==NULL) crt_atexit = (atexit_fun_t*)GetProcAddress(mod, "_crt_atexit");
+ if (crt_at_quick_exit == NULL) crt_at_quick_exit = (atexit_fun_t*)GetProcAddress(mod, "_crt_at_quick_exit");
+ }
+ }
+ }
+ int diff = mimalloc_index - ucrtbase_index;
+ if (diff > 1) {
+ _mi_warning_message("warning: the \"mimalloc-override\" DLL seems not to load before or right after the C runtime (\"ucrtbase\").\n"
+ " Try to fix this by changing the linking order.\n");
+ }
+ return true;
+}
+
+
+// ------------------------------------------------------
+// Dll Entry
+// ------------------------------------------------------
+
+extern BOOL WINAPI _DllMainCRTStartup(HINSTANCE inst, DWORD reason, LPVOID reserved);
+
+static DWORD mi_fls_unwind_entry;
+static void NTAPI mi_fls_unwind(PVOID value) {
+ if (value != NULL) mi_patches_enable(); // and re-enable normal patches again for DLL's loaded after us
+ return;
+}
+
+static void mi_patches_atexit(void) {
+ mi_execute_exit_list(&atexit_list);
+ mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
+}
+
+static void mi_patches_at_quick_exit(void) {
+ mi_execute_exit_list(&at_quick_exit_list);
+ mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op
+}
+
+__declspec(dllexport) BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID reserved) {
+ if (reason == DLL_PROCESS_ATTACH) {
+ __security_init_cookie();
+ }
+ else if (reason == DLL_PROCESS_DETACH) {
+ // enter termination phase for good now
+ mi_patches_enable_term();
+ }
+ // C runtime main
+ BOOL ok = _DllMainCRTStartup(inst, reason, reserved);
+ if (reason == DLL_PROCESS_ATTACH && ok) {
+ // initialize at exit lists
+ mi_initialize_atexit();
+
+ // Now resolve patches
+ ok = mi_patches_resolve();
+ if (ok) {
+ // check if patching is not disabled
+ #pragma warning(suppress:4996)
+ const char* s = getenv("MIMALLOC_DISABLE_OVERRIDE");
+ bool enabled = (s == NULL || !(strstr("1;TRUE;YES;ON", s) != NULL));
+ if (!enabled) {
+ _mi_verbose_message("override is disabled\n");
+ }
+ else {
+ // and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress)
+ mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind);
+ if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) {
+ FlsSetValue(mi_fls_unwind_entry, (void*)1);
+ }
+
+ // register our patch disabler in the global exit list
+ if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit);
+ if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit);
+
+ // and patch ! this also redirects the `atexit` handling for the global exit list
+ mi_patches_enable();
+ _mi_verbose_message("override is enabled\n");
+
+ // hide internal allocation
+ mi_stats_reset();
+ }
+ }
+ }
+ return ok;
+}
diff --git a/src/alloc-override.c b/src/alloc-override.c
index 345d396c..e5eeaab2 100644
--- a/src/alloc-override.c
+++ b/src/alloc-override.c
@@ -9,7 +9,7 @@ terms of the MIT license. A copy of the license can be found in the file
#error "this file should be included from 'alloc.c' (so aliases can work)"
#endif
-#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL) && defined(_WIN64))
+#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL))
#error "It is only possible to override "malloc" on Windows when building as a 64-bit DLL (and linking the C runtime as a DLL)"
#endif
diff --git a/src/alloc-posix.c b/src/alloc-posix.c
index 1f55b3a8..672b73b3 100644
--- a/src/alloc-posix.c
+++ b/src/alloc-posix.c
@@ -38,7 +38,9 @@ size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
}
void mi_cfree(void* p) mi_attr_noexcept {
- mi_free(p);
+ if (mi_is_in_heap_region(p)) {
+ mi_free(p);
+ }
}
int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept {
diff --git a/src/alloc.c b/src/alloc.c
index 6a91c0ad..bfb37d19 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -57,6 +57,7 @@ extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small(mi_get_default_heap(), size);
}
+
// zero initialized small block
void* mi_zalloc_small(size_t size) mi_attr_noexcept {
void* p = mi_malloc_small(size);
@@ -71,7 +72,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep
void* p;
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
p = mi_heap_malloc_small(heap, size);
- }
+ }
else {
p = _mi_malloc_generic(heap, size);
}
@@ -223,8 +224,7 @@ void mi_free(void* p) mi_attr_noexcept
return;
}
#endif
-
- bool local = (_mi_thread_id() == segment->thread_id); // preload, note: putting the thread_id in the page->flags does not improve performance
+
mi_page_t* page = _mi_segment_page_of(segment, p);
#if (MI_STAT>1)
@@ -236,24 +236,18 @@ void mi_free(void* p) mi_attr_noexcept
// huge page stat is accounted for in `_mi_page_retire`
#endif
- // adjust if it might be an un-aligned block
- if (mi_likely(page->flags.value==0)) { // not full or aligned
+ uintptr_t tid = _mi_thread_id();
+ if (mi_likely(tid == page->flags.value)) {
+ // local, and not full or aligned
mi_block_t* block = (mi_block_t*)p;
- if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance
- // owning thread can free a block directly
- mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
- page->local_free = block;
- page->used--;
- if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
- }
- else {
- // use atomic operations for a multi-threaded free
- _mi_free_block_mt(page, block);
- }
+ mi_block_set_next(page, block, page->local_free);
+ page->local_free = block;
+ page->used--;
+ if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); }
}
else {
- // aligned blocks, or a full page; use the more generic path
- mi_free_generic(segment, page, local, p);
+ // non-local, aligned blocks, or a full page; use the more generic path
+ mi_free_generic(segment, page, tid == mi_page_thread_id(page), p);
}
}
diff --git a/src/init.c b/src/init.c
index 44e3c9cb..f807d74a 100644
--- a/src/init.c
+++ b/src/init.c
@@ -12,15 +12,16 @@ terms of the MIT license. A copy of the license can be found in the file
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
- 0, false, false, false, {0}, 0, 0,
- NULL, 0, // free, used
+ 0, false, false, false, 0, 0,
+ NULL, // free
#if MI_SECURE
0,
#endif
+ 0, {0}, // used, flags
NULL, 0, 0,
0, NULL, NULL, NULL
- #if (MI_INTPTR_SIZE==4)
- , { NULL }
+ #if (MI_INTPTR_SIZE==8 && MI_SECURE==0)
+ , { NULL }
#endif
};
@@ -33,22 +34,23 @@ const mi_page_t _mi_page_empty = {
#define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) }
#define MI_PAGE_QUEUES_EMPTY \
{ QNULL(1), \
- QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \
- QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \
- QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \
- QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \
- QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \
- QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \
- QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \
- QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \
- QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \
+ QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \
+ QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \
+ QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \
+ QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \
+ QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \
+ QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \
+ QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \
+ QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \
+ QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \
+ QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \
QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ }
#define MI_STAT_COUNT_NULL() {0,0,0,0}
// Empty statistics
#if MI_STAT>1
-#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) }
+#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) }
#else
#define MI_STAT_COUNT_END_NULL()
#endif
@@ -61,7 +63,8 @@ const mi_page_t _mi_page_empty = {
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
- MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \
+ MI_STAT_COUNT_NULL(), \
+ { 0, 0 }, \
{ 0, 0 } \
MI_STAT_COUNT_END_NULL()
@@ -95,8 +98,8 @@ static mi_tld_t tld_main = {
0,
&_mi_heap_main,
{ { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments
- { 0, NULL, NULL, 0, tld_main_stats }, // os
- { MI_STATS_NULL } // stats
+ { 0, NULL, NULL, 0, tld_main_stats }, // os
+ { MI_STATS_NULL } // stats
};
mi_heap_t _mi_heap_main = {
@@ -385,7 +388,7 @@ bool _mi_preloading() {
}
// Communicate with the redirection module on Windows
-#if defined(_WIN32) && defined(MI_SHARED_LIB)
+#if 0
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/memory.c b/src/memory.c
index e7d1887e..7f8cfb14 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -106,6 +106,7 @@ static size_t mi_good_commit_size(size_t size) {
// Return if a pointer points into a region reserved by us.
bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+ if (p==NULL) return false;
size_t count = mi_atomic_read(®ions_count);
for (size_t i = 0; i < count; i++) {
uint8_t* start = (uint8_t*)mi_atomic_read_ptr(®ions[i].start);
diff --git a/src/page-queue.c b/src/page-queue.c
index fd388113..a386f8a1 100644
--- a/src/page-queue.c
+++ b/src/page-queue.c
@@ -97,7 +97,7 @@ uint8_t _mi_bsr(uintptr_t x) {
// Returns MI_BIN_HUGE if the size is too large.
// We use `wsize` for the size in "machine word sizes",
// i.e. byte size == `wsize*sizeof(void*)`.
-inline uint8_t _mi_bin(size_t size) {
+extern inline uint8_t _mi_bin(size_t size) {
size_t wsize = _mi_wsize_from_size(size);
uint8_t bin;
if (wsize <= 1) {
@@ -120,13 +120,13 @@ inline uint8_t _mi_bin(size_t size) {
bin = MI_BIN_HUGE;
}
else {
- #if defined(MI_ALIGN4W)
+ #if defined(MI_ALIGN4W)
if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes
#endif
wsize--;
// find the highest bit
uint8_t b = mi_bsr32((uint32_t)wsize);
- // and use the top 3 bits to determine the bin (~16% worst internal fragmentation).
+ // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation).
// - adjust with 3 because we use do not round the first 8 sizes
// which each get an exact bin
bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3;
diff --git a/src/page.c b/src/page.c
index 69d32bfe..e6be8df6 100644
--- a/src/page.c
+++ b/src/page.c
@@ -71,10 +71,11 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
mi_assert_internal(page->block_size > 0);
mi_assert_internal(page->used <= page->capacity);
mi_assert_internal(page->capacity <= page->reserved);
-
+
mi_segment_t* segment = _mi_page_segment(page);
uint8_t* start = _mi_page_start(segment,page,NULL);
mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL));
+ mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page));
//mi_assert_internal(start + page->capacity*page->block_size == page->top);
mi_assert_internal(mi_page_list_is_valid(page,page->free));
@@ -216,7 +217,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os);
if (page == NULL) return NULL;
mi_page_init(heap, page, block_size, &heap->tld->stats);
- mi_heap_stat_increase( heap, pages, 1);
+ _mi_stat_increase( &heap->tld->stats.pages, 1);
mi_page_queue_push(heap, pq, page);
mi_assert_expensive(_mi_page_is_valid(page));
return page;
@@ -352,7 +353,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
// account for huge pages here
if (page->block_size > MI_LARGE_SIZE_MAX) {
- mi_heap_stat_decrease(page->heap, huge, page->block_size);
+ _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size);
}
// remove from the page list
@@ -384,8 +385,9 @@ void _mi_page_retire(mi_page_t* page) {
// is the only page left with free blocks. It is not clear
// how to check this efficiently though... for now we just check
// if its neighbours are almost fully used.
- if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) {
+ if (mi_likely(page->block_size <= MI_MEDIUM_SIZE_MAX)) {
if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) {
+ _mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1);
return; // dont't retire after all
}
}
@@ -404,7 +406,60 @@ void _mi_page_retire(mi_page_t* page) {
#define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT)
#define MI_MIN_SLICES (2)
-static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats)
+static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) {
+ UNUSED(stats);
+ mi_assert_internal(page->free == NULL);
+ mi_assert_internal(page->local_free == NULL);
+ mi_assert_internal(page->capacity + extend <= page->reserved);
+ void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL);
+ size_t bsize = page->block_size;
+
+ // initialize a randomized free list
+ // set up `slice_count` slices to alternate between
+ size_t shift = MI_MAX_SLICE_SHIFT;
+ while ((extend >> shift) == 0) {
+ shift--;
+ }
+ size_t slice_count = (size_t)1U << shift;
+ size_t slice_extend = extend / slice_count;
+ mi_assert_internal(slice_extend >= 1);
+ mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
+ size_t counts[MI_MAX_SLICES]; // available objects in the slice
+ for (size_t i = 0; i < slice_count; i++) {
+ blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend);
+ counts[i] = slice_extend;
+ }
+ counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?)
+
+ // and initialize the free list by randomly threading through them
+ // set up first element
+ size_t current = _mi_heap_random(heap) % slice_count;
+ counts[current]--;
+ page->free = blocks[current];
+ // and iterate through the rest
+ uintptr_t rnd = heap->random;
+ for (size_t i = 1; i < extend; i++) {
+ // call random_shuffle only every INTPTR_SIZE rounds
+ size_t round = i%MI_INTPTR_SIZE;
+ if (round == 0) rnd = _mi_random_shuffle(rnd);
+ // select a random next slice index
+ size_t next = ((rnd >> 8*round) & (slice_count-1));
+ while (counts[next]==0) { // ensure it still has space
+ next++;
+ if (next==slice_count) next = 0;
+ }
+ // and link the current block to it
+ counts[next]--;
+ mi_block_t* block = blocks[current];
+ blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
+ mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
+ current = next;
+ }
+ mi_block_set_next(page, blocks[current], NULL); // end of the list
+ heap->random = _mi_random_shuffle(rnd);
+}
+
+static void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats)
{
UNUSED(stats);
mi_assert_internal(page->free == NULL);
@@ -413,66 +468,17 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e
void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL );
size_t bsize = page->block_size;
mi_block_t* start = mi_page_block_at(page, page_area, page->capacity);
- if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) {
- // initialize a sequential free list
- mi_block_t* end = mi_page_block_at(page, page_area, page->capacity + extend - 1);
- mi_block_t* block = start;
- for (size_t i = 0; i < extend; i++) {
- mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
- mi_block_set_next(page,block,next);
- block = next;
- }
- mi_block_set_next(page, end, NULL);
- page->free = start;
- }
- else {
- // initialize a randomized free list
- // set up `slice_count` slices to alternate between
- size_t shift = MI_MAX_SLICE_SHIFT;
- while ((extend >> shift) == 0) {
- shift--;
- }
- size_t slice_count = (size_t)1U << shift;
- size_t slice_extend = extend / slice_count;
- mi_assert_internal(slice_extend >= 1);
- mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice
- size_t counts[MI_MAX_SLICES]; // available objects in the slice
- for (size_t i = 0; i < slice_count; i++) {
- blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend);
- counts[i] = slice_extend;
- }
- counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?)
- // and initialize the free list by randomly threading through them
- // set up first element
- size_t current = _mi_heap_random(heap) % slice_count;
- counts[current]--;
- page->free = blocks[current];
- // and iterate through the rest
- uintptr_t rnd = heap->random;
- for (size_t i = 1; i < extend; i++) {
- // call random_shuffle only every INTPTR_SIZE rounds
- size_t round = i%MI_INTPTR_SIZE;
- if (round == 0) rnd = _mi_random_shuffle(rnd);
- // select a random next slice index
- size_t next = ((rnd >> 8*round) & (slice_count-1));
- while (counts[next]==0) { // ensure it still has space
- next++;
- if (next==slice_count) next = 0;
- }
- // and link the current block to it
- counts[next]--;
- mi_block_t* block = blocks[current];
- blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block
- mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next`
- current = next;
- }
- mi_block_set_next( page, blocks[current], NULL); // end of the list
- heap->random = _mi_random_shuffle(rnd);
+ // initialize a sequential free list
+ mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1);
+ mi_block_t* block = start;
+ while(block <= last) {
+ mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize);
+ mi_block_set_next(page,block,next);
+ block = next;
}
- // enable the new free list
- page->capacity += (uint16_t)extend;
- _mi_stat_increase(&stats->page_committed, extend * page->block_size);
+ mi_block_set_next(page, last, NULL);
+ page->free = start;
}
/* -----------------------------------------------------------
@@ -518,7 +524,15 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
mi_assert_internal(extend < (1UL<<16));
// and append the extend the free list
- mi_page_free_list_extend(heap, page, extend, stats );
+ if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) {
+ mi_page_free_list_extend(page, extend, stats );
+ }
+ else {
+ mi_page_free_list_extend_secure(heap, page, extend, stats);
+ }
+ // enable the new free list
+ page->capacity += (uint16_t)extend;
+ _mi_stat_increase(&stats->page_committed, extend * page->block_size);
mi_assert_expensive(mi_page_is_valid_init(page));
}
@@ -688,7 +702,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
if (page != NULL) {
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(page->block_size == block_size);
- mi_heap_stat_increase( heap, huge, block_size);
+ _mi_stat_increase( &heap->tld->stats.huge, block_size);
}
return page;
}
@@ -708,10 +722,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
// call potential deferred free routines
_mi_deferred_free(heap, false);
-
+
// free delayed frees from other threads
_mi_heap_delayed_free(heap);
-
+
// huge allocation?
mi_page_t* page;
if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) {
diff --git a/src/segment.c b/src/segment.c
index 8f254a26..736345bf 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -226,6 +226,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) {
+ segment->thread_id = 0;
mi_segments_track_size(-((long)segment_size),tld);
if (mi_option_is_enabled(mi_option_secure)) {
_mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set
@@ -235,8 +236,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
-// and no more than 2.
-#define MI_SEGMENT_CACHE_MAX (2)
+// and no more than 4.
+#define MI_SEGMENT_CACHE_MAX (4)
#define MI_SEGMENT_CACHE_FRACTION (8)
// note: returned segment may be partially reset
@@ -248,17 +249,19 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
tld->cache = segment->next;
segment->next = NULL;
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
+ _mi_stat_decrease(&tld->stats->segments_cache, 1);
return segment;
}
static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
- if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
- tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache
+ if (tld->cache_count < MI_SEGMENT_CACHE_MAX
+ && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))
+ ) { // always allow 1 element cache
return false;
}
// take the opportunity to reduce the segment cache if it is too large (now)
// TODO: this never happens as we check against peak usage, should we use current usage instead?
- while (tld->cache_count > (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
+ while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) {
mi_segment_t* segment = mi_segment_cache_pop(0,tld);
mi_assert_internal(segment != NULL);
if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld);
@@ -269,7 +272,9 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld));
mi_assert_internal(segment->next == NULL);
- if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) return false;
+ if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) {
+ return false;
+ }
mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE);
if (mi_option_is_enabled(mi_option_cache_reset)) {
_mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats);
@@ -277,6 +282,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld)
segment->next = tld->cache;
tld->cache = segment;
tld->cache_count++;
+ _mi_stat_increase(&tld->stats->segments_cache,1);
return true;
}
@@ -407,8 +413,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t
mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment));
mi_assert(segment->next == NULL);
mi_assert(segment->prev == NULL);
- _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
- segment->thread_id = 0;
+ _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size);
// update reset memory statistics
/*
@@ -613,6 +618,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen
}
else {
// otherwise reclaim it
+ mi_page_init_flags(page,segment->thread_id);
_mi_page_reclaim(heap,page);
}
}
@@ -643,6 +649,7 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl
mi_assert_internal(mi_segment_has_free(segment));
mi_page_t* page = mi_segment_find_free(segment, tld->stats);
page->segment_in_use = true;
+ mi_page_init_flags(page,segment->thread_id);
segment->used++;
mi_assert_internal(segment->used <= segment->capacity);
if (segment->used == segment->capacity) {
@@ -682,6 +689,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_
segment->used = 1;
mi_page_t* page = &segment->pages[0];
page->segment_in_use = true;
+ mi_page_init_flags(page,segment->thread_id);
return page;
}
@@ -693,22 +701,27 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
segment->used = 1;
mi_page_t* page = &segment->pages[0];
page->segment_in_use = true;
+ mi_page_init_flags(page,segment->thread_id);
return page;
}
/* -----------------------------------------------------------
Page allocation and free
----------------------------------------------------------- */
+static bool mi_is_good_fit(size_t bsize, size_t size) {
+ // good fit if no more than 25% wasted
+ return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4));
+}
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page;
- if (block_size <= (MI_SMALL_PAGE_SIZE/4)) {
+ if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) {
page = mi_segment_small_page_alloc(tld,os_tld);
}
- else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) {
+ else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) {
page = mi_segment_medium_page_alloc(tld, os_tld);
}
- else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) {
+ else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) {
page = mi_segment_large_page_alloc(tld, os_tld);
}
else {
diff --git a/src/stats.c b/src/stats.c
index 2b15bf9e..8725e48c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -99,14 +99,14 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1);
mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1);
mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1);
- mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1);
- mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1);
mi_stat_add(&stats->commit_calls, &src->commit_calls, 1);
mi_stat_add(&stats->threads, &src->threads, 1);
mi_stat_add(&stats->pages_extended, &src->pages_extended, 1);
mi_stat_add(&stats->malloc, &src->malloc, 1);
+ mi_stat_add(&stats->segments_cache, &src->segments_cache, 1);
mi_stat_add(&stats->huge, &src->huge, 1);
+ mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1);
mi_stat_counter_add(&stats->searches, &src->searches, 1);
#if MI_STAT>1
for (size_t i = 0; i <= MI_BIN_HUGE; i++) {
@@ -172,10 +172,15 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t
}
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, FILE* out ) {
- double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
- _mi_fprintf(out,"%10s: %7.1f avg\n", msg, avg);
+ _mi_fprintf(out, "%10s:", msg);
+ mi_print_amount(stat->total, -1, out);
+ _mi_fprintf(out, "\n");
}
+static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, FILE* out) {
+ double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count);
+ _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg);
+}
static void mi_print_header( FILE* out ) {
@@ -229,15 +234,15 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n
mi_stat_print(&stats->page_committed, "touched", 1, out);
mi_stat_print(&stats->segments, "segments", -1, out);
mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out);
+ mi_stat_print(&stats->segments_cache, "-cached", -1, out);
mi_stat_print(&stats->pages, "pages", -1, out);
mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out);
mi_stat_print(&stats->pages_extended, "-extended", 0, out);
+ mi_stat_counter_print(&stats->page_no_retire, "-noretire", out);
mi_stat_print(&stats->mmap_calls, "mmaps", 0, out);
- mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out);
- mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out);
mi_stat_print(&stats->commit_calls, "commits", 0, out);
mi_stat_print(&stats->threads, "threads", 0, out);
- mi_stat_counter_print(&stats->searches, "searches", out);
+ mi_stat_counter_print_avg(&stats->searches, "searches", out);
if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs);
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 6ddf4f37..94891cc3 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -6,6 +6,7 @@
#include
#include // redefines malloc etc.
+
int main() {
mi_version();
void* p1 = malloc(78);