From afd981d0088772b1d248b66fc171cc3c78161a34 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 7 Jul 2019 12:56:40 +0800 Subject: [PATCH 01/23] Use checked unsigned multiplication extension of GCC/Clang Most processors have carry flags which they set on addition overflow, so it is a good idea to access them whenever possible. Most of them also have widening multiply instructions that can be used to detect overflow of the non-widening version. Both GCC and Clang offer a way to detect an overflow for security critical applications. Reference: https://clang.llvm.org/docs/LanguageExtensions.html#checked-arithmetic-builtins --- include/mimalloc-internal.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 3a3f5c4d..a1d1d835 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -109,6 +109,9 @@ bool _mi_page_is_valid(mi_page_t* page); #define mi_likely(x) (x) #endif +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif #if defined(_MSC_VER) #define mi_decl_noinline __declspec(noinline) @@ -141,9 +144,17 @@ bool _mi_page_is_valid(mi_page_t* page); // Overflow detecting multiply #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) static inline bool mi_mul_overflow(size_t size, size_t count, size_t* total) { +#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 +#if (MI_INTPTR_SIZE == 4) + return __builtin_umul_overflow(size, count, total); +#else + return __builtin_umull_overflow(size, count, total); +#endif +#else /* __builtin_umul_overflow is unavailable */ *total = size * count; return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); +#endif } // Align a byte size to a size in _machine words_, From a215049b4a7382665084410b7be775e1580a5370 Mon Sep 17 00:00:00 2001 From: caixiangyue Date: Fri, 19 Jul 2019 16:23:14 +0800 Subject: [PATCH 02/23] fix typo --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index f9705992..68a23143 100644 --- a/src/os.c +++ b/src/os.c @@ -195,7 +195,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, void* p = NULL; if (use_large_os_page(size, try_alignment)) { if (large_page_try_ok > 0) { - // if a large page page allocation fails, it seems the calls to VirtualAlloc get very expensive. + // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. large_page_try_ok--; } From f0de0b6f6852a345c0cb86fc264679accbcf6b5f Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 19 Jul 2019 09:33:14 -0700 Subject: [PATCH 03/23] remove old windows override code --- src/alloc-override-win.c | 702 --------------------------------------- 1 file changed, 702 deletions(-) delete mode 100644 src/alloc-override-win.c diff --git a/src/alloc-override-win.c b/src/alloc-override-win.c deleted file mode 100644 index 7b0fe69f..00000000 --- a/src/alloc-override-win.c +++ /dev/null @@ -1,702 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2018, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -#include "mimalloc.h" -#include "mimalloc-internal.h" - -#if !defined(_WIN32) -#error "this file should only be included on Windows" -#endif - -#include -#include - - -/* -To override the C runtime `malloc` on Windows we need to patch the allocation -functions at runtime initialization. Unfortunately we can never patch before the -runtime initializes itself, because as soon as we call `GetProcAddress` on the -runtime module (a DLL or EXE in Windows speak), it will first load and initialize -(by the OS calling `DllMain` on it). - -This means that some things might be already allocated by the C runtime itself -(and possibly other DLL's) before we get to resolve runtime adresses. This is -no problem if everyone unwinds in order: when we unload, we unpatch and restore -the original crt `free` routines and crt malloc'd memory is freed correctly. - -But things go wrong if such early CRT alloc'd memory is freed or re-allocated -_after_ we patch, but _before_ we unload (and unpatch), or if any memory allocated -by us is freed after we unpatched. - -There are two tricky situations to deal with: - -1. The Thread Local Storage (TLS): when the main thread stops it will call registered - callbacks on TLS entries (allocated by `FlsAlloc`). This is done by the OS - before any DLL's are unloaded. Unfortunately, the C runtime registers such - TLS entries with CRT allocated memory which is freed in the callback. - -2. Inside the CRT: - a. Some variables might get initialized by patched allocated - blocks but freed during CRT unloading after we unpatched - (like temporary file buffers). - b. Some blocks are allocated at CRT and freed by the CRT (like the - environment storage). - c. And some blocks are allocated by the CRT and then reallocated - while patched, and finally freed after unpatching! This - happens with the `atexit` functions for example to grow the array - of registered functions. - -In principle situation 2 is hopeless: since we cannot patch before CRT initialization, -we can never be sure how to free or reallocate a pointer during CRT unloading. -However, in practice there is a good solution: when terminating, we just patch -the reallocation and free routines to no-ops -- we are winding down anyway! This leaves -just the reallocation problm of CRT alloc'd memory once we are patched. Here, a study of the -CRT reveals that there seem to be just three such situations: - -1. When registering `atexit` routines (to grow the exit function table), -2. When calling `_setmaxstdio` (to grow the file handle table), -3. and `_popen`/`_wpopen` (to grow handle pairs). These turn out not to be - a problem as these are NULL initialized. - -We fix these by providing wrappers: - -1. We first register a _global_ `atexit` routine ourselves (`mi_patches_at_exit`) before patching, - and then patch the `_crt_atexit` function to implement our own global exit list (and the - same for `_crt_at_quick_exit`). All module local lists are no problem since they are always fully - (un)patched from initialization to end. We can register in the global list by dynamically - getting the global `_crt_atexit` entry from `ucrtbase.dll`. - -2. The `_setmaxstdio` is _detoured_: we patch it by a stub that unpatches first, - calls the original routine and repatches again. - -That leaves us to reliably shutdown and enter "termination mode": - -1. Using our trick to get the global exit list entry point, we register an exit function `mi_patches_atexit` - that first executes all our home brew list of exit functions, and then enters a _termination_ - phase that patches realloc/free variants with no-ops. Patching later again with special no-ops for - `free` also improves efficiency during the program run since no flags need to be checked. - -2. That is not quite good enough yet since after executing exit routines after us on the - global exit list (registered by the CRT), - the OS starts to unwind the TLS callbacks and we would like to run callbacks registered after loading - our DLL to be done in patched mode. So, we also allocate a TLS entry when our DLL is loaded and when its - callback is called, we re-enable the original patches again. Since TLS is destroyed in FIFO order - this runs any callbacks in later DLL's in patched mode. - -3. Finally the DLL's get unloaded by the OS in order (still patched) until our DLL gets unloaded - and then we start a termination phase again, and patch realloc/free with no-ops for good this time. - -*/ - -static int __cdecl mi_setmaxstdio(int newmax); - -// ------------------------------------------------------ -// Microsoft allocation extensions -// ------------------------------------------------------ - - -typedef size_t mi_nothrow_t; - -static void mi_free_nothrow(void* p, mi_nothrow_t tag) { - UNUSED(tag); - mi_free(p); -} - -// Versions of `free`, `realloc`, `recalloc`, `expand` and `msize` -// that are used during termination and are no-ops. -static void mi_free_term(void* p) { - UNUSED(p); -} - -static void mi_free_size_term(void* p, size_t size) { - UNUSED(size); - UNUSED(p); -} - -static void mi_free_nothrow_term(void* p, mi_nothrow_t tag) { - UNUSED(tag); - UNUSED(p); -} - -static void* mi_realloc_term(void* p, size_t newsize) { - UNUSED(p); UNUSED(newsize); - return NULL; -} - -static void* mi__recalloc_term(void* p, size_t newcount, size_t newsize) { - UNUSED(p); UNUSED(newcount); UNUSED(newsize); - return NULL; -} - -static void* mi__expand_term(void* p, size_t newsize) { - UNUSED(p); UNUSED(newsize); - return NULL; -} - -static size_t mi__msize_term(void* p) { - UNUSED(p); - return 0; -} - - -// Debug versions, forward to base versions (that get patched) - -static void* mi__malloc_dbg(size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return _malloc_base(size); -} - -static void* mi__calloc_dbg(size_t count, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return _calloc_base(count, size); -} - -static void* mi__realloc_dbg(void* p, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return _realloc_base(p, size); -} - -static void mi__free_dbg(void* p, int block_type) { - UNUSED(block_type); - _free_base(p); -} - - -// the `recalloc`,`expand`, and `msize` don't have base versions and thus need a separate term version - -static void* mi__recalloc_dbg(void* p, size_t count, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return mi_recalloc(p, count, size); -} - -static void* mi__expand_dbg(void* p, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return mi__expand(p, size); -} - -static size_t mi__msize_dbg(void* p, int block_type) { - UNUSED(block_type); - return mi_usable_size(p); -} - -static void* mi__recalloc_dbg_term(void* p, size_t count, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return mi__recalloc_term(p, count, size); -} - -static void* mi__expand_dbg_term(void* p, size_t size, int block_type, const char* fname, int line) { - UNUSED(block_type); UNUSED(fname); UNUSED(line); - return mi__expand_term(p, size); -} - -static size_t mi__msize_dbg_term(void* p, int block_type) { - UNUSED(block_type); - return mi__msize_term(p); -} - - -// ------------------------------------------------------ -// implement our own global atexit handler -// ------------------------------------------------------ -typedef void (cbfun_t)(void); -typedef int (atexit_fun_t)(cbfun_t* fn); -typedef uintptr_t encoded_t; - -typedef struct exit_list_s { - encoded_t functions; // encoded pointer to array of encoded function pointers - size_t count; - size_t capacity; -} exit_list_t; - -#define MI_EXIT_INC (64) - -static exit_list_t atexit_list = { 0, 0, 0 }; -static exit_list_t at_quick_exit_list = { 0, 0, 0 }; -static CRITICAL_SECTION atexit_lock; - -// encode/decode function pointers with a random canary for security -static encoded_t canary; - -static inline void *decode(encoded_t x) { - return (void*)(x^canary); -} - -static inline encoded_t encode(void* p) { - return ((uintptr_t)p ^ canary); -} - - -static void init_canary() -{ - canary = _mi_random_init(0); - atexit_list.functions = at_quick_exit_list.functions = encode(NULL); -} - - -// initialize the list -static void mi_initialize_atexit(void) { - InitializeCriticalSection(&atexit_lock); - init_canary(); -} - -// register an exit function -static int mi_register_atexit(exit_list_t* list, cbfun_t* fn) { - if (fn == NULL) return EINVAL; - EnterCriticalSection(&atexit_lock); - encoded_t* functions = (encoded_t*)decode(list->functions); - if (list->count >= list->capacity) { // at first `functions == decode(0) == NULL` - encoded_t* newf = (encoded_t*)mi_recalloc(functions, list->capacity + MI_EXIT_INC, sizeof(cbfun_t*)); - if (newf != NULL) { - list->capacity += MI_EXIT_INC; - list->functions = encode(newf); - functions = newf; - } - } - int result; - if (list->count < list->capacity && functions != NULL) { - functions[list->count] = encode(fn); - list->count++; - result = 0; // success - } - else { - result = ENOMEM; - } - LeaveCriticalSection(&atexit_lock); - return result; -} - -// Register a global `atexit` function -static int mi_atexit(cbfun_t* fn) { - return mi_register_atexit(&atexit_list,fn); -} - -static int mi_at_quick_exit(cbfun_t* fn) { - return mi_register_atexit(&at_quick_exit_list,fn); -} - -static int mi_register_onexit(void* table, cbfun_t* fn) { - // TODO: how can we distinguish a quick_exit from atexit? - return mi_atexit(fn); -} - -// Execute exit functions in a list -static void mi_execute_exit_list(exit_list_t* list) { - // copy and zero the list structure - EnterCriticalSection(&atexit_lock); - exit_list_t clist = *list; - memset(list,0,sizeof(*list)); - LeaveCriticalSection(&atexit_lock); - - // now execute the functions outside of the lock - encoded_t* functions = (encoded_t*)decode(clist.functions); - if (functions != NULL) { - for (size_t i = clist.count; i > 0; i--) { // careful with unsigned count down.. - cbfun_t* fn = (cbfun_t*)decode(functions[i-1]); - if (fn==NULL) break; // corrupted! - fn(); - } - mi_free(functions); - } -} - - - -// ------------------------------------------------------ -// Jump assembly instructions for patches -// ------------------------------------------------------ - -#if defined(_M_IX86) || defined(_M_X64) - -#define MI_JUMP_SIZE 14 // at most 2+4+8 for a long jump or 1+5 for a short one - -typedef struct mi_jump_s { - uint8_t opcodes[MI_JUMP_SIZE]; -} mi_jump_t; - -void mi_jump_restore(void* current, const mi_jump_t* saved) { - memcpy(current, &saved->opcodes, MI_JUMP_SIZE); -} - -void mi_jump_write(void* current, void* target, mi_jump_t* save) { - if (save != NULL) { - memcpy(&save->opcodes, current, MI_JUMP_SIZE); - } - uint8_t* opcodes = ((mi_jump_t*)current)->opcodes; - ptrdiff_t diff = (uint8_t*)target - (uint8_t*)current; - uint32_t ofs32 = (uint32_t)diff; - #ifdef _M_X64 - uint64_t ofs64 = (uint64_t)diff; - if (ofs64 != (uint64_t)ofs32) { - // use long jump - opcodes[0] = 0xFF; - opcodes[1] = 0x25; - *((uint32_t*)&opcodes[2]) = 0; - *((uint64_t*)&opcodes[6]) = (uint64_t)target; - } - else - #endif - { - // use short jump - opcodes[0] = 0xE9; - *((uint32_t*)&opcodes[1]) = ofs32 - 5 /* size of the short jump instruction */; - } -} - -#elif defined(_M_ARM64) - -#define MI_JUMP_SIZE 16 - -typedef struct mi_jump_s { - uint8_t opcodes[MI_JUMP_SIZE]; -} mi_jump_t; - -void mi_jump_restore(void* current, const mi_jump_t* saved) { - memcpy(current, &saved->opcodes, MI_JUMP_SIZE); -} - -void mi_jump_write(void* current, void* target, mi_jump_t* save) { - if (save != NULL) { - memcpy(&save->opcodes, current, MI_JUMP_SIZE); - } - uint8_t* opcodes = ((mi_jump_t*)current)->opcodes; - uint64_t diff = (uint8_t*)target - (uint8_t*)current; - - // 0x50 0x00 0x00 0x58 ldr x16, .+8 # load PC relative +8 - // 0x00 0x02 0x3F 0xD6 blr x16 # and jump - //
- //
- static const uint8_t jump_opcodes[8] = { 0x50, 0x00, 0x00, 0x58, 0x00, 0x02, 0x3F, 0xD6 }; - memcpy(&opcodes[0], jump_opcodes, sizeof(jump_opcodes)); - *((uint64_t*)&opcodes[8]) = diff; -} - -#else -#error "define jump instructions for this platform" -#endif - - -// ------------------------------------------------------ -// Patches -// ------------------------------------------------------ -typedef enum patch_apply_e { - PATCH_NONE, - PATCH_TARGET, - PATCH_TARGET_TERM -} patch_apply_t; - -#define MAX_ENTRIES 4 // maximum number of patched entry points (like `malloc` in ucrtbase and msvcrt) - -typedef struct mi_patch_s { - const char* name; // name of the function to patch - void* target; // the address of the new target (never NULL) - void* target_term; // the address of the target during termination (or NULL) - patch_apply_t applied; // what target has been applied? - void* originals[MAX_ENTRIES]; // the resolved addresses of the function (or NULLs) - mi_jump_t saves[MAX_ENTRIES]; // the saved instructions in case it was applied -} mi_patch_t; - -#define MI_PATCH_NAME3(name,target,term) { name, &target, &term, PATCH_NONE, {NULL,NULL,NULL,NULL} } -#define MI_PATCH_NAME2(name,target) { name, &target, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} } -#define MI_PATCH3(name,target,term) MI_PATCH_NAME3(#name, target, term) -#define MI_PATCH2(name,target) MI_PATCH_NAME2(#name, target) -#define MI_PATCH1(name) MI_PATCH2(name,mi_##name) - -static mi_patch_t patches[] = { - // we implement our own global exit handler (as the CRT versions do a realloc internally) - //MI_PATCH2(_crt_atexit, mi_atexit), - //MI_PATCH2(_crt_at_quick_exit, mi_at_quick_exit), - MI_PATCH2(_setmaxstdio, mi_setmaxstdio), - MI_PATCH2(_register_onexit_function, mi_register_onexit), - - // override higher level atexit functions so we can implement at_quick_exit correcty - MI_PATCH2(atexit, mi_atexit), - MI_PATCH2(at_quick_exit, mi_at_quick_exit), - - // regular entries - MI_PATCH2(malloc, mi_malloc), - MI_PATCH2(calloc, mi_calloc), - MI_PATCH3(realloc, mi_realloc,mi_realloc_term), - MI_PATCH3(free, mi_free,mi_free_term), - - // extended api - MI_PATCH2(_strdup, mi_strdup), - MI_PATCH2(_strndup, mi_strndup), - MI_PATCH3(_expand, mi__expand,mi__expand_term), - MI_PATCH3(_recalloc, mi_recalloc,mi__recalloc_term), - MI_PATCH3(_msize, mi_usable_size,mi__msize_term), - - // base versions - MI_PATCH2(_malloc_base, mi_malloc), - MI_PATCH2(_calloc_base, mi_calloc), - MI_PATCH3(_realloc_base, mi_realloc,mi_realloc_term), - MI_PATCH3(_free_base, mi_free,mi_free_term), - - // these base versions are in the crt but without import records - MI_PATCH_NAME3("_recalloc_base", mi_recalloc,mi__recalloc_term), - MI_PATCH_NAME3("_msize_base", mi_usable_size,mi__msize_term), - - // debug - MI_PATCH2(_malloc_dbg, mi__malloc_dbg), - MI_PATCH2(_realloc_dbg, mi__realloc_dbg), - MI_PATCH2(_calloc_dbg, mi__calloc_dbg), - MI_PATCH2(_free_dbg, mi__free_dbg), - - MI_PATCH3(_expand_dbg, mi__expand_dbg, mi__expand_dbg_term), - MI_PATCH3(_recalloc_dbg, mi__recalloc_dbg, mi__recalloc_dbg_term), - MI_PATCH3(_msize_dbg, mi__msize_dbg, mi__msize_dbg_term), - -#if 0 - // override new/delete variants for efficiency (?) -#ifdef _WIN64 - // 64 bit new/delete - MI_PATCH_NAME2("??2@YAPEAX_K@Z", mi_new), - MI_PATCH_NAME2("??_U@YAPEAX_K@Z", mi_new), - MI_PATCH_NAME3("??3@YAXPEAX@Z", mi_free, mi_free_term), - MI_PATCH_NAME3("??_V@YAXPEAX@Z", mi_free, mi_free_term), - MI_PATCH_NAME3("??3@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized - MI_PATCH_NAME3("??_V@YAXPEAX_K@Z", mi_free_size, mi_free_size_term), // delete sized - MI_PATCH_NAME2("??2@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new), - MI_PATCH_NAME2("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", mi_new), - MI_PATCH_NAME3("??3@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term), - MI_PATCH_NAME3("??_V@YAXPEAXAEBUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term), - - -#else - // 32 bit new/delete - MI_PATCH_NAME2("??2@YAPAXI@Z", mi_new), - MI_PATCH_NAME2("??_U@YAPAXI@Z", mi_new), - MI_PATCH_NAME3("??3@YAXPAX@Z", mi_free, mi_free_term), - MI_PATCH_NAME3("??_V@YAXPAX@Z", mi_free, mi_free_term), - MI_PATCH_NAME3("??3@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized - MI_PATCH_NAME3("??_V@YAXPAXI@Z", mi_free_size, mi_free_size_term), // delete sized - - MI_PATCH_NAME2("??2@YAPAXIABUnothrow_t@std@@@Z", mi_new), - MI_PATCH_NAME2("??_U@YAPAXIABUnothrow_t@std@@@Z", mi_new), - MI_PATCH_NAME3("??3@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term), - MI_PATCH_NAME3("??_V@YAXPAXABUnothrow_t@std@@@Z", mi_free_nothrow, mi_free_nothrow_term), - -#endif -#endif - { NULL, NULL, NULL, PATCH_NONE, {NULL,NULL,NULL,NULL} } -}; - - -// Apply a patch -static bool mi_patch_apply(mi_patch_t* patch, patch_apply_t apply) -{ - if (patch->originals[0] == NULL) return true; // unresolved - if (apply == PATCH_TARGET_TERM && patch->target_term == NULL) apply = PATCH_TARGET; // avoid re-applying non-term variants - if (patch->applied == apply) return false; - - for (int i = 0; i < MAX_ENTRIES; i++) { - void* original = patch->originals[i]; - if (original == NULL) break; // no more - - DWORD protect = PAGE_READWRITE; - if (!VirtualProtect(original, MI_JUMP_SIZE, PAGE_EXECUTE_READWRITE, &protect)) return false; - if (apply == PATCH_NONE) { - mi_jump_restore(original, &patch->saves[i]); - } - else { - void* target = (apply == PATCH_TARGET ? patch->target : patch->target_term); - mi_assert_internal(target != NULL); - if (target != NULL) mi_jump_write(original, target, &patch->saves[i]); - } - VirtualProtect(original, MI_JUMP_SIZE, protect, &protect); - } - patch->applied = apply; - return true; -} - -// Apply all patches -static bool _mi_patches_apply(patch_apply_t apply, patch_apply_t* previous) { - static patch_apply_t current = PATCH_NONE; - if (previous != NULL) *previous = current; - if (current == apply) return true; - current = apply; - bool ok = true; - for (size_t i = 0; patches[i].name != NULL; i++) { - if (!mi_patch_apply(&patches[i], apply)) ok = false; - } - return ok; -} - -// Export the following three functions just in case -// a user needs that level of control. - -// Disable all patches -mi_decl_export void mi_patches_disable(void) { - _mi_patches_apply(PATCH_NONE, NULL); -} - -// Enable all patches normally -mi_decl_export bool mi_patches_enable(void) { - return _mi_patches_apply( PATCH_TARGET, NULL ); -} - -// Enable all patches in termination phase where free is a no-op -mi_decl_export bool mi_patches_enable_term(void) { - return _mi_patches_apply(PATCH_TARGET_TERM, NULL); -} - -// ------------------------------------------------------ -// Stub for _setmaxstdio -// ------------------------------------------------------ - -static int __cdecl mi_setmaxstdio(int newmax) { - patch_apply_t previous; - _mi_patches_apply(PATCH_NONE, &previous); // disable patches - int result = _setmaxstdio(newmax); // call original function (that calls original CRT recalloc) - _mi_patches_apply(previous,NULL); // and re-enable patches - return result; -} - - -// ------------------------------------------------------ -// Resolve addresses dynamically -// ------------------------------------------------------ - -// Try to resolve patches for a given module (DLL) -static void mi_module_resolve(const char* fname, HMODULE mod, int priority) { - // see if any patches apply - for (size_t i = 0; patches[i].name != NULL; i++) { - mi_patch_t* patch = &patches[i]; - if (patch->applied == PATCH_NONE) { - // find an available entry - int i = 0; - while (i < MAX_ENTRIES && patch->originals[i] != NULL) i++; - if (i < MAX_ENTRIES) { - void* addr = GetProcAddress(mod, patch->name); - if (addr != NULL) { - // found it! set the address - patch->originals[i] = addr; - _mi_trace_message(" override %s at %s!%p (entry %i)\n", patch->name, fname, addr, i); - } - } - } - } -} - -#define MIMALLOC_NAME "mimalloc-override.dll" -#define UCRTBASE_NAME "ucrtbase.dll" -#define UCRTBASED_NAME "ucrtbased.dll" - -// Resolve addresses of all patches by inspecting the loaded modules -static atexit_fun_t* crt_atexit = NULL; -static atexit_fun_t* crt_at_quick_exit = NULL; - - -static bool mi_patches_resolve(void) { - // get all loaded modules - HANDLE process = GetCurrentProcess(); // always -1, no need to release - DWORD needed = 0; - HMODULE modules[400]; // try to stay under 4k to not trigger the guard page - EnumProcessModules(process, modules, sizeof(modules), &needed); - if (needed == 0) return false; - int count = needed / sizeof(HMODULE); - int ucrtbase_index = 0; - int mimalloc_index = 0; - // iterate through the loaded modules - _mi_trace_message("overriding malloc dynamically...\n"); - for (int i = 0; i < count; i++) { - HMODULE mod = modules[i]; - char filename[MAX_PATH] = { 0 }; - DWORD slen = GetModuleFileName(mod, filename, MAX_PATH); - if (slen > 0 && slen < MAX_PATH) { - // filter out potential crt modules only - filename[slen] = 0; - const char* lastsep = strrchr(filename, '\\'); - const char* basename = (lastsep==NULL ? filename : lastsep+1); - _mi_trace_message(" %i: dynamic module %s\n", i, filename); - - // remember indices so we can check load order (in debug mode) - if (_stricmp(basename, MIMALLOC_NAME) == 0) mimalloc_index = i; - if (_stricmp(basename, UCRTBASE_NAME) == 0) ucrtbase_index = i; - if (_stricmp(basename, UCRTBASED_NAME) == 0) ucrtbase_index = i; - - // see if we potentially patch in this module - int priority = 0; - if (i == 0) priority = 2; // main module to allow static crt linking - else if (_strnicmp(basename, "ucrt", 4) == 0) priority = 3; // new ucrtbase.dll in windows 10 - // NOTE: don't override msvcr -- leads to crashes in setlocale (needs more testing) - // else if (_strnicmp(basename, "msvcr", 5) == 0) priority = 1; // older runtimes - - if (priority > 0) { - // probably found a crt module, try to patch it - mi_module_resolve(basename,mod,priority); - - // try to find the atexit functions for the main process (in `ucrtbase.dll`) - if (crt_atexit==NULL) crt_atexit = (atexit_fun_t*)GetProcAddress(mod, "_crt_atexit"); - if (crt_at_quick_exit == NULL) crt_at_quick_exit = (atexit_fun_t*)GetProcAddress(mod, "_crt_at_quick_exit"); - } - } - } - int diff = mimalloc_index - ucrtbase_index; - if (diff > 1) { - _mi_warning_message("warning: the \"mimalloc-override\" DLL seems not to load before or right after the C runtime (\"ucrtbase\").\n" - " Try to fix this by changing the linking order.\n"); - } - return true; -} - - -// ------------------------------------------------------ -// Dll Entry -// ------------------------------------------------------ - -extern BOOL WINAPI _DllMainCRTStartup(HINSTANCE inst, DWORD reason, LPVOID reserved); - -static DWORD mi_fls_unwind_entry; -static void NTAPI mi_fls_unwind(PVOID value) { - if (value != NULL) mi_patches_enable(); // and re-enable normal patches again for DLL's loaded after us - return; -} - -static void mi_patches_atexit(void) { - mi_execute_exit_list(&atexit_list); - mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op -} - -static void mi_patches_at_quick_exit(void) { - mi_execute_exit_list(&at_quick_exit_list); - mi_patches_enable_term(); // enter termination phase and patch realloc/free with a no-op -} - -__declspec(dllexport) BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID reserved) { - if (reason == DLL_PROCESS_ATTACH) { - __security_init_cookie(); - } - else if (reason == DLL_PROCESS_DETACH) { - // enter termination phase for good now - mi_patches_enable_term(); - } - // C runtime main - BOOL ok = _DllMainCRTStartup(inst, reason, reserved); - if (reason == DLL_PROCESS_ATTACH && ok) { - // Now resolve patches - ok = mi_patches_resolve(); - if (ok) { - // and register our unwind entry (this must be after resolving due to possible delayed DLL initialization from GetProcAddress) - mi_fls_unwind_entry = FlsAlloc(&mi_fls_unwind); - if (mi_fls_unwind_entry != FLS_OUT_OF_INDEXES) { - FlsSetValue(mi_fls_unwind_entry, (void*)1); - } - - // register our patch disabler in the global exit list - mi_initialize_atexit(); - if (crt_atexit != NULL) (*crt_atexit)(&mi_patches_atexit); - if (crt_at_quick_exit != NULL) (*crt_at_quick_exit)(&mi_patches_at_quick_exit); - - // and patch ! this also redirects the `atexit` handling for the global exit list - mi_patches_enable(); - - // hide internal allocation - mi_stats_reset(); - } - } - return ok; -} From 1ffa48cc61f5cd382566b68a2c9ff572e2a741a5 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 21 Jul 2019 21:42:00 +0800 Subject: [PATCH 04/23] Add branch prediction hint for mi_option_get mi_option_get is called frequently in stress tests, and the patch adds extra hint to the compiler to emit instructions that will cause branch prediction to favour the "likely" side of a jump instruction. --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 0f588740..84b9d2bf 100644 --- a/src/options.c +++ b/src/options.c @@ -51,7 +51,7 @@ static void mi_option_init(mi_option_desc_t* desc); long mi_option_get(mi_option_t option) { mi_assert(option >= 0 && option < _mi_option_last); mi_option_desc_t* desc = &options[option]; - if (desc->init == UNINIT) { + if (mi_unlikely(desc->init == UNINIT)) { mi_option_init(desc); if (option != mi_option_verbose) { _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); From c382c72cf23eb0d4bbcf4b0b32bba7158898ebb9 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 21 Jul 2019 22:20:05 +0800 Subject: [PATCH 05/23] Avoid using strlen function in loop --- src/options.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/options.c b/src/options.c index 0f588740..358d143e 100644 --- a/src/options.c +++ b/src/options.c @@ -176,7 +176,8 @@ static void mi_option_init(mi_option_desc_t* desc) { #pragma warning(suppress:4996) char* s = getenv(buf); if (s == NULL) { - for (size_t i = 0; i < strlen(buf); i++) { + size_t buf_size = strlen(buf); + for (size_t i = 0; i < buf_size; i++) { buf[i] = toupper(buf[i]); } #pragma warning(suppress:4996) @@ -184,7 +185,8 @@ static void mi_option_init(mi_option_desc_t* desc) { } if (s != NULL) { mi_strlcpy(buf, s, sizeof(buf)); - for (size_t i = 0; i < strlen(buf); i++) { + size_t buf_size = strlen(buf); // TODO: use strnlen? + for (size_t i = 0; i < buf_size; i++) { buf[i] = toupper(buf[i]); } if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { From 40cb631683bc5864a704b088021350b422af9f8c Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 13:03:51 -0700 Subject: [PATCH 06/23] re-add missing thread_init needed when running in debug mode --- src/heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/heap.c b/src/heap.c index 48bb9830..e55fdf34 100644 --- a/src/heap.c +++ b/src/heap.c @@ -172,7 +172,7 @@ void mi_collect(bool force) mi_attr_noexcept { ----------------------------------------------------------- */ mi_heap_t* mi_heap_get_default(void) { - // mi_thread_init(); + mi_thread_init(); return mi_get_default_heap(); } From df33efb19a3d6498e727441f0fd119f556df44c9 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 13:09:34 -0700 Subject: [PATCH 07/23] improved debug warning for freeing invalid pointers --- src/alloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index bac925ee..099bdbad 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -211,8 +211,11 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_DEBUG>0) if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("possibly trying to mi_free a pointer that does not point to a valid heap region: %p\n" + _mi_warning_message("possibly trying to mi_free a pointer that does not point to a valid heap region: 0x%p\n" "(this may still be a valid very large allocation (over 64MiB))\n", p); + if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { + _mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p); + } } if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { _mi_error_message("trying to mi_free a pointer that does not point to a valid heap space: %p\n", p); From 146a753d1efac8ac552e02190aaf9f9346765492 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Mon, 22 Jul 2019 04:45:40 +0800 Subject: [PATCH 08/23] Fix path name in documentation about macOS --- doc/mimalloc-doc.h | 2 +- docs/overrides.html | 2 +- readme.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 16327d2c..57b7dd4c 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -808,7 +808,7 @@ library so all calls to the standard `malloc` interface are resolved to the _mimalloc_ library. - `env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram` (on Linux, BSD, etc.) -- `env DYLD_INSERT_LIBRARIES=usr/lib/libmimalloc.dylib myprogram` (On macOS) +- `env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram` (On macOS) Note certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). diff --git a/docs/overrides.html b/docs/overrides.html index 16400375..2360a936 100644 --- a/docs/overrides.html +++ b/docs/overrides.html @@ -109,7 +109,7 @@ $(document).ready(function(){initNavTree('overrides.html','');});

On these systems we preload the mimalloc shared library so all calls to the standard malloc interface are resolved to the mimalloc library.

  • env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram (on Linux, BSD, etc.)
  • -
  • env DYLD_INSERT_LIBRARIES=usr/lib/libmimalloc.dylib myprogram (On macOS)

    +
  • env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram (On macOS)

    Note certain security restrictions may apply when doing this from the shell.

diff --git a/readme.md b/readme.md index 85234c24..8ff19deb 100644 --- a/readme.md +++ b/readme.md @@ -191,7 +191,7 @@ library so all calls to the standard `malloc` interface are resolved to the _mimalloc_ library. - `env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram` (on Linux, BSD, etc.) -- `env DYLD_INSERT_LIBRARIES=usr/lib/libmimalloc.dylib myprogram` (On macOS) +- `env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram` (On macOS) Note certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). From 1e27cef873f6db6b8548a2278323f34e665d52d2 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 21 Jul 2019 23:21:14 +0800 Subject: [PATCH 09/23] Enforce strict include-what-you-use policy The include-what-you-use (IWYU) policy is beneficial to faster compilation and fewer recompilations. Many build tools, such as GNU make, provide a mechanism for automatically figuring out what .h files a .cc file depends on. These mechanisms typically look at #include lines. When unnecessary #includes are listed, the build system is more likely to recompile in cases where it is not necessary. With the enforcement, header file no longer includes . Reference: https://github.com/include-what-you-use/include-what-you-use/blob/master/docs/WhyIWYU.md --- include/mimalloc-types.h | 1 - include/mimalloc.h | 1 - src/alloc-aligned.c | 2 +- src/alloc.c | 5 +++-- src/init.c | 3 ++- src/options.c | 3 ++- src/os.c | 2 +- src/page.c | 2 -- 8 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 2f16020f..613aae82 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -8,7 +8,6 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_TYPES_H #define MIMALLOC_TYPES_H -#include // size_t etc. #include // ptrdiff_t #include // uintptr_t, uint16_t, etc diff --git a/include/mimalloc.h b/include/mimalloc.h index ec09f119..e8a40ab6 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -69,7 +69,6 @@ terms of the MIT license. A copy of the license can be found in the file // Includes // ------------------------------------------------------ -#include // size_t, malloc etc. #include // bool #include // FILE diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 3ef93c83..4fcf433a 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#include // memset +#include // memset, memcpy // ------------------------------------------------------ // Aligned Allocation diff --git a/src/alloc.c b/src/alloc.c index d5050b03..a5078dc5 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -8,7 +8,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include // memset +#include // memset, memcpy, strlen +#include // malloc, exit #define MI_IN_ALLOC_C #include "alloc-override.c" @@ -463,7 +464,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) } } #else -#include +#include // pathconf static size_t mi_path_max() { static size_t path_max = 0; if (path_max <= 0) { diff --git a/src/init.c b/src/init.c index 5b2d3c8e..12dcd5dc 100644 --- a/src/init.c +++ b/src/init.c @@ -7,7 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#include // memcpy +#include // memcpy, memset +#include // atexit // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { diff --git a/src/options.c b/src/options.c index 0f588740..02ca4800 100644 --- a/src/options.c +++ b/src/options.c @@ -8,7 +8,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include -#include // strcmp +#include // strtol +#include // strncpy, strncat, strlen, strstr #include // toupper #include diff --git a/src/os.c b/src/os.c index f9705992..950676ed 100644 --- a/src/os.c +++ b/src/os.c @@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#include // memset +#include // strerror #include #if defined(_WIN32) diff --git a/src/page.c b/src/page.c index fc7c4f01..1ac026f3 100644 --- a/src/page.c +++ b/src/page.c @@ -15,8 +15,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include // memset, memcpy - /* ----------------------------------------------------------- Definition of page queues for each block size ----------------------------------------------------------- */ From 069f52523f148c5d5d157f54481024befdba74b9 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 17:08:09 -0700 Subject: [PATCH 10/23] use hinted address to mmap to reduce mmap calls --- src/os.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/os.c b/src/os.c index 3527c94d..3b92e641 100644 --- a/src/os.c +++ b/src/os.c @@ -10,6 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" +#include "mimalloc-atomic.h" #include // memset #include @@ -242,6 +243,23 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { return (void*) aligned_base; } #else +static void* mi_unix_mmapx(size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + void* p = NULL; + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use a special area for 4MiB aligned allocations + static volatile intptr_t aligned_base = ((intptr_t)1 << 42); // starting at 4TiB + if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0 && (aligned_base%try_alignment)==0) { + intptr_t hint = mi_atomic_add(&aligned_base,size) - size; + p = mmap((void*)hint,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + } + #endif + if (p==NULL) { + p = mmap(NULL,size,protect_flags,flags,fd,0); + } + return p; +} + static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) { void* p = NULL; #if !defined(MAP_ANONYMOUS) @@ -278,12 +296,12 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) // try large page allocation // TODO: if always failing due to permissions or no huge pages, try to avoid repeatedly trying? // Should we check this in _mi_os_init? (as on Windows) - p = mmap(NULL, size, protect_flags, lflags, fd, 0); + p = mi_unix_mmapx(size, try_alignment, protect_flags, lflags, fd); if (p == MAP_FAILED) p = NULL; // fall back to regular mmap if large is exhausted or no permission } } if (p == NULL) { - p = mmap(NULL, size, protect_flags, flags, -1, 0); + p = mi_unix_mmapx(size, try_alignment, protect_flags, flags, -1); if (p == MAP_FAILED) p = NULL; } return p; @@ -439,7 +457,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -// Commit/Decommit memory. +// Commit/Decommit memory. // Usuelly commit is aligned liberal, while decommit is aligned conservative. // (but not for the reset version where we want commit to be conservative as well) static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, mi_stats_t* stats) { @@ -503,7 +521,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if (MI_DEBUG>1) + #if (MI_DEBUG>1) if (!mi_option_is_enabled(mi_option_secure)) { memset(start, 0, csize); // pretend it is eagerly reset } @@ -521,7 +539,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); if (p != start) return false; - } + } #else #if defined(MADV_FREE) static int advice = MADV_FREE; From 272a8e03e4ac01d517de36ecf0026e3153eb3187 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 17:13:36 -0700 Subject: [PATCH 11/23] use atomic ops to guard large page tries on windows --- src/os.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/os.c b/src/os.c index 3b92e641..78e638c2 100644 --- a/src/os.c +++ b/src/os.c @@ -206,20 +206,21 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment } static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags) { - static size_t large_page_try_ok = 0; + static volatile uintptr_t large_page_try_ok = 0; void* p = NULL; if (use_large_os_page(size, try_alignment)) { - if (large_page_try_ok > 0) { + uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + if (try_ok > 0) { // if a large page page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - large_page_try_ok--; + mi_atomic_compare_exchange(&large_page_try_ok, try_ok - 1, try_ok); } else { // large OS pages must always reserve and commit. p = mi_win_virtual_allocx(addr, size, try_alignment, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE | flags); // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - large_page_try_ok = 10; // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations } } } From 7091670c22f5c5e0bbd42a9b2cbf1c98df7cfc94 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 17:14:13 -0700 Subject: [PATCH 12/23] trailing id after #endif --- include/mimalloc-override.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-override.h b/include/mimalloc-override.h index f6149514..d81063ab 100644 --- a/include/mimalloc-override.h +++ b/include/mimalloc-override.h @@ -75,7 +75,7 @@ including this header is not necessary. // ------------------------------------------------------ #ifdef __cplusplus #include - + void operator delete(void* p) noexcept { mi_free(p); }; void operator delete[](void* p) noexcept { mi_free(p); }; @@ -103,4 +103,4 @@ including this header is not necessary. #endif #endif -#endif MIMALLOC_OVERRIDE_H +#endif // MIMALLOC_OVERRIDE_H From e4caee5f55f8a71e07890577d2095248911aec33 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 01:36:16 -0700 Subject: [PATCH 13/23] update test files and overriding --- ide/vs2017/mimalloc-override-test.vcxproj | 15 ++------- .../mimalloc-override-test.vcxproj.filters | 3 -- ide/vs2017/mimalloc-test.vcxproj | 6 ++-- ide/vs2017/mimalloc-test.vcxproj.filters | 2 +- include/mimalloc-override.h | 12 ++++--- test/CMakeLists.txt | 19 ++++++++---- test/main-override-static.c | 31 +++++++++++++++++++ test/main-override.c | 7 ++--- test/main-override.cpp | 13 +++----- 9 files changed, 67 insertions(+), 41 deletions(-) create mode 100644 test/main-override-static.c diff --git a/ide/vs2017/mimalloc-override-test.vcxproj b/ide/vs2017/mimalloc-override-test.vcxproj index 77752890..7df1e79a 100644 --- a/ide/vs2017/mimalloc-override-test.vcxproj +++ b/ide/vs2017/mimalloc-override-test.vcxproj @@ -172,23 +172,14 @@ COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath) - - - true - true - true - true - - - false - false - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + + diff --git a/ide/vs2017/mimalloc-override-test.vcxproj.filters b/ide/vs2017/mimalloc-override-test.vcxproj.filters index 80f1c9c0..eb5e70b7 100644 --- a/ide/vs2017/mimalloc-override-test.vcxproj.filters +++ b/ide/vs2017/mimalloc-override-test.vcxproj.filters @@ -18,8 +18,5 @@ Source Files - - Source Files - \ No newline at end of file diff --git a/ide/vs2017/mimalloc-test.vcxproj b/ide/vs2017/mimalloc-test.vcxproj index 8e61a97f..c1539aeb 100644 --- a/ide/vs2017/mimalloc-test.vcxproj +++ b/ide/vs2017/mimalloc-test.vcxproj @@ -144,14 +144,14 @@ Console - - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + + diff --git a/ide/vs2017/mimalloc-test.vcxproj.filters b/ide/vs2017/mimalloc-test.vcxproj.filters index eb5e70b7..fca75e1c 100644 --- a/ide/vs2017/mimalloc-test.vcxproj.filters +++ b/ide/vs2017/mimalloc-test.vcxproj.filters @@ -15,7 +15,7 @@ - + Source Files diff --git a/include/mimalloc-override.h b/include/mimalloc-override.h index d81063ab..c3348068 100644 --- a/include/mimalloc-override.h +++ b/include/mimalloc-override.h @@ -69,11 +69,15 @@ including this header is not necessary. #define _aligned_offset_recalloc(p,s,n,a,o) mi_recalloc_aligned_at(p,s,n,a,o) -// ------------------------------------------------------ -// With a C++ compiler we override the new/delete operators. +// ----------------------------------------------------------------- +// With a C++ compiler we can override all the new/delete operators +// by defining 'MIMALLOC_DEFINE_NEW_DELETE' in some source file and +// then including this header file. This is not needed when linking +// statically with the mimalloc library, but it can be more performant +// on Windows when using dynamic overiding as well. // see -// ------------------------------------------------------ -#ifdef __cplusplus +// ----------------------------------------------------------------- +#if defined(__cplusplus) && defined(MIMALLOC_DEFINE_NEW_DELETE) #include void operator delete(void* p) noexcept { mi_free(p); }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 42d4a2f4..8a830073 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,14 +24,21 @@ target_link_libraries(dynamic-override PUBLIC mimalloc) add_executable(dynamic-override-cxx main-override.cpp) target_link_libraries(dynamic-override-cxx PUBLIC mimalloc) -# with a static library +# overriding with a static object file works reliable as the symbols in the +# object file have priority over those in library files +add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o) +target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include) +target_link_libraries(static-override-obj PUBLIC pthread) + +# overriding with a static library works too if using the `mimalloc-override.h` +# header to redefine malloc/free. +add_executable(static-override-static main-override-static.c) +target_link_libraries(static-override-static PUBLIC mimalloc-static) + + +# overriding with a static library: this may not work if the library is linked too late add_executable(static-override main-override.c) target_link_libraries(static-override PUBLIC mimalloc-static) add_executable(static-override-cxx main-override.cpp) target_link_libraries(static-override-cxx PUBLIC mimalloc-static) - -# and with a static object file -add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o) -target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include) -target_link_libraries(static-override-obj PUBLIC pthread) diff --git a/test/main-override-static.c b/test/main-override-static.c new file mode 100644 index 00000000..6ddf4f37 --- /dev/null +++ b/test/main-override-static.c @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +#include +#include // redefines malloc etc. + +int main() { + mi_version(); + void* p1 = malloc(78); + void* p2 = malloc(24); + free(p1); + p1 = malloc(8); + //char* s = strdup("hello\n"); + free(p2); + p2 = malloc(16); + p1 = realloc(p1, 32); + free(p1); + free(p2); + //free(s); + //mi_collect(true); + + /* now test if override worked by allocating/freeing across the api's*/ + //p1 = mi_malloc(32); + //free(p1); + //p2 = malloc(32); + //mi_free(p2); + mi_stats_print(NULL); + return 0; +} diff --git a/test/main-override.c b/test/main-override.c index ddb2f16e..1bec1179 100644 --- a/test/main-override.c +++ b/test/main-override.c @@ -3,11 +3,10 @@ #include #include -//#include - +#include int main() { - //mi_stats_reset(); + mi_version(); // ensure mimalloc library is linked void* p1 = malloc(78); void* p2 = malloc(24); free(p1); @@ -26,6 +25,6 @@ int main() { //free(p1); //p2 = malloc(32); //mi_free(p2); - + mi_stats_print(NULL); return 0; } diff --git a/test/main-override.cpp b/test/main-override.cpp index 8f47dcd1..fb7ab7a1 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -4,8 +4,6 @@ #include #include -#include - #include static void* p = malloc(8); @@ -24,16 +22,15 @@ public: }; -int main() { - //mi_malloc_override(); - mi_stats_reset(); +int main() { + mi_version(); atexit(free_p); void* p1 = malloc(78); - void* p2 = _aligned_malloc(24,16); + void* p2 = mi_malloc_aligned(16,24); free(p1); p1 = malloc(8); - char* s = _strdup("hello\n"); - _aligned_free(p2); + char* s = mi_strdup("hello\n"); + mi_free(p2); p2 = malloc(16); p1 = realloc(p1, 32); free(p1); From e90938fb4bf9f7a3ce259d98f3e85576297a0a72 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 10:10:45 -0700 Subject: [PATCH 14/23] merge --- src/alloc-posix.c | 4 ++-- test/test-stress.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 4e844ba3..1f55b3a8 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -19,6 +19,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #include // memcpy +#include // getenv #ifndef EINVAL #define EINVAL 22 @@ -115,7 +116,7 @@ int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { #pragma warning(suppress:4996) char* p = getenv(name); if (p==NULL) { - *buf = NULL; + *buf = NULL; } else { *buf = mi_strdup(p); @@ -146,4 +147,3 @@ int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) return 0; #endif } - diff --git a/test/test-stress.c b/test/test-stress.c index b26dfd04..298e5a17 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -180,7 +180,7 @@ static DWORD WINAPI thread_entry(LPVOID param) { static void run_os_threads(size_t nthreads) { DWORD* tids = (DWORD*)malloc(nthreads * sizeof(DWORD)); HANDLE* thandles = (HANDLE*)malloc(nthreads * sizeof(HANDLE)); - for (intptr_t i = 0; i < nthreads; i++) { + for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } for (int i = 0; i < nthreads; i++) { From 86cadca059b2bf80cd94aa05d6f04813d144c482 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 16:11:06 -0700 Subject: [PATCH 15/23] more comments --- test/CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8a830073..8bf36521 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,29 +14,31 @@ endif() # Import mimalloc (if installed) find_package(mimalloc 1.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") -message(STATUS "${MIMALLOC_INCLUDE_DIR}") - -# Tests +# overriding with a dynamic library add_executable(dynamic-override main-override.c) target_link_libraries(dynamic-override PUBLIC mimalloc) add_executable(dynamic-override-cxx main-override.cpp) target_link_libraries(dynamic-override-cxx PUBLIC mimalloc) + # overriding with a static object file works reliable as the symbols in the # object file have priority over those in library files add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o) target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include) target_link_libraries(static-override-obj PUBLIC pthread) + # overriding with a static library works too if using the `mimalloc-override.h` -# header to redefine malloc/free. +# header to redefine malloc/free. (the library already overrides new/delete) add_executable(static-override-static main-override-static.c) target_link_libraries(static-override-static PUBLIC mimalloc-static) # overriding with a static library: this may not work if the library is linked too late +# on the command line after the C runtime library; but we cannot control that well in CMake add_executable(static-override main-override.c) target_link_libraries(static-override PUBLIC mimalloc-static) From 7c26ce9280e306d9d476bbbc26fa2a95ad71cadc Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 10:27:14 -0700 Subject: [PATCH 16/23] ensure C++ compilation on windows --- ide/vs2017/mimalloc-override.vcxproj | 8 ++++---- ide/vs2017/mimalloc.vcxproj | 4 ++-- include/mimalloc-override.h | 2 +- src/init.c | 6 ++++++ test/main-override.cpp | 6 +++--- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 3ca8158a..f41b2efc 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -100,7 +100,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false - Default + CompileAsCpp ../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies) @@ -121,7 +121,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false - Default + CompileAsCpp ../../bin/mimalloc-redirect.lib;%(AdditionalDependencies) @@ -152,7 +152,7 @@ $(IntDir) false MultiThreadedDLL - Default + CompileAsCpp true @@ -177,7 +177,7 @@ $(IntDir) false MultiThreadedDLL - Default + CompileAsCpp true diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index a8cb7566..3e453471 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -154,7 +154,7 @@ Neither false false - Default + CompileAsCpp true @@ -185,7 +185,7 @@ Neither false false - Default + CompileAsCpp true diff --git a/include/mimalloc-override.h b/include/mimalloc-override.h index c3348068..56b41e6b 100644 --- a/include/mimalloc-override.h +++ b/include/mimalloc-override.h @@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file This header can be used to statically redirect malloc/free and new/delete to the mimalloc variants. This can be useful if one can include this file on each source file in a project (but be careful when using external code to -not accidentally mix pointer from different allocators). +not accidentally mix pointers from different allocators). On windows it can still be good to always try to include this header even when dynamically overriding since this will give better performance especially diff --git a/src/init.c b/src/init.c index d00d7c05..152e906b 100644 --- a/src/init.c +++ b/src/init.c @@ -384,12 +384,18 @@ bool _mi_preloading() { // Communicate with the redirection module on Windows #if defined(_WIN32) && defined(MI_SHARED_LIB) +#ifdef __cplusplus +extern "C" { +#endif mi_decl_export void _mi_redirect_init() { // called on redirection mi_redirected = true; } __declspec(dllimport) bool mi_allocator_init(const char** message); __declspec(dllimport) void mi_allocator_done(); +#ifdef __cplusplus +} +#endif #else static bool mi_allocator_init(const char** message) { if (message != NULL) *message = NULL; diff --git a/test/main-override.cpp b/test/main-override.cpp index fb7ab7a1..6c7fc0d5 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -23,14 +23,14 @@ public: int main() { - mi_version(); + mi_stats_reset(); // ignore earlier allocations atexit(free_p); void* p1 = malloc(78); void* p2 = mi_malloc_aligned(16,24); free(p1); p1 = malloc(8); char* s = mi_strdup("hello\n"); - mi_free(p2); + free(p2); p2 = malloc(16); p1 = realloc(p1, 32); free(p1); @@ -39,7 +39,7 @@ int main() { Test* t = new Test(42); delete t; t = new (std::nothrow) Test(42); - delete t; + delete t; return 0; } From f0530b6a83b6b060a36f92d7e82f2d72264af646 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 20:51:12 -0700 Subject: [PATCH 17/23] small optimizations, use bitwise aligne --- CMakeLists.txt | 1 + include/mimalloc-internal.h | 26 +++++++++++++++++++++++++- include/mimalloc-types.h | 11 ++++++----- include/mimalloc.h | 6 +++--- src/alloc.c | 6 +++--- src/init.c | 8 +++++--- src/os.c | 7 ------- src/page.c | 14 ++++++++++---- src/segment.c | 12 ++++++------ 9 files changed, 59 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d86d096b..ec0fd99a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,6 +87,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") if(CMAKE_C_COMPILER_ID MATCHES "GNU") list(APPEND mi_cflags -Wno-invalid-memory-model) list(APPEND mi_cflags -fvisibility=hidden) + list(APPEND mi_cflags -fbranch-target-load-optimize ) endif() endif() diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cbed5909..e261dba2 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -39,7 +39,6 @@ bool _mi_preloading(); // true while the C runtime is not ready // os.c size_t _mi_os_page_size(void); -uintptr_t _mi_align_up(uintptr_t sz, size_t alignment); void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data @@ -165,6 +164,20 @@ static inline bool mi_mul_overflow(size_t size, size_t count, size_t* total) { #endif } +// Align upwards +static inline uintptr_t _mi_is_power_of_two(uintptr_t x) { + return ((x & (x - 1)) == 0); +} +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return ((sz + mask) & ~mask); + } + else { + return (((sz + mask)/alignment)*alignment); + } +} + // Align a byte size to a size in _machine words_, // i.e. byte size == `wsize*sizeof(void*)`. static inline size_t _mi_wsize_from_size(size_t size) { @@ -324,12 +337,23 @@ static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_bl } static inline mi_block_t* mi_block_next(mi_page_t* page, mi_block_t* block) { + #if MI_SECURE return mi_block_nextx(page->cookie,block); + #else + UNUSED(page); + return mi_block_nextx(0, block); + #endif } static inline void mi_block_set_next(mi_page_t* page, mi_block_t* block, mi_block_t* next) { + #if MI_SECURE mi_block_set_nextx(page->cookie,block,next); + #else + UNUSED(page); + mi_block_set_nextx(0, block, next); + #endif } + // ------------------------------------------------------------------- // Getting the thread id should be performant // as it is called in the fast path of `_mi_free`, diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 7221f5b8..5c14ffd4 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -132,10 +132,9 @@ typedef union mi_page_flags_u { } mi_page_flags_t; // Thread free list. -// We use bottom 2 bits of the pointer for mi_delayed_t flags +// We use the bottom 2 bits of the pointer for mi_delayed_t flags typedef uintptr_t mi_thread_free_t; - // A page contains blocks of one specific size (`block_size`). // Each page has three list of free blocks: // `free` for blocks that can be allocated, @@ -165,9 +164,11 @@ typedef struct mi_page_s { mi_page_flags_t flags; uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists + #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) @@ -182,9 +183,9 @@ typedef struct mi_page_s { // improve page index calculation #if MI_INTPTR_SIZE==8 - //void* padding[1]; // 10 words on 64-bit + //void* padding[1]; // 12 words on 64-bit #elif MI_INTPTR_SIZE==4 - void* padding[1]; // 12 words on 32-bit + void* padding[1]; // 12 words on 32-bit #endif } mi_page_t; diff --git a/include/mimalloc.h b/include/mimalloc.h index e7e83791..c6b7b5f8 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -52,8 +52,8 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_alloc_size2(s1,s2) #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) - #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #endif #else #define mi_decl_thread __thread @@ -62,7 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl + #define mi_cdecl #endif // ------------------------------------------------------ diff --git a/src/alloc.c b/src/alloc.c index 649b6e95..6a91c0ad 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -237,9 +237,9 @@ void mi_free(void* p) mi_attr_noexcept #endif // adjust if it might be an un-aligned block - if (mi_likely(page->flags.value==0)) { // note: merging both tests (local | value) does not matter for performance + if (mi_likely(page->flags.value==0)) { // not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_likely(local)) { + if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance // owning thread can free a block directly mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance page->local_free = block; @@ -248,7 +248,7 @@ void mi_free(void* p) mi_attr_noexcept } else { // use atomic operations for a multi-threaded free - _mi_free_block_mt(page, block); + _mi_free_block_mt(page, block); } } else { diff --git a/src/init.c b/src/init.c index 152e906b..44e3c9cb 100644 --- a/src/init.c +++ b/src/init.c @@ -12,9 +12,11 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, {0}, - 0, 0, - NULL, 0, 0, // free, used, cookie + 0, false, false, false, {0}, 0, 0, + NULL, 0, // free, used + #if MI_SECURE + 0, + #endif NULL, 0, 0, 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==4) diff --git a/src/os.c b/src/os.c index bcdd0ea4..b0eab87d 100644 --- a/src/os.c +++ b/src/os.c @@ -34,13 +34,6 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { - uintptr_t x = (sz / alignment) * alignment; - if (x < sz) x += alignment; - if (x < sz) return 0; // overflow - return x; -} - static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); } diff --git a/src/page.c b/src/page.c index b0c0b382..d46a5aad 100644 --- a/src/page.c +++ b/src/page.c @@ -93,7 +93,9 @@ static bool mi_page_is_valid_init(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); + #if MI_SECURE mi_assert_internal(page->cookie != 0); + #endif if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); @@ -119,7 +121,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. continue; // and try again - } + } } while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); @@ -258,7 +260,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* next = mi_block_nextx(heap->cookie,block); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { - // we might already start delayed freeing while another thread has not yet + // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting. mi_block_t* dfree; do { @@ -498,7 +500,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st if (page->capacity >= page->reserved) return; size_t page_size; - _mi_page_start(_mi_page_segment(page), page, &page_size); + _mi_page_start(_mi_page_segment(page), page, &page_size); _mi_stat_increase(&stats->pages_extended, 1); // calculate the extend count @@ -533,7 +535,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); + #if MI_SECURE page->cookie = _mi_heap_random(heap) | 1; + #endif mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); @@ -543,7 +547,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); mi_assert_internal(page->flags.has_aligned == false); + #if MI_SECURE mi_assert_internal(page->cookie != 0); + #endif mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list @@ -683,7 +689,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); mi_heap_stat_increase( heap, huge, block_size); - } + } return page; } diff --git a/src/segment.c b/src/segment.c index 7f7bedd7..8f254a26 100644 --- a/src/segment.c +++ b/src/segment.c @@ -235,8 +235,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -// and no more than 4. -#define MI_SEGMENT_CACHE_MAX (4) +// and no more than 2. +#define MI_SEGMENT_CACHE_MAX (2) #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset @@ -252,7 +252,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t } static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->cache_count < MI_SEGMENT_CACHE_MAX && + if (tld->cache_count < MI_SEGMENT_CACHE_MAX && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache return false; } @@ -318,7 +318,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); + bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { @@ -702,10 +702,10 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= (MI_SMALL_PAGE_SIZE/16)*3) { + if (block_size <= (MI_SMALL_PAGE_SIZE/4)) { page = mi_segment_small_page_alloc(tld,os_tld); } - else if (block_size <= (MI_MEDIUM_PAGE_SIZE/16)*3) { + else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) { page = mi_segment_medium_page_alloc(tld, os_tld); } else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) { From 6ca8b3fd89a5192ae794a8ea9483695c94b0d1e7 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 21:25:57 -0700 Subject: [PATCH 18/23] remove old comment --- src/page.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/page.c b/src/page.c index d46a5aad..69d32bfe 100644 --- a/src/page.c +++ b/src/page.c @@ -733,11 +733,4 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // and try again, this time succeeding! (i.e. this should never recurse) return _mi_page_malloc(heap, page, size); - /* - if (page->used == page->reserved) { - // needed for huge pages to free reliably from other threads. - mi_page_to_full(page,mi_page_queue_of(page)); - } - return p; - */ } From 2b911b0b10d2c4b36d5c15b0f21e3a80f0a48a76 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 09:36:58 -0700 Subject: [PATCH 19/23] fix cmake build on windows --- CMakeLists.txt | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ec0fd99a..45d5f988 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,14 +121,28 @@ add_library(mimalloc SHARED ${mi_sources}) set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} NO_SONAME "YES" OUTPUT_NAME ${mi_basename} ) target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT) target_compile_options(mimalloc PRIVATE ${mi_cflags}) +target_link_libraries(mimalloc PUBLIC ${mi_libraries}) target_include_directories(mimalloc PUBLIC $ $ ) -target_link_libraries(mimalloc PUBLIC ${mi_libraries}) +if(WIN32) + # On windows copy the mimalloc redirection dll too. + target_link_libraries(mimalloc PRIVATE ../../bin/mimalloc-redirect) + add_custom_command(TARGET mimalloc POST_BUILD + COMMAND "${CMAKE_COMMAND}" -E copy "../../bin/mimalloc-redirect.dll" $ + COMMENT "Copy mimalloc-redirect.dll to output directory") +endif() # static library add_library(mimalloc-static STATIC ${mi_sources}) +target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) +target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) +target_link_libraries(mimalloc-static PUBLIC ${mi_libraries}) +target_include_directories(mimalloc-static PUBLIC + $ + $ +) if(WIN32) # When building both static and shared libraries on Windows, a static library should use a # different output name to avoid the conflict with the import library of a shared one. @@ -137,14 +151,6 @@ if(WIN32) else() set_target_properties(mimalloc-static PROPERTIES OUTPUT_NAME ${mi_basename}) endif() -target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) -target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) - -target_include_directories(mimalloc-static PUBLIC - $ - $ -) -target_link_libraries(mimalloc-static PUBLIC ${mi_libraries}) # install static and shared library, and the include files install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_dir} LIBRARY NAMELINK_SKIP) From 48a3d0c6e288ee7c0ed3d6fdd9d917b01bc4000a Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 09:37:36 -0700 Subject: [PATCH 20/23] fix 32-bit build of stress test --- test/test-stress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index 298e5a17..511679ac 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -27,7 +27,7 @@ static int N = 10; // scaling factor static volatile void* transfer[TRANSFERS]; -#if (INTPTR_MAX != UINT32_MAX) +#if (UINTPTR_MAX != UINT32_MAX) const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; #else const uintptr_t cookie = 0x1ce4e5b9UL; @@ -39,7 +39,7 @@ typedef uintptr_t* random_t; static uintptr_t pick(random_t r) { uintptr_t x = *r; - #if (INTPTR_MAX > UINT32_MAX) + #if (UINTPTR_MAX > UINT32_MAX) // by Sebastiano Vigna, see: x ^= x >> 30; x *= 0xbf58476d1ce4e5b9UL; @@ -183,7 +183,7 @@ static void run_os_threads(size_t nthreads) { for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } - for (int i = 0; i < nthreads; i++) { + for (size_t i = 0; i < nthreads; i++) { WaitForSingleObject(thandles[i], INFINITE); } } From 663e63aac20639ecf0af27c2d26e66821f942a0b Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 09:59:20 -0700 Subject: [PATCH 21/23] ensure cmake uses C++ compilation with MSVC --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45d5f988..980ab542 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,6 @@ set(mi_sources src/options.c src/init.c) - # Set default build type if (NOT CMAKE_BUILD_TYPE) if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$") @@ -44,6 +43,11 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") set(MI_SECURE "ON") endif() +if(CMAKE_C_COMPILER_ID MATCHES "MSVC") + set(MI_USE_CXX "ON") +endif() + + # Options if(MI_OVERRIDE MATCHES "ON") message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") @@ -78,7 +82,7 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-stress.c PROPERTIES LANGUAGE CXX ) endif() # Compiler flags From 0a81d26c83b4a5cceb47bc212826df6f34940559 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 12:40:42 -0700 Subject: [PATCH 22/23] fix mimalloc-redirect path on windows cmake build --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 980ab542..8b37e579 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,9 +132,9 @@ target_include_directories(mimalloc PUBLIC ) if(WIN32) # On windows copy the mimalloc redirection dll too. - target_link_libraries(mimalloc PRIVATE ../../bin/mimalloc-redirect) + target_link_libraries(mimalloc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect.lib) add_custom_command(TARGET mimalloc POST_BUILD - COMMAND "${CMAKE_COMMAND}" -E copy "../../bin/mimalloc-redirect.dll" $ + COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect.dll" $ COMMENT "Copy mimalloc-redirect.dll to output directory") endif() From 095a87be2eab8b58dd24df87bcb7df26dcbc46de Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 17:57:27 -0700 Subject: [PATCH 23/23] add recursion guard for overriding in windows --- src/options.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/options.c b/src/options.c index 919884bd..01620e75 100644 --- a/src/options.c +++ b/src/options.c @@ -111,25 +111,34 @@ void mi_option_enable_default(mi_option_t option, bool enable) { #define MAX_ERROR_COUNT (10) static uintptr_t error_count = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings +// When overriding malloc, we may recurse into mi_vfprintf if an allocation +// inside the C runtime causes another message. +static mi_decl_thread bool recurse = false; + // Define our own limited `fprintf` that avoids memory allocation. // We do this using `snprintf` with a limited buffer. static void mi_vfprintf( FILE* out, const char* prefix, const char* fmt, va_list args ) { char buf[256]; if (fmt==NULL) return; + if (_mi_preloading() || recurse) return; + recurse = true; if (out==NULL) out = stdout; - if (_mi_preloading()) return; vsnprintf(buf,sizeof(buf)-1,fmt,args); #ifdef _WIN32 - // on windows with redirection, the C runtime uses us and we cannot call `fputs` - // while called from the C runtime itself, so use a non-locking option + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so use direct console output. if (out==stderr) { if (prefix != NULL) _cputs(prefix); _cputs(buf); - return; } + else #endif - if (prefix != NULL) fputs(prefix,out); - fputs(buf,out); + { + if (prefix != NULL) fputs(prefix,out); + fputs(buf,out); + } + recurse = false; + return; } void _mi_fprintf( FILE* out, const char* fmt, ... ) {