From bd6abd246330ed2870764b99c0c442ec71bad876 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 10:07:23 -0700 Subject: [PATCH 01/36] add comment about large pages --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index b27f714f..47fba61a 100644 --- a/src/options.c +++ b/src/options.c @@ -35,7 +35,7 @@ static mi_option_desc_t options[_mi_option_last] = { { 0, UNINIT, "page_reset" }, { 0, UNINIT, "cache_reset" }, { 0, UNINIT, "pool_commit" }, - { 0, UNINIT, "eager_commit" }, // secure must have eager commit + { 0, UNINIT, "eager_commit" }, // secure and large pages must have eager commit { 0, UNINIT, "large_os_pages" }, // use large OS pages { 0, UNINIT, "reset_decommits" }, { 0, UNINIT, "reset_discards" }, From 46b11fa0a48ed34bb753470668bff5c6551dfab9 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 15 Jul 2019 10:07:23 -0700 Subject: [PATCH 02/36] add comment about large pages --- src/options.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/options.c b/src/options.c index 46f0a36e..5c4548c2 100644 --- a/src/options.c +++ b/src/options.c @@ -36,6 +36,7 @@ typedef struct mi_option_desc_s { const char* name; // option name without `mimalloc_` prefix } mi_option_desc_t; +<<<<<<< HEAD static mi_option_desc_t options[_mi_option_last] = { // stable options @@ -43,6 +44,16 @@ static mi_option_desc_t options[_mi_option_last] = { MI_DEBUG, UNINIT, "show_errors" }, { 0, UNINIT, "verbose" }, +======= +static mi_option_desc_t options[_mi_option_last] = { + { 0, UNINIT, "page_reset" }, + { 0, UNINIT, "cache_reset" }, + { 0, UNINIT, "pool_commit" }, + { 0, UNINIT, "eager_commit" }, // secure and large pages must have eager commit + { 0, UNINIT, "large_os_pages" }, // use large OS pages + { 0, UNINIT, "reset_decommits" }, + { 0, UNINIT, "reset_discards" }, +>>>>>>> add comment about large pages #if MI_SECURE { MI_SECURE, INITIALIZED, "secure" }, // in a secure build the environment setting is ignored #else From 8dba36bcecf33aaa743e75baf5996bfb9f898ead Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 7 Jul 2019 12:56:40 +0800 Subject: [PATCH 03/36] Use checked unsigned multiplication extension of GCC/Clang Most processors have carry flags which they set on addition overflow, so it is a good idea to access them whenever possible. Most of them also have widening multiply instructions that can be used to detect overflow of the non-widening version. Both GCC and Clang offer a way to detect an overflow for security critical applications. Reference: https://clang.llvm.org/docs/LanguageExtensions.html#checked-arithmetic-builtins --- include/mimalloc-internal.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 1d380e8f..cbed5909 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -117,6 +117,9 @@ bool _mi_page_is_valid(mi_page_t* page); #define mi_likely(x) (x) #endif +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif #if defined(_MSC_VER) #define mi_decl_noinline __declspec(noinline) @@ -149,9 +152,17 @@ bool _mi_page_is_valid(mi_page_t* page); // Overflow detecting multiply #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) static inline bool mi_mul_overflow(size_t size, size_t count, size_t* total) { +#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 +#if (MI_INTPTR_SIZE == 4) + return __builtin_umul_overflow(size, count, total); +#else + return __builtin_umull_overflow(size, count, total); +#endif +#else /* __builtin_umul_overflow is unavailable */ *total = size * count; return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); +#endif } // Align a byte size to a size in _machine words_, From 7266e7006a8a8d8a79d1fc99796599f1f5967daf Mon Sep 17 00:00:00 2001 From: caixiangyue Date: Fri, 19 Jul 2019 16:23:14 +0800 Subject: [PATCH 04/36] fix typo --- src/os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/os.c b/src/os.c index 3527c94d..6b99e9a8 100644 --- a/src/os.c +++ b/src/os.c @@ -209,7 +209,7 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, void* p = NULL; if (use_large_os_page(size, try_alignment)) { if (large_page_try_ok > 0) { - // if a large page page allocation fails, it seems the calls to VirtualAlloc get very expensive. + // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. large_page_try_ok--; } From 3e9c953eea501d9ab1796f7dbc50ab4957d3b25f Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 21 Jul 2019 21:42:00 +0800 Subject: [PATCH 05/36] Add branch prediction hint for mi_option_get mi_option_get is called frequently in stress tests, and the patch adds extra hint to the compiler to emit instructions that will cause branch prediction to favour the "likely" side of a jump instruction. --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 5c4548c2..826df946 100644 --- a/src/options.c +++ b/src/options.c @@ -79,7 +79,7 @@ static void mi_option_init(mi_option_desc_t* desc); long mi_option_get(mi_option_t option) { mi_assert(option >= 0 && option < _mi_option_last); mi_option_desc_t* desc = &options[option]; - if (desc->init == UNINIT) { + if (mi_unlikely(desc->init == UNINIT)) { mi_option_init(desc); if (option != mi_option_verbose) { _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); From 28b874129ad3ce0eab03385c797d16245a5af9fb Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 21 Jul 2019 22:20:05 +0800 Subject: [PATCH 06/36] Avoid using strlen function in loop --- src/options.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index 826df946..63f470ea 100644 --- a/src/options.c +++ b/src/options.c @@ -214,10 +214,16 @@ static const char* mi_getenv(const char* name) { #pragma warning(suppress:4996) const char* s = getenv(name); if (s == NULL) { +<<<<<<< HEAD char buf[64+1]; mi_strlcpy(buf,name,64); for (size_t i = 0; i < strlen(buf); i++) { buf[i] = toupper(name[i]); +======= + size_t buf_size = strlen(buf); + for (size_t i = 0; i < buf_size; i++) { + buf[i] = toupper(buf[i]); +>>>>>>> Avoid using strlen function in loop } #pragma warning(suppress:4996) s = getenv(buf); @@ -234,7 +240,8 @@ static void mi_option_init(mi_option_desc_t* desc) { const char* s = mi_getenv(buf); if (s != NULL) { mi_strlcpy(buf, s, sizeof(buf)); - for (size_t i = 0; i < strlen(buf); i++) { + size_t buf_size = strlen(buf); // TODO: use strnlen? + for (size_t i = 0; i < buf_size; i++) { buf[i] = toupper(buf[i]); } if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { From 2eb607d03fe57f74da253c397461daef035b2bde Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 13:03:51 -0700 Subject: [PATCH 07/36] re-add missing thread_init needed when running in debug mode --- src/heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/heap.c b/src/heap.c index 48bb9830..e55fdf34 100644 --- a/src/heap.c +++ b/src/heap.c @@ -172,7 +172,7 @@ void mi_collect(bool force) mi_attr_noexcept { ----------------------------------------------------------- */ mi_heap_t* mi_heap_get_default(void) { - // mi_thread_init(); + mi_thread_init(); return mi_get_default_heap(); } From 65037d051bf257435d95b21a606fe6bdf5f91cf3 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 13:09:34 -0700 Subject: [PATCH 08/36] improved debug warning for freeing invalid pointers --- src/alloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/alloc.c b/src/alloc.c index bac925ee..099bdbad 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -211,8 +211,11 @@ void mi_free(void* p) mi_attr_noexcept #if (MI_DEBUG>0) if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("possibly trying to mi_free a pointer that does not point to a valid heap region: %p\n" + _mi_warning_message("possibly trying to mi_free a pointer that does not point to a valid heap region: 0x%p\n" "(this may still be a valid very large allocation (over 64MiB))\n", p); + if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { + _mi_warning_message("(yes, the previous pointer 0x%p was valid after all)\n", p); + } } if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { _mi_error_message("trying to mi_free a pointer that does not point to a valid heap space: %p\n", p); From fe229f8fad24eca5dc4953c93bbc87795e6d819a Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Mon, 22 Jul 2019 04:45:40 +0800 Subject: [PATCH 09/36] Fix path name in documentation about macOS --- doc/mimalloc-doc.h | 2 +- docs/overrides.html | 2 +- readme.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/mimalloc-doc.h b/doc/mimalloc-doc.h index 16327d2c..57b7dd4c 100644 --- a/doc/mimalloc-doc.h +++ b/doc/mimalloc-doc.h @@ -808,7 +808,7 @@ library so all calls to the standard `malloc` interface are resolved to the _mimalloc_ library. - `env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram` (on Linux, BSD, etc.) -- `env DYLD_INSERT_LIBRARIES=usr/lib/libmimalloc.dylib myprogram` (On macOS) +- `env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram` (On macOS) Note certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). diff --git a/docs/overrides.html b/docs/overrides.html index 16400375..2360a936 100644 --- a/docs/overrides.html +++ b/docs/overrides.html @@ -109,7 +109,7 @@ $(document).ready(function(){initNavTree('overrides.html','');});

On these systems we preload the mimalloc shared library so all calls to the standard malloc interface are resolved to the mimalloc library.

  • env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram (on Linux, BSD, etc.)
  • -
  • env DYLD_INSERT_LIBRARIES=usr/lib/libmimalloc.dylib myprogram (On macOS)

    +
  • env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram (On macOS)

    Note certain security restrictions may apply when doing this from the shell.

diff --git a/readme.md b/readme.md index 85234c24..8ff19deb 100644 --- a/readme.md +++ b/readme.md @@ -191,7 +191,7 @@ library so all calls to the standard `malloc` interface are resolved to the _mimalloc_ library. - `env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram` (on Linux, BSD, etc.) -- `env DYLD_INSERT_LIBRARIES=usr/lib/libmimalloc.dylib myprogram` (On macOS) +- `env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram` (On macOS) Note certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). From b5e26bedb1be3a8bd110b451bd1427fa737be1bb Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 21 Jul 2019 23:21:14 +0800 Subject: [PATCH 10/36] Enforce strict include-what-you-use policy The include-what-you-use (IWYU) policy is beneficial to faster compilation and fewer recompilations. Many build tools, such as GNU make, provide a mechanism for automatically figuring out what .h files a .cc file depends on. These mechanisms typically look at #include lines. When unnecessary #includes are listed, the build system is more likely to recompile in cases where it is not necessary. With the enforcement, header file no longer includes . Reference: https://github.com/include-what-you-use/include-what-you-use/blob/master/docs/WhyIWYU.md --- include/mimalloc-types.h | 1 - include/mimalloc.h | 1 - src/alloc-aligned.c | 2 +- src/alloc.c | 5 +++-- src/init.c | 3 ++- src/options.c | 3 ++- src/os.c | 2 +- src/page.c | 2 -- 8 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index d591ff86..7221f5b8 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -8,7 +8,6 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_TYPES_H #define MIMALLOC_TYPES_H -#include // size_t etc. #include // ptrdiff_t #include // uintptr_t, uint16_t, etc diff --git a/include/mimalloc.h b/include/mimalloc.h index d92c2866..e7e83791 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -69,7 +69,6 @@ terms of the MIT license. A copy of the license can be found in the file // Includes // ------------------------------------------------------ -#include // size_t, malloc etc. #include // bool #include // FILE diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 175fa3e3..2f44f317 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#include // memset +#include // memset, memcpy // ------------------------------------------------------ // Aligned Allocation diff --git a/src/alloc.c b/src/alloc.c index 099bdbad..649b6e95 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -8,7 +8,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include // memset +#include // memset, memcpy, strlen +#include // malloc, exit #define MI_IN_ALLOC_C #include "alloc-override.c" @@ -465,7 +466,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) } } #else -#include +#include // pathconf static size_t mi_path_max() { static size_t path_max = 0; if (path_max <= 0) { diff --git a/src/init.c b/src/init.c index 06aa28c5..d00d7c05 100644 --- a/src/init.c +++ b/src/init.c @@ -7,7 +7,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#include // memcpy +#include // memcpy, memset +#include // atexit // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { diff --git a/src/options.c b/src/options.c index 63f470ea..349599ad 100644 --- a/src/options.c +++ b/src/options.c @@ -9,7 +9,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-atomic.h" #include -#include // strcmp +#include // strtol +#include // strncpy, strncat, strlen, strstr #include // toupper #include diff --git a/src/os.c b/src/os.c index 6b99e9a8..52fd4206 100644 --- a/src/os.c +++ b/src/os.c @@ -11,7 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" -#include // memset +#include // strerror #include #if defined(_WIN32) diff --git a/src/page.c b/src/page.c index 685b6b4a..b0c0b382 100644 --- a/src/page.c +++ b/src/page.c @@ -15,8 +15,6 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc-internal.h" #include "mimalloc-atomic.h" -#include // memset, memcpy - /* ----------------------------------------------------------- Definition of page queues for each block size ----------------------------------------------------------- */ From 7b7c36c8c7511d6d8a718cd5d2e36db5d96a4812 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 17:08:09 -0700 Subject: [PATCH 11/36] use hinted address to mmap to reduce mmap calls --- src/os.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/os.c b/src/os.c index 52fd4206..a5f12c17 100644 --- a/src/os.c +++ b/src/os.c @@ -10,6 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc.h" #include "mimalloc-internal.h" +#include "mimalloc-atomic.h" #include // strerror #include @@ -242,6 +243,23 @@ static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { return (void*) aligned_base; } #else +static void* mi_unix_mmapx(size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + void* p = NULL; + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use a special area for 4MiB aligned allocations + static volatile intptr_t aligned_base = ((intptr_t)1 << 42); // starting at 4TiB + if (try_alignment <= MI_SEGMENT_SIZE && (size%MI_SEGMENT_SIZE)==0 && (aligned_base%try_alignment)==0) { + intptr_t hint = mi_atomic_add(&aligned_base,size) - size; + p = mmap((void*)hint,size,protect_flags,flags,fd,0); + if (p==MAP_FAILED) p = NULL; // fall back to regular mmap + } + #endif + if (p==NULL) { + p = mmap(NULL,size,protect_flags,flags,fd,0); + } + return p; +} + static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) { void* p = NULL; #if !defined(MAP_ANONYMOUS) @@ -278,12 +296,12 @@ static void* mi_unix_mmap(size_t size, size_t try_alignment, int protect_flags) // try large page allocation // TODO: if always failing due to permissions or no huge pages, try to avoid repeatedly trying? // Should we check this in _mi_os_init? (as on Windows) - p = mmap(NULL, size, protect_flags, lflags, fd, 0); + p = mi_unix_mmapx(size, try_alignment, protect_flags, lflags, fd); if (p == MAP_FAILED) p = NULL; // fall back to regular mmap if large is exhausted or no permission } } if (p == NULL) { - p = mmap(NULL, size, protect_flags, flags, -1, 0); + p = mi_unix_mmapx(size, try_alignment, protect_flags, flags, -1); if (p == MAP_FAILED) p = NULL; } return p; @@ -439,7 +457,7 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -// Commit/Decommit memory. +// Commit/Decommit memory. // Usuelly commit is aligned liberal, while decommit is aligned conservative. // (but not for the reset version where we want commit to be conservative as well) static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, mi_stats_t* stats) { @@ -503,7 +521,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if (MI_DEBUG>1) + #if (MI_DEBUG>1) if (!mi_option_is_enabled(mi_option_secure)) { memset(start, 0, csize); // pretend it is eagerly reset } @@ -521,7 +539,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == start); if (p != start) return false; - } + } #else #if defined(MADV_FREE) static int advice = MADV_FREE; From b611c7fb34a14ccea4038c1955e169f550ecdab5 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 17:13:36 -0700 Subject: [PATCH 12/36] use atomic ops to guard large page tries on windows --- src/os.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/os.c b/src/os.c index a5f12c17..f7b36258 100644 --- a/src/os.c +++ b/src/os.c @@ -206,20 +206,26 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment } static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags) { - static size_t large_page_try_ok = 0; + static volatile uintptr_t large_page_try_ok = 0; void* p = NULL; if (use_large_os_page(size, try_alignment)) { +<<<<<<< HEAD if (large_page_try_ok > 0) { // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. +======= + uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); + if (try_ok > 0) { + // if a large page page allocation fails, it seems the calls to VirtualAlloc get very expensive. +>>>>>>> use atomic ops to guard large page tries on windows // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - large_page_try_ok--; + mi_atomic_compare_exchange(&large_page_try_ok, try_ok - 1, try_ok); } else { // large OS pages must always reserve and commit. p = mi_win_virtual_allocx(addr, size, try_alignment, MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE | flags); // fall back to non-large page allocation on error (`p == NULL`). if (p == NULL) { - large_page_try_ok = 10; // on error, don't try again for the next N allocations + mi_atomic_write(&large_page_try_ok,10); // on error, don't try again for the next N allocations } } } From 6d9fab5af41878deb98889e3e4ab7d59c4ae3c46 Mon Sep 17 00:00:00 2001 From: daan Date: Sun, 21 Jul 2019 17:14:13 -0700 Subject: [PATCH 13/36] trailing id after #endif --- include/mimalloc-override.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-override.h b/include/mimalloc-override.h index f6149514..d81063ab 100644 --- a/include/mimalloc-override.h +++ b/include/mimalloc-override.h @@ -75,7 +75,7 @@ including this header is not necessary. // ------------------------------------------------------ #ifdef __cplusplus #include - + void operator delete(void* p) noexcept { mi_free(p); }; void operator delete[](void* p) noexcept { mi_free(p); }; @@ -103,4 +103,4 @@ including this header is not necessary. #endif #endif -#endif MIMALLOC_OVERRIDE_H +#endif // MIMALLOC_OVERRIDE_H From 219d46ff0c38e83a0153876fe19ae8dbf994dfdc Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 01:36:16 -0700 Subject: [PATCH 14/36] update test files and overriding --- ide/vs2017/mimalloc-override-test.vcxproj | 15 ++------- .../mimalloc-override-test.vcxproj.filters | 3 -- ide/vs2017/mimalloc-test.vcxproj | 6 ++-- ide/vs2017/mimalloc-test.vcxproj.filters | 2 +- include/mimalloc-override.h | 12 ++++--- test/CMakeLists.txt | 19 ++++++++---- test/main-override-static.c | 31 +++++++++++++++++++ test/main-override.c | 7 ++--- test/main-override.cpp | 13 +++----- 9 files changed, 67 insertions(+), 41 deletions(-) create mode 100644 test/main-override-static.c diff --git a/ide/vs2017/mimalloc-override-test.vcxproj b/ide/vs2017/mimalloc-override-test.vcxproj index 77752890..7df1e79a 100644 --- a/ide/vs2017/mimalloc-override-test.vcxproj +++ b/ide/vs2017/mimalloc-override-test.vcxproj @@ -172,23 +172,14 @@ COPY /Y $(SolutionDir)..\..\bin\mimalloc-redirect.dll $(OutputPath) - - - true - true - true - true - - - false - false - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + + diff --git a/ide/vs2017/mimalloc-override-test.vcxproj.filters b/ide/vs2017/mimalloc-override-test.vcxproj.filters index 80f1c9c0..eb5e70b7 100644 --- a/ide/vs2017/mimalloc-override-test.vcxproj.filters +++ b/ide/vs2017/mimalloc-override-test.vcxproj.filters @@ -18,8 +18,5 @@ Source Files - - Source Files - \ No newline at end of file diff --git a/ide/vs2017/mimalloc-test.vcxproj b/ide/vs2017/mimalloc-test.vcxproj index 8e61a97f..c1539aeb 100644 --- a/ide/vs2017/mimalloc-test.vcxproj +++ b/ide/vs2017/mimalloc-test.vcxproj @@ -144,14 +144,14 @@ Console - - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + + diff --git a/ide/vs2017/mimalloc-test.vcxproj.filters b/ide/vs2017/mimalloc-test.vcxproj.filters index eb5e70b7..fca75e1c 100644 --- a/ide/vs2017/mimalloc-test.vcxproj.filters +++ b/ide/vs2017/mimalloc-test.vcxproj.filters @@ -15,7 +15,7 @@ - + Source Files diff --git a/include/mimalloc-override.h b/include/mimalloc-override.h index d81063ab..c3348068 100644 --- a/include/mimalloc-override.h +++ b/include/mimalloc-override.h @@ -69,11 +69,15 @@ including this header is not necessary. #define _aligned_offset_recalloc(p,s,n,a,o) mi_recalloc_aligned_at(p,s,n,a,o) -// ------------------------------------------------------ -// With a C++ compiler we override the new/delete operators. +// ----------------------------------------------------------------- +// With a C++ compiler we can override all the new/delete operators +// by defining 'MIMALLOC_DEFINE_NEW_DELETE' in some source file and +// then including this header file. This is not needed when linking +// statically with the mimalloc library, but it can be more performant +// on Windows when using dynamic overiding as well. // see -// ------------------------------------------------------ -#ifdef __cplusplus +// ----------------------------------------------------------------- +#if defined(__cplusplus) && defined(MIMALLOC_DEFINE_NEW_DELETE) #include void operator delete(void* p) noexcept { mi_free(p); }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 42d4a2f4..8a830073 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,14 +24,21 @@ target_link_libraries(dynamic-override PUBLIC mimalloc) add_executable(dynamic-override-cxx main-override.cpp) target_link_libraries(dynamic-override-cxx PUBLIC mimalloc) -# with a static library +# overriding with a static object file works reliable as the symbols in the +# object file have priority over those in library files +add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o) +target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include) +target_link_libraries(static-override-obj PUBLIC pthread) + +# overriding with a static library works too if using the `mimalloc-override.h` +# header to redefine malloc/free. +add_executable(static-override-static main-override-static.c) +target_link_libraries(static-override-static PUBLIC mimalloc-static) + + +# overriding with a static library: this may not work if the library is linked too late add_executable(static-override main-override.c) target_link_libraries(static-override PUBLIC mimalloc-static) add_executable(static-override-cxx main-override.cpp) target_link_libraries(static-override-cxx PUBLIC mimalloc-static) - -# and with a static object file -add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o) -target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include) -target_link_libraries(static-override-obj PUBLIC pthread) diff --git a/test/main-override-static.c b/test/main-override-static.c new file mode 100644 index 00000000..6ddf4f37 --- /dev/null +++ b/test/main-override-static.c @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +#include +#include // redefines malloc etc. + +int main() { + mi_version(); + void* p1 = malloc(78); + void* p2 = malloc(24); + free(p1); + p1 = malloc(8); + //char* s = strdup("hello\n"); + free(p2); + p2 = malloc(16); + p1 = realloc(p1, 32); + free(p1); + free(p2); + //free(s); + //mi_collect(true); + + /* now test if override worked by allocating/freeing across the api's*/ + //p1 = mi_malloc(32); + //free(p1); + //p2 = malloc(32); + //mi_free(p2); + mi_stats_print(NULL); + return 0; +} diff --git a/test/main-override.c b/test/main-override.c index ddb2f16e..1bec1179 100644 --- a/test/main-override.c +++ b/test/main-override.c @@ -3,11 +3,10 @@ #include #include -//#include - +#include int main() { - //mi_stats_reset(); + mi_version(); // ensure mimalloc library is linked void* p1 = malloc(78); void* p2 = malloc(24); free(p1); @@ -26,6 +25,6 @@ int main() { //free(p1); //p2 = malloc(32); //mi_free(p2); - + mi_stats_print(NULL); return 0; } diff --git a/test/main-override.cpp b/test/main-override.cpp index 8f47dcd1..fb7ab7a1 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -4,8 +4,6 @@ #include #include -#include - #include static void* p = malloc(8); @@ -24,16 +22,15 @@ public: }; -int main() { - //mi_malloc_override(); - mi_stats_reset(); +int main() { + mi_version(); atexit(free_p); void* p1 = malloc(78); - void* p2 = _aligned_malloc(24,16); + void* p2 = mi_malloc_aligned(16,24); free(p1); p1 = malloc(8); - char* s = _strdup("hello\n"); - _aligned_free(p2); + char* s = mi_strdup("hello\n"); + mi_free(p2); p2 = malloc(16); p1 = realloc(p1, 32); free(p1); From 0b4d74a5668b2869701a66a398f27d9bf13d163f Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 10:10:45 -0700 Subject: [PATCH 15/36] merge --- src/alloc-posix.c | 4 ++-- test/test-stress.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 4e844ba3..1f55b3a8 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -19,6 +19,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #include // memcpy +#include // getenv #ifndef EINVAL #define EINVAL 22 @@ -115,7 +116,7 @@ int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { #pragma warning(suppress:4996) char* p = getenv(name); if (p==NULL) { - *buf = NULL; + *buf = NULL; } else { *buf = mi_strdup(p); @@ -146,4 +147,3 @@ int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) return 0; #endif } - diff --git a/test/test-stress.c b/test/test-stress.c index b26dfd04..298e5a17 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -180,7 +180,7 @@ static DWORD WINAPI thread_entry(LPVOID param) { static void run_os_threads(size_t nthreads) { DWORD* tids = (DWORD*)malloc(nthreads * sizeof(DWORD)); HANDLE* thandles = (HANDLE*)malloc(nthreads * sizeof(HANDLE)); - for (intptr_t i = 0; i < nthreads; i++) { + for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } for (int i = 0; i < nthreads; i++) { From 598ed19c614cdf8c3a2b967df8b4b5793111c95e Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 16:11:06 -0700 Subject: [PATCH 16/36] more comments --- test/CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8a830073..8bf36521 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,29 +14,31 @@ endif() # Import mimalloc (if installed) find_package(mimalloc 1.0 REQUIRED NO_SYSTEM_ENVIRONMENT_PATH) +message(STATUS "Found mimalloc installed at: ${MIMALLOC_TARGET_DIR}") -message(STATUS "${MIMALLOC_INCLUDE_DIR}") - -# Tests +# overriding with a dynamic library add_executable(dynamic-override main-override.c) target_link_libraries(dynamic-override PUBLIC mimalloc) add_executable(dynamic-override-cxx main-override.cpp) target_link_libraries(dynamic-override-cxx PUBLIC mimalloc) + # overriding with a static object file works reliable as the symbols in the # object file have priority over those in library files add_executable(static-override-obj main-override.c ${MIMALLOC_TARGET_DIR}/mimalloc.o) target_include_directories(static-override-obj PUBLIC ${MIMALLOC_TARGET_DIR}/include) target_link_libraries(static-override-obj PUBLIC pthread) + # overriding with a static library works too if using the `mimalloc-override.h` -# header to redefine malloc/free. +# header to redefine malloc/free. (the library already overrides new/delete) add_executable(static-override-static main-override-static.c) target_link_libraries(static-override-static PUBLIC mimalloc-static) # overriding with a static library: this may not work if the library is linked too late +# on the command line after the C runtime library; but we cannot control that well in CMake add_executable(static-override main-override.c) target_link_libraries(static-override PUBLIC mimalloc-static) From 66b8c37ab396553ad43f332ebf6d1abc55c98ef3 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 10:27:14 -0700 Subject: [PATCH 17/36] ensure C++ compilation on windows --- ide/vs2017/mimalloc-override.vcxproj | 8 ++++---- ide/vs2017/mimalloc.vcxproj | 4 ++-- include/mimalloc-override.h | 2 +- src/init.c | 6 ++++++ test/main-override.cpp | 6 +++--- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index 3ca8158a..f41b2efc 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -100,7 +100,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false - Default + CompileAsCpp ../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies) @@ -121,7 +121,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false - Default + CompileAsCpp ../../bin/mimalloc-redirect.lib;%(AdditionalDependencies) @@ -152,7 +152,7 @@ $(IntDir) false MultiThreadedDLL - Default + CompileAsCpp true @@ -177,7 +177,7 @@ $(IntDir) false MultiThreadedDLL - Default + CompileAsCpp true diff --git a/ide/vs2017/mimalloc.vcxproj b/ide/vs2017/mimalloc.vcxproj index a8cb7566..3e453471 100644 --- a/ide/vs2017/mimalloc.vcxproj +++ b/ide/vs2017/mimalloc.vcxproj @@ -154,7 +154,7 @@ Neither false false - Default + CompileAsCpp true @@ -185,7 +185,7 @@ Neither false false - Default + CompileAsCpp true diff --git a/include/mimalloc-override.h b/include/mimalloc-override.h index c3348068..56b41e6b 100644 --- a/include/mimalloc-override.h +++ b/include/mimalloc-override.h @@ -12,7 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file This header can be used to statically redirect malloc/free and new/delete to the mimalloc variants. This can be useful if one can include this file on each source file in a project (but be careful when using external code to -not accidentally mix pointer from different allocators). +not accidentally mix pointers from different allocators). On windows it can still be good to always try to include this header even when dynamically overriding since this will give better performance especially diff --git a/src/init.c b/src/init.c index d00d7c05..152e906b 100644 --- a/src/init.c +++ b/src/init.c @@ -384,12 +384,18 @@ bool _mi_preloading() { // Communicate with the redirection module on Windows #if defined(_WIN32) && defined(MI_SHARED_LIB) +#ifdef __cplusplus +extern "C" { +#endif mi_decl_export void _mi_redirect_init() { // called on redirection mi_redirected = true; } __declspec(dllimport) bool mi_allocator_init(const char** message); __declspec(dllimport) void mi_allocator_done(); +#ifdef __cplusplus +} +#endif #else static bool mi_allocator_init(const char** message) { if (message != NULL) *message = NULL; diff --git a/test/main-override.cpp b/test/main-override.cpp index fb7ab7a1..6c7fc0d5 100644 --- a/test/main-override.cpp +++ b/test/main-override.cpp @@ -23,14 +23,14 @@ public: int main() { - mi_version(); + mi_stats_reset(); // ignore earlier allocations atexit(free_p); void* p1 = malloc(78); void* p2 = mi_malloc_aligned(16,24); free(p1); p1 = malloc(8); char* s = mi_strdup("hello\n"); - mi_free(p2); + free(p2); p2 = malloc(16); p1 = realloc(p1, 32); free(p1); @@ -39,7 +39,7 @@ int main() { Test* t = new Test(42); delete t; t = new (std::nothrow) Test(42); - delete t; + delete t; return 0; } From 189ad0f81dc3b029c05a3b7cf19d8ba78c4e0429 Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 20:51:12 -0700 Subject: [PATCH 18/36] small optimizations, use bitwise aligne --- CMakeLists.txt | 1 + include/mimalloc-internal.h | 26 +++++++++++++++++++++++++- include/mimalloc-types.h | 11 ++++++----- include/mimalloc.h | 6 +++--- src/alloc.c | 6 +++--- src/init.c | 8 +++++--- src/os.c | 7 ------- src/page.c | 14 ++++++++++---- src/segment.c | 12 ++++++------ 9 files changed, 59 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d86d096b..ec0fd99a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,6 +87,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") if(CMAKE_C_COMPILER_ID MATCHES "GNU") list(APPEND mi_cflags -Wno-invalid-memory-model) list(APPEND mi_cflags -fvisibility=hidden) + list(APPEND mi_cflags -fbranch-target-load-optimize ) endif() endif() diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cbed5909..e261dba2 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -39,7 +39,6 @@ bool _mi_preloading(); // true while the C runtime is not ready // os.c size_t _mi_os_page_size(void); -uintptr_t _mi_align_up(uintptr_t sz, size_t alignment); void _mi_os_init(void); // called from process init void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data @@ -165,6 +164,20 @@ static inline bool mi_mul_overflow(size_t size, size_t count, size_t* total) { #endif } +// Align upwards +static inline uintptr_t _mi_is_power_of_two(uintptr_t x) { + return ((x & (x - 1)) == 0); +} +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return ((sz + mask) & ~mask); + } + else { + return (((sz + mask)/alignment)*alignment); + } +} + // Align a byte size to a size in _machine words_, // i.e. byte size == `wsize*sizeof(void*)`. static inline size_t _mi_wsize_from_size(size_t size) { @@ -324,12 +337,23 @@ static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_bl } static inline mi_block_t* mi_block_next(mi_page_t* page, mi_block_t* block) { + #if MI_SECURE return mi_block_nextx(page->cookie,block); + #else + UNUSED(page); + return mi_block_nextx(0, block); + #endif } static inline void mi_block_set_next(mi_page_t* page, mi_block_t* block, mi_block_t* next) { + #if MI_SECURE mi_block_set_nextx(page->cookie,block,next); + #else + UNUSED(page); + mi_block_set_nextx(0, block, next); + #endif } + // ------------------------------------------------------------------- // Getting the thread id should be performant // as it is called in the fast path of `_mi_free`, diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 7221f5b8..5c14ffd4 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -132,10 +132,9 @@ typedef union mi_page_flags_u { } mi_page_flags_t; // Thread free list. -// We use bottom 2 bits of the pointer for mi_delayed_t flags +// We use the bottom 2 bits of the pointer for mi_delayed_t flags typedef uintptr_t mi_thread_free_t; - // A page contains blocks of one specific size (`block_size`). // Each page has three list of free blocks: // `free` for blocks that can be allocated, @@ -165,9 +164,11 @@ typedef struct mi_page_s { mi_page_flags_t flags; uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists + #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) @@ -182,9 +183,9 @@ typedef struct mi_page_s { // improve page index calculation #if MI_INTPTR_SIZE==8 - //void* padding[1]; // 10 words on 64-bit + //void* padding[1]; // 12 words on 64-bit #elif MI_INTPTR_SIZE==4 - void* padding[1]; // 12 words on 32-bit + void* padding[1]; // 12 words on 32-bit #endif } mi_page_t; diff --git a/include/mimalloc.h b/include/mimalloc.h index e7e83791..c6b7b5f8 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -52,8 +52,8 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_alloc_size2(s1,s2) #else #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) - #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) - #define mi_cdecl // leads to warnings... __attribute__((cdecl)) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) #endif #else #define mi_decl_thread __thread @@ -62,7 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_malloc #define mi_attr_alloc_size(s) #define mi_attr_alloc_size2(s1,s2) - #define mi_cdecl + #define mi_cdecl #endif // ------------------------------------------------------ diff --git a/src/alloc.c b/src/alloc.c index 649b6e95..6a91c0ad 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -237,9 +237,9 @@ void mi_free(void* p) mi_attr_noexcept #endif // adjust if it might be an un-aligned block - if (mi_likely(page->flags.value==0)) { // note: merging both tests (local | value) does not matter for performance + if (mi_likely(page->flags.value==0)) { // not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_likely(local)) { + if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance // owning thread can free a block directly mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance page->local_free = block; @@ -248,7 +248,7 @@ void mi_free(void* p) mi_attr_noexcept } else { // use atomic operations for a multi-threaded free - _mi_free_block_mt(page, block); + _mi_free_block_mt(page, block); } } else { diff --git a/src/init.c b/src/init.c index 152e906b..44e3c9cb 100644 --- a/src/init.c +++ b/src/init.c @@ -12,9 +12,11 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, {0}, - 0, 0, - NULL, 0, 0, // free, used, cookie + 0, false, false, false, {0}, 0, 0, + NULL, 0, // free, used + #if MI_SECURE + 0, + #endif NULL, 0, 0, 0, NULL, NULL, NULL #if (MI_INTPTR_SIZE==4) diff --git a/src/os.c b/src/os.c index f7b36258..f6358912 100644 --- a/src/os.c +++ b/src/os.c @@ -34,13 +34,6 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { - uintptr_t x = (sz / alignment) * alignment; - if (x < sz) x += alignment; - if (x < sz) return 0; // overflow - return x; -} - static void* mi_align_up_ptr(void* p, size_t alignment) { return (void*)_mi_align_up((uintptr_t)p, alignment); } diff --git a/src/page.c b/src/page.c index b0c0b382..d46a5aad 100644 --- a/src/page.c +++ b/src/page.c @@ -93,7 +93,9 @@ static bool mi_page_is_valid_init(mi_page_t* page) { bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); + #if MI_SECURE mi_assert_internal(page->cookie != 0); + #endif if (page->heap!=NULL) { mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id); @@ -119,7 +121,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) { else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) { mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. continue; // and try again - } + } } while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal !mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree)); @@ -258,7 +260,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { mi_block_t* next = mi_block_nextx(heap->cookie,block); // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { - // we might already start delayed freeing while another thread has not yet + // we might already start delayed freeing while another thread has not yet // reset the delayed_freeing flag; in that case delay it further by reinserting. mi_block_t* dfree; do { @@ -498,7 +500,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st if (page->capacity >= page->reserved) return; size_t page_size; - _mi_page_start(_mi_page_segment(page), page, &page_size); + _mi_page_start(_mi_page_segment(page), page, &page_size); _mi_stat_increase(&stats->pages_extended, 1); // calculate the extend count @@ -533,7 +535,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->block_size = block_size; mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); + #if MI_SECURE page->cookie = _mi_heap_random(heap) | 1; + #endif mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); @@ -543,7 +547,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->next == NULL); mi_assert_internal(page->prev == NULL); mi_assert_internal(page->flags.has_aligned == false); + #if MI_SECURE mi_assert_internal(page->cookie != 0); + #endif mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list @@ -683,7 +689,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); mi_heap_stat_increase( heap, huge, block_size); - } + } return page; } diff --git a/src/segment.c b/src/segment.c index 7f7bedd7..8f254a26 100644 --- a/src/segment.c +++ b/src/segment.c @@ -235,8 +235,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -// and no more than 4. -#define MI_SEGMENT_CACHE_MAX (4) +// and no more than 2. +#define MI_SEGMENT_CACHE_MAX (2) #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset @@ -252,7 +252,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t } static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->cache_count < MI_SEGMENT_CACHE_MAX && + if (tld->cache_count < MI_SEGMENT_CACHE_MAX && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache return false; } @@ -318,7 +318,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift); // Try to get it from our thread local cache first - bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); + bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM); bool protection_still_good = false; mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld); if (segment != NULL) { @@ -702,10 +702,10 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= (MI_SMALL_PAGE_SIZE/16)*3) { + if (block_size <= (MI_SMALL_PAGE_SIZE/4)) { page = mi_segment_small_page_alloc(tld,os_tld); } - else if (block_size <= (MI_MEDIUM_PAGE_SIZE/16)*3) { + else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) { page = mi_segment_medium_page_alloc(tld, os_tld); } else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) { From a02204970513fa48183e8f89736e148cd837cbda Mon Sep 17 00:00:00 2001 From: daan Date: Mon, 22 Jul 2019 21:25:57 -0700 Subject: [PATCH 19/36] remove old comment --- src/page.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/page.c b/src/page.c index d46a5aad..69d32bfe 100644 --- a/src/page.c +++ b/src/page.c @@ -733,11 +733,4 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // and try again, this time succeeding! (i.e. this should never recurse) return _mi_page_malloc(heap, page, size); - /* - if (page->used == page->reserved) { - // needed for huge pages to free reliably from other threads. - mi_page_to_full(page,mi_page_queue_of(page)); - } - return p; - */ } From f2f45ad5df2d138beee086e46931bf454cc11527 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 09:36:58 -0700 Subject: [PATCH 20/36] fix cmake build on windows --- CMakeLists.txt | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ec0fd99a..45d5f988 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,14 +121,28 @@ add_library(mimalloc SHARED ${mi_sources}) set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} NO_SONAME "YES" OUTPUT_NAME ${mi_basename} ) target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT) target_compile_options(mimalloc PRIVATE ${mi_cflags}) +target_link_libraries(mimalloc PUBLIC ${mi_libraries}) target_include_directories(mimalloc PUBLIC $ $ ) -target_link_libraries(mimalloc PUBLIC ${mi_libraries}) +if(WIN32) + # On windows copy the mimalloc redirection dll too. + target_link_libraries(mimalloc PRIVATE ../../bin/mimalloc-redirect) + add_custom_command(TARGET mimalloc POST_BUILD + COMMAND "${CMAKE_COMMAND}" -E copy "../../bin/mimalloc-redirect.dll" $ + COMMENT "Copy mimalloc-redirect.dll to output directory") +endif() # static library add_library(mimalloc-static STATIC ${mi_sources}) +target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) +target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) +target_link_libraries(mimalloc-static PUBLIC ${mi_libraries}) +target_include_directories(mimalloc-static PUBLIC + $ + $ +) if(WIN32) # When building both static and shared libraries on Windows, a static library should use a # different output name to avoid the conflict with the import library of a shared one. @@ -137,14 +151,6 @@ if(WIN32) else() set_target_properties(mimalloc-static PROPERTIES OUTPUT_NAME ${mi_basename}) endif() -target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB) -target_compile_options(mimalloc-static PRIVATE ${mi_cflags}) - -target_include_directories(mimalloc-static PUBLIC - $ - $ -) -target_link_libraries(mimalloc-static PUBLIC ${mi_libraries}) # install static and shared library, and the include files install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_dir} LIBRARY NAMELINK_SKIP) From 13364b50b8017b1c7357db1e6bb3a97fa313cd63 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 09:37:36 -0700 Subject: [PATCH 21/36] fix 32-bit build of stress test --- test/test-stress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test-stress.c b/test/test-stress.c index 298e5a17..511679ac 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -27,7 +27,7 @@ static int N = 10; // scaling factor static volatile void* transfer[TRANSFERS]; -#if (INTPTR_MAX != UINT32_MAX) +#if (UINTPTR_MAX != UINT32_MAX) const uintptr_t cookie = 0xbf58476d1ce4e5b9UL; #else const uintptr_t cookie = 0x1ce4e5b9UL; @@ -39,7 +39,7 @@ typedef uintptr_t* random_t; static uintptr_t pick(random_t r) { uintptr_t x = *r; - #if (INTPTR_MAX > UINT32_MAX) + #if (UINTPTR_MAX > UINT32_MAX) // by Sebastiano Vigna, see: x ^= x >> 30; x *= 0xbf58476d1ce4e5b9UL; @@ -183,7 +183,7 @@ static void run_os_threads(size_t nthreads) { for (uintptr_t i = 0; i < nthreads; i++) { thandles[i] = CreateThread(0, 4096, &thread_entry, (void*)(i), 0, &tids[i]); } - for (int i = 0; i < nthreads; i++) { + for (size_t i = 0; i < nthreads; i++) { WaitForSingleObject(thandles[i], INFINITE); } } From c02a0c9b49771cdfef95fde4a577c8d92423de15 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 09:59:20 -0700 Subject: [PATCH 22/36] ensure cmake uses C++ compilation with MSVC --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45d5f988..980ab542 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,6 @@ set(mi_sources src/options.c src/init.c) - # Set default build type if (NOT CMAKE_BUILD_TYPE) if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$") @@ -44,6 +43,11 @@ if("${CMAKE_BINARY_DIR}" MATCHES ".*(S|s)ecure$") set(MI_SECURE "ON") endif() +if(CMAKE_C_COMPILER_ID MATCHES "MSVC") + set(MI_USE_CXX "ON") +endif() + + # Options if(MI_OVERRIDE MATCHES "ON") message(STATUS "Override standard malloc (MI_OVERRIDE=ON)") @@ -78,7 +82,7 @@ endif() if(MI_USE_CXX MATCHES "ON") message(STATUS "Use the C++ compiler to compile (MI_USE_CXX=ON)") set_source_files_properties(${mi_sources} PROPERTIES LANGUAGE CXX ) - set_source_files_properties(src/static.c test/test-api.c PROPERTIES LANGUAGE CXX ) + set_source_files_properties(src/static.c test/test-api.c test/test-stress.c PROPERTIES LANGUAGE CXX ) endif() # Compiler flags From ab022e4271c51fb164ae31cb530f65f6884ca17d Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 12:40:42 -0700 Subject: [PATCH 23/36] fix mimalloc-redirect path on windows cmake build --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 980ab542..8b37e579 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,9 +132,9 @@ target_include_directories(mimalloc PUBLIC ) if(WIN32) # On windows copy the mimalloc redirection dll too. - target_link_libraries(mimalloc PRIVATE ../../bin/mimalloc-redirect) + target_link_libraries(mimalloc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect.lib) add_custom_command(TARGET mimalloc POST_BUILD - COMMAND "${CMAKE_COMMAND}" -E copy "../../bin/mimalloc-redirect.dll" $ + COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect.dll" $ COMMENT "Copy mimalloc-redirect.dll to output directory") endif() From fa5dc176213b557bcd0bc0b2c7bd953b576547f1 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 15:09:54 -0700 Subject: [PATCH 24/36] fix merge conflicts --- src/options.c | 20 ++------------------ src/os.c | 5 ----- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/src/options.c b/src/options.c index 349599ad..56fb4851 100644 --- a/src/options.c +++ b/src/options.c @@ -37,7 +37,6 @@ typedef struct mi_option_desc_s { const char* name; // option name without `mimalloc_` prefix } mi_option_desc_t; -<<<<<<< HEAD static mi_option_desc_t options[_mi_option_last] = { // stable options @@ -45,16 +44,6 @@ static mi_option_desc_t options[_mi_option_last] = { MI_DEBUG, UNINIT, "show_errors" }, { 0, UNINIT, "verbose" }, -======= -static mi_option_desc_t options[_mi_option_last] = { - { 0, UNINIT, "page_reset" }, - { 0, UNINIT, "cache_reset" }, - { 0, UNINIT, "pool_commit" }, - { 0, UNINIT, "eager_commit" }, // secure and large pages must have eager commit - { 0, UNINIT, "large_os_pages" }, // use large OS pages - { 0, UNINIT, "reset_decommits" }, - { 0, UNINIT, "reset_discards" }, ->>>>>>> add comment about large pages #if MI_SECURE { MI_SECURE, INITIALIZED, "secure" }, // in a secure build the environment setting is ignored #else @@ -215,16 +204,11 @@ static const char* mi_getenv(const char* name) { #pragma warning(suppress:4996) const char* s = getenv(name); if (s == NULL) { -<<<<<<< HEAD - char buf[64+1]; - mi_strlcpy(buf,name,64); - for (size_t i = 0; i < strlen(buf); i++) { - buf[i] = toupper(name[i]); -======= + char buf[64]; + mi_strlcpy(buf,name,sizeof(buf)); size_t buf_size = strlen(buf); for (size_t i = 0; i < buf_size; i++) { buf[i] = toupper(buf[i]); ->>>>>>> Avoid using strlen function in loop } #pragma warning(suppress:4996) s = getenv(buf); diff --git a/src/os.c b/src/os.c index f6358912..56abd55d 100644 --- a/src/os.c +++ b/src/os.c @@ -202,14 +202,9 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static volatile uintptr_t large_page_try_ok = 0; void* p = NULL; if (use_large_os_page(size, try_alignment)) { -<<<<<<< HEAD - if (large_page_try_ok > 0) { - // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. -======= uintptr_t try_ok = mi_atomic_read(&large_page_try_ok); if (try_ok > 0) { // if a large page page allocation fails, it seems the calls to VirtualAlloc get very expensive. ->>>>>>> use atomic ops to guard large page tries on windows // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. mi_atomic_compare_exchange(&large_page_try_ok, try_ok - 1, try_ok); } From 89da085b6718ec55ffb263c4d7aae1f162180f2f Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 15:10:25 -0700 Subject: [PATCH 25/36] rename to mimalloc-override.dll and use C compilation --- ide/vs2017/mimalloc-override.vcxproj | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ide/vs2017/mimalloc-override.vcxproj b/ide/vs2017/mimalloc-override.vcxproj index f41b2efc..82f0b432 100644 --- a/ide/vs2017/mimalloc-override.vcxproj +++ b/ide/vs2017/mimalloc-override.vcxproj @@ -70,25 +70,25 @@ $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .dll - mimalloc + mimalloc-override $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .dll - mimalloc + mimalloc-override $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .dll - mimalloc + mimalloc-override $(SolutionDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(SolutionDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ .dll - mimalloc + mimalloc-override @@ -100,7 +100,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false - CompileAsCpp + Default ../../bin/mimalloc-redirect32.lib;%(AdditionalDependencies) @@ -121,7 +121,7 @@ MI_SHARED_LIB;MI_SHARED_LIB_EXPORT;MI_MALLOC_OVERRIDE;%(PreprocessorDefinitions); MultiThreadedDebugDLL false - CompileAsCpp + Default ../../bin/mimalloc-redirect.lib;%(AdditionalDependencies) @@ -152,7 +152,7 @@ $(IntDir) false MultiThreadedDLL - CompileAsCpp + Default true @@ -177,7 +177,7 @@ $(IntDir) false MultiThreadedDLL - CompileAsCpp + Default true From 3d6feead60daf63decdba9c5a0ae4a7dee0f037b Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 15:54:47 -0700 Subject: [PATCH 26/36] add heap region check to cfree --- src/alloc-posix.c | 4 +++- src/memory.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/alloc-posix.c b/src/alloc-posix.c index 1f55b3a8..672b73b3 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -38,7 +38,9 @@ size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept { } void mi_cfree(void* p) mi_attr_noexcept { - mi_free(p); + if (mi_is_in_heap_region(p)) { + mi_free(p); + } } int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept { diff --git a/src/memory.c b/src/memory.c index e7d1887e..7f8cfb14 100644 --- a/src/memory.c +++ b/src/memory.c @@ -106,6 +106,7 @@ static size_t mi_good_commit_size(size_t size) { // Return if a pointer points into a region reserved by us. bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + if (p==NULL) return false; size_t count = mi_atomic_read(®ions_count); for (size_t i = 0; i < count; i++) { uint8_t* start = (uint8_t*)mi_atomic_read_ptr(®ions[i].start); From 613d0c1993c1b00c4b4039e4144a791cfbf0d57f Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 23 Jul 2019 17:57:27 -0700 Subject: [PATCH 27/36] merge 095a87b --- src/options.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/options.c b/src/options.c index 56fb4851..a68ddbdf 100644 --- a/src/options.c +++ b/src/options.c @@ -111,25 +111,34 @@ void mi_option_enable_default(mi_option_t option, bool enable) { #define MAX_ERROR_COUNT (10) static uintptr_t error_count = 0; // when MAX_ERROR_COUNT stop emitting errors and warnings +// When overriding malloc, we may recurse into mi_vfprintf if an allocation +// inside the C runtime causes another message. +static mi_decl_thread bool recurse = false; + // Define our own limited `fprintf` that avoids memory allocation. // We do this using `snprintf` with a limited buffer. static void mi_vfprintf( FILE* out, const char* prefix, const char* fmt, va_list args ) { char buf[256]; if (fmt==NULL) return; + if (_mi_preloading() || recurse) return; + recurse = true; if (out==NULL) out = stdout; - if (_mi_preloading()) return; vsnprintf(buf,sizeof(buf)-1,fmt,args); #ifdef _WIN32 - // on windows with redirection, the C runtime uses us and we cannot call `fputs` - // while called from the C runtime itself, so use a non-locking option - if (out==stderr) { - if (prefix != NULL) _cputs(prefix); - _cputs(buf); - return; + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so use direct console output. + if (out==stderr) { + if (prefix != NULL) _cputs(prefix); + _cputs(buf); } + else #endif - if (prefix != NULL) fputs(prefix,out); - fputs(buf,out); + { + if (prefix != NULL) fputs(prefix,out); + fputs(buf,out); + } + recurse = false; + return; } void _mi_fprintf( FILE* out, const char* fmt, ... ) { From 0e8241c1400004ba7a4b0a780aba747207559e8d Mon Sep 17 00:00:00 2001 From: Jakub Szymanski Date: Tue, 30 Jul 2019 10:36:00 -0700 Subject: [PATCH 28/36] change from MIMALLOC_OVERRIDE to MIMALLOC_DISABLE_OVERRIDE --- src/alloc-override-win.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alloc-override-win.c b/src/alloc-override-win.c index 93886982..d1d51b9a 100644 --- a/src/alloc-override-win.c +++ b/src/alloc-override-win.c @@ -685,8 +685,8 @@ __declspec(dllexport) BOOL WINAPI DllEntry(HINSTANCE inst, DWORD reason, LPVOID if (ok) { // check if patching is not disabled #pragma warning(suppress:4996) - const char* s = getenv("MIMALLOC_OVERRIDE"); - bool enabled = (s == NULL || strstr("1;TRUE;YES;ON", s) != NULL); + const char* s = getenv("MIMALLOC_DISABLE_OVERRIDE"); + bool enabled = (s == NULL || !(strstr("1;TRUE;YES;ON", s) != NULL)); if (!enabled) { _mi_verbose_message("override is disabled\n"); } From 6313c21d936c7368930837bf9f1501fcebdb3e45 Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 6 Aug 2019 18:25:57 -0700 Subject: [PATCH 29/36] fix output directory in test projects --- ide/vs2017/mimalloc-test-stress.vcxproj | 8 ++++---- ide/vs2017/mimalloc-test.vcxproj | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ide/vs2017/mimalloc-test-stress.vcxproj b/ide/vs2017/mimalloc-test-stress.vcxproj index e8cc5045..b8267d0b 100644 --- a/ide/vs2017/mimalloc-test-stress.vcxproj +++ b/ide/vs2017/mimalloc-test-stress.vcxproj @@ -67,19 +67,19 @@ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ diff --git a/ide/vs2017/mimalloc-test.vcxproj b/ide/vs2017/mimalloc-test.vcxproj index c1539aeb..27c7bb6e 100644 --- a/ide/vs2017/mimalloc-test.vcxproj +++ b/ide/vs2017/mimalloc-test.vcxproj @@ -67,19 +67,19 @@ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ - $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ + $(ProjectDir)..\..\out\msvc-$(Platform)\$(Configuration)\ $(ProjectDir)..\..\out\msvc-$(Platform)\$(ProjectName)\$(Configuration)\ From 56778fe7d210ffbb95e830e2bbcba04eb84351af Mon Sep 17 00:00:00 2001 From: daan Date: Tue, 6 Aug 2019 18:57:53 -0700 Subject: [PATCH 30/36] split normal and secure extend in a separate routine --- src/page.c | 130 +++++++++++++++++++++++++++++------------------------ 1 file changed, 71 insertions(+), 59 deletions(-) diff --git a/src/page.c b/src/page.c index 69d32bfe..b2bbc2ad 100644 --- a/src/page.c +++ b/src/page.c @@ -404,6 +404,59 @@ void _mi_page_retire(mi_page_t* page) { #define MI_MAX_SLICES (1UL << MI_MAX_SLICE_SHIFT) #define MI_MIN_SLICES (2) +static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) { + UNUSED(stats); + mi_assert_internal(page->free == NULL); + mi_assert_internal(page->local_free == NULL); + mi_assert_internal(page->capacity + extend <= page->reserved); + void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL); + size_t bsize = page->block_size; + + // initialize a randomized free list + // set up `slice_count` slices to alternate between + size_t shift = MI_MAX_SLICE_SHIFT; + while ((extend >> shift) == 0) { + shift--; + } + size_t slice_count = (size_t)1U << shift; + size_t slice_extend = extend / slice_count; + mi_assert_internal(slice_extend >= 1); + mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice + size_t counts[MI_MAX_SLICES]; // available objects in the slice + for (size_t i = 0; i < slice_count; i++) { + blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); + counts[i] = slice_extend; + } + counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) + + // and initialize the free list by randomly threading through them + // set up first element + size_t current = _mi_heap_random(heap) % slice_count; + counts[current]--; + page->free = blocks[current]; + // and iterate through the rest + uintptr_t rnd = heap->random; + for (size_t i = 1; i < extend; i++) { + // call random_shuffle only every INTPTR_SIZE rounds + size_t round = i%MI_INTPTR_SIZE; + if (round == 0) rnd = _mi_random_shuffle(rnd); + // select a random next slice index + size_t next = ((rnd >> 8*round) & (slice_count-1)); + while (counts[next]==0) { // ensure it still has space + next++; + if (next==slice_count) next = 0; + } + // and link the current block to it + counts[next]--; + mi_block_t* block = blocks[current]; + blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block + mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` + current = next; + } + mi_block_set_next(page, blocks[current], NULL); // end of the list + heap->random = _mi_random_shuffle(rnd); +} + static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) { UNUSED(stats); @@ -413,66 +466,17 @@ static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t e void* page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); size_t bsize = page->block_size; mi_block_t* start = mi_page_block_at(page, page_area, page->capacity); - if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) { - // initialize a sequential free list - mi_block_t* end = mi_page_block_at(page, page_area, page->capacity + extend - 1); - mi_block_t* block = start; - for (size_t i = 0; i < extend; i++) { - mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); - mi_block_set_next(page,block,next); - block = next; - } - mi_block_set_next(page, end, NULL); - page->free = start; - } - else { - // initialize a randomized free list - // set up `slice_count` slices to alternate between - size_t shift = MI_MAX_SLICE_SHIFT; - while ((extend >> shift) == 0) { - shift--; - } - size_t slice_count = (size_t)1U << shift; - size_t slice_extend = extend / slice_count; - mi_assert_internal(slice_extend >= 1); - mi_block_t* blocks[MI_MAX_SLICES]; // current start of the slice - size_t counts[MI_MAX_SLICES]; // available objects in the slice - for (size_t i = 0; i < slice_count; i++) { - blocks[i] = mi_page_block_at(page, page_area, page->capacity + i*slice_extend); - counts[i] = slice_extend; - } - counts[slice_count-1] += (extend % slice_count); // final slice holds the modulus too (todo: distribute evenly?) - // and initialize the free list by randomly threading through them - // set up first element - size_t current = _mi_heap_random(heap) % slice_count; - counts[current]--; - page->free = blocks[current]; - // and iterate through the rest - uintptr_t rnd = heap->random; - for (size_t i = 1; i < extend; i++) { - // call random_shuffle only every INTPTR_SIZE rounds - size_t round = i%MI_INTPTR_SIZE; - if (round == 0) rnd = _mi_random_shuffle(rnd); - // select a random next slice index - size_t next = ((rnd >> 8*round) & (slice_count-1)); - while (counts[next]==0) { // ensure it still has space - next++; - if (next==slice_count) next = 0; - } - // and link the current block to it - counts[next]--; - mi_block_t* block = blocks[current]; - blocks[current] = (mi_block_t*)((uint8_t*)block + bsize); // bump to the following block - mi_block_set_next(page, block, blocks[next]); // and set next; note: we may have `current == next` - current = next; - } - mi_block_set_next( page, blocks[current], NULL); // end of the list - heap->random = _mi_random_shuffle(rnd); + // initialize a sequential free list + mi_block_t* last = mi_page_block_at(page, page_area, page->capacity + extend - 1); + mi_block_t* block = start; + while(block <= last) { + mi_block_t* next = (mi_block_t*)((uint8_t*)block + bsize); + mi_block_set_next(page,block,next); + block = next; } - // enable the new free list - page->capacity += (uint16_t)extend; - _mi_stat_increase(&stats->page_committed, extend * page->block_size); + mi_block_set_next(page, last, NULL); + page->free = start; } /* ----------------------------------------------------------- @@ -518,7 +522,15 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st mi_assert_internal(extend < (1UL<<16)); // and append the extend the free list - mi_page_free_list_extend(heap, page, extend, stats ); + if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) { + mi_page_free_list_extend(heap, page, extend, stats ); + } + else { + mi_page_free_list_extend_secure(heap, page, extend, stats); + } + // enable the new free list + page->capacity += (uint16_t)extend; + _mi_stat_increase(&stats->page_committed, extend * page->block_size); mi_assert_expensive(mi_page_is_valid_init(page)); } From 55778d2fe41fe78dea4c3da3e89c5b7fd15c624a Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 8 Aug 2019 11:36:13 -0700 Subject: [PATCH 31/36] improved stats --- include/mimalloc-internal.h | 2 +- include/mimalloc-types.h | 4 ++-- src/init.c | 3 ++- src/page.c | 7 ++++--- src/segment.c | 13 +++++++++---- src/stats.c | 19 ++++++++++++------- 6 files changed, 30 insertions(+), 18 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index e261dba2..cc487a21 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -307,7 +307,7 @@ static inline bool mi_page_all_used(mi_page_t* page) { static inline bool mi_page_mostly_used(const mi_page_t* page) { if (page==NULL) return true; uint16_t frac = page->reserved / 8U; - return (page->reserved - page->used + page->thread_freed < frac); + return (page->reserved - page->used + page->thread_freed <= frac); } static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 5c14ffd4..6c094091 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -324,12 +324,12 @@ typedef struct mi_stats_s { mi_stat_count_t pages_abandoned; mi_stat_count_t pages_extended; mi_stat_count_t mmap_calls; - mi_stat_count_t mmap_right_align; - mi_stat_count_t mmap_ensure_aligned; mi_stat_count_t commit_calls; mi_stat_count_t threads; mi_stat_count_t huge; mi_stat_count_t malloc; + mi_stat_count_t segments_cache; + mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; #if MI_STAT>1 mi_stat_count_t normal[MI_BIN_HUGE+1]; diff --git a/src/init.c b/src/init.c index 8075ea35..3b060fa4 100644 --- a/src/init.c +++ b/src/init.c @@ -61,7 +61,8 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ + MI_STAT_COUNT_NULL(), \ + { 0, 0 }, \ { 0, 0 } \ MI_STAT_COUNT_END_NULL() diff --git a/src/page.c b/src/page.c index b2bbc2ad..aa8a8415 100644 --- a/src/page.c +++ b/src/page.c @@ -216,7 +216,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size mi_page_t* page = _mi_segment_page_alloc(block_size, &heap->tld->segments, &heap->tld->os); if (page == NULL) return NULL; mi_page_init(heap, page, block_size, &heap->tld->stats); - mi_heap_stat_increase( heap, pages, 1); + _mi_stat_increase( &heap->tld->stats.pages, 1); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); return page; @@ -352,7 +352,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { // account for huge pages here if (page->block_size > MI_LARGE_SIZE_MAX) { - mi_heap_stat_decrease(page->heap, huge, page->block_size); + _mi_stat_decrease(&page->heap->tld->stats.huge, page->block_size); } // remove from the page list @@ -386,6 +386,7 @@ void _mi_page_retire(mi_page_t* page) { // if its neighbours are almost fully used. if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { + _mi_stat_counter_increase(&page->heap->tld->stats.page_no_retire,1); return; // dont't retire after all } } @@ -700,7 +701,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { if (page != NULL) { mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(page->block_size == block_size); - mi_heap_stat_increase( heap, huge, block_size); + _mi_stat_increase( &heap->tld->stats.huge, block_size); } return page; } diff --git a/src/segment.c b/src/segment.c index 8f254a26..a86c3bc0 100644 --- a/src/segment.c +++ b/src/segment.c @@ -248,17 +248,19 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t tld->cache = segment->next; segment->next = NULL; mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); + _mi_stat_decrease(&tld->stats->segments_cache, 1); return segment; } static bool mi_segment_cache_full(mi_segments_tld_t* tld) { - if (tld->cache_count < MI_SEGMENT_CACHE_MAX && - tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache + if (tld->cache_count < MI_SEGMENT_CACHE_MAX + && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) + ) { // always allow 1 element cache return false; } // take the opportunity to reduce the segment cache if it is too large (now) // TODO: this never happens as we check against peak usage, should we use current usage instead? - while (tld->cache_count > (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { + while (tld->cache_count > MI_SEGMENT_CACHE_MAX ) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { mi_segment_t* segment = mi_segment_cache_pop(0,tld); mi_assert_internal(segment != NULL); if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); @@ -269,7 +271,9 @@ static bool mi_segment_cache_full(mi_segments_tld_t* tld) { static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); mi_assert_internal(segment->next == NULL); - if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) return false; + if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { + return false; + } mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); if (mi_option_is_enabled(mi_option_cache_reset)) { _mi_mem_reset((uint8_t*)segment + segment->segment_info_size, segment->segment_size - segment->segment_info_size, tld->stats); @@ -277,6 +281,7 @@ static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) segment->next = tld->cache; tld->cache = segment; tld->cache_count++; + _mi_stat_increase(&tld->stats->segments_cache,1); return true; } diff --git a/src/stats.c b/src/stats.c index 2b15bf9e..8725e48c 100644 --- a/src/stats.c +++ b/src/stats.c @@ -99,14 +99,14 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); mi_stat_add(&stats->segments_abandoned, &src->segments_abandoned, 1); mi_stat_add(&stats->mmap_calls, &src->mmap_calls, 1); - mi_stat_add(&stats->mmap_ensure_aligned, &src->mmap_ensure_aligned, 1); - mi_stat_add(&stats->mmap_right_align, &src->mmap_right_align, 1); mi_stat_add(&stats->commit_calls, &src->commit_calls, 1); mi_stat_add(&stats->threads, &src->threads, 1); mi_stat_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_add(&stats->malloc, &src->malloc, 1); + mi_stat_add(&stats->segments_cache, &src->segments_cache, 1); mi_stat_add(&stats->huge, &src->huge, 1); + mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { @@ -172,10 +172,15 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t } static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, FILE* out ) { - double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); - _mi_fprintf(out,"%10s: %7.1f avg\n", msg, avg); + _mi_fprintf(out, "%10s:", msg); + mi_print_amount(stat->total, -1, out); + _mi_fprintf(out, "\n"); } +static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, FILE* out) { + double avg = (stat->count == 0 ? 0.0 : (double)stat->total / (double)stat->count); + _mi_fprintf(out, "%10s: %7.1f avg\n", msg, avg); +} static void mi_print_header( FILE* out ) { @@ -229,15 +234,15 @@ static void _mi_stats_print(mi_stats_t* stats, double secs, FILE* out) mi_attr_n mi_stat_print(&stats->page_committed, "touched", 1, out); mi_stat_print(&stats->segments, "segments", -1, out); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out); + mi_stat_print(&stats->segments_cache, "-cached", -1, out); mi_stat_print(&stats->pages, "pages", -1, out); mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out); mi_stat_print(&stats->pages_extended, "-extended", 0, out); + mi_stat_counter_print(&stats->page_no_retire, "-noretire", out); mi_stat_print(&stats->mmap_calls, "mmaps", 0, out); - mi_stat_print(&stats->mmap_right_align, "mmap fast", 0, out); - mi_stat_print(&stats->mmap_ensure_aligned, "mmap slow", 0, out); mi_stat_print(&stats->commit_calls, "commits", 0, out); mi_stat_print(&stats->threads, "threads", 0, out); - mi_stat_counter_print(&stats->searches, "searches", out); + mi_stat_counter_print_avg(&stats->searches, "searches", out); if (secs >= 0.0) _mi_fprintf(out, "%10s: %9.3f s\n", "elapsed", secs); From 6596e970a52d28a5e449e9765c33f88787a96bb5 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 8 Aug 2019 15:23:18 -0700 Subject: [PATCH 32/36] move in_full and has_aligned into page threadid for a single test in mi_free --- include/mimalloc-internal.h | 4 ++++ include/mimalloc-types.h | 32 ++++++++++++++++++++++---------- src/alloc.c | 25 +++++++++---------------- src/init.c | 9 +++++---- src/page.c | 7 ++++--- src/segment.c | 8 ++++++-- 6 files changed, 50 insertions(+), 35 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index cc487a21..2c3ccffd 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -314,6 +314,10 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; } +static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { + return (page->flags.padding << MI_PAGE_FLAGS_BITS); +} + // ------------------------------------------------------------------- // Encoding/Decoding the free list next pointers // ------------------------------------------------------------------- diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 6c094091..073d23d3 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -123,14 +123,26 @@ typedef enum mi_delayed_e { } mi_delayed_t; +// Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags +// This allows a single test in `mi_free` to check for unlikely cases +// (namely, non-local free, aligned free, or freeing in a full page) +#define MI_PAGE_FLAGS_BITS (2) typedef union mi_page_flags_u { - uint16_t value; + uintptr_t threadidx; struct { - bool has_aligned; - bool in_full; + #ifdef MI_BIG_ENDIAN + uintptr_t padding : (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS); + uintptr_t in_full : 1; + uintptr_t has_aligned : 1; + #else + uintptr_t in_full : 1; + uintptr_t has_aligned : 1; + uintptr_t padding : (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS); + #endif }; } mi_page_flags_t; + // Thread free list. // We use the bottom 2 bits of the pointer for mi_delayed_t flags typedef uintptr_t mi_thread_free_t; @@ -160,16 +172,16 @@ typedef struct mi_page_s { bool is_reset:1; // `true` if the page memory was reset bool is_committed:1; // `true` if the page virtual memory is committed - // layout like this to optimize access in `mi_malloc` and `mi_free` - mi_page_flags_t flags; + // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - + // 16 bits padding mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists #endif size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + mi_page_flags_t flags; // threadid:62 | has_aligned:1 | in_full:1 mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) volatile uintptr_t thread_freed; // at least this number of blocks are in `thread_free` @@ -182,10 +194,10 @@ typedef struct mi_page_s { struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // improve page index calculation -#if MI_INTPTR_SIZE==8 - //void* padding[1]; // 12 words on 64-bit +#if (MI_INTPTR_SIZE==8 && MI_SECURE==0) + void* padding[1]; // 12 words on 64-bit #elif MI_INTPTR_SIZE==4 - void* padding[1]; // 12 words on 32-bit + // void* padding[1]; // 12 words on 32-bit #endif } mi_page_t; @@ -215,7 +227,7 @@ typedef struct mi_segment_s { // layout like this to optimize access in `mi_free` size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - uintptr_t thread_id; // unique id of the thread owning this segment + volatile uintptr_t thread_id; // unique id of the thread owning this segment mi_page_kind_t page_kind; // kind of pages: small, large, or huge mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; diff --git a/src/alloc.c b/src/alloc.c index 6a91c0ad..f23ed896 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -223,8 +223,7 @@ void mi_free(void* p) mi_attr_noexcept return; } #endif - - bool local = (_mi_thread_id() == segment->thread_id); // preload, note: putting the thread_id in the page->flags does not improve performance + mi_page_t* page = _mi_segment_page_of(segment, p); #if (MI_STAT>1) @@ -237,23 +236,17 @@ void mi_free(void* p) mi_attr_noexcept #endif // adjust if it might be an un-aligned block - if (mi_likely(page->flags.value==0)) { // not full or aligned + uintptr_t tid = _mi_thread_id(); + if (mi_likely(tid == page->flags.threadidx)) { // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance - // owning thread can free a block directly - mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance - page->local_free = block; - page->used--; - if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } - } - else { - // use atomic operations for a multi-threaded free - _mi_free_block_mt(page, block); - } + mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance + page->local_free = block; + page->used--; + if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } } else { - // aligned blocks, or a full page; use the more generic path - mi_free_generic(segment, page, local, p); + // non-local, aligned blocks, or a full page; use the more generic path + mi_free_generic(segment, page, tid == mi_page_thread_id(page), p); } } diff --git a/src/init.c b/src/init.c index 3b060fa4..13ed9561 100644 --- a/src/init.c +++ b/src/init.c @@ -12,15 +12,16 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, {0}, 0, 0, - NULL, 0, // free, used + 0, false, false, false, 0, 0, + NULL, // free #if MI_SECURE 0, #endif + 0, {0}, // used, flags NULL, 0, 0, 0, NULL, NULL, NULL - #if (MI_INTPTR_SIZE==4) - , { NULL } + #if (MI_INTPTR_SIZE==8 && MI_SECURE==0) + , { NULL } #endif }; diff --git a/src/page.c b/src/page.c index aa8a8415..4ff797c0 100644 --- a/src/page.c +++ b/src/page.c @@ -71,10 +71,11 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(page->block_size > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); - + mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); + mi_assert_internal(segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -458,7 +459,7 @@ static void mi_page_free_list_extend_secure(mi_heap_t* heap, mi_page_t* page, si heap->random = _mi_random_shuffle(rnd); } -static void mi_page_free_list_extend( mi_heap_t* heap, mi_page_t* page, size_t extend, mi_stats_t* stats) +static void mi_page_free_list_extend( mi_page_t* page, size_t extend, mi_stats_t* stats) { UNUSED(stats); mi_assert_internal(page->free == NULL); @@ -524,7 +525,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st // and append the extend the free list if (extend < MI_MIN_SLICES || !mi_option_is_enabled(mi_option_secure)) { - mi_page_free_list_extend(heap, page, extend, stats ); + mi_page_free_list_extend(page, extend, stats ); } else { mi_page_free_list_extend_secure(heap, page, extend, stats); diff --git a/src/segment.c b/src/segment.c index a86c3bc0..256c30eb 100644 --- a/src/segment.c +++ b/src/segment.c @@ -226,6 +226,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { + segment->thread_id = 0; mi_segments_track_size(-((long)segment_size),tld); if (mi_option_is_enabled(mi_option_secure)) { _mi_mem_unprotect(segment, segment->segment_size); // ensure no more guard pages are set @@ -412,8 +413,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); mi_assert(segment->next == NULL); mi_assert(segment->prev == NULL); - _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - segment->thread_id = 0; + _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); // update reset memory statistics /* @@ -618,6 +618,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { // otherwise reclaim it + page->flags.threadidx = segment->thread_id; _mi_page_reclaim(heap,page); } } @@ -648,6 +649,7 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld->stats); page->segment_in_use = true; + page->flags.threadidx = segment->thread_id; segment->used++; mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { @@ -687,6 +689,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; + page->flags.threadidx = segment->thread_id; return page; } @@ -698,6 +701,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; + page->flags.threadidx = segment->thread_id; return page; } From 5e56b40fe6446bd6f1bb583805b6fb8230911ab6 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 8 Aug 2019 17:18:49 -0700 Subject: [PATCH 33/36] improve page flags handling --- include/mimalloc-internal.h | 8 +++++++- include/mimalloc-types.h | 14 +++++++------- src/alloc.c | 2 +- src/page.c | 4 ++-- src/segment.c | 8 ++++---- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 2c3ccffd..ad9b3ecf 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -315,7 +315,13 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) } static inline uintptr_t mi_page_thread_id(const mi_page_t* page) { - return (page->flags.padding << MI_PAGE_FLAGS_BITS); + return (page->flags.xthread_id << MI_PAGE_FLAGS_BITS); +} + +static inline void mi_page_init_flags(mi_page_t* page, uintptr_t thread_id) { + page->flags.value = 0; + page->flags.xthread_id = (thread_id >> MI_PAGE_FLAGS_BITS); + mi_assert(page->flags.value == thread_id); } // ------------------------------------------------------------------- diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 073d23d3..3af664cf 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -126,18 +126,18 @@ typedef enum mi_delayed_e { // Use the lowest two bits of a thread id for the `in_full` and `has_aligned` flags // This allows a single test in `mi_free` to check for unlikely cases // (namely, non-local free, aligned free, or freeing in a full page) -#define MI_PAGE_FLAGS_BITS (2) +#define MI_PAGE_FLAGS_BITS (2) +#define MI_PAGE_FLAGS_TID_BITS (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS) typedef union mi_page_flags_u { - uintptr_t threadidx; + uintptr_t value; struct { #ifdef MI_BIG_ENDIAN - uintptr_t padding : (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS); + uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS; + #endif uintptr_t in_full : 1; uintptr_t has_aligned : 1; - #else - uintptr_t in_full : 1; - uintptr_t has_aligned : 1; - uintptr_t padding : (MI_INTPTR_SIZE*8 - MI_PAGE_FLAGS_BITS); + #ifndef MI_BIG_ENDIAN + uintptr_t xthread_id : MI_PAGE_FLAGS_TID_BITS; #endif }; } mi_page_flags_t; diff --git a/src/alloc.c b/src/alloc.c index f23ed896..fe9d5fb0 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -237,7 +237,7 @@ void mi_free(void* p) mi_attr_noexcept // adjust if it might be an un-aligned block uintptr_t tid = _mi_thread_id(); - if (mi_likely(tid == page->flags.threadidx)) { // local, and not full or aligned + if (mi_likely(tid == page->flags.value)) { // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance page->local_free = block; diff --git a/src/page.c b/src/page.c index 4ff797c0..9be0372d 100644 --- a/src/page.c +++ b/src/page.c @@ -75,7 +75,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_segment_t* segment = _mi_page_segment(page); uint8_t* start = _mi_page_start(segment,page,NULL); mi_assert_internal(start == _mi_segment_page_start(segment,page,page->block_size,NULL)); - mi_assert_internal(segment->thread_id == mi_page_thread_id(page)); + mi_assert_internal(segment->thread_id==0 || segment->thread_id == mi_page_thread_id(page)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); @@ -387,7 +387,7 @@ void _mi_page_retire(mi_page_t* page) { // if its neighbours are almost fully used. if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { - _mi_stat_counter_increase(&page->heap->tld->stats.page_no_retire,1); + _mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1); return; // dont't retire after all } } diff --git a/src/segment.c b/src/segment.c index 256c30eb..f2fd09ad 100644 --- a/src/segment.c +++ b/src/segment.c @@ -618,7 +618,7 @@ bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segmen } else { // otherwise reclaim it - page->flags.threadidx = segment->thread_id; + mi_page_init_flags(page,segment->thread_id); _mi_page_reclaim(heap,page); } } @@ -649,7 +649,7 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl mi_assert_internal(mi_segment_has_free(segment)); mi_page_t* page = mi_segment_find_free(segment, tld->stats); page->segment_in_use = true; - page->flags.threadidx = segment->thread_id; + mi_page_init_flags(page,segment->thread_id); segment->used++; mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity) { @@ -689,7 +689,7 @@ static mi_page_t* mi_segment_large_page_alloc(mi_segments_tld_t* tld, mi_os_tld_ segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; - page->flags.threadidx = segment->thread_id; + mi_page_init_flags(page,segment->thread_id); return page; } @@ -701,7 +701,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld segment->used = 1; mi_page_t* page = &segment->pages[0]; page->segment_in_use = true; - page->flags.threadidx = segment->thread_id; + mi_page_init_flags(page,segment->thread_id); return page; } From 442bad91904532bdc7fe45370e676c4aa46fc7f7 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 9 Aug 2019 11:18:38 -0700 Subject: [PATCH 34/36] add good-fit for allowing larger blocks in smaller segments --- include/mimalloc-types.h | 26 +++++-- src/alloc.c | 9 ++- src/init.c | 37 ++++++--- src/page-queue.c | 11 ++- src/page.c | 6 +- src/segment.c | 14 ++-- test/main-override-static.c | 146 ++++++++++++++++++++++++++++++++++++ 7 files changed, 214 insertions(+), 35 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 3af664cf..45307c15 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -91,21 +91,31 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE) #define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE) -#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/8) // 64kb on 64-bit -#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/8) // 512kb on 64-bit +#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 64kb on 64-bit +#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 512kb on 64-bit #define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT) -// Maximum number of size classes. (spaced exponentially in 16.7% increments) -#define MI_BIN_HUGE (64U) - // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) -#if (MI_LARGE_WSIZE_MAX > 131072) +#define MI_BIN4 +#ifdef MI_BIN4 +// Maximum number of size classes. (spaced exponentially in 25% increments) +#define MI_BIN_HUGE (40U) + +#if (MI_LARGE_WSIZE_MAX > 524287) #error "define more bins" #endif +#else +// Maximum number of size classes. (spaced exponentially in 12.5% increments) +#define MI_BIN_HUGE (70U) + +#if (MI_LARGE_WSIZE_MAX > 393216) +#error "define more bins" +#endif +#endif typedef uintptr_t mi_encoded_t; @@ -172,10 +182,10 @@ typedef struct mi_page_s { bool is_reset:1; // `true` if the page memory was reset bool is_committed:1; // `true` if the page virtual memory is committed - // layout like this to optimize access in `mi_malloc` and `mi_free` + // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed uint16_t reserved; // number of blocks reserved in memory - // 16 bits padding + // 16 bits padding mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) #if MI_SECURE uintptr_t cookie; // random cookie to encode the free lists diff --git a/src/alloc.c b/src/alloc.c index fe9d5fb0..bfb37d19 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -57,6 +57,7 @@ extern inline void* mi_malloc_small(size_t size) mi_attr_noexcept { return mi_heap_malloc_small(mi_get_default_heap(), size); } + // zero initialized small block void* mi_zalloc_small(size_t size) mi_attr_noexcept { void* p = mi_malloc_small(size); @@ -71,7 +72,7 @@ extern inline void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcep void* p; if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { p = mi_heap_malloc_small(heap, size); - } + } else { p = _mi_malloc_generic(heap, size); } @@ -235,11 +236,11 @@ void mi_free(void* p) mi_attr_noexcept // huge page stat is accounted for in `_mi_page_retire` #endif - // adjust if it might be an un-aligned block uintptr_t tid = _mi_thread_id(); - if (mi_likely(tid == page->flags.value)) { // local, and not full or aligned + if (mi_likely(tid == page->flags.value)) { + // local, and not full or aligned mi_block_t* block = (mi_block_t*)p; - mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance + mi_block_set_next(page, block, page->local_free); page->local_free = block; page->used--; if (mi_unlikely(mi_page_all_free(page))) { _mi_page_retire(page); } diff --git a/src/init.c b/src/init.c index 13ed9561..1ea510b2 100644 --- a/src/init.c +++ b/src/init.c @@ -32,24 +32,37 @@ const mi_page_t _mi_page_empty = { // Empty page queues for every bin #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } +#ifdef MI_BIN4 #define MI_PAGE_QUEUES_EMPTY \ { QNULL(1), \ - QNULL(1), QNULL(2), QNULL(3), QNULL(4), QNULL(5), QNULL(6), QNULL(7), QNULL(8), \ - QNULL(10), QNULL(12), QNULL(14), QNULL(16), QNULL(20), QNULL(24), QNULL(28), QNULL(32), \ - QNULL(40), QNULL(48), QNULL(56), QNULL(64), QNULL(80), QNULL(96), QNULL(112), QNULL(128), \ - QNULL(160), QNULL(192), QNULL(224), QNULL(256), QNULL(320), QNULL(384), QNULL(448), QNULL(512), \ - QNULL(640), QNULL(768), QNULL(896), QNULL(1024), QNULL(1280), QNULL(1536), QNULL(1792), QNULL(2048), \ - QNULL(2560), QNULL(3072), QNULL(3584), QNULL(4096), QNULL(5120), QNULL(6144), QNULL(7168), QNULL(8192), \ - QNULL(10240), QNULL(12288), QNULL(14336), QNULL(16384), QNULL(20480), QNULL(24576), QNULL(28672), QNULL(32768), \ - QNULL(40960), QNULL(49152), QNULL(57344), QNULL(65536), QNULL(81920), QNULL(98304), QNULL(114688), \ - QNULL(MI_LARGE_WSIZE_MAX + 1 /*131072, Huge queue */), \ + QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ + QNULL( 11), QNULL( 15), QNULL( 23), QNULL( 31), QNULL( 47), QNULL( 63), QNULL( 95), QNULL( 127), /* 16 */ \ + QNULL( 191), QNULL( 255), QNULL( 383), QNULL( 511), QNULL( 767), QNULL( 1023), QNULL( 1535), QNULL( 2047), /* 24 */ \ + QNULL( 3071), QNULL( 4095), QNULL( 6143), QNULL( 8191), QNULL( 12287), QNULL( 16383), QNULL( 24575), QNULL( 32767), /* 32 */ \ + QNULL( 49151), QNULL( 65535), QNULL( 98303), QNULL(131071), QNULL(196607), QNULL(262143), QNULL(393215), /* 39 */ \ + QNULL(MI_LARGE_WSIZE_MAX + 1 /* 524287, Huge queue */), \ QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } +#else +#define MI_PAGE_QUEUES_EMPTY \ + { QNULL(1), \ + QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ + QNULL( 10), QNULL( 12), QNULL( 14), QNULL( 16), QNULL( 20), QNULL( 24), QNULL( 28), QNULL( 32), /* 16 */ \ + QNULL( 40), QNULL( 48), QNULL( 56), QNULL( 64), QNULL( 80), QNULL( 96), QNULL( 112), QNULL( 128), /* 24 */ \ + QNULL( 160), QNULL( 192), QNULL( 224), QNULL( 256), QNULL( 320), QNULL( 384), QNULL( 448), QNULL( 512), /* 32 */ \ + QNULL( 640), QNULL( 768), QNULL( 896), QNULL( 1024), QNULL( 1280), QNULL( 1536), QNULL( 1792), QNULL( 2048), /* 40 */ \ + QNULL( 2560), QNULL( 3072), QNULL( 3584), QNULL( 4096), QNULL( 5120), QNULL( 6144), QNULL( 7168), QNULL( 8192), /* 48 */ \ + QNULL( 10240), QNULL( 12288), QNULL( 14336), QNULL( 16384), QNULL( 20480), QNULL( 24576), QNULL( 28672), QNULL( 32768), /* 56 */ \ + QNULL( 40960), QNULL( 49152), QNULL( 57344), QNULL( 65536), QNULL( 81920), QNULL( 98304), QNULL(114688), QNULL(131072), /* 64 */ \ + QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \ + QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \ + QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } +#endif #define MI_STAT_COUNT_NULL() {0,0,0,0} // Empty statistics #if MI_STAT>1 -#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT64(MI_STAT_COUNT_NULL) } +#define MI_STAT_COUNT_END_NULL() , { MI_STAT_COUNT_NULL(), MI_INIT32(MI_STAT_COUNT_NULL) } #else #define MI_STAT_COUNT_END_NULL() #endif @@ -97,8 +110,8 @@ static mi_tld_t tld_main = { 0, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, 0, 0, 0, 0, 0, 0, NULL, tld_main_stats }, // segments - { 0, NULL, NULL, 0, tld_main_stats }, // os - { MI_STATS_NULL } // stats + { 0, NULL, NULL, 0, tld_main_stats }, // os + { MI_STATS_NULL } // stats }; mi_heap_t _mi_heap_main = { diff --git a/src/page-queue.c b/src/page-queue.c index fd388113..69ebcc75 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -97,7 +97,7 @@ uint8_t _mi_bsr(uintptr_t x) { // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. -inline uint8_t _mi_bin(size_t size) { +extern inline uint8_t _mi_bin(size_t size) { size_t wsize = _mi_wsize_from_size(size); uint8_t bin; if (wsize <= 1) { @@ -120,16 +120,21 @@ inline uint8_t _mi_bin(size_t size) { bin = MI_BIN_HUGE; } else { - #if defined(MI_ALIGN4W) + #if defined(MI_ALIGN4W) if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes #endif + #ifdef MI_BIN4 + uint8_t b = mi_bsr32((uint32_t)wsize); + bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; + #else wsize--; // find the highest bit uint8_t b = mi_bsr32((uint32_t)wsize); - // and use the top 3 bits to determine the bin (~16% worst internal fragmentation). + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + #endif } mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); return bin; diff --git a/src/page.c b/src/page.c index 9be0372d..e6be8df6 100644 --- a/src/page.c +++ b/src/page.c @@ -385,7 +385,7 @@ void _mi_page_retire(mi_page_t* page) { // is the only page left with free blocks. It is not clear // how to check this efficiently though... for now we just check // if its neighbours are almost fully used. - if (mi_likely(page->block_size <= MI_SMALL_SIZE_MAX)) { + if (mi_likely(page->block_size <= MI_MEDIUM_SIZE_MAX)) { if (mi_page_mostly_used(page->prev) && mi_page_mostly_used(page->next)) { _mi_stat_counter_increase(&_mi_stats_main.page_no_retire,1); return; // dont't retire after all @@ -722,10 +722,10 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept // call potential deferred free routines _mi_deferred_free(heap, false); - + // free delayed frees from other threads _mi_heap_delayed_free(heap); - + // huge allocation? mi_page_t* page; if (mi_unlikely(size > MI_LARGE_SIZE_MAX)) { diff --git a/src/segment.c b/src/segment.c index f2fd09ad..736345bf 100644 --- a/src/segment.c +++ b/src/segment.c @@ -236,8 +236,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se // The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -// and no more than 2. -#define MI_SEGMENT_CACHE_MAX (2) +// and no more than 4. +#define MI_SEGMENT_CACHE_MAX (4) #define MI_SEGMENT_CACHE_FRACTION (8) // note: returned segment may be partially reset @@ -708,16 +708,20 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld /* ----------------------------------------------------------- Page allocation and free ----------------------------------------------------------- */ +static bool mi_is_good_fit(size_t bsize, size_t size) { + // good fit if no more than 25% wasted + return (bsize > 0 && size > 0 && bsize < size && (size - (size % bsize)) < (size/4)); +} mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= (MI_SMALL_PAGE_SIZE/4)) { + if (block_size <= MI_SMALL_SIZE_MAX || mi_is_good_fit(block_size,MI_SMALL_PAGE_SIZE)) { page = mi_segment_small_page_alloc(tld,os_tld); } - else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) { + else if (block_size <= MI_MEDIUM_SIZE_MAX || mi_is_good_fit(block_size, MI_MEDIUM_PAGE_SIZE)) { page = mi_segment_medium_page_alloc(tld, os_tld); } - else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) { + else if (block_size < MI_LARGE_SIZE_MAX || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t))) { page = mi_segment_large_page_alloc(tld, os_tld); } else { diff --git a/test/main-override-static.c b/test/main-override-static.c index 6ddf4f37..83aa388a 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -6,8 +6,154 @@ #include #include // redefines malloc etc. +#include +#include + +#define MI_INTPTR_SIZE 8 +#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) + +#define MI_BIN_HUGE 100 +//#define MI_ALIGN2W + +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +#include +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse((DWORD*)&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; + #if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); + #elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); + #else + # error "define bsr for non-32 or 64-bit platforms" + #endif +} + +static inline size_t _mi_wsize_from_size(size_t size) { + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +extern inline uint8_t _mi_bin8(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } + #if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } + #endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + #if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes + #endif + wsize--; + // find the highest bit + uint8_t b = mi_bsr32((uint32_t)wsize); + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + } + return bin; +} + +extern inline uint8_t _mi_bin4(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } + #if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } + #else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } + #endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + uint8_t b = mi_bsr32((uint32_t)wsize); + bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; + } + return bin; +} + +void mi_bins() { + printf(" QNULL(1), /* 0 */ \\\n "); + size_t last_bin = 0; + for (size_t size = 1; size < (MI_INTPTR_SIZE*MI_LARGE_WSIZE_MAX); size++) { + size_t bin = _mi_bin4(size); + if (bin != last_bin) { + size_t wsize = (size-1)/sizeof(intptr_t); + // printf("size: %6zd, wsize: %6d, bin: %6zd\n", size - 1, (size-1)/sizeof(intptr_t), last_bin); + printf("QNULL(%6zd), ", wsize); + if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); + last_bin = bin; + } + } +} + + + int main() { mi_version(); + mi_bins(); void* p1 = malloc(78); void* p2 = malloc(24); free(p1); From 7b16aa9302e104ebc4fc74ad8bea450ca58f6829 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 9 Aug 2019 11:22:38 -0700 Subject: [PATCH 35/36] remove accidental commit of different size bins experiment --- include/mimalloc-types.h | 10 --- src/init.c | 12 --- src/page-queue.c | 5 -- test/main-override-static.c | 145 ------------------------------------ 4 files changed, 172 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 45307c15..6aa82210 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -100,22 +100,12 @@ terms of the MIT license. A copy of the license can be found in the file // due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) -#define MI_BIN4 -#ifdef MI_BIN4 -// Maximum number of size classes. (spaced exponentially in 25% increments) -#define MI_BIN_HUGE (40U) - -#if (MI_LARGE_WSIZE_MAX > 524287) -#error "define more bins" -#endif -#else // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (70U) #if (MI_LARGE_WSIZE_MAX > 393216) #error "define more bins" #endif -#endif typedef uintptr_t mi_encoded_t; diff --git a/src/init.c b/src/init.c index 1ea510b2..f807d74a 100644 --- a/src/init.c +++ b/src/init.c @@ -32,17 +32,6 @@ const mi_page_t _mi_page_empty = { // Empty page queues for every bin #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } -#ifdef MI_BIN4 -#define MI_PAGE_QUEUES_EMPTY \ - { QNULL(1), \ - QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ - QNULL( 11), QNULL( 15), QNULL( 23), QNULL( 31), QNULL( 47), QNULL( 63), QNULL( 95), QNULL( 127), /* 16 */ \ - QNULL( 191), QNULL( 255), QNULL( 383), QNULL( 511), QNULL( 767), QNULL( 1023), QNULL( 1535), QNULL( 2047), /* 24 */ \ - QNULL( 3071), QNULL( 4095), QNULL( 6143), QNULL( 8191), QNULL( 12287), QNULL( 16383), QNULL( 24575), QNULL( 32767), /* 32 */ \ - QNULL( 49151), QNULL( 65535), QNULL( 98303), QNULL(131071), QNULL(196607), QNULL(262143), QNULL(393215), /* 39 */ \ - QNULL(MI_LARGE_WSIZE_MAX + 1 /* 524287, Huge queue */), \ - QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } -#else #define MI_PAGE_QUEUES_EMPTY \ { QNULL(1), \ QNULL( 1), QNULL( 2), QNULL( 3), QNULL( 4), QNULL( 5), QNULL( 6), QNULL( 7), QNULL( 8), /* 8 */ \ @@ -56,7 +45,6 @@ const mi_page_t _mi_page_empty = { QNULL(163840), QNULL(196608), QNULL(229376), QNULL(262144), QNULL(327680), /* 69 */ \ QNULL(MI_LARGE_WSIZE_MAX + 1 /* 393216, Huge queue */), \ QNULL(MI_LARGE_WSIZE_MAX + 2) /* Full queue */ } -#endif #define MI_STAT_COUNT_NULL() {0,0,0,0} diff --git a/src/page-queue.c b/src/page-queue.c index 69ebcc75..a386f8a1 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -123,10 +123,6 @@ extern inline uint8_t _mi_bin(size_t size) { #if defined(MI_ALIGN4W) if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes #endif - #ifdef MI_BIN4 - uint8_t b = mi_bsr32((uint32_t)wsize); - bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; - #else wsize--; // find the highest bit uint8_t b = mi_bsr32((uint32_t)wsize); @@ -134,7 +130,6 @@ extern inline uint8_t _mi_bin(size_t size) { // - adjust with 3 because we use do not round the first 8 sizes // which each get an exact bin bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; - #endif } mi_assert_internal(bin > 0 && bin <= MI_BIN_HUGE); return bin; diff --git a/test/main-override-static.c b/test/main-override-static.c index 83aa388a..94891cc3 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -6,154 +6,9 @@ #include #include // redefines malloc etc. -#include -#include - -#define MI_INTPTR_SIZE 8 -#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) - -#define MI_BIN_HUGE 100 -//#define MI_ALIGN2W - -// Bit scan reverse: return the index of the highest bit. -static inline uint8_t mi_bsr32(uint32_t x); - -#if defined(_MSC_VER) -#include -#include -static inline uint8_t mi_bsr32(uint32_t x) { - uint32_t idx; - _BitScanReverse((DWORD*)&idx, x); - return idx; -} -#elif defined(__GNUC__) || defined(__clang__) -static inline uint8_t mi_bsr32(uint32_t x) { - return (31 - __builtin_clz(x)); -} -#else -static inline uint8_t mi_bsr32(uint32_t x) { - // de Bruijn multiplication, see - static const uint8_t debruijn[32] = { - 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, - 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, - }; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - x++; - return debruijn[(x*0x076be629) >> 27]; -} -#endif - -// Bit scan reverse: return the index of the highest bit. -uint8_t _mi_bsr(uintptr_t x) { - if (x == 0) return 0; - #if MI_INTPTR_SIZE==8 - uint32_t hi = (x >> 32); - return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); - #elif MI_INTPTR_SIZE==4 - return mi_bsr32(x); - #else - # error "define bsr for non-32 or 64-bit platforms" - #endif -} - -static inline size_t _mi_wsize_from_size(size_t size) { - return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); -} - -// Return the bin for a given field size. -// Returns MI_BIN_HUGE if the size is too large. -// We use `wsize` for the size in "machine word sizes", -// i.e. byte size == `wsize*sizeof(void*)`. -extern inline uint8_t _mi_bin8(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; - } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; - } - else { - #if defined(MI_ALIGN4W) - if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes - #endif - wsize--; - // find the highest bit - uint8_t b = mi_bsr32((uint32_t)wsize); - // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). - // - adjust with 3 because we use do not round the first 8 sizes - // which each get an exact bin - bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; - } - return bin; -} - -extern inline uint8_t _mi_bin4(size_t size) { - size_t wsize = _mi_wsize_from_size(size); - uint8_t bin; - if (wsize <= 1) { - bin = 1; - } - #if defined(MI_ALIGN4W) - else if (wsize <= 4) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #elif defined(MI_ALIGN2W) - else if (wsize <= 8) { - bin = (uint8_t)((wsize+1)&~1); // round to double word sizes - } - #else - else if (wsize <= 8) { - bin = (uint8_t)wsize; - } - #endif - else if (wsize > MI_LARGE_WSIZE_MAX) { - bin = MI_BIN_HUGE; - } - else { - uint8_t b = mi_bsr32((uint32_t)wsize); - bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; - } - return bin; -} - -void mi_bins() { - printf(" QNULL(1), /* 0 */ \\\n "); - size_t last_bin = 0; - for (size_t size = 1; size < (MI_INTPTR_SIZE*MI_LARGE_WSIZE_MAX); size++) { - size_t bin = _mi_bin4(size); - if (bin != last_bin) { - size_t wsize = (size-1)/sizeof(intptr_t); - // printf("size: %6zd, wsize: %6d, bin: %6zd\n", size - 1, (size-1)/sizeof(intptr_t), last_bin); - printf("QNULL(%6zd), ", wsize); - if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); - last_bin = bin; - } - } -} - - int main() { mi_version(); - mi_bins(); void* p1 = malloc(78); void* p2 = malloc(24); free(p1); From b1938530afcb70ed3567922c4d7005a76ecd9b69 Mon Sep 17 00:00:00 2001 From: daan Date: Fri, 9 Aug 2019 14:31:45 -0700 Subject: [PATCH 36/36] fix comment --- include/mimalloc-types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index 6aa82210..4002c12c 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -91,8 +91,8 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE) #define MI_LARGE_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_LARGE_PAGE_SIZE) -#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 64kb on 64-bit -#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 512kb on 64-bit +#define MI_MEDIUM_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb on 64-bit +#define MI_LARGE_SIZE_MAX (MI_LARGE_PAGE_SIZE/4) // 1Mb on 64-bit #define MI_LARGE_WSIZE_MAX (MI_LARGE_SIZE_MAX>>MI_INTPTR_SHIFT)