From cf89fc6338945fb9e1be4532cc9e98c58b77f121 Mon Sep 17 00:00:00 2001 From: Biswapriyo Nath Date: Fri, 18 Feb 2022 12:06:08 +0530 Subject: [PATCH 01/17] Fix strict function prototype warnings Fix warning: function declaration isn't a prototype [-Wstrict-prototypes] In C int foo() and int foo(void) are different functions. --- src/os.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/os.c b/src/os.c index 757e8cab..e458946c 100644 --- a/src/os.c +++ b/src/os.c @@ -107,7 +107,7 @@ bool _mi_os_has_overcommit(void) { } // OS (small) page size -size_t _mi_os_page_size() { +size_t _mi_os_page_size(void) { return os_page_size; } @@ -159,7 +159,7 @@ static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; -static bool mi_win_enable_large_os_pages() +static bool mi_win_enable_large_os_pages(void) { if (large_os_page_size > 0) return true; @@ -230,7 +230,7 @@ void _mi_os_init(void) } } #elif defined(__wasi__) -void _mi_os_init() { +void _mi_os_init(void) { os_overcommit = false; os_page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB os_alloc_granularity = 16; @@ -261,7 +261,7 @@ static void os_detect_overcommit(void) { #endif } -void _mi_os_init() { +void _mi_os_init(void) { // get the page size long result = sysconf(_SC_PAGESIZE); if (result > 0) { @@ -1302,7 +1302,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { Support NUMA aware allocation -----------------------------------------------------------------------------*/ #ifdef _WIN32 -static size_t mi_os_numa_nodex() { +static size_t mi_os_numa_nodex(void) { USHORT numa_node = 0; if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { // Extended API is supported From 1f8138a4dc08c906e34b047f4e462d1c292a35ee Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Mon, 28 Feb 2022 09:25:56 +0200 Subject: [PATCH 02/17] Fix wrong reporting of area used bytes Fixes #552 --- src/heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/heap.c b/src/heap.c index d560fbc6..7cb4e5d1 100644 --- a/src/heap.c +++ b/src/heap.c @@ -530,7 +530,7 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa xarea.area.reserved = page->reserved * bsize; xarea.area.committed = page->capacity * bsize; xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); - xarea.area.used = page->used; + xarea.area.used = page->used * bsize; xarea.area.block_size = bsize; return fun(heap, &xarea, arg); } From c027c27c0aad8d0389e1b0f1fbc01c789b3835ae Mon Sep 17 00:00:00 2001 From: David Carlier Date: Wed, 30 Mar 2022 18:45:22 +0100 Subject: [PATCH 03/17] update the docs to clarify the proper C++ override usage. --- docs/overrides.html | 2 +- docs/using.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/overrides.html b/docs/overrides.html index 0e7fd0ec..fc0ad591 100644 --- a/docs/overrides.html +++ b/docs/overrides.html @@ -118,7 +118,7 @@ $(document).ready(function(){initNavTree('overrides.html',''); initResizable();

Windows

Overriding on Windows is robust and has the particular advantage to be able to redirect all malloc/free calls that go through the (dynamic) C runtime allocator, including those from other DLL's or libraries.

The overriding on Windows requires that you link your program explicitly with the mimalloc DLL and use the C-runtime library as a DLL (using the /MD or /MDd switch). Also, the mimalloc-redirect.dll (or mimalloc-redirect32.dll) must be available in the same folder as the main mimalloc-override.dll at runtime (as it is a dependency). The redirection DLL ensures that all calls to the C runtime malloc API get redirected to mimalloc (in mimalloc-override.dll).

-

To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main function, like mi_version() (or use the /INCLUDE:mi_version switch on the linker). See the mimalloc-override-test project for an example on how to use this. For best performance on Windows with C++, it is also recommended to also override the new/delete operations (by including mimalloc-new-delete.h a single(!) source file in your project).

+

To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some call to the mimalloc API in the main function, like mi_version() (or use the /INCLUDE:mi_version switch on the linker). See the mimalloc-override-test project for an example on how to use this. For best performance on Windows with C++, it is also recommended to also override the new/delete operations (by including mimalloc-new-delete.h a single(!) source file in your project without linking to the mimalloc library).

The environment variable MIMALLOC_DISABLE_REDIRECT=1 can be used to disable dynamic overriding at run-time. Use MIMALLOC_VERBOSE=1 to check if mimalloc was successfully redirected.

(Note: in principle, it is possible to even patch existing executables without any recompilation if they are linked with the dynamic C runtime (ucrtbase.dll) – just put the mimalloc-override.dll into the import table (and put mimalloc-redirect.dll in the same folder) Such patching can be done for example with CFF Explorer).

Static override

diff --git a/docs/using.html b/docs/using.html index e6aad1a2..140f0c5c 100644 --- a/docs/using.html +++ b/docs/using.html @@ -105,7 +105,7 @@ $(document).ready(function(){initNavTree('using.html',''); initResizable(); });

to link with the shared (dynamic) library, or:

target_link_libraries(myapp PUBLIC mimalloc-static)

to link with the static library. See test\CMakeLists.txt for an example.

C++

-

For best performance in C++ programs, it is also recommended to override the global new and delete operators. For convience, mimalloc provides mimalloc-new-delete.h which does this for you – just include it in a single(!) source file in your project.

+

For best performance in C++ programs, it is also recommended to override the global new and delete operators. For convience, mimalloc provides mimalloc-new-delete.h which does this for you – just include it in a single(!) source file in your project without linking to the mimalloc's library.

In C++, mimalloc also provides the mi_stl_allocator struct which implements the std::allocator interface. For example:

std::vector<some_struct, mi_stl_allocator<some_struct>> vec;
vec.push_back(some_struct());

Statistics

From 3c7ce7d3c6cd6a71db65cdd06881f45ab0848078 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Thu, 7 Apr 2022 19:09:31 -0700 Subject: [PATCH 04/17] improve mi_realloc codepath --- include/mimalloc-internal.h | 6 ++-- src/alloc.c | 59 ++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 16be1251..79adc231 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -138,8 +138,8 @@ mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` -void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero); -void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero); +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size); @@ -945,7 +945,7 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); - memcpy(adst, asrc, n); + _mi_memcpy(adst, asrc, n); } #else // Default fallback on `_mi_memcpy` diff --git a/src/alloc.c b/src/alloc.c index 5f150f24..62e76e23 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -147,7 +147,7 @@ mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { return p; } -void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { void* p = mi_heap_malloc(heap,size); if (zero && p != NULL) { _mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again? @@ -530,20 +530,25 @@ bool _mi_free_delayed_block(mi_block_t* block) { } // Bytes available in a block -static size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { - const mi_segment_t* const segment = mi_checked_ptr_segment(p,msg); - if (segment==NULL) return 0; - const mi_page_t* const page = _mi_segment_page_of(segment, p); - const mi_block_t* block = (const mi_block_t*)p; - if (mi_unlikely(mi_page_has_aligned(page))) { - block = _mi_page_ptr_unalign(segment, page, p); - size_t size = mi_page_usable_size_of(page, block); - ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)block; - mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); - return (size - adjust); +mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { + const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); + const size_t size = mi_page_usable_size_of(page, block); + const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; + mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); + return (size - adjust); +} + +static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { + const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); + if (segment==NULL) return 0; // also returns 0 if `p == NULL` + const mi_page_t* const page = _mi_segment_page_of(segment, p); + if (mi_likely(!mi_page_has_aligned(page))) { + const mi_block_t* block = (const mi_block_t*)p; + return mi_page_usable_size_of(page, block); } else { - return mi_page_usable_size_of(page, block); + // split out to separate routine for improved code generation + return mi_page_usable_aligned_size_of(segment, page, p); } } @@ -612,35 +617,43 @@ mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { return mi_heap_mallocn(mi_get_default_heap(),count,size); } -// Expand in place or fail +// Expand (or shrink) in place (or fail) void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { + #if MI_PADDING + // we do not shrink/expand with padding enabled + MI_UNUSED(p); MI_UNUSED(newsize); + return NULL; + #else if (p == NULL) return NULL; - size_t size = _mi_usable_size(p,"mi_expand"); + const size_t size = _mi_usable_size(p,"mi_expand"); if (newsize > size) return NULL; return p; // it fits + #endif } -void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) { - if (p == NULL) return _mi_heap_malloc_zero(heap,newsize,zero); - size_t size = _mi_usable_size(p,"mi_realloc"); - if (newsize <= size && newsize >= (size / 2)) { +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept { + const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL + if (mi_unlikely(newsize <= size && newsize >= (size / 2))) { + // todo: adjust potential padding to reflect the new size? return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); if (mi_likely(newp != NULL)) { if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized - size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); memset((uint8_t*)newp + start, 0, newsize - start); } - _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); - mi_free(p); // only free if successful + if (mi_likely(p != NULL)) { + _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); + mi_free(p); // only free the original pointer if successful + } } return newp; } void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { - return _mi_heap_realloc_zero(heap, p, newsize, false); + return _mi_heap_realloc_zero(heap, p, newsize, false); } void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { From 6e5788d076f78b25d19c1afbc3b9a3434c38dcfa Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Apr 2022 20:17:48 -0700 Subject: [PATCH 05/17] add small cache for thread metadata for programs that create/destroy many OS threads --- src/init.c | 86 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 15 deletions(-) diff --git a/src/init.c b/src/init.c index 854a2228..72c56b42 100644 --- a/src/init.c +++ b/src/init.c @@ -165,6 +165,68 @@ typedef struct mi_thread_data_s { mi_tld_t tld; } mi_thread_data_t; + +// Thread meta-data is allocated directly from the OS. For +// some programs that do not use thread pools and allocate and +// destroy many OS threads, this may causes too much overhead +// per thread so we maintain a small cache of recently freed metadata. + +#define TD_CACHE_SIZE (8) +static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; + +static mi_thread_data_t* mi_thread_data_alloc(void) { + // try to find thread metadata in the cache + mi_thread_data_t* td; + for (int i = 0; i < TD_CACHE_SIZE; i++) { + td = mi_atomic_load_ptr_relaxed(mi_thread_data_t*, &td_cache[i]); + if (td != NULL) { + mi_thread_data_t* expected = td; + if (mi_atomic_cas_weak_acq_rel(&td_cache[i], &expected, NULL)) { + return td; + } + } + } + // if that fails, allocate directly from the OS + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + if (td == NULL) { + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + } + } + return td; +} + +static void mi_thread_data_free( mi_thread_data_t* tdfree ) { + // try to add the thread metadata to the cache + for (int i = 0; i < TD_CACHE_SIZE; i++) { + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t*, &td_cache[i]); + if (td == NULL) { + mi_thread_data_t* expected = NULL; + if (mi_atomic_cas_weak_acq_rel(&td_cache[i], &expected, tdfree)) { + return; + } + } + } + // if that fails, just free it directly + _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); +} + +static void mi_thread_data_collect(void) { + // free all thread metadata from the cache + for (int i = 0; i < TD_CACHE_SIZE; i++) { + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t*, &td_cache[i]); + if (td != NULL) { + mi_thread_data_t* expected = td; + if (mi_atomic_cas_weak_acq_rel(&td_cache[i], &expected, NULL)) { + _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); + } + } + } +} + // Initialize the thread local default heap, called from `mi_thread_init` static bool _mi_heap_init(void) { if (mi_heap_is_initialized(mi_get_default_heap())) return true; @@ -177,16 +239,9 @@ static bool _mi_heap_init(void) { } else { // use `_mi_os_alloc` to allocate directly from the OS - mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); // Todo: more efficient allocation? - if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); - if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); - return false; - } - } + mi_thread_data_t* td = mi_thread_data_alloc(); + if (td == NULL) return false; + // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; @@ -242,16 +297,17 @@ static bool _mi_heap_done(mi_heap_t* heap) { // free if not the main thread if (heap != &_mi_heap_main) { mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); - _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); + mi_thread_data_free((mi_thread_data_t*)heap); } -#if 0 - // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, - // there may still be delete/free calls after the mi_fls_done is called. Issue #207 else { + mi_thread_data_collect(); // free cached thread metadata + #if 0 + // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, + // there may still be delete/free calls after the mi_fls_done is called. Issue #207 _mi_heap_destroy_pages(heap); mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); + #endif } -#endif return false; } From 185f296513da2bfcdb413f7c4821bf238c35e035 Mon Sep 17 00:00:00 2001 From: daan Date: Thu, 7 Apr 2022 20:26:35 -0700 Subject: [PATCH 06/17] improve atomic operations for the thread metadata cache --- src/init.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/init.c b/src/init.c index 72c56b42..32523abf 100644 --- a/src/init.c +++ b/src/init.c @@ -178,10 +178,10 @@ static mi_thread_data_t* mi_thread_data_alloc(void) { // try to find thread metadata in the cache mi_thread_data_t* td; for (int i = 0; i < TD_CACHE_SIZE; i++) { - td = mi_atomic_load_ptr_relaxed(mi_thread_data_t*, &td_cache[i]); + td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { - mi_thread_data_t* expected = td; - if (mi_atomic_cas_weak_acq_rel(&td_cache[i], &expected, NULL)) { + td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); + if (td != NULL) { return td; } } @@ -202,10 +202,10 @@ static mi_thread_data_t* mi_thread_data_alloc(void) { static void mi_thread_data_free( mi_thread_data_t* tdfree ) { // try to add the thread metadata to the cache for (int i = 0; i < TD_CACHE_SIZE; i++) { - mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t*, &td_cache[i]); + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td == NULL) { mi_thread_data_t* expected = NULL; - if (mi_atomic_cas_weak_acq_rel(&td_cache[i], &expected, tdfree)) { + if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) { return; } } @@ -217,10 +217,10 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) { static void mi_thread_data_collect(void) { // free all thread metadata from the cache for (int i = 0; i < TD_CACHE_SIZE; i++) { - mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t*, &td_cache[i]); + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { - mi_thread_data_t* expected = td; - if (mi_atomic_cas_weak_acq_rel(&td_cache[i], &expected, NULL)) { + td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); + if (td != NULL) { _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); } } From b7677b6f8482daad8374b2cf5430be2156308063 Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 8 Apr 2022 14:09:38 -0700 Subject: [PATCH 07/17] fix atomic warnings on clang14 (issue #571) --- include/mimalloc-atomic.h | 8 +++++++- src/init.c | 10 +++++----- src/os.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index e07df84d..7ad5da58 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -23,10 +23,15 @@ terms of the MIT license. A copy of the license can be found in the file #define _Atomic(tp) std::atomic #define mi_atomic(name) std::atomic_##name #define mi_memory_order(name) std::memory_order_##name +#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571 + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif #elif defined(_MSC_VER) // Use MSVC C wrapper for C11 atomics #define _Atomic(tp) tp -#define ATOMIC_VAR_INIT(x) x +#define MI_ATOMIC_VAR_INIT(x) x #define mi_atomic(name) mi_atomic_##name #define mi_memory_order(name) mi_memory_order_##name #else @@ -34,6 +39,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #define mi_atomic(name) atomic_##name #define mi_memory_order(name) memory_order_##name +#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) #endif // Various defines for all used memory orders in mimalloc diff --git a/src/init.c b/src/init.c index 854a2228..ad1e4d45 100644 --- a/src/init.c +++ b/src/init.c @@ -25,8 +25,8 @@ const mi_page_t _mi_page_empty = { 0, // used 0, // xblock_size NULL, // local_free - ATOMIC_VAR_INIT(0), // xthread_free - ATOMIC_VAR_INIT(0), // xheap + MI_ATOMIC_VAR_INIT(0), // xthread_free + MI_ATOMIC_VAR_INIT(0), // xheap NULL, NULL }; @@ -91,7 +91,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, - ATOMIC_VAR_INIT(NULL), + MI_ATOMIC_VAR_INIT(NULL), 0, // tid 0, // cookie { 0, 0 }, // keys @@ -123,7 +123,7 @@ mi_heap_t _mi_heap_main = { &tld_main, MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, - ATOMIC_VAR_INIT(NULL), + MI_ATOMIC_VAR_INIT(NULL), 0, // thread id 0, // initial cookie { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) @@ -325,7 +325,7 @@ bool _mi_is_main_thread(void) { return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); } -static _Atomic(size_t) thread_count = ATOMIC_VAR_INIT(1); +static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); size_t _mi_current_thread_count(void) { return mi_atomic_load_relaxed(&thread_count); diff --git a/src/os.c b/src/os.c index 52939faa..f36f8480 100644 --- a/src/os.c +++ b/src/os.c @@ -983,7 +983,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) if (p != start) return false; #else #if defined(MADV_FREE) - static _Atomic(size_t) advice = ATOMIC_VAR_INIT(MADV_FREE); + static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); int err; while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; From e18a8cd72eeb024e64d2929948cc9bd8a4d9bf79 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 8 Apr 2022 16:58:32 -0700 Subject: [PATCH 08/17] add heap walk test --- include/mimalloc.h | 1 + src/heap.c | 9 +- test/main-override-static.c | 205 +++++++++++++++++++++++++++++++++++- 3 files changed, 209 insertions(+), 6 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 91ad352b..c9d48c0e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -256,6 +256,7 @@ typedef struct mi_heap_area_s { size_t committed; // current available bytes for this area size_t used; // number of allocated blocks size_t block_size; // size in bytes of each block + size_t full_block_size; // size in bytes of a full block including padding and metadata. } mi_heap_area_t; typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg); diff --git a/src/heap.c b/src/heap.c index 7cb4e5d1..f4654464 100644 --- a/src/heap.c +++ b/src/heap.c @@ -470,13 +470,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v if (page->used == 0) return true; const size_t bsize = mi_page_block_size(page); + const size_t ubsize = mi_page_usable_block_size(page); // without padding size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); if (page->capacity == 1) { // optimize page with one block mi_assert_internal(page->used == 1 && page->free == NULL); - return visitor(mi_page_heap(page), area, pstart, bsize, arg); + return visitor(mi_page_heap(page), area, pstart, ubsize, arg); } // create a bitmap of free blocks. @@ -510,7 +511,7 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v else if ((m & ((uintptr_t)1 << bit)) == 0) { used_count++; uint8_t* block = pstart + (i * bsize); - if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false; + if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; } } mi_assert_internal(page->used == used_count); @@ -526,12 +527,14 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; const size_t bsize = mi_page_block_size(page); + const size_t ubsize = mi_page_usable_block_size(page); xarea.page = page; xarea.area.reserved = page->reserved * bsize; xarea.area.committed = page->capacity * bsize; xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); xarea.area.used = page->used * bsize; - xarea.area.block_size = bsize; + xarea.area.block_size = ubsize; + xarea.area.full_block_size = bsize; return fun(heap, &xarea, arg); } diff --git a/test/main-override-static.c b/test/main-override-static.c index 071e4248..e64b987b 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -16,6 +16,9 @@ static void test_aslr(void); static void test_process_info(void); static void test_reserved(void); static void negative_stat(void); +static void alloc_huge(void); +static void test_heap_walk(void); + int main() { mi_version(); @@ -29,6 +32,8 @@ int main() { // invalid_free(); // test_reserved(); // negative_stat(); + test_heap_walk(); + // alloc_huge(); void* p1 = malloc(78); void* p2 = malloc(24); @@ -48,8 +53,10 @@ int main() { //free(p1); //p2 = malloc(32); //mi_free(p2); - mi_collect(true); - mi_stats_print(NULL); + + //mi_collect(true); + //mi_stats_print(NULL); + // test_process_info(); return 0; } @@ -179,4 +186,196 @@ static void negative_stat(void) { *p = 100; mi_free(p); mi_stats_print_out(NULL, NULL); -} \ No newline at end of file +} + +static void alloc_huge(void) { + void* p = mi_malloc(67108872); + mi_free(p); +} + +static bool test_visit(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { + printf("I'm visiting a block of size %zu, allocated size %zu\n", block_size, mi_usable_size(block)); + return true; +} + +static void test_heap_walk(void) { + mi_heap_t* heap = mi_heap_new(); + //mi_heap_malloc(heap, 2097152); + mi_heap_malloc(heap, 2067152); + mi_heap_malloc(heap, 2097160); + mi_heap_malloc(heap, 24576); + mi_heap_visit_blocks(heap, true, &test_visit, NULL); +} + +// ---------------------------- +// bin size experiments +// ------------------------------ + +#if 0 +#include +#include + +#define MI_INTPTR_SIZE 8 +#define MI_LARGE_WSIZE_MAX (4*1024*1024 / MI_INTPTR_SIZE) + +#define MI_BIN_HUGE 100 +//#define MI_ALIGN2W + +// Bit scan reverse: return the index of the highest bit. +static inline uint8_t mi_bsr32(uint32_t x); + +#if defined(_MSC_VER) +#include +#include +static inline uint8_t mi_bsr32(uint32_t x) { + uint32_t idx; + _BitScanReverse((DWORD*)&idx, x); + return idx; +} +#elif defined(__GNUC__) || defined(__clang__) +static inline uint8_t mi_bsr32(uint32_t x) { + return (31 - __builtin_clz(x)); +} +#else +static inline uint8_t mi_bsr32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 0, 22, 1, 28, 23, 18, 2, 29, 26, 24, 10, 19, 7, 3, 12, + 30, 21, 27, 17, 25, 9, 6, 11, 20, 16, 8, 5, 15, 4, 14, 13, + }; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x++; + return debruijn[(x*0x076be629) >> 27]; +} +#endif + +/* +// Bit scan reverse: return the index of the highest bit. +uint8_t _mi_bsr(uintptr_t x) { + if (x == 0) return 0; + #if MI_INTPTR_SIZE==8 + uint32_t hi = (x >> 32); + return (hi == 0 ? mi_bsr32((uint32_t)x) : 32 + mi_bsr32(hi)); + #elif MI_INTPTR_SIZE==4 + return mi_bsr32(x); + #else + # error "define bsr for non-32 or 64-bit platforms" + #endif +} +*/ + + +static inline size_t _mi_wsize_from_size(size_t size) { + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Return the bin for a given field size. +// Returns MI_BIN_HUGE if the size is too large. +// We use `wsize` for the size in "machine word sizes", +// i.e. byte size == `wsize*sizeof(void*)`. +extern inline uint8_t _mi_bin8(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } +#if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } +#endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { +#if defined(MI_ALIGN4W) + if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes +#endif + wsize--; + // find the highest bit + uint8_t b = mi_bsr32((uint32_t)wsize); + // and use the top 3 bits to determine the bin (~12.5% worst internal fragmentation). + // - adjust with 3 because we use do not round the first 8 sizes + // which each get an exact bin + bin = ((b << 2) + (uint8_t)((wsize >> (b - 2)) & 0x03)) - 3; + } + return bin; +} + +static inline uint8_t _mi_bin4(size_t size) { + size_t wsize = _mi_wsize_from_size(size); + uint8_t bin; + if (wsize <= 1) { + bin = 1; + } +#if defined(MI_ALIGN4W) + else if (wsize <= 4) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#elif defined(MI_ALIGN2W) + else if (wsize <= 8) { + bin = (uint8_t)((wsize+1)&~1); // round to double word sizes + } +#else + else if (wsize <= 8) { + bin = (uint8_t)wsize; + } +#endif + else if (wsize > MI_LARGE_WSIZE_MAX) { + bin = MI_BIN_HUGE; + } + else { + uint8_t b = mi_bsr32((uint32_t)wsize); + bin = ((b << 1) + (uint8_t)((wsize >> (b - 1)) & 0x01)) + 3; + } + return bin; +} + +static size_t _mi_binx4(size_t bsize) { + if (bsize==0) return 0; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 1) return bsize; + size_t bin = ((b << 1) | (bsize >> (b - 1))&0x01); + return bin; +} + +static size_t _mi_binx8(size_t bsize) { + if (bsize<=1) return bsize; + uint8_t b = mi_bsr32((uint32_t)bsize); + if (b <= 2) return bsize; + size_t bin = ((b << 2) | (bsize >> (b - 2))&0x03) - 5; + return bin; +} + +static void mi_bins(void) { + //printf(" QNULL(1), /* 0 */ \\\n "); + size_t last_bin = 0; + size_t min_bsize = 0; + size_t last_bsize = 0; + for (size_t bsize = 1; bsize < 2*1024; bsize++) { + size_t size = bsize * 64 * 1024; + size_t bin = _mi_binx8(bsize); + if (bin != last_bin) { + printf("min bsize: %6zd, max bsize: %6zd, bin: %6zd\n", min_bsize, last_bsize, last_bin); + //printf("QNULL(%6zd), ", wsize); + //if (last_bin%8 == 0) printf("/* %i */ \\\n ", last_bin); + last_bin = bin; + min_bsize = bsize; + } + last_bsize = bsize; + } +} +#endif + From adc8b3187c4ec19fe4e8e8bb2f5aae4e9eb4bba3 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Apr 2022 13:48:16 -0700 Subject: [PATCH 09/17] fix Windows C++ compilation in combination with dynamic overriding by preferring RtlGenRandom --- src/random.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/random.c b/src/random.c index 5057a623..d474a53a 100644 --- a/src/random.c +++ b/src/random.c @@ -168,16 +168,10 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim #if defined(_WIN32) -#if !defined(MI_USE_RTLGENRANDOM) -// We prefer to use BCryptGenRandom instead of RtlGenRandom but it can lead to a deadlock -// under the VS debugger when using dynamic overriding. -#pragma comment (lib,"bcrypt.lib") -#include -static bool os_random_buf(void* buf, size_t buf_len) { - return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); -} -#else -// Use (unofficial) RtlGenRandom +#if defined(MI_USE_RTLGENRANDOM) || defined(__cplusplus) +// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using +// dynamic overriding, we observed it can raise an exception when compiled with C++, and +// sometimes deadlocks when also running under the VS debugger. #pragma comment (lib,"advapi32.lib") #define RtlGenRandom SystemFunction036 #ifdef __cplusplus @@ -190,6 +184,12 @@ BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); static bool os_random_buf(void* buf, size_t buf_len) { return (RtlGenRandom(buf, (ULONG)buf_len) != 0); } +#else +#pragma comment (lib,"bcrypt.lib") +#include +static bool os_random_buf(void* buf, size_t buf_len) { + return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} #endif #elif defined(__APPLE__) From 016b2ad5352de399af167a1acb6042cee31f8fbe Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Apr 2022 14:08:27 -0700 Subject: [PATCH 10/17] nicer heap walk test --- test/main-override-static.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/main-override-static.c b/test/main-override-static.c index e64b987b..a5088d3a 100644 --- a/test/main-override-static.c +++ b/test/main-override-static.c @@ -194,7 +194,12 @@ static void alloc_huge(void) { } static bool test_visit(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { - printf("I'm visiting a block of size %zu, allocated size %zu\n", block_size, mi_usable_size(block)); + if (block == NULL) { + printf("visiting an area with blocks of size %zu (including padding)\n", area->full_block_size); + } + else { + printf(" block of size %zu (allocated size is %zu)\n", block_size, mi_usable_size(block)); + } return true; } From 6c91c75b140f650845e918005979a05ac0da7d5e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Apr 2022 14:33:20 -0700 Subject: [PATCH 11/17] remove thread local segment cache --- include/mimalloc-types.h | 3 -- include/mimalloc.h | 2 +- src/init.c | 2 +- src/options.c | 2 +- src/segment.c | 91 ++++------------------------------------ 5 files changed, 10 insertions(+), 90 deletions(-) diff --git a/include/mimalloc-types.h b/include/mimalloc-types.h index be3cf503..a3fad92d 100644 --- a/include/mimalloc-types.h +++ b/include/mimalloc-types.h @@ -521,9 +521,6 @@ typedef struct mi_segments_tld_s { size_t peak_count; // peak number of segments size_t current_size; // current size of all segments size_t peak_size; // peak size of all segments - size_t cache_count; // number of segments in the cache - size_t cache_size; // total size of all segments in the cache - mi_segment_t* cache; // (small) cache of segments mi_stats_t* stats; // points to tld stats mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; diff --git a/include/mimalloc.h b/include/mimalloc.h index c9d48c0e..2707bc16 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -313,7 +313,7 @@ typedef enum mi_option_e { mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup - mi_option_segment_cache, + mi_option_deprecated_segment_cache, mi_option_page_reset, mi_option_abandoned_page_reset, mi_option_segment_reset, diff --git a/src/init.c b/src/init.c index ce43e3a6..f910c287 100644 --- a/src/init.c +++ b/src/init.c @@ -112,7 +112,7 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, - 0, 0, 0, 0, 0, 0, NULL, + 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments { 0, &tld_main.stats }, // os diff --git a/src/options.c b/src/options.c index e1944a19..4f857ec6 100644 --- a/src/options.c +++ b/src/options.c @@ -78,7 +78,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, - { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) diff --git a/src/segment.c b/src/segment.c index 8a83ceed..a98edcfd 100644 --- a/src/segment.c +++ b/src/segment.c @@ -110,17 +110,6 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t Invariant checking ----------------------------------------------------------- */ -#if (MI_DEBUG>=2) -static bool mi_segment_is_in_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); - bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); - if (in_queue) { - mi_assert_expensive(mi_segment_queue_contains(queue, segment)); - } - return in_queue; -} -#endif - static size_t mi_segment_page_size(const mi_segment_t* segment) { if (segment->capacity > 1) { mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); @@ -483,64 +472,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); } - -// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -#define MI_SEGMENT_CACHE_FRACTION (8) - -// note: returned segment may be partially reset -static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t* tld) { - if (segment_size != 0 && segment_size != MI_SEGMENT_SIZE) return NULL; - mi_segment_t* segment = tld->cache; - if (segment == NULL) return NULL; - tld->cache_count--; - tld->cache = segment->next; - segment->next = NULL; - mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - _mi_stat_decrease(&tld->stats->segments_cache, 1); - return segment; -} - -static bool mi_segment_cache_full(mi_segments_tld_t* tld) -{ - // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread - size_t max_cache = mi_option_get(mi_option_segment_cache); - if (tld->cache_count < max_cache - && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache - ) { - return false; - } - // take the opportunity to reduce the segment cache if it is too large (now) - // TODO: this never happens as we check against peak usage, should we use current usage instead? - while (tld->cache_count > max_cache) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { - mi_segment_t* segment = mi_segment_cache_pop(0,tld); - mi_assert_internal(segment != NULL); - if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); - } - return true; -} - -static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); - mi_assert_internal(segment->next == NULL); - if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { - return false; - } - mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - segment->next = tld->cache; - tld->cache = segment; - tld->cache_count++; - _mi_stat_increase(&tld->stats->segments_cache,1); - return true; -} - // called by threads that are terminating to free cached segments -void _mi_segment_thread_collect(mi_segments_tld_t* tld) { - mi_segment_t* segment; - while ((segment = mi_segment_cache_pop(0,tld)) != NULL) { - mi_segment_os_free(segment, segment->segment_size, tld); - } - mi_assert_internal(tld->cache_count == 0); - mi_assert_internal(tld->cache == NULL); +void _mi_segment_thread_collect(mi_segments_tld_t* tld) { #if MI_DEBUG>=2 if (!_mi_is_main_thread()) { mi_assert_internal(tld->pages_reset.first == NULL); @@ -712,13 +645,8 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert(segment->prev == NULL); _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - if (!force && mi_segment_cache_push(segment, tld)) { - // it is put in our cache - } - else { - // otherwise return it to the OS - mi_segment_os_free(segment, segment->segment_size, tld); - } + // return it to the OS + mi_segment_os_free(segment, segment->segment_size, tld); } /* ----------------------------------------------------------- @@ -1217,15 +1145,10 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s { mi_assert_internal(page_kind <= MI_PAGE_LARGE); mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); - // 1. try to get a segment from our cache - mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld); - if (segment != NULL) { - mi_segment_init(segment, 0, page_kind, page_shift, tld, os_tld); - return segment; - } - // 2. try to reclaim an abandoned segment + + // 1. try to reclaim an abandoned segment bool reclaimed; - segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); + mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); if (reclaimed) { // reclaimed the right page right into the heap mi_assert_internal(segment != NULL && segment->page_kind == page_kind && page_kind <= MI_PAGE_LARGE); @@ -1235,7 +1158,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s // reclaimed a segment with empty pages (of `page_kind`) in it return segment; } - // 3. otherwise allocate a fresh segment + // 2. otherwise allocate a fresh segment return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); } From faca422b718625e656012391d94c564f5a67d977 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Apr 2022 14:48:30 -0700 Subject: [PATCH 12/17] fix msvc warnings at level 4 --- src/alloc-posix.c | 26 +++++++++++++------------- src/init.c | 2 +- src/options.c | 3 +++ src/segment.c | 7 ++++++- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/alloc-posix.c b/src/alloc-posix.c index ee5babe1..176e7ec3 100644 --- a/src/alloc-posix.c +++ b/src/alloc-posix.c @@ -32,17 +32,17 @@ terms of the MIT license. A copy of the license can be found in the file #endif -size_t mi_malloc_size(const void* p) mi_attr_noexcept { +mi_decl_nodiscard size_t mi_malloc_size(const void* p) mi_attr_noexcept { //if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } -size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept { +mi_decl_nodiscard size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept { //if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } -size_t mi_malloc_good_size(size_t size) mi_attr_noexcept { +mi_decl_nodiscard size_t mi_malloc_good_size(size_t size) mi_attr_noexcept { return mi_good_size(size); } @@ -65,24 +65,24 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept return 0; } -mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { void* p = mi_malloc_aligned(size, alignment); mi_assert_internal(((uintptr_t)p % alignment) == 0); return p; } -mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept { return mi_memalign( _mi_os_page_size(), size ); } -mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept { size_t psize = _mi_os_page_size(); if (size >= SIZE_MAX - psize) return NULL; // overflow size_t asize = _mi_align_up(size, psize); return mi_malloc_aligned(asize, psize); } -mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { if (mi_unlikely((size&(alignment-1)) != 0)) { // C11 requires alignment>0 && integral multiple, see #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment); @@ -95,13 +95,13 @@ mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_n return p; } -void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD +mi_decl_nodiscard void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD void* newp = mi_reallocn(p,count,size); if (newp==NULL) { errno = ENOMEM; } return newp; } -int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD +mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD mi_assert(p != NULL); if (p == NULL) { errno = EINVAL; @@ -120,7 +120,7 @@ void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft return res; } -mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { if (s==NULL) return NULL; size_t len; for(len = 0; s[len] != 0; len++) { } @@ -132,7 +132,7 @@ mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noex return p; } -mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { return (unsigned char*)mi_strdup((const char*)s); } @@ -172,10 +172,10 @@ int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) #endif } -void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft +mi_decl_nodiscard void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft return mi_recalloc_aligned_at(p, newcount, size, alignment, offset); } -void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft +mi_decl_nodiscard void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft return mi_recalloc_aligned(p, newcount, size, alignment); } diff --git a/src/init.c b/src/init.c index f910c287..e8913818 100644 --- a/src/init.c +++ b/src/init.c @@ -458,7 +458,7 @@ bool _mi_preloading(void) { return os_preloading; } -bool mi_is_redirected(void) mi_attr_noexcept { +mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { return mi_redirected; } diff --git a/src/options.c b/src/options.c index 4f857ec6..8700fc76 100644 --- a/src/options.c +++ b/src/options.c @@ -116,6 +116,7 @@ void _mi_options_init(void) { mi_decl_nodiscard long mi_option_get(mi_option_t option) { mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return 0; mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option if (mi_unlikely(desc->init == UNINIT)) { @@ -126,6 +127,7 @@ mi_decl_nodiscard long mi_option_get(mi_option_t option) { void mi_option_set(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return; mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option desc->value = value; @@ -134,6 +136,7 @@ void mi_option_set(mi_option_t option, long value) { void mi_option_set_default(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return; mi_option_desc_t* desc = &options[option]; if (desc->init != INITIALIZED) { desc->value = value; diff --git a/src/segment.c b/src/segment.c index a98edcfd..bd36f627 100644 --- a/src/segment.c +++ b/src/segment.c @@ -191,7 +191,10 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* mi_assert_internal((segment->segment_info_size - os_psize) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t)))); mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_psize == 0); mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_psize, os_psize, protect); - if (MI_SECURE <= 1 || segment->capacity == 1) { + #if (MI_SECURE >= 2) + if (segment->capacity == 1) + #endif + { // and protect the last (or only) page too mi_assert_internal(MI_SECURE <= 1 || segment->page_kind >= MI_PAGE_LARGE); uint8_t* start = (uint8_t*)segment + segment->segment_size - os_psize; @@ -207,6 +210,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* mi_segment_protect_range(start, os_psize, protect); } } + #if (MI_SECURE >= 2) else { // or protect every page const size_t page_size = mi_segment_page_size(segment); @@ -216,6 +220,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* } } } + #endif } } From c8258514043176929b93113518d961db5b094864 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Apr 2022 15:59:05 -0700 Subject: [PATCH 13/17] define MEM_EXTENDED_PARAMETER structure ourselves on Windows in order to compile with older SDK's --- src/os.c | 73 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/src/os.c b/src/os.c index 860de82e..6a303a85 100644 --- a/src/os.c +++ b/src/os.c @@ -141,20 +141,41 @@ size_t _mi_os_good_alloc_size(size_t size) { // We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. // So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) -// -// We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. +// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. +typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { + MiMemExtendedParameterInvalidType = 0, + MiMemExtendedParameterAddressRequirements, + MiMemExtendedParameterNumaNode, + MiMemExtendedParameterPartitionHandle, + MiMemExtendedParameterUserPhysicalHandle, + MiMemExtendedParameterAttributeFlags, + MiMemExtendedParameterMax +} MI_MEM_EXTENDED_PARAMETER_TYPE; + +typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { + struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; + union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; +} MI_MEM_EXTENDED_PARAMETER; + +typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { + PVOID LowestStartingAddress; + PVOID HighestEndingAddress; + SIZE_T Alignment; +} MI_MEM_ADDRESS_REQUIREMENTS; + +#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 + #include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); static PVirtualAlloc2 pVirtualAlloc2 = NULL; static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; // Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 -#if (_WIN32_WINNT < 0x601) // before Win7 -typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; -#endif -typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(PPROCESSOR_NUMBER ProcNumber); -typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(PPROCESSOR_NUMBER Processor, PUSHORT NodeNumber); +typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; + +typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); +typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; @@ -348,20 +369,18 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment } } #endif -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) // on modern Windows try use VirtualAlloc2 for aligned allocation if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { - MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; + MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; reqs.Alignment = try_alignment; - MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; - param.Type = MemExtendedParameterAddressRequirements; - param.Pointer = &reqs; + MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; + param.Type.Type = MiMemExtendedParameterAddressRequirements; + param.Arg.Pointer = &reqs; void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); if (p != NULL) return p; _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); // fall through on error } -#endif // last resort return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -1109,21 +1128,17 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) mi_win_enable_large_os_pages(); - #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; + MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages static bool mi_huge_pages_available = true; if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif - params[0].Type = 5; // == MemExtendedParameterAttributeFlags; - params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + params[0].Type.Type = MiMemExtendedParameterAttributeFlags; + params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; ULONG param_count = 1; if (numa_node >= 0) { param_count++; - params[1].Type = MemExtendedParameterNumaNode; - params[1].ULong = (unsigned)numa_node; + params[1].Type.Type = MiMemExtendedParameterNumaNode; + params[1].Arg.ULong = (unsigned)numa_node; } SIZE_T psize = size; void* base = addr; @@ -1139,13 +1154,11 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) } // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation if (pVirtualAlloc2 != NULL && numa_node >= 0) { - params[0].Type = MemExtendedParameterNumaNode; - params[0].ULong = (unsigned)numa_node; + params[0].Type.Type = MiMemExtendedParameterNumaNode; + params[0].Arg.ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - #else - MI_UNUSED(numa_node); - #endif + // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -1299,7 +1312,7 @@ static size_t mi_os_numa_nodex(void) { USHORT numa_node = 0; if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { // Extended API is supported - PROCESSOR_NUMBER pnum; + MI_PROCESSOR_NUMBER pnum; (*pGetCurrentProcessorNumberEx)(&pnum); USHORT nnode = 0; BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); From 96bf3a803901aa6ca12690685e88ce3e41b2befa Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Apr 2022 16:16:11 -0700 Subject: [PATCH 14/17] fix warning --- src/os.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/os.c b/src/os.c index 6a303a85..f6a3cc86 100644 --- a/src/os.c +++ b/src/os.c @@ -347,7 +347,7 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats return !err; } -#if !(defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED)) +#if !(defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED) || MI_INTPTR_SIZE < 8) static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); #endif @@ -691,7 +691,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) if (hint%try_alignment != 0) return NULL; return (void*)hint; } -#elif defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED) +#elif defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED) || (MI_INTPTR_SIZE < 8) // no need for mi_os_get_aligned_hint #else static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { From f4b7ea9e9ee5648975266c532e90ebfc9530045b Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sat, 9 Apr 2022 16:20:27 -0700 Subject: [PATCH 15/17] fix compile warnings --- src/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/segment.c b/src/segment.c index bd36f627..75265073 100644 --- a/src/segment.c +++ b/src/segment.c @@ -110,6 +110,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t Invariant checking ----------------------------------------------------------- */ +#if (MI_DEBUG >= 2) || (MI_SECURE >= 2) static size_t mi_segment_page_size(const mi_segment_t* segment) { if (segment->capacity > 1) { mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); @@ -120,7 +121,7 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) { return segment->segment_size; } } - +#endif #if (MI_DEBUG>=2) static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) { @@ -479,6 +480,7 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se // called by threads that are terminating to free cached segments void _mi_segment_thread_collect(mi_segments_tld_t* tld) { + MI_UNUSED_RELEASE(tld); #if MI_DEBUG>=2 if (!_mi_is_main_thread()) { mi_assert_internal(tld->pages_reset.first == NULL); From 2ab70f3c84a8ca66c4ffdbc19da0ad62c39765ef Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 10 Apr 2022 12:55:36 -0700 Subject: [PATCH 16/17] remove ifdefs around mi_os_aligned_hint --- src/os.c | 104 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 53 insertions(+), 51 deletions(-) diff --git a/src/os.c b/src/os.c index f6a3cc86..69eb49df 100644 --- a/src/os.c +++ b/src/os.c @@ -308,7 +308,57 @@ static int mi_madvise(void* addr, size_t length, int advice) { /* ----------------------------------------------------------- - free memory + aligned hinting +-------------------------------------------------------------- */ + +// On 64-bit systems, we can do efficient aligned allocation by using +// the 2TiB to 30TiB area to allocate those. +#if (MI_INTPTR_SIZE >= 8) +static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; + +// Return a MI_SEGMENT_SIZE aligned address that is probably available. +// If this returns NULL, the OS will determine the address but on some OS's that may not be +// properly aligned which can be more costly as it needs to be adjusted afterwards. +// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; +// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses +// in the middle of the 2TiB - 6TiB address range (see issue #372)) + +#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start +#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) +#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) + +static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) +{ + if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; + size = _mi_align_up(size, MI_SEGMENT_SIZE); + if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096. + #if (MI_SECURE>0) + size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. + #endif + + uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); + if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize + uintptr_t init = MI_HINT_BASE; + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode + uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB + #endif + uintptr_t expected = hint + size; + mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); + hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all + } + if (hint%try_alignment != 0) return NULL; + return (void*)hint; +} +#else +static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { + MI_UNUSED(try_alignment); MI_UNUSED(size); + return NULL; +} +#endif + +/* ----------------------------------------------------------- + Free memory -------------------------------------------------------------- */ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) @@ -347,9 +397,6 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats return !err; } -#if !(defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED) || MI_INTPTR_SIZE < 8) -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); -#endif /* ----------------------------------------------------------- Raw allocation on Windows (VirtualAlloc) @@ -652,53 +699,6 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro } #endif -// On 64-bit systems, we can do efficient aligned allocation by using -// the 2TiB to 30TiB area to allocate them. -#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || defined(MI_OS_USE_MMAP)) -static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; - -// Return a 4MiB aligned address that is probably available. -// If this returns NULL, the OS will determine the address but on some OS's that may not be -// properly aligned which can be more costly as it needs to be adjusted afterwards. -// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; -// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses -// in the middle of the 2TiB - 6TiB address range (see issue #372)) - -#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start -#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) -#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) - -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) -{ - if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; - size = _mi_align_up(size, MI_SEGMENT_SIZE); - if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096. - #if (MI_SECURE>0) - size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. - #endif - - uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); - if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize - uintptr_t init = MI_HINT_BASE; - #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); - init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB - #endif - uintptr_t expected = hint + size; - mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); - hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all - } - if (hint%try_alignment != 0) return NULL; - return (void*)hint; -} -#elif defined(__wasi__) || defined(MI_USE_SBRK) || defined(MAP_ALIGNED) || (MI_INTPTR_SIZE < 8) -// no need for mi_os_get_aligned_hint -#else -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { - MI_UNUSED(try_alignment); MI_UNUSED(size); - return NULL; -} -#endif /* ----------------------------------------------------------- Primitive allocation from the OS. @@ -799,6 +799,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, return p; } + /* ----------------------------------------------------------- OS API: alloc, free, alloc_aligned ----------------------------------------------------------- */ @@ -826,6 +827,7 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) { + MI_UNUSED(&mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); From 9afdf762a6f6909040903963b756f600cbea967e Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Sun, 10 Apr 2022 12:55:59 -0700 Subject: [PATCH 17/17] fix c++ compilation warning for an unused parameter --- src/alloc-override.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alloc-override.c b/src/alloc-override.c index 12e9e0d6..e29cb4b2 100644 --- a/src/alloc-override.c +++ b/src/alloc-override.c @@ -166,8 +166,8 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast(al)); }; - void operator delete (void* p, std::align_val_t al, const std::nothrow_t& tag) noexcept { mi_free_aligned(p, static_cast(al)); } - void operator delete[](void* p, std::align_val_t al, const std::nothrow_t& tag) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast(al)); } + void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast(al)); } void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); } void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast(al)); }