diff --git a/include/mimalloc/prim.h b/include/mimalloc/prim.h index 8a627438..65f65376 100644 --- a/include/mimalloc/prim.h +++ b/include/mimalloc/prim.h @@ -117,7 +117,8 @@ void _mi_prim_thread_done_auto_done(void); // Called when the default heap for a thread changes void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); - +// Is this thread part of a thread pool? +bool _mi_prim_thread_is_in_threadpool(void); //------------------------------------------------------------------- // Thread id: `_mi_prim_thread_id()` diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index d883ec52..e10786a0 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -399,7 +399,7 @@ struct mi_heap_s { size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread - bool no_reclaim; // `true` if this heap should not reclaim abandoned pages + bool allow_page_reclaim; // `true` if this heap can reclaim abandoned pages bool allow_page_abandon; // `true` if this heap can abandon pages to reduce memory footprint uint8_t tag; // custom tag, can be used for separating heaps based on the object types #if MI_GUARDED @@ -568,14 +568,15 @@ typedef struct mi_os_tld_s { // Thread local data struct mi_tld_s { - unsigned long long heartbeat; // monotonic heartbeat count - bool recurse; // true if deferred was called; used to prevent infinite recursion. - mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) - mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) - mi_subproc_t* subproc; // sub-process this thread belongs to. - size_t tseq; // thread sequence id - mi_os_tld_t os; // os tld - mi_stats_t stats; // statistics + unsigned long long heartbeat; // monotonic heartbeat count + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) + mi_subproc_t* subproc; // sub-process this thread belongs to. + size_t tseq; // thread sequence id + bool recurse; // true if deferred was called; used to prevent infinite recursion. + bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) + mi_os_tld_t os; // os tld + mi_stats_t stats; // statistics }; #endif diff --git a/src/arena.c b/src/arena.c index f6c0f0a3..fa7d53ed 100644 --- a/src/arena.c +++ b/src/arena.c @@ -585,21 +585,25 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment)); - // claimed free slices: initialize the page partly + // claimed free slices: initialize the page partly if (!memid.initially_zero) { + mi_track_mem_undefined(page, slice_count * MI_ARENA_SLICE_SIZE); _mi_memzero_aligned(page, sizeof(*page)); } - #if MI_DEBUG > 1 else { + mi_track_mem_defined(page, slice_count * MI_ARENA_SLICE_SIZE); + } + #if MI_DEBUG > 1 + if (memid.initially_zero) { if (!mi_mem_is_zero(page, mi_size_of_slices(slice_count))) { - _mi_error_message(EFAULT, "page memory was not zero initialized!\n"); + _mi_error_message(EFAULT, "internal error: page memory was not zero initialized.\n"); memid.initially_zero = false; _mi_memzero_aligned(page, sizeof(*page)); } } #endif if (MI_PAGE_INFO_SIZE < _mi_align_up(sizeof(*page), MI_PAGE_MIN_BLOCK_ALIGN)) { - _mi_error_message(EFAULT, "fatal internal error: MI_PAGE_INFO_SIZE is too small\n"); + _mi_error_message(EFAULT, "fatal internal error: MI_PAGE_INFO_SIZE is too small.\n"); }; const size_t block_start = (os_align ? MI_PAGE_ALIGN : MI_PAGE_INFO_SIZE); const size_t reserved = (os_align ? 1 : (mi_size_of_slices(slice_count) - block_start) / block_size); diff --git a/src/bitmap.h b/src/bitmap.h index 7d6d8f97..40c4df42 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2023 Microsoft Research, Daan Leijen +Copyright (c) 2019-2024 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/src/free.c b/src/free.c index ece55599..d45507e7 100644 --- a/src/free.c +++ b/src/free.c @@ -230,7 +230,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { { mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag); if ((tagheap != NULL) && // don't reclaim across heap object types - (!tagheap->no_reclaim) && // we are allowed to reclaim abandoned pages + (tagheap->allow_page_reclaim) && // we are allowed to reclaim abandoned pages (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? ) (_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?) ) diff --git a/src/heap.c b/src/heap.c index d687f25e..3bf8b976 100644 --- a/src/heap.c +++ b/src/heap.c @@ -128,7 +128,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) #else collect >= MI_FORCE #endif - && is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim) + && is_main_thread && mi_heap_is_backing(heap) && heap->allow_page_reclaim) { // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. @@ -192,23 +192,14 @@ void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool heap->tld = tld; heap->thread_id = _mi_thread_id(); heap->arena_id = arena_id; - heap->no_reclaim = noreclaim; + heap->allow_page_reclaim = !noreclaim; heap->allow_page_abandon = (!noreclaim && mi_option_get(mi_option_full_page_retain) >= 0); heap->tag = tag; - - #if defined(WIN32) && (MI_ARCH_X64 || MI_ARCH_X86) - // disallow reclaim for threads running in the windows threadpool - const DWORD winVersion = GetVersion(); - const DWORD winMajorVersion = (DWORD)(LOBYTE(LOWORD(winVersion))); - if (winMajorVersion >= 6) { - _TEB* const teb = NtCurrentTeb(); - void* const poolData = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778))); - if (poolData != NULL) { - heap->no_reclaim = true; - } + if (tld->is_in_threadpool) { + // if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap. + // (but abandoning is good in this case) + heap->allow_page_reclaim = false; } - #endif - if (heap == tld->heap_backing) { _mi_random_init(&heap->random); } @@ -364,7 +355,8 @@ static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_he void mi_heap_destroy(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); - mi_assert(heap->no_reclaim); + mi_assert(!heap->allow_page_reclaim); + mi_assert(!heap->allow_page_abandon); mi_assert_expensive(mi_heap_is_valid(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; #if MI_GUARDED @@ -372,9 +364,9 @@ void mi_heap_destroy(mi_heap_t* heap) { mi_heap_delete(heap); return; #else - if (!heap->no_reclaim) { + if (heap->allow_page_reclaim) { _mi_warning_message("'mi_heap_destroy' called but ignored as the heap was not created with 'allow_destroy' (heap at %p)\n", heap); - // don't free in case it may contain reclaimed pages + // don't free in case it may contain reclaimed pages, mi_heap_delete(heap); } else { @@ -395,7 +387,7 @@ void _mi_heap_unsafe_destroy_all(void) { mi_heap_t* curr = bheap->tld->heaps; while (curr != NULL) { mi_heap_t* next = curr->next; - if (curr->no_reclaim) { + if (!curr->allow_page_reclaim) { mi_heap_destroy(curr); } else { diff --git a/src/init.c b/src/init.c index 4fbd50ed..b66efc69 100644 --- a/src/init.c +++ b/src/init.c @@ -131,12 +131,14 @@ extern mi_heap_t _mi_heap_main; static mi_decl_cache_align mi_subproc_t mi_subproc_default; static mi_decl_cache_align mi_tld_t tld_main = { - 0, false, + 0, &_mi_heap_main, &_mi_heap_main, - &mi_subproc_default, // subproc - 0, // tseq - { 0, &tld_main.stats }, // os - { MI_STATS_NULL } // stats + &mi_subproc_default, // subproc + 0, // tseq + false, // recurse + false, // is_in_threadpool + { 0, &tld_main.stats }, // os + { MI_STATS_NULL } // stats }; mi_decl_cache_align mi_heap_t _mi_heap_main = { @@ -150,8 +152,8 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = { 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next heap - false, // can reclaim - true, // eager abandon + true, // allow page reclaim + true, // allow page abandon 0, // tag #if MI_GUARDED 0, 0, 0, 0, 0, @@ -402,6 +404,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { tld->subproc = &mi_subproc_default; tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); tld->os.stats = &tld->stats; + tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool(); } // Free the thread local default heap (called from `mi_thread_done`) diff --git a/src/prim/emscripten/prim.c b/src/prim/emscripten/prim.c index 82147de7..d3dcca93 100644 --- a/src/prim/emscripten/prim.c +++ b/src/prim/emscripten/prim.c @@ -239,6 +239,9 @@ void _mi_prim_thread_done_auto_done(void) { void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { MI_UNUSED(heap); - } #endif + +bool _mi_prim_thread_is_in_threadpool(void) { + return false; +} diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index 5a4440c3..e1ca3964 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -886,3 +886,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { } #endif + +bool _mi_prim_thread_is_in_threadpool(void) { + return false; +} diff --git a/src/prim/wasi/prim.c b/src/prim/wasi/prim.c index e1e7de5e..def09985 100644 --- a/src/prim/wasi/prim.c +++ b/src/prim/wasi/prim.c @@ -277,3 +277,7 @@ void _mi_prim_thread_done_auto_done(void) { void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { MI_UNUSED(heap); } + +bool _mi_prim_thread_is_in_threadpool(void) { + return false; +} diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 276da85c..80522f47 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -17,6 +17,9 @@ terms of the MIT license. A copy of the license can be found in the file // Dynamically bind Windows API points for portability //--------------------------------------------- +static DWORD win_major_version = 6; +static DWORD win_minor_version = 0; + // We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. // So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) @@ -115,6 +118,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) config->has_overcommit = false; config->has_partial_free = false; config->has_virtual_reserve = true; + // windows version + const DWORD win_version = GetVersion(); + win_major_version = (DWORD)(LOBYTE(LOWORD(win_version))); + win_minor_version = (DWORD)(HIBYTE(LOWORD(win_version))); // get the page size SYSTEM_INFO si; GetSystemInfo(&si); @@ -134,7 +141,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) if (memInKiB > 0 && memInKiB < (SIZE_MAX / MI_KiB)) { config->physical_memory = (size_t)(memInKiB * MI_KiB); } - } + } // get the VirtualAlloc2 function HINSTANCE hDll; hDll = LoadLibrary(TEXT("kernelbase.dll")); @@ -809,4 +816,18 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { void _mi_allocator_done(void) { mi_allocator_done(); } -#endif \ No newline at end of file +#endif + + +bool _mi_prim_thread_is_in_threadpool(void) { + #if (MI_ARCH_X64 || MI_ARCH_X86) + if (win_major_version >= 6) { + // check if this thread belongs to a windows threadpool + // see: + _TEB* const teb = NtCurrentTeb(); + void* const pool_data = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778))); + return (pool_data != NULL); + } + #endif + return false; +} diff --git a/test/test-stress.c b/test/test-stress.c index 19edf2b5..915c953f 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -347,8 +347,8 @@ int main(int argc, char** argv) { mi_collect(true); mi_debug_show_arenas(true,true,false); #endif - mi_collect(true); - mi_debug_show_arenas(true, true, false); + // mi_collect(true); + // mi_debug_show_arenas(true, true, false); // mi_stats_print(NULL); #else mi_stats_print(NULL); // so we see rss/commit/elapsed