check for running in a threadpool to disable page reclaim

This commit is contained in:
daanx 2024-12-07 17:11:11 -08:00
parent d0c86f3f0e
commit c33de86da3
12 changed files with 80 additions and 47 deletions

View file

@ -117,7 +117,8 @@ void _mi_prim_thread_done_auto_done(void);
// Called when the default heap for a thread changes
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
// Is this thread part of a thread pool?
bool _mi_prim_thread_is_in_threadpool(void);
//-------------------------------------------------------------------
// Thread id: `_mi_prim_thread_id()`

View file

@ -399,7 +399,7 @@ struct mi_heap_s {
size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues)
size_t page_retired_max; // largest retired index into the `pages` array.
mi_heap_t* next; // list of heaps per thread
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
bool allow_page_reclaim; // `true` if this heap can reclaim abandoned pages
bool allow_page_abandon; // `true` if this heap can abandon pages to reduce memory footprint
uint8_t tag; // custom tag, can be used for separating heaps based on the object types
#if MI_GUARDED
@ -568,14 +568,15 @@ typedef struct mi_os_tld_s {
// Thread local data
struct mi_tld_s {
unsigned long long heartbeat; // monotonic heartbeat count
bool recurse; // true if deferred was called; used to prevent infinite recursion.
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
mi_subproc_t* subproc; // sub-process this thread belongs to.
size_t tseq; // thread sequence id
mi_os_tld_t os; // os tld
mi_stats_t stats; // statistics
unsigned long long heartbeat; // monotonic heartbeat count
mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
mi_subproc_t* subproc; // sub-process this thread belongs to.
size_t tseq; // thread sequence id
bool recurse; // true if deferred was called; used to prevent infinite recursion.
bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks)
mi_os_tld_t os; // os tld
mi_stats_t stats; // statistics
};
#endif

View file

@ -585,21 +585,25 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz
mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN));
mi_assert_internal(!os_align || _mi_is_aligned((uint8_t*)page + page_alignment, block_alignment));
// claimed free slices: initialize the page partly
// claimed free slices: initialize the page partly
if (!memid.initially_zero) {
mi_track_mem_undefined(page, slice_count * MI_ARENA_SLICE_SIZE);
_mi_memzero_aligned(page, sizeof(*page));
}
#if MI_DEBUG > 1
else {
mi_track_mem_defined(page, slice_count * MI_ARENA_SLICE_SIZE);
}
#if MI_DEBUG > 1
if (memid.initially_zero) {
if (!mi_mem_is_zero(page, mi_size_of_slices(slice_count))) {
_mi_error_message(EFAULT, "page memory was not zero initialized!\n");
_mi_error_message(EFAULT, "internal error: page memory was not zero initialized.\n");
memid.initially_zero = false;
_mi_memzero_aligned(page, sizeof(*page));
}
}
#endif
if (MI_PAGE_INFO_SIZE < _mi_align_up(sizeof(*page), MI_PAGE_MIN_BLOCK_ALIGN)) {
_mi_error_message(EFAULT, "fatal internal error: MI_PAGE_INFO_SIZE is too small\n");
_mi_error_message(EFAULT, "fatal internal error: MI_PAGE_INFO_SIZE is too small.\n");
};
const size_t block_start = (os_align ? MI_PAGE_ALIGN : MI_PAGE_INFO_SIZE);
const size_t reserved = (os_align ? 1 : (mi_size_of_slices(slice_count) - block_start) / block_size);

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2019-2023 Microsoft Research, Daan Leijen
Copyright (c) 2019-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.

View file

@ -230,7 +230,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) {
{
mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag);
if ((tagheap != NULL) && // don't reclaim across heap object types
(!tagheap->no_reclaim) && // we are allowed to reclaim abandoned pages
(tagheap->allow_page_reclaim) && // we are allowed to reclaim abandoned pages
(page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? )
(_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?)
)

View file

@ -128,7 +128,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
#else
collect >= MI_FORCE
#endif
&& is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim)
&& is_main_thread && mi_heap_is_backing(heap) && heap->allow_page_reclaim)
{
// the main thread is abandoned (end-of-program), try to reclaim all abandoned segments.
// if all memory is freed by now, all segments should be freed.
@ -192,23 +192,14 @@ void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool
heap->tld = tld;
heap->thread_id = _mi_thread_id();
heap->arena_id = arena_id;
heap->no_reclaim = noreclaim;
heap->allow_page_reclaim = !noreclaim;
heap->allow_page_abandon = (!noreclaim && mi_option_get(mi_option_full_page_retain) >= 0);
heap->tag = tag;
#if defined(WIN32) && (MI_ARCH_X64 || MI_ARCH_X86)
// disallow reclaim for threads running in the windows threadpool
const DWORD winVersion = GetVersion();
const DWORD winMajorVersion = (DWORD)(LOBYTE(LOWORD(winVersion)));
if (winMajorVersion >= 6) {
_TEB* const teb = NtCurrentTeb();
void* const poolData = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778)));
if (poolData != NULL) {
heap->no_reclaim = true;
}
if (tld->is_in_threadpool) {
// if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap.
// (but abandoning is good in this case)
heap->allow_page_reclaim = false;
}
#endif
if (heap == tld->heap_backing) {
_mi_random_init(&heap->random);
}
@ -364,7 +355,8 @@ static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_he
void mi_heap_destroy(mi_heap_t* heap) {
mi_assert(heap != NULL);
mi_assert(mi_heap_is_initialized(heap));
mi_assert(heap->no_reclaim);
mi_assert(!heap->allow_page_reclaim);
mi_assert(!heap->allow_page_abandon);
mi_assert_expensive(mi_heap_is_valid(heap));
if (heap==NULL || !mi_heap_is_initialized(heap)) return;
#if MI_GUARDED
@ -372,9 +364,9 @@ void mi_heap_destroy(mi_heap_t* heap) {
mi_heap_delete(heap);
return;
#else
if (!heap->no_reclaim) {
if (heap->allow_page_reclaim) {
_mi_warning_message("'mi_heap_destroy' called but ignored as the heap was not created with 'allow_destroy' (heap at %p)\n", heap);
// don't free in case it may contain reclaimed pages
// don't free in case it may contain reclaimed pages,
mi_heap_delete(heap);
}
else {
@ -395,7 +387,7 @@ void _mi_heap_unsafe_destroy_all(void) {
mi_heap_t* curr = bheap->tld->heaps;
while (curr != NULL) {
mi_heap_t* next = curr->next;
if (curr->no_reclaim) {
if (!curr->allow_page_reclaim) {
mi_heap_destroy(curr);
}
else {

View file

@ -131,12 +131,14 @@ extern mi_heap_t _mi_heap_main;
static mi_decl_cache_align mi_subproc_t mi_subproc_default;
static mi_decl_cache_align mi_tld_t tld_main = {
0, false,
0,
&_mi_heap_main, &_mi_heap_main,
&mi_subproc_default, // subproc
0, // tseq
{ 0, &tld_main.stats }, // os
{ MI_STATS_NULL } // stats
&mi_subproc_default, // subproc
0, // tseq
false, // recurse
false, // is_in_threadpool
{ 0, &tld_main.stats }, // os
{ MI_STATS_NULL } // stats
};
mi_decl_cache_align mi_heap_t _mi_heap_main = {
@ -150,8 +152,8 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = {
0, // page count
MI_BIN_FULL, 0, // page retired min/max
NULL, // next heap
false, // can reclaim
true, // eager abandon
true, // allow page reclaim
true, // allow page abandon
0, // tag
#if MI_GUARDED
0, 0, 0, 0, 0,
@ -402,6 +404,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) {
tld->subproc = &mi_subproc_default;
tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1);
tld->os.stats = &tld->stats;
tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool();
}
// Free the thread local default heap (called from `mi_thread_done`)

View file

@ -239,6 +239,9 @@ void _mi_prim_thread_done_auto_done(void) {
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -886,3 +886,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
}
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -277,3 +277,7 @@ void _mi_prim_thread_done_auto_done(void) {
void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
MI_UNUSED(heap);
}
bool _mi_prim_thread_is_in_threadpool(void) {
return false;
}

View file

@ -17,6 +17,9 @@ terms of the MIT license. A copy of the license can be found in the file
// Dynamically bind Windows API points for portability
//---------------------------------------------
static DWORD win_major_version = 6;
static DWORD win_minor_version = 0;
// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
@ -115,6 +118,10 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
config->has_overcommit = false;
config->has_partial_free = false;
config->has_virtual_reserve = true;
// windows version
const DWORD win_version = GetVersion();
win_major_version = (DWORD)(LOBYTE(LOWORD(win_version)));
win_minor_version = (DWORD)(HIBYTE(LOWORD(win_version)));
// get the page size
SYSTEM_INFO si;
GetSystemInfo(&si);
@ -134,7 +141,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config )
if (memInKiB > 0 && memInKiB < (SIZE_MAX / MI_KiB)) {
config->physical_memory = (size_t)(memInKiB * MI_KiB);
}
}
}
// get the VirtualAlloc2 function
HINSTANCE hDll;
hDll = LoadLibrary(TEXT("kernelbase.dll"));
@ -809,4 +816,18 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) {
void _mi_allocator_done(void) {
mi_allocator_done();
}
#endif
#endif
bool _mi_prim_thread_is_in_threadpool(void) {
#if (MI_ARCH_X64 || MI_ARCH_X86)
if (win_major_version >= 6) {
// check if this thread belongs to a windows threadpool
// see: <https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/pebteb/teb/index.htm>
_TEB* const teb = NtCurrentTeb();
void* const pool_data = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778)));
return (pool_data != NULL);
}
#endif
return false;
}

View file

@ -347,8 +347,8 @@ int main(int argc, char** argv) {
mi_collect(true);
mi_debug_show_arenas(true,true,false);
#endif
mi_collect(true);
mi_debug_show_arenas(true, true, false);
// mi_collect(true);
// mi_debug_show_arenas(true, true, false);
// mi_stats_print(NULL);
#else
mi_stats_print(NULL); // so we see rss/commit/elapsed