From ffa8bce58165a044c7e29c025dfa6c53e4d4b00c Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 9 Oct 2024 11:27:57 -0700 Subject: [PATCH 01/15] prefer pages that do not expand --- src/page.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/page.c b/src/page.c index 49f9ed52..29f08b49 100644 --- a/src/page.c +++ b/src/page.c @@ -715,6 +715,14 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi Find pages with free blocks -------------------------------------------------------------*/ +#define MI_MAX_CANDIDATE_SEARCH (16) + +static inline bool mi_page_is_expandable(const mi_page_t* page) { + mi_assert_internal(page != NULL); + mi_assert_internal(page->capacity <= page->reserved); + return (page->capacity < page->reserved); +} + // Find a page with free blocks of `page->block_size`. static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { @@ -722,6 +730,8 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p #if MI_STAT size_t count = 0; #endif + size_t candidate_count = 0; // we reset this on the first candidate to limit the search + mi_page_t* page_candidate = NULL; // a page with free space mi_page_t* page = pq->first; while (page != NULL) { @@ -729,10 +739,36 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p #if MI_STAT count++; #endif - + candidate_count++; +#if defined(MI_MAX_CANDIDATE_SEARCH) // 0. collect freed blocks by us and other threads - _mi_page_free_collect(page, false); + _mi_page_free_collect(page, false); // todo: should we free empty pages? + // is the local free list non-empty? + const bool immediate_available = mi_page_immediate_available(page); + + // 1. If the page is completely full, move it to the `mi_pages_full` + // queue so we don't visit long-lived pages too often. + if (!immediate_available && !mi_page_is_expandable(page)) { + mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); + mi_page_to_full(page, pq); + } + else { + // the page has free space, make it a candidate + // we prefer non-expandable pages with high usage as candidates (to reduce commit, and increase chances of free-ing up pages) + if (page_candidate == NULL) { + page_candidate = page; + candidate_count = 0; + } + else if (!mi_page_is_expandable(page) && page->used > page_candidate->used) { + page_candidate = page; + } + if (immediate_available || candidate_count > MI_MAX_CANDIDATE_SEARCH) { + mi_assert_internal(page_candidate!=NULL); + break; + } + } +#else // 1. if the page contains free blocks, we are done if (mi_page_immediate_available(page)) { break; // pick this one @@ -749,12 +785,22 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // queue so we don't visit long-lived pages too often. mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); mi_page_to_full(page, pq); +#endif page = next; } // for each page mi_heap_stat_counter_increase(heap, searches, count); + // set the page to the best candidate + if (page_candidate != NULL) { + page = page_candidate; + } + if (page != NULL && !mi_page_immediate_available(page)) { + mi_assert_internal(mi_page_is_expandable(page)); + mi_page_extend_free(heap, page, heap->tld); + } + if (page == NULL) { _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); From a05b5ab0a1ab94372c3db1b4580a16aee7821e0d Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 9 Oct 2024 14:21:50 -0700 Subject: [PATCH 02/15] search N pages for a best fit --- src/page.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/page.c b/src/page.c index 29f08b49..e3c2d643 100644 --- a/src/page.c +++ b/src/page.c @@ -715,14 +715,17 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi Find pages with free blocks -------------------------------------------------------------*/ +// search for a best next page to use for at most N pages (often cut short if immediate blocks are available) #define MI_MAX_CANDIDATE_SEARCH (16) -static inline bool mi_page_is_expandable(const mi_page_t* page) { +// is the page not yet used up to its reserved space? +static bool mi_page_is_expandable(const mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_internal(page->capacity <= page->reserved); return (page->capacity < page->reserved); } + // Find a page with free blocks of `page->block_size`. static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { @@ -740,14 +743,17 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p count++; #endif candidate_count++; -#if defined(MI_MAX_CANDIDATE_SEARCH) - // 0. collect freed blocks by us and other threads - _mi_page_free_collect(page, false); // todo: should we free empty pages? + // collect freed blocks by us and other threads + _mi_page_free_collect(page, false); + +#if defined(MI_MAX_CANDIDATE_SEARCH) + // search up to N pages for a best candidate + // is the local free list non-empty? const bool immediate_available = mi_page_immediate_available(page); - // 1. If the page is completely full, move it to the `mi_pages_full` + // if the page is completely full, move it to the `mi_pages_full` // queue so we don't visit long-lived pages too often. if (!immediate_available && !mi_page_is_expandable(page)) { mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); @@ -763,25 +769,20 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p else if (!mi_page_is_expandable(page) && page->used > page_candidate->used) { page_candidate = page; } + // if we find a non-expandable candidate, or searched for N pages, return with the best candidate if (immediate_available || candidate_count > MI_MAX_CANDIDATE_SEARCH) { mi_assert_internal(page_candidate!=NULL); break; } } #else - // 1. if the page contains free blocks, we are done - if (mi_page_immediate_available(page)) { + // first-fit algorithm + // If the page contains free blocks, we are done + if (mi_page_immediate_available(page) || mi_page_is_expandable(page)) { break; // pick this one } - // 2. Try to extend - if (page->capacity < page->reserved) { - mi_page_extend_free(heap, page, heap->tld); - mi_assert_internal(mi_page_immediate_available(page)); - break; - } - - // 3. If the page is completely full, move it to the `mi_pages_full` + // If the page is completely full, move it to the `mi_pages_full` // queue so we don't visit long-lived pages too often. mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); mi_page_to_full(page, pq); From 96877159c24d09a58fd7c7126261c177d15a5485 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 9 Oct 2024 14:35:33 -0700 Subject: [PATCH 03/15] insert full pages that became unfull, at the start of the page queue to increase potential reuse --- src/page-queue.c | 58 +++++++++++++++++++++++++++++++++++++++--------- src/page.c | 2 +- 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/src/page-queue.c b/src/page-queue.c index 02a8008d..71e439d6 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -260,7 +260,7 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ } -static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { +static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* from, bool enqueue_at_end, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); @@ -273,6 +273,8 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro (mi_page_is_huge(page) && mi_page_queue_is_full(to))); mi_heap_t* heap = mi_page_heap(page); + + // delete from `from` if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == from->last) from->last = page->prev; @@ -283,22 +285,58 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_heap_queue_first_update(heap, from); } - page->prev = to->last; - page->next = NULL; - if (to->last != NULL) { - mi_assert_internal(heap == mi_page_heap(to->last)); - to->last->next = page; - to->last = page; + // insert into `to` + if (enqueue_at_end) { + // enqueue at the end + page->prev = to->last; + page->next = NULL; + if (to->last != NULL) { + mi_assert_internal(heap == mi_page_heap(to->last)); + to->last->next = page; + to->last = page; + } + else { + to->first = page; + to->last = page; + mi_heap_queue_first_update(heap, to); + } } else { - to->first = page; - to->last = page; - mi_heap_queue_first_update(heap, to); + if (to->first != NULL) { + // enqueue at 2nd place + mi_assert_internal(heap == mi_page_heap(to->first)); + mi_page_t* next = to->first->next; + page->prev = to->first; + page->next = next; + to->first->next = page; + if (next != NULL) { + next->prev = page; + } + else { + to->last = page; + } + } + else { + // enqueue at the head (singleton list) + page->prev = NULL; + page->next = NULL; + to->first = page; + to->last = page; + mi_heap_queue_first_update(heap, to); + } } mi_page_set_in_full(page, mi_page_queue_is_full(to)); } +static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { + mi_page_queue_enqueue_from_ex(to, from, true, page); +} + +static void mi_page_queue_enqueue_from_at_start(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { + mi_page_queue_enqueue_from_ex(to, from, false, page); +} + // Only called from `mi_heap_absorb`. size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { mi_assert_internal(mi_heap_contains_queue(heap,pq)); diff --git a/src/page.c b/src/page.c index e3c2d643..fdc2c612 100644 --- a/src/page.c +++ b/src/page.c @@ -357,7 +357,7 @@ void _mi_page_unfull(mi_page_t* page) { mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_set_in_full(page, true); - mi_page_queue_enqueue_from(pq, pqfull, page); + mi_page_queue_enqueue_from_at_start(pq, pqfull, page); // insert at the start to increase the chance of reusing full pages (?) } static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { From cbc0e1980950949fbc0587a12eaad02f6c90317a Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 9 Oct 2024 14:41:12 -0700 Subject: [PATCH 04/15] revert back to unfull at the end of queues as it slows down some benchmarks (like alloc-test1) --- src/page.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index fdc2c612..ab9152be 100644 --- a/src/page.c +++ b/src/page.c @@ -357,7 +357,9 @@ void _mi_page_unfull(mi_page_t* page) { mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_set_in_full(page, true); - mi_page_queue_enqueue_from_at_start(pq, pqfull, page); // insert at the start to increase the chance of reusing full pages (?) + mi_page_queue_enqueue_from(pq, pqfull, page); + // we may instead insert at the front to increase reuse but it slows down some benchmarks like `alloc-test1` + // mi_page_queue_enqueue_from_at_start(pq, pqfull, page); } static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { From 0316cb1d882034fad7f1b6521ac7e81441a87513 Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 9 Oct 2024 15:05:35 -0700 Subject: [PATCH 05/15] reduce page search to 8 --- src/page-queue.c | 6 +++--- src/page.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/page-queue.c b/src/page-queue.c index 71e439d6..54496222 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -333,9 +333,9 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_page_queue_enqueue_from_ex(to, from, true, page); } -static void mi_page_queue_enqueue_from_at_start(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { - mi_page_queue_enqueue_from_ex(to, from, false, page); -} +// static void mi_page_queue_enqueue_from_at_start(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { +// mi_page_queue_enqueue_from_ex(to, from, false, page); +// } // Only called from `mi_heap_absorb`. size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { diff --git a/src/page.c b/src/page.c index ab9152be..a0224e41 100644 --- a/src/page.c +++ b/src/page.c @@ -358,7 +358,7 @@ void _mi_page_unfull(mi_page_t* page) { mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_set_in_full(page, true); mi_page_queue_enqueue_from(pq, pqfull, page); - // we may instead insert at the front to increase reuse but it slows down some benchmarks like `alloc-test1` + // we could insert at the front to increase reuse, but it slows down certain benchmarks (like alloc-test) // mi_page_queue_enqueue_from_at_start(pq, pqfull, page); } @@ -718,7 +718,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi -------------------------------------------------------------*/ // search for a best next page to use for at most N pages (often cut short if immediate blocks are available) -#define MI_MAX_CANDIDATE_SEARCH (16) +#define MI_MAX_CANDIDATE_SEARCH (8) // is the page not yet used up to its reserved space? static bool mi_page_is_expandable(const mi_page_t* page) { @@ -768,7 +768,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p page_candidate = page; candidate_count = 0; } - else if (!mi_page_is_expandable(page) && page->used > page_candidate->used) { + else if (!mi_page_is_expandable(page) && page->capacity < page_candidate->capacity) { page_candidate = page; } // if we find a non-expandable candidate, or searched for N pages, return with the best candidate From a7e7cbac89d4dbacee9e184b450ca5bfc31e0a19 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Wed, 9 Oct 2024 15:15:57 -0700 Subject: [PATCH 06/15] use enqueue_from_full, and keep inserting at the end --- src/page-queue.c | 9 +++++---- src/page.c | 4 +--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/page-queue.c b/src/page-queue.c index 54496222..3034e15d 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -330,12 +330,13 @@ static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* } static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { - mi_page_queue_enqueue_from_ex(to, from, true, page); + mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end */, page); } -// static void mi_page_queue_enqueue_from_at_start(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { -// mi_page_queue_enqueue_from_ex(to, from, false, page); -// } +static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { + // note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`) + mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page); +} // Only called from `mi_heap_absorb`. size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { diff --git a/src/page.c b/src/page.c index a0224e41..acc72253 100644 --- a/src/page.c +++ b/src/page.c @@ -357,9 +357,7 @@ void _mi_page_unfull(mi_page_t* page) { mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_set_in_full(page, true); - mi_page_queue_enqueue_from(pq, pqfull, page); - // we could insert at the front to increase reuse, but it slows down certain benchmarks (like alloc-test) - // mi_page_queue_enqueue_from_at_start(pq, pqfull, page); + mi_page_queue_enqueue_from_full(pq, pqfull, page); } static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { From 723869014ff71b12c585bf9b9b51ee4128d1b71f Mon Sep 17 00:00:00 2001 From: daanx Date: Wed, 9 Oct 2024 21:24:20 -0700 Subject: [PATCH 07/15] add ability to abandon segments after a threshold --- include/mimalloc.h | 1 + include/mimalloc/internal.h | 2 + include/mimalloc/types.h | 2 +- src/arena-abandon.c | 2 +- src/options.c | 1 + src/page.c | 21 ++++++++++ src/segment.c | 83 ++++++++++++++++++++++++++++++++++++- 7 files changed, 108 insertions(+), 4 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index a5b3cc9d..df85a2c0 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -369,6 +369,7 @@ typedef enum mi_option_e { mi_option_visit_abandoned, // allow visiting heap blocks from abandoned threads (=0) mi_option_debug_guarded_min, // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects (=0) mi_option_debug_guarded_max, // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects (=0) + mi_option_target_segments_per_thread, // experimental (=0) _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index aff6a1bd..b4e74789 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -175,6 +175,8 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; / void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_page_force_abandon(mi_page_t* page); + void _mi_heap_delayed_free_all(mi_heap_t* heap); bool _mi_heap_delayed_free_partial(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 69f737b3..044d6eae 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -200,7 +200,7 @@ typedef int32_t mi_ssize_t; #define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 8KiB on 64-bit #define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB on 64-bit #define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE) -#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 16MiB on 64-bit #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) // Maximum number of size classes. (spaced exponentially in 12.5% increments) diff --git a/src/arena-abandon.c b/src/arena-abandon.c index eaa8c7c9..84b9f72c 100644 --- a/src/arena-abandon.c +++ b/src/arena-abandon.c @@ -192,7 +192,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool vi else { // otherwise visit all starting at a random location if (abandoned_count > abandoned_list_count && max_arena > 0) { - current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); + current->start = 0; // (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); current->end = current->start + max_arena; } else { diff --git a/src/options.c b/src/options.c index 1cfb2f17..c97b9abe 100644 --- a/src/options.c +++ b/src/options.c @@ -100,6 +100,7 @@ static mi_option_desc_t options[_mi_option_last] = #endif { 0, UNINIT, MI_OPTION(debug_guarded_min) }, // only used when building with MI_DEBUG_GUARDED: minimal rounded object size for guarded objects { 0, UNINIT, MI_OPTION(debug_guarded_max) }, // only used when building with MI_DEBUG_GUARDED: maximal rounded object size for guarded objects + { 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable. }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/page.c b/src/page.c index 99ef3835..5671c7d4 100644 --- a/src/page.c +++ b/src/page.c @@ -405,6 +405,27 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { } +// force abandon a page; this is safe to call +void _mi_page_force_abandon(mi_page_t* page) { + mi_heap_t* heap = mi_page_heap(page); + // mark page as not using delayed free + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); + + // ensure this page is no longer in the heap delayed free list + _mi_heap_delayed_free_all(heap); + if (page->block_size == 0) return; // it may have been freed now + + // and now unlink it from the page queue and abandon (or free) + mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); + if (mi_page_all_free(page)) { + _mi_page_free(page, pq, false); + } + else { + _mi_page_abandon(page, pq); + } +} + + // Free a page with no more free blocks void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { mi_assert_internal(page != NULL); diff --git a/src/segment.c b/src/segment.c index 1f1dc006..bb7483f1 100644 --- a/src/segment.c +++ b/src/segment.c @@ -693,6 +693,8 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ // free previous slice -- remove it from free and merge mi_assert_internal(prev->slice_count > 0 && prev->slice_offset==0); slice_count += prev->slice_count; + slice->slice_count = 0; + slice->slice_offset = (uint32_t)((uint8_t*)slice - (uint8_t*)prev); // set the slice offset for `segment_force_abandon` (in case the previous free block is very large). if (!is_abandoned) { mi_segment_span_remove_from_queue(prev, tld); } slice = prev; } @@ -1329,7 +1331,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice result = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); break; } - else if (segment->abandoned_visits > 3 && is_suitable) { + else if (segment->abandoned_visits > 3 && is_suitable && !mi_option_is_enabled(mi_option_target_segments_per_thread)) { // always reclaim on 3rd visit to limit the abandoned queue length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } @@ -1343,7 +1345,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice return result; } - +// collect abandoned segments void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) { mi_segment_t* segment; @@ -1367,6 +1369,80 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) _mi_arena_field_cursor_done(¤t); } +/* ----------------------------------------------------------- + Force abandon a segment that is in use by our thread +----------------------------------------------------------- */ + +// force abandon a segment +static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) +{ + mi_assert_internal(!mi_segment_is_abandoned(segment)); + + // for all slices + const mi_slice_t* end; + mi_slice_t* slice = mi_slices_start_iterate(segment, &end); + while (slice < end) { + mi_assert_internal(slice->slice_count > 0); + mi_assert_internal(slice->slice_offset == 0); + if (mi_slice_is_used(slice)) { + // ensure used count is up to date and collect potential concurrent frees + mi_page_t* const page = mi_slice_to_page(slice); + _mi_page_free_collect(page, false); + { + // abandon the page if it is still in-use (this will free it if possible as well) + mi_assert_internal(segment->used > 0); + if (segment->used == segment->abandoned+1) { + // the last page.. abandon and return as the segment will be abandoned after this + // and we should no longer access it. + _mi_page_force_abandon(page); + return; + } + else { + // abandon and continue + _mi_page_force_abandon(page); + // it might be freed, reset the slice (note: relies on coalesce setting the slice_offset) + slice = mi_slice_first(slice); + } + } + } + slice = slice + slice->slice_count; + } + mi_assert(segment->used == segment->abandoned); + mi_assert(segment->used == 0); + if (segment->used == 0) { + // all free now + mi_segment_free(segment, false, tld); + } + else { + // perform delayed purges + mi_segment_try_purge(segment, false /* force? */, tld->stats); + } +} + + +// try abandon segments. +// this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use. +static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { + const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024); + if (target == 0 || tld->count <= target) return; + + const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% + + // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages + for (int i = 0; i < 16 && tld->count >= min_target; i++) { + mi_page_t* page = heap->pages[MI_BIN_FULL].first; + while (page != NULL && mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX) { + page = page->next; + } + if (page==NULL) { + break; + } + mi_segment_t* segment = _mi_page_segment(page); + mi_segment_force_abandon(segment, tld); + mi_assert_internal(page != heap->pages[MI_BIN_FULL].first); // as it is just abandoned + } +} + /* ----------------------------------------------------------- Reclaim or allocate ----------------------------------------------------------- */ @@ -1375,6 +1451,9 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_ { mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); + // try to abandon some segments to increase reuse between threads + mi_segments_try_abandon(heap,tld); + // 1. try to reclaim an abandoned segment bool reclaimed; mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld); From 19ce2c6461ffa63583f57c2558e9c7f9979dadaa Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 11 Oct 2024 10:44:43 -0700 Subject: [PATCH 08/15] restore randomization when trying to reclaim abandoned segments --- src/arena-abandon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena-abandon.c b/src/arena-abandon.c index 84b9f72c..eaa8c7c9 100644 --- a/src/arena-abandon.c +++ b/src/arena-abandon.c @@ -192,7 +192,7 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool vi else { // otherwise visit all starting at a random location if (abandoned_count > abandoned_list_count && max_arena > 0) { - current->start = 0; // (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); + current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); current->end = current->start + max_arena; } else { From 81da26d7d30c87bc0f094c91fbbae39513d2d35a Mon Sep 17 00:00:00 2001 From: Daan Date: Fri, 11 Oct 2024 10:52:35 -0700 Subject: [PATCH 09/15] make target test for stealing one less since we are about to reclaim_or_alloc a fresh segment --- src/segment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/segment.c b/src/segment.c index bb7483f1..3d411f9c 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1424,9 +1424,10 @@ static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* t // this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use. static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024); - if (target == 0 || tld->count <= target) return; + // we call this when we are about to add a fresh segment so we should be under our target segment count. + if (target == 0 || tld->count < target) return; - const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% + const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages for (int i = 0; i < 16 && tld->count >= min_target; i++) { From eda16d7c918b3f172de95bf0453edde6d249a321 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 29 Oct 2024 20:07:35 -0700 Subject: [PATCH 10/15] remove wrong assertion --- src/options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/options.c b/src/options.c index c55e63b1..ed1cf921 100644 --- a/src/options.c +++ b/src/options.c @@ -63,6 +63,7 @@ typedef struct mi_option_desc_s { #define MI_DEFAULT_ARENA_EAGER_COMMIT 2 #endif +// in KiB #ifndef MI_DEFAULT_ARENA_RESERVE #if (MI_INTPTR_SIZE>4) #define MI_DEFAULT_ARENA_RESERVE 1024L*1024L @@ -197,7 +198,6 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma } mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { - mi_assert_internal(mi_option_has_size_in_kib(option)); const long x = mi_option_get(option); size_t size = (x < 0 ? 0 : (size_t)x); if (mi_option_has_size_in_kib(option)) { From 4f46cf7d5a0f7cbd30d0048babd3e67a4226ee53 Mon Sep 17 00:00:00 2001 From: Daan Date: Tue, 29 Oct 2024 22:40:58 -0700 Subject: [PATCH 11/15] ensure we dont reclaim a segment on a free if that would go above the target segment count --- src/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/segment.c b/src/segment.c index 3d411f9c..66ac4bf7 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1261,6 +1261,8 @@ bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) { if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned if (segment->subproc != heap->tld->segments.subproc) return false; // only reclaim within the same subprocess if (!_mi_heap_memid_is_suitable(heap,segment->memid)) return false; // don't reclaim between exclusive and non-exclusive arena's + const long target = _mi_option_get_fast(mi_option_target_segments_per_thread); + if (target > 0 && (size_t)target <= heap->tld->segments.count) return false; // don't reclaim if going above the target count // don't reclaim more from a `free` call than half the current segments // this is to prevent a pure free-ing thread to start owning too many segments // (but not for out-of-arena segments as that is the main way to be reclaimed for those) From 3a7b6f0a8d83052adcb1b0ae27840b2f6d61ab06 Mon Sep 17 00:00:00 2001 From: Daan Date: Mon, 18 Nov 2024 10:28:00 -0800 Subject: [PATCH 12/15] allow build time setting of sample rate --- src/options.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/options.c b/src/options.c index 4f95e601..6635f661 100644 --- a/src/options.c +++ b/src/options.c @@ -89,6 +89,14 @@ typedef struct mi_option_desc_s { #define MI_DEFAULT_RESERVE_OS_MEMORY 0 #endif +#ifndef MI_DEFAULT_GUARDED_SAMPLE_RATE +#if MI_GUARDED +#define MI_DEFAULT_GUARDED_SAMPLE_RATE 4000 +#else +#define MI_DEFAULT_GUARDED_SAMPLE_RATE 0 +#endif +#endif + static mi_option_desc_t options[_mi_option_last] = { @@ -145,11 +153,8 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(guarded_min) }, // only used when building with MI_GUARDED: minimal rounded object size for guarded objects { MI_GiB, UNINIT, MI_OPTION(guarded_max) }, // only used when building with MI_GUARDED: maximal rounded object size for guarded objects { 0, UNINIT, MI_OPTION(guarded_precise) }, // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0) -#if MI_GUARDED - { 4000,UNINIT, MI_OPTION(guarded_sample_rate)}, // 1 out of N allocations in the min/max range will be guarded(= 1000) -#else - { 0, UNINIT, MI_OPTION(guarded_sample_rate)}, -#endif + { MI_DEFAULT_GUARDED_SAMPLE_RATE, + UNINIT, MI_OPTION(guarded_sample_rate)}, // 1 out of N allocations in the min/max range will be guarded (=4000) { 0, UNINIT, MI_OPTION(guarded_sample_seed)}, }; @@ -180,7 +185,7 @@ void _mi_options_init(void) { _mi_warning_message("option 'allow_large_os_pages' is disabled to allow for guarded objects\n"); } } - _mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_guarded_max) > 0 ? "enabled" : "disabled"); + _mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_guarded_sample_rate) != 0 ? "enabled" : "disabled"); #endif } From 71fec8caf5a6f3219f37ccfe38b71d73d6274c37 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 18 Nov 2024 15:05:22 -0800 Subject: [PATCH 13/15] add target_segments_per_thread option --- include/mimalloc.h | 1 + include/mimalloc/internal.h | 6 ++- src/options.c | 2 + src/page.c | 33 +++++++++++++--- src/segment.c | 79 ++++++++++++++++++++++++++++++++++++- 5 files changed, 112 insertions(+), 9 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index e3fecdf1..83cbda12 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -377,6 +377,7 @@ typedef enum mi_option_e { mi_option_guarded_precise, // disregard minimal alignment requirement to always place guarded blocks exactly in front of a guard page (=0) mi_option_guarded_sample_rate, // 1 out of N allocations in the min/max range will be guarded (=1000) mi_option_guarded_sample_seed, // can be set to allow for a (more) deterministic re-execution when a guard page is triggered (=0) + mi_option_target_segments_per_thread, // experimental (=0) _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index d58bd9ca..8de37edf 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -178,6 +178,8 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; / void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_page_force_abandon(mi_page_t* page); + void _mi_heap_delayed_free_all(mi_heap_t* heap); bool _mi_heap_delayed_free_partial(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); @@ -625,9 +627,9 @@ static inline bool mi_heap_malloc_use_guarded(mi_heap_t* heap, size_t size) { } else { // failed size criteria, rewind count (but don't write to an empty heap) - if (heap->guarded_sample_rate != 0) { heap->guarded_sample_count = 1; } + if (heap->guarded_sample_rate != 0) { heap->guarded_sample_count = 1; } return false; - } + } } mi_decl_restrict void* _mi_heap_malloc_guarded(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; diff --git a/src/options.c b/src/options.c index 4f95e601..d49d4aa8 100644 --- a/src/options.c +++ b/src/options.c @@ -65,6 +65,7 @@ typedef struct mi_option_desc_s { #define MI_DEFAULT_ARENA_EAGER_COMMIT 2 #endif +// in KiB #ifndef MI_DEFAULT_ARENA_RESERVE #if (MI_INTPTR_SIZE>4) #define MI_DEFAULT_ARENA_RESERVE 1024L*1024L @@ -151,6 +152,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(guarded_sample_rate)}, #endif { 0, UNINIT, MI_OPTION(guarded_sample_seed)}, + { 0, UNINIT, MI_OPTION(target_segments_per_thread) }, // abandon segments beyond this point, or 0 to disable. }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/page.c b/src/page.c index 9d8a7b0d..3cf91ba8 100644 --- a/src/page.c +++ b/src/page.c @@ -357,7 +357,7 @@ void _mi_page_unfull(mi_page_t* page) { mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_set_in_full(page, true); - mi_page_queue_enqueue_from_full(pq, pqfull, page); + mi_page_queue_enqueue_from_full(pq, pqfull, page); } static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { @@ -403,6 +403,29 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { _mi_segment_page_abandon(page,segments_tld); } +// force abandon a page +void _mi_page_force_abandon(mi_page_t* page) { + mi_heap_t* heap = mi_page_heap(page); + // mark page as not using delayed free + _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); + + // ensure this page is no longer in the heap delayed free list + _mi_heap_delayed_free_all(heap); + // TODO: can we still access the page as it may have been + // freed and the memory decommitted? + // A way around this is to explicitly unlink this page from + // the heap delayed free list. + if (page->capacity == 0) return; // it may have been freed now + + // and now unlink it from the page queue and abandon (or free) + mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); + if (mi_page_all_free(page)) { + _mi_page_free(page, pq, false); + } + else { + _mi_page_abandon(page, pq); + } +} // Free a page with no more free blocks void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { @@ -743,7 +766,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p #if defined(MI_MAX_CANDIDATE_SEARCH) // search up to N pages for a best candidate - + // is the local free list non-empty? const bool immediate_available = mi_page_immediate_available(page); @@ -758,9 +781,9 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // we prefer non-expandable pages with high usage as candidates (to reduce commit, and increase chances of free-ing up pages) if (page_candidate == NULL) { page_candidate = page; - candidate_count = 0; + candidate_count = 0; } - else if (!mi_page_is_expandable(page) && page->capacity < page_candidate->capacity) { + else if (!mi_page_is_expandable(page) && page->used >= page_candidate->used) { page_candidate = page; } // if we find a non-expandable candidate, or searched for N pages, return with the best candidate @@ -805,7 +828,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p } } else { - mi_assert(pq->first == page); + // mi_assert(pq->first == page); page->retire_expire = 0; } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); diff --git a/src/segment.c b/src/segment.c index 18736818..d2604436 100644 --- a/src/segment.c +++ b/src/segment.c @@ -952,6 +952,9 @@ bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) { if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned if (segment->subproc != heap->tld->segments.subproc) return false; // only reclaim within the same subprocess if (!_mi_heap_memid_is_suitable(heap,segment->memid)) return false; // don't reclaim between exclusive and non-exclusive arena's + const long target = _mi_option_get_fast(mi_option_target_segments_per_thread); + if (target > 0 && (size_t)target <= heap->tld->segments.count) return false; // don't reclaim if going above the target count + // don't reclaim more from a `free` call than half the current segments // this is to prevent a pure free-ing thread to start owning too many segments // (but not for out-of-arena segments as that is the main way to be reclaimed for those) @@ -1023,8 +1026,8 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, result = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); break; } - else if (segment->abandoned_visits >= 3 && is_suitable) { - // always reclaim on 3rd visit to limit the list length. + else if (segment->abandoned_visits > 3 && is_suitable && !mi_option_is_enabled(mi_option_target_segments_per_thread)) { + // always reclaim on 3rd visit to limit the abandoned segment count. mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { @@ -1038,6 +1041,75 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, } +/* ----------------------------------------------------------- + Force abandon a segment that is in use by our thread +----------------------------------------------------------- */ + +// force abandon a segment +static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) +{ + mi_assert_internal(segment->abandoned < segment->used); + + // for all pages + for (size_t i = 0; i < segment->capacity; i++) { + mi_page_t* page = &segment->pages[i]; + if (page->segment_in_use) { + // ensure used count is up to date and collect potential concurrent frees + _mi_page_free_collect(page, false); + { + // abandon the page if it is still in-use (this will free it if possible as well) + mi_assert_internal(segment->used > 0); + if (segment->used == segment->abandoned+1) { + // the last page.. abandon and return as the segment will be abandoned after this + // and we should no longer access it. + _mi_page_force_abandon(page); + return; + } + else { + // abandon and continue + _mi_page_force_abandon(page); + } + } + } + } + mi_assert(segment->used == segment->abandoned); + mi_assert(segment->used == 0); + if (segment->used == 0) { + // all free now + mi_segment_free(segment, false, tld); + } + else { + // perform delayed purges + mi_pages_try_purge(false /* force? */, tld); + } +} + + +// try abandon segments. +// this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use. +static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { + const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024); + // we call this when we are about to add a fresh segment so we should be under our target segment count. + if (target == 0 || tld->count < target) return; + + const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% + + // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages + for (int i = 0; i < 16 && tld->count >= min_target; i++) { + mi_page_t* page = heap->pages[MI_BIN_FULL].first; + while (page != NULL && mi_page_is_huge(page)) { + page = page->next; + } + if (page==NULL) { + break; + } + mi_segment_t* segment = _mi_page_segment(page); + mi_segment_force_abandon(segment, tld); + mi_assert_internal(page != heap->pages[MI_BIN_FULL].first); // as it is just abandoned + } +} + + /* ----------------------------------------------------------- Reclaim or allocate ----------------------------------------------------------- */ @@ -1047,6 +1119,9 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s mi_assert_internal(page_kind <= MI_PAGE_LARGE); mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); + // try to abandon some segments to increase reuse between threads + mi_segments_try_abandon(heap,tld); + // 1. try to reclaim an abandoned segment bool reclaimed; mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); From 9b7ac9a1a67308a26d09f0b87d2e876fbc61c86c Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 25 Nov 2024 16:58:02 -0800 Subject: [PATCH 14/15] clean up candidate search; add mi_collect_reduce --- ide/vs2022/mimalloc.vcxproj | 2 +- include/mimalloc.h | 1 + src/page-queue.c | 10 +++++++++- src/page.c | 23 ++++++++++++++------- src/segment.c | 40 ++++++++++++++++++++++++++++--------- 5 files changed, 58 insertions(+), 18 deletions(-) diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 160f1436..dddab777 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -116,7 +116,7 @@ true Default ../../include - MI_DEBUG=4;MI_GUARDED=1;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_GUARDED=0;%(PreprocessorDefinitions); CompileAsCpp false stdcpp20 diff --git a/include/mimalloc.h b/include/mimalloc.h index 83cbda12..5916228b 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -148,6 +148,7 @@ typedef void (mi_cdecl mi_error_fun)(int err, void* arg); mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); mi_decl_export void mi_collect(bool force) mi_attr_noexcept; +mi_decl_export void mi_collect_reduce(size_t target_thread_owned) mi_attr_noexcept; mi_decl_export int mi_version(void) mi_attr_noexcept; mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; diff --git a/src/page-queue.c b/src/page-queue.c index 3034e15d..9796f3dc 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -259,6 +259,14 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ heap->page_count++; } +static void mi_page_queue_move_to_front(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { + mi_assert_internal(mi_page_heap(page) == heap); + mi_assert_internal(mi_page_queue_contains(queue, page)); + if (queue->first == page) return; + mi_page_queue_remove(queue, page); + mi_page_queue_push(heap, queue, page); + mi_assert_internal(queue->first == page); +} static void mi_page_queue_enqueue_from_ex(mi_page_queue_t* to, mi_page_queue_t* from, bool enqueue_at_end, mi_page_t* page) { mi_assert_internal(page != NULL); @@ -335,7 +343,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { // note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`) - mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page); + mi_page_queue_enqueue_from_ex(to, from, false /* enqueue at the end of the `to` queue? */, page); } // Only called from `mi_heap_absorb`. diff --git a/src/page.c b/src/page.c index 3cf91ba8..43ac7c4e 100644 --- a/src/page.c +++ b/src/page.c @@ -471,6 +471,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); + #if MI_RETIRE_CYCLES > 0 const size_t bsize = mi_page_block_size(page); if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? @@ -486,7 +487,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { return; // don't free after all } } - + #endif _mi_page_free(page, pq, false); } @@ -753,6 +754,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p size_t candidate_count = 0; // we reset this on the first candidate to limit the search mi_page_t* page_candidate = NULL; // a page with free space mi_page_t* page = pq->first; + while (page != NULL) { mi_page_t* next = page->next; // remember next @@ -764,7 +766,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // collect freed blocks by us and other threads _mi_page_free_collect(page, false); -#if defined(MI_MAX_CANDIDATE_SEARCH) + #if MI_MAX_CANDIDATE_SEARCH > 1 // search up to N pages for a best candidate // is the local free list non-empty? @@ -783,7 +785,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p page_candidate = page; candidate_count = 0; } - else if (!mi_page_is_expandable(page) && page->used >= page_candidate->used) { + else if (/* !mi_page_is_expandable(page) && */ page->used >= page_candidate->used) { page_candidate = page; } // if we find a non-expandable candidate, or searched for N pages, return with the best candidate @@ -792,7 +794,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p break; } } -#else + #else // first-fit algorithm // If the page contains free blocks, we are done if (mi_page_immediate_available(page) || mi_page_is_expandable(page)) { @@ -803,7 +805,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // queue so we don't visit long-lived pages too often. mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); mi_page_to_full(page, pq); -#endif + #endif page = next; } // for each page @@ -828,10 +830,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p } } else { - // mi_assert(pq->first == page); + // move the page to the front of the queue + mi_page_queue_move_to_front(heap, pq, page); page->retire_expire = 0; + // _mi_heap_collect_retired(heap, false); // update retire counts; note: increases rss on MemoryLoad bench so don't do this } mi_assert_internal(page == NULL || mi_page_immediate_available(page)); + + return page; } @@ -839,7 +845,9 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p // Find a page with free blocks of `size`. static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { - mi_page_queue_t* pq = mi_page_queue(heap,size); + mi_page_queue_t* pq = mi_page_queue(heap, size); + + // check the first page: we even do this with candidate search or otherwise we re-search every time mi_page_t* page = pq->first; if (page != NULL) { #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness @@ -858,6 +866,7 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { return page; // fast path } } + return mi_page_queue_find_free_ex(heap, pq, true); } diff --git a/src/segment.c b/src/segment.c index d2604436..16764da8 100644 --- a/src/segment.c +++ b/src/segment.c @@ -979,6 +979,13 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { _mi_arena_field_cursor_done(¤t); } + +static bool segment_count_is_within_target(mi_segments_tld_t* tld, size_t* ptarget) { + const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread, 0, 1024); + if (ptarget != NULL) { *ptarget = target; } + return (target == 0 || tld->count < target); +} + static long mi_segment_get_reclaim_tries(mi_segments_tld_t* tld) { // limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries. const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100); @@ -1001,7 +1008,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_segment_t* segment = NULL; mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, tld->subproc, false /* non-blocking */, ¤t); - while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) + while (segment_count_is_within_target(tld,NULL) && (max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) { mi_assert(segment->subproc == heap->tld->segments.subproc); // cursor only visits segments in our sub-process segment->abandoned_visits++; @@ -1026,7 +1033,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, result = mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); break; } - else if (segment->abandoned_visits > 3 && is_suitable && !mi_option_is_enabled(mi_option_target_segments_per_thread)) { + else if (segment->abandoned_visits > 3 && is_suitable) { // always reclaim on 3rd visit to limit the abandoned segment count. mi_segment_reclaim(segment, heap, 0, NULL, tld); } @@ -1087,15 +1094,11 @@ static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* t // try abandon segments. // this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use. -static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { - const size_t target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread,0,1024); - // we call this when we are about to add a fresh segment so we should be under our target segment count. - if (target == 0 || tld->count < target) return; - +static void mi_segments_try_abandon_to_target(mi_heap_t* heap, size_t target, mi_segments_tld_t* tld) { + if (target <= 1) return; const size_t min_target = (target > 4 ? (target*3)/4 : target); // 75% - // todo: we should maintain a list of segments per thread; for now, only consider segments from the heap full pages - for (int i = 0; i < 16 && tld->count >= min_target; i++) { + for (int i = 0; i < 64 && tld->count >= min_target; i++) { mi_page_t* page = heap->pages[MI_BIN_FULL].first; while (page != NULL && mi_page_is_huge(page)) { page = page->next; @@ -1109,6 +1112,25 @@ static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { } } +// try abandon segments. +// this should be called from `reclaim_or_alloc` so we know all segments are (about) fully in use. +static void mi_segments_try_abandon(mi_heap_t* heap, mi_segments_tld_t* tld) { + // we call this when we are about to add a fresh segment so we should be under our target segment count. + size_t target = 0; + if (segment_count_is_within_target(tld, &target)) return; + mi_segments_try_abandon_to_target(heap, target, tld); +} + +void mi_collect_reduce(size_t target_size) mi_attr_noexcept { + mi_collect(true); + mi_heap_t* heap = mi_heap_get_default(); + mi_segments_tld_t* tld = &heap->tld->segments; + size_t target = target_size / MI_SEGMENT_SIZE; + if (target == 0) { + target = (size_t)mi_option_get_clamp(mi_option_target_segments_per_thread, 1, 1024); + } + mi_segments_try_abandon_to_target(heap, target, tld); +} /* ----------------------------------------------------------- Reclaim or allocate From 7673aa2517fa44080c51df8833aa7e79dad12ea8 Mon Sep 17 00:00:00 2001 From: daanx Date: Mon, 25 Nov 2024 18:41:57 -0800 Subject: [PATCH 15/15] ensure forced abandoned pages can be accessed after free --- include/mimalloc/types.h | 3 ++- src/page.c | 6 ++---- src/segment.c | 38 ++++++++++++++++++++++---------------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index f7bca137..44074450 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -416,7 +416,8 @@ typedef struct mi_segment_s { // segment fields struct mi_segment_s* next; // must be the first (non-constant) segment field -- see `segment.c:segment_init` struct mi_segment_s* prev; - bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation) + bool was_reclaimed; // true if it was reclaimed (used to limit reclaim-on-free reclamation) + bool dont_free; // can be temporarily true to ensure the segment is not freed size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned_visits; // count how often this segment is visited for reclaiming (to force reclaim if it is too long) diff --git a/src/page.c b/src/page.c index 43ac7c4e..c681d6d0 100644 --- a/src/page.c +++ b/src/page.c @@ -411,10 +411,8 @@ void _mi_page_force_abandon(mi_page_t* page) { // ensure this page is no longer in the heap delayed free list _mi_heap_delayed_free_all(heap); - // TODO: can we still access the page as it may have been - // freed and the memory decommitted? - // A way around this is to explicitly unlink this page from - // the heap delayed free list. + // We can still access the page meta-info even if it is freed as we ensure + // in `mi_segment_force_abandon` that the segment is not freed (yet) if (page->capacity == 0) return; // it may have been freed now // and now unlink it from the page queue and abandon (or free) diff --git a/src/segment.c b/src/segment.c index 16764da8..74abcdbc 100644 --- a/src/segment.c +++ b/src/segment.c @@ -652,6 +652,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { MI_UNUSED(force); mi_assert(segment != NULL); + + // in `mi_segment_force_abandon` we set this to true to ensure the segment's memory stays valid + if (segment->dont_free) return; + // don't purge as we are freeing now mi_segment_remove_all_purges(segment, false /* don't force as we are about to free */, tld); mi_segment_remove_from_free_queue(segment, tld); @@ -1056,32 +1060,34 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, static void mi_segment_force_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->abandoned < segment->used); + mi_assert_internal(!segment->dont_free); + + // ensure the segment does not get free'd underneath us (so we can check if a page has been freed in `mi_page_force_abandon`) + segment->dont_free = true; // for all pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { - // ensure used count is up to date and collect potential concurrent frees - _mi_page_free_collect(page, false); - { - // abandon the page if it is still in-use (this will free it if possible as well) - mi_assert_internal(segment->used > 0); - if (segment->used == segment->abandoned+1) { - // the last page.. abandon and return as the segment will be abandoned after this - // and we should no longer access it. - _mi_page_force_abandon(page); - return; - } - else { - // abandon and continue - _mi_page_force_abandon(page); - } + // abandon the page if it is still in-use (this will free the page if possible as well (but not our segment)) + mi_assert_internal(segment->used > 0); + if (segment->used == segment->abandoned+1) { + // the last page.. abandon and return as the segment will be abandoned after this + // and we should no longer access it. + segment->dont_free = false; + _mi_page_force_abandon(page); + return; + } + else { + // abandon and continue + _mi_page_force_abandon(page); } } } + segment->dont_free = false; mi_assert(segment->used == segment->abandoned); mi_assert(segment->used == 0); - if (segment->used == 0) { + if (segment->used == 0) { // paranoia // all free now mi_segment_free(segment, false, tld); }