From b604099334645d96003c7f662c64466d85c0583c Mon Sep 17 00:00:00 2001 From: Sergiy Kuryata Date: Tue, 8 Oct 2024 17:08:09 -0700 Subject: [PATCH] Experimental improvements to reduce memory usage 1. Prefer to use more loaded pages when allocating 2. Add an option to restrict unbounded heap growth and allow the excessive memory allocations to be re-used by other threads --- include/mimalloc.h | 1 + src/heap.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++ src/options.c | 3 +- src/page-queue.c | 38 +++++++++++++++++-- src/page.c | 74 ++++++++++++++++++++++++++++++++++++- src/segment.c | 8 +++- 6 files changed, 208 insertions(+), 8 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index c41bcc80..5bca7956 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -348,6 +348,7 @@ typedef enum mi_option_e { mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1) mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's) mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows) + mi_option_max_segments_per_heap, // max number of segments that heap can own _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, diff --git a/src/heap.c b/src/heap.c index e498fdb2..dfdb5620 100644 --- a/src/heap.c +++ b/src/heap.c @@ -651,3 +651,95 @@ bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_vis mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); } + +static mi_segment_t* mi_heap_get_segment_to_drop(mi_heap_t* heap) { + mi_page_queue_t* fullPageQueue = &heap->pages[MI_BIN_FULL]; + mi_segment_t* segment = NULL; + + if (fullPageQueue->first != NULL) { + segment = _mi_ptr_segment(fullPageQueue->first); + } + + return segment; +} + +const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment); +mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page); + +// Visit all pages in a segment +static mi_decl_noinline void mi_segment_visit_pages(mi_heap_t* heap, mi_segment_t* segment, heap_page_visitor_fun* fn, void* arg1) +{ + // Visit all pages in the segments + // Note: starting from the 2nd slice because + // the first slice/page is allocated by the segment itself + mi_slice_t* slice = &segment->slices[1]; + const mi_slice_t* end = mi_segment_slices_end(segment); + bool isLastPage; + uint32_t slice_count; + + while (slice < end) { + + // Recored segment's state before making any operations on its pages, as it can change the state and get marked as abandoned. + isLastPage = (segment->used == 1) || ((segment->used - segment->abandoned) == 1); + slice_count = slice->slice_count; + + if (slice_count > 0) { + if (slice->block_size > 1) { + mi_page_t* page = (mi_page_t*)slice; + fn(heap, mi_heap_page_queue_of(heap, page), page, arg1, NULL); + + if (isLastPage) { + break; + } + } + + slice = slice + slice_count; + } + else { + slice = slice + 1; + } + } +} + +void mi_heap_drop_segment(mi_heap_t* heap, size_t targetSegmentCount) { + bool segmentsDropped = false; + + while (heap->tld->segments.count >= targetSegmentCount) { + + // 1. Find a segment to drop (abandon) using the Full Page queue + mi_segment_t* segmentToAbandon = mi_heap_get_segment_to_drop(heap); + if (segmentToAbandon == NULL) { + break; + } + + segmentsDropped = true; + // 2. when abandoning, mark all pages to no longer add to delayed_free + mi_segment_visit_pages(heap, segmentToAbandon, &mi_heap_page_never_delayed_free, NULL); + + // 3. free all current thread delayed blocks. + // (when abandoning, after this there are no more thread-delayed references into the pages.) + _mi_heap_delayed_free_all(heap); + + // 4. collect all pages in the selected segment owned by this thread + // This will effectively abandon the segment. + mi_collect_t collect = MI_ABANDON; + mi_segment_visit_pages(heap, segmentToAbandon, &mi_heap_page_collect, &collect); + } + + if (segmentsDropped) { + // collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list) + // note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment + _mi_abandoned_collect(heap, false /* force? */, &heap->tld->segments); + } +} + +void mi_heap_drop_segment_if_required(mi_heap_t* heap, size_t alloc_block_size) +{ + size_t targetSegmentCount = mi_option_get_size(mi_option_max_segments_per_heap); + if ((targetSegmentCount > 0) && + (alloc_block_size <= MI_MEDIUM_OBJ_SIZE_MAX) && + (heap->tld->segments.count >= targetSegmentCount)) { + + mi_heap_drop_segment(heap, targetSegmentCount); + } +} \ No newline at end of file diff --git a/src/options.c b/src/options.c index a62727dd..099766a6 100644 --- a/src/options.c +++ b/src/options.c @@ -90,9 +90,10 @@ static mi_option_desc_t options[_mi_option_last] = #endif { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, - { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free + { 0, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free { 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) { 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. + { 8, UNINIT, MI_OPTION(max_segments_per_heap) }, // max number of segments that heap can own. }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/page-queue.c b/src/page-queue.c index ceea91ee..0a2a2471 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -145,7 +145,7 @@ static inline bool mi_page_is_large_or_huge(const mi_page_t* page) { return (mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_huge(page)); } -static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { +mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { mi_assert_internal(heap!=NULL); uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); mi_assert_internal(bin <= MI_BIN_FULL); @@ -264,8 +264,15 @@ static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_ heap->page_count++; } +int32_t mi_get_page_usage(mi_page_t* page) +{ + _mi_page_free_collect(page, false); -static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { + int32_t usage = usage = (100 * page->used) / page->reserved; + return usage; +} + +static void mi_page_queue_enqueue_from2(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page, bool addToHead) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); @@ -292,8 +299,27 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro page->next = NULL; if (to->last != NULL) { mi_assert_internal(heap == mi_page_heap(to->last)); - to->last->next = page; - to->last = page; + addToHead = addToHead && (mi_get_page_usage(page) > 50); + if (addToHead) { + if (to->first == to->last) { + to->last->next = page; + to->last = page; + } + else { + page->prev = to->first; + page->next = to->first->next; + + if (to->first->next != NULL) { + to->first->next->prev = page; + } + + to->first->next = page; + } + } + else { + to->last->next = page; + to->last = page; + } } else { to->first = page; @@ -304,6 +330,10 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_page_set_in_full(page, mi_page_queue_is_full(to)); } +static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { + mi_page_queue_enqueue_from2(to, from, page, false); +} + // Only called from `mi_heap_absorb`. size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append) { mi_assert_internal(mi_heap_contains_queue(heap,pq)); diff --git a/src/page.c b/src/page.c index 871ed215..f3c4514a 100644 --- a/src/page.c +++ b/src/page.c @@ -358,7 +358,7 @@ void _mi_page_unfull(mi_page_t* page) { mi_page_set_in_full(page, false); // to get the right queue mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); mi_page_set_in_full(page, true); - mi_page_queue_enqueue_from(pq, pqfull, page); + mi_page_queue_enqueue_from2(pq, pqfull, page, true /*addToHead*/); } static void mi_page_to_full(mi_page_t* page, mi_page_queue_t* pq) { @@ -712,6 +712,52 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi Find pages with free blocks -------------------------------------------------------------*/ +int32_t mi_get_page_usage(mi_page_t* page); + +mi_page_t* mi_page_queue_find_most_used_page(mi_page_queue_t* pq) +{ + mi_page_t* mostUsedPage = NULL; + int32_t mostUsedPageUsage = 0; + int32_t pagesChecked = 0; + + mi_page_t* page = pq->first; + while ((page != NULL) && (pagesChecked < 5)) + { + mi_page_t* next = page->next; // remember next + + int32_t pageUsage = mi_get_page_usage(page); + if (pageUsage < 100) { + if (mostUsedPage == NULL) { + mostUsedPage = page; + mostUsedPageUsage = pageUsage; + } + else if (pageUsage > mostUsedPageUsage) { + mostUsedPage = page; + mostUsedPageUsage = pageUsage; + } + + if (mostUsedPageUsage > 50) { + break; + } + } + else { + // If the page is completely full, move it to the `mi_pages_full` + // queue so we don't visit long-lived pages too often. + mi_assert_internal(!mi_page_is_in_full(page) && !mi_page_immediate_available(page)); + mi_page_to_full(page, pq); + } + + pagesChecked++; + page = next; + } // for each page + + if (pq->first == pq->last) { + mostUsedPage = pq->first; // the page queue had only full pages after the first one + } + + return mostUsedPage; +} + // Find a page with free blocks of `page->block_size`. static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { @@ -719,7 +765,33 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p #if MI_STAT size_t count = 0; #endif + mi_page_t* page = pq->first; + if (page != NULL) { + page = mi_page_queue_find_most_used_page(pq); + if ((page != NULL) && (page != pq->first)) { + + // remove the page from list + mi_page_t* prev = page->prev; + mi_page_t* next = page->next; + prev->next = next; + if (next != NULL) { + next->prev = prev; + } + if (page == pq->last) { + pq->last = prev; + } + + // Add to the head + page->prev = NULL; + page->next = pq->first; + pq->first->prev = page; + pq->first = page; + mi_heap_queue_first_update(heap, pq); + } + } + + page = pq->first; while (page != NULL) { mi_page_t* next = page->next; // remember next diff --git a/src/segment.c b/src/segment.c index 4e4dcb80..4353cf49 100644 --- a/src/segment.c +++ b/src/segment.c @@ -158,7 +158,7 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx) { ----------------------------------------------------------- */ -static const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) { +const mi_slice_t* mi_segment_slices_end(const mi_segment_t* segment) { return &segment->slices[segment->slice_entries]; } @@ -1318,7 +1318,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice // the segment due to concurrent frees (in which case `NULL` is returned). return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } - else if (segment->abandoned_visits > 3 && is_suitable) { + else if (segment->abandoned_visits > 3 && is_suitable && mi_option_get_size(mi_option_max_segments_per_heap) == 0) { // always reclaim on 3rd visit to limit the abandoned queue length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } @@ -1358,10 +1358,14 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) Reclaim or allocate ----------------------------------------------------------- */ +void mi_heap_drop_segment_if_required(mi_heap_t* heap, size_t alloc_block_size); + static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); + mi_heap_drop_segment_if_required(heap, block_size); + // 1. try to reclaim an abandoned segment bool reclaimed; mi_segment_t* segment = mi_segment_try_reclaim(heap, needed_slices, block_size, &reclaimed, tld);