From ad18e029945282c90d7943df46f70ac44184fa72 Mon Sep 17 00:00:00 2001 From: Sergiy Kuryata Date: Sat, 4 Jan 2025 16:49:14 -0800 Subject: [PATCH] Various performance tuning --- include/mimalloc.h | 6 ++++-- include/mimalloc/internal.h | 5 +++++ include/mimalloc/types.h | 3 ++- src/alloc.c | 1 - src/arena.c | 26 ++++++++++++-------------- src/free.c | 2 +- src/heap.c | 19 ++++++++++++------- src/init.c | 2 ++ src/options.c | 6 ++++-- src/page-queue.c | 2 +- src/page.c | 7 +++++-- src/segment.c | 18 +++++++++++------- 12 files changed, 59 insertions(+), 38 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 7871a4ed..dfbe7ac2 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -348,8 +348,10 @@ typedef enum mi_option_e { mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1) mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's) mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows) - mi_option_max_segments_per_heap, // max number of segments that heap can own - mi_option_heap_collect_abandoned_interval, // interval (in ms) between collecting abandoned segments when a heap drops exessive segments + mi_option_max_segments_per_heap, // max number of segments that heap can own + mi_option_heap_collect_abandoned_interval, // interval (in ms) between collecting abandoned segments when a heap drops excessive segments + mi_option_heap_collect_abandoned_count, // number of abandoned segments to collect when a heap drops excessive segments + mi_option_max_candidate_segments_to_check, // max number of abandoned segments to check before selecting one to reclaim _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index f43fc151..12164bb1 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -164,6 +164,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_await_readers(void); void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); +void _mi_abandoned_collect_clamp(mi_heap_t* heap, bool force, long max_segment_count, mi_segments_tld_t* tld); bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment); // "page.c" @@ -602,6 +603,10 @@ static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; } +static inline mi_page_queue_t* mi_queue_of_page(const mi_heap_t* heap, const mi_page_t* page) { + return &((mi_heap_t*)heap)->pages[page->bin_index]; +} + //----------------------------------------------------------- diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index fea971b0..ce6cf8c8 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -327,6 +327,8 @@ typedef struct mi_page_s { uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) uint8_t heap_tag; // tag of the owning heap, used for separated heaps by object type // padding + uint8_t bin_index; // bin index of the page queue this page belongs to + size_t free_space_bit; // a bit/mask for marking quickly free_space_mask of the owning segment size_t block_size; // size available in each block (always `>0`) uint8_t* page_start; // start of the page area containing the blocks @@ -504,7 +506,6 @@ typedef struct mi_page_queue_s { mi_page_t* first; mi_page_t* last; size_t block_size; - size_t allocationCount; } mi_page_queue_t; #define MI_BIN_FULL (MI_BIN_HUGE+1) diff --git a/src/alloc.c b/src/alloc.c index 9b9a6c01..8486b533 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -41,7 +41,6 @@ extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_ page->used++; mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); - pq->allocationCount++; mi_allocation_stats_increment(page->block_size); mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); diff --git a/src/arena.c b/src/arena.c index c51ec843..254f00a5 100644 --- a/src/arena.c +++ b/src/arena.c @@ -824,25 +824,23 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment) // start a cursor at a randomized arena void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current) { - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - - if (heap != NULL) { - current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena)); - } - else { - current->start = 0; - } - - current->count = 0; - current->bitmap_idx = 0; - current->free_space_mask = MI_FREE_SPACE_MASK_ANY; + _mi_arena_field_cursor_init2(heap, current, MI_FREE_SPACE_MASK_ANY); } void _mi_arena_field_cursor_init2(mi_heap_t* heap, mi_arena_field_cursor_t* current, size_t free_space_mask) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); - current->count = 0; + + current->start = 0; current->bitmap_idx = 0; + if (heap != NULL) { + current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena)); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[current->start]); + if (arena != NULL) { + current->bitmap_idx = _mi_heap_random_next(heap) % (arena->field_count * MI_BITMAP_FIELD_BITS); + } + } + + current->count = 0; current->free_space_mask = free_space_mask; } diff --git a/src/free.c b/src/free.c index 4b2eeac6..56bbb4d8 100644 --- a/src/free.c +++ b/src/free.c @@ -287,7 +287,7 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg #endif } - size_t size_mask = mi_free_space_mask_from_blocksize(page->block_size); + size_t size_mask = page->free_space_bit; size_t new_free_space_mask; size_t old_free_space_mask = mi_atomic_load_relaxed(&segment->free_space_mask); do { diff --git a/src/heap.c b/src/heap.c index 0a5f84c4..b9fd58d5 100644 --- a/src/heap.c +++ b/src/heap.c @@ -90,11 +90,14 @@ typedef enum mi_collect_e { static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { - MI_UNUSED(arg2); MI_UNUSED(heap); mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); - _mi_page_free_collect(page, collect >= MI_FORCE); + bool collect_free_pages = (arg2 == NULL) || *((bool*)arg2); + + if (collect_free_pages) { + _mi_page_free_collect(page, collect >= MI_FORCE); + } if (collect == MI_FORCE) { // note: call before a potential `_mi_page_free` as the segment may be freed if this was the last used page in that segment. mi_segment_t* segment = _mi_page_segment(page); @@ -695,7 +698,7 @@ static mi_segment_t* mi_heap_get_segment_to_drop(mi_heap_t* heap, size_t targetS } // Visit all pages in a segment -static mi_decl_noinline void mi_segment_visit_pages(mi_heap_t* heap, mi_segment_t* segment, heap_page_visitor_fun* fn, void* arg1) +static mi_decl_noinline void mi_segment_visit_pages(mi_heap_t* heap, mi_segment_t* segment, heap_page_visitor_fun* fn, void* arg1, void* arg2) { // Visit all pages in the segments // Note: starting from the 2nd slice because @@ -714,7 +717,7 @@ static mi_decl_noinline void mi_segment_visit_pages(mi_heap_t* heap, mi_segment_ if (slice_count > 0) { if (slice->block_size > 1) { mi_page_t* page = (mi_page_t*)slice; - fn(heap, mi_heap_page_queue_of(heap, page), page, arg1, NULL); + fn(heap, mi_heap_page_queue_of(heap, page), page, arg1, arg2); if (isLastPage) { break; @@ -741,7 +744,7 @@ void mi_heap_drop_segment(mi_heap_t* heap, size_t targetSegmentCount, size_t all segmentsDropped = true; // 2. when abandoning, mark all pages to no longer add to delayed_free - mi_segment_visit_pages(heap, segmentToAbandon, &mi_heap_page_never_delayed_free, NULL); + mi_segment_visit_pages(heap, segmentToAbandon, &mi_heap_page_never_delayed_free, NULL, NULL); // 3. free all current thread delayed blocks. // (when abandoning, after this there are no more thread-delayed references into the pages.) @@ -750,7 +753,8 @@ void mi_heap_drop_segment(mi_heap_t* heap, size_t targetSegmentCount, size_t all // 4. collect all pages in the selected segment owned by this thread // This will effectively abandon the segment. mi_collect_t collect = MI_ABANDON; - mi_segment_visit_pages(heap, segmentToAbandon, &mi_heap_page_collect, &collect); + bool collect_free_pages = false; + mi_segment_visit_pages(heap, segmentToAbandon, &mi_heap_page_collect, &collect, &collect_free_pages); } while (heap->tld->segments.count >= targetSegmentCount); if (segmentsDropped) { @@ -761,7 +765,8 @@ void mi_heap_drop_segment(mi_heap_t* heap, size_t targetSegmentCount, size_t all // collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list) // note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment - _mi_abandoned_collect(heap, false /* force? */, &heap->tld->segments); + long max_segment_count = mi_option_get(mi_option_heap_collect_abandoned_count); + _mi_abandoned_collect_clamp(heap, false /* force? */, max_segment_count, &heap->tld->segments); } } } diff --git a/src/init.c b/src/init.c index e9c45442..e9ffc16e 100644 --- a/src/init.c +++ b/src/init.c @@ -26,6 +26,8 @@ const mi_page_t _mi_page_empty = { 0, // used 0, // block size shift 0, // heap tag + 0, // bin index + 0, // free space bit 0, // block_size NULL, // page_start #if (MI_PADDING || MI_ENCODE_FREELIST) diff --git a/src/options.c b/src/options.c index fc3348ba..bd87acaf 100644 --- a/src/options.c +++ b/src/options.c @@ -93,8 +93,10 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free { 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) { 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. - { 8, UNINIT, MI_OPTION(max_segments_per_heap) }, // max number of segments that heap can own. - { 2000, UNINIT, MI_OPTION(heap_collect_abandoned_interval) }, // delay (ms) in between collecting abandoned segments when a heap drops exessive segments. + { 5, UNINIT, MI_OPTION(max_segments_per_heap) }, // max number of segments that heap can own. + { 16, UNINIT, MI_OPTION(heap_collect_abandoned_interval) }, // delay (ms) in between collecting abandoned segments when a heap drops excessive segments. + { 16, UNINIT, MI_OPTION(heap_collect_abandoned_count) }, // number of abandoned segments to collect when a heap drops excessive segments + { 3, UNINIT, MI_OPTION(max_candidate_segments_to_check) }, // max number of abandoned segments to check before selecting one to reclaim. }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/page-queue.c b/src/page-queue.c index 0a2a2471..40f343b3 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -147,7 +147,7 @@ static inline bool mi_page_is_large_or_huge(const mi_page_t* page) { mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { mi_assert_internal(heap!=NULL); - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : page->bin_index)); mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; mi_assert_internal((mi_page_block_size(page) == pq->block_size) || diff --git a/src/page.c b/src/page.c index 668ec987..839dbccf 100644 --- a/src/page.c +++ b/src/page.c @@ -264,7 +264,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { #endif // TODO: push on full queue immediately if it is full? - mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); + mi_page_queue_t* pq = mi_queue_of_page(heap, page); mi_page_queue_push(heap, pq, page); mi_assert_expensive(_mi_page_is_valid(page)); } @@ -302,7 +302,7 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0); if (page==NULL) return NULL; mi_assert_internal(pq->block_size==mi_page_block_size(page)); - mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); + mi_assert_internal(pq==mi_queue_of_page(heap, page)); return page; } @@ -687,6 +687,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->block_size_shift = 0; } + page->bin_index = mi_bin(page->block_size); + page->free_space_bit = mi_free_space_mask_from_blocksize(page->block_size); + mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); diff --git a/src/segment.c b/src/segment.c index e2930638..069858f8 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1298,7 +1298,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { hasUsedSmallPage = true; } if (mi_page_has_any_available(page)) { - free_space_mask |= mi_free_space_mask_from_blocksize(slice->block_size); + free_space_mask |= page->free_space_bit; } } slice = slice + slice->slice_count; @@ -1408,10 +1408,10 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s // a page has available free blocks of the right size has_page = true; *hasExactPage = true; - free_space_mask |= mi_free_space_mask_from_blocksize(page->block_size); + free_space_mask |= page->free_space_bit; } else if (mi_page_has_any_available(page)) { - free_space_mask |= mi_free_space_mask_from_blocksize(page->block_size); + free_space_mask |= page->free_space_bit; } } } @@ -1560,7 +1560,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice size_t free_space_mask = mi_free_space_mask_from_blocksize(block_size); mi_page_kind_t target_page_kind = mi_page_kind_from_size(block_size); mi_segment_t* best_candidate_segment = NULL; - int candidates_to_check = 8; + long candidates_to_check = mi_option_get(mi_option_max_candidate_segments_to_check); mi_arena_field_cursor_t current; _mi_arena_field_cursor_init2(heap, ¤t, free_space_mask); while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) @@ -1642,13 +1642,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice return NULL; } - -void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) +void _mi_abandoned_collect_clamp(mi_heap_t* heap, bool force, long max_segment_count, mi_segments_tld_t* tld) { mi_segment_t* segment; bool hasExactPage = false; mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, ¤t); - long max_tries = (force ? (long)_mi_arena_segment_abandoned_count() : 1024); // limit latency + long max_tries = (force ? (long)_mi_arena_segment_abandoned_count() : max_segment_count); // limit latency while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) { mi_segment_check_free(segment,0,0,&hasExactPage,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { @@ -1666,6 +1665,11 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) } } +void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) +{ + _mi_abandoned_collect_clamp(heap, force, 1024, tld); +} + /* ----------------------------------------------------------- Reclaim or allocate ----------------------------------------------------------- */