From 911de68d3e6de9f26dcc206ead1db8f4694dde7b Mon Sep 17 00:00:00 2001 From: Sergiy Kuryata Date: Thu, 31 Oct 2024 16:53:14 -0700 Subject: [PATCH] =?UTF-8?q?Experimental=20optimization=20for=20the=20segme?= =?UTF-8?q?nt=20reclamation=20logic.=20The=20goal=20is=20to=20improve=20ef?= =?UTF-8?q?ficiency=20of=20finding=20an=20abandoned=20segment=20that=20con?= =?UTF-8?q?tains=20free=20space=20of=20the=20required=20size.=20This=20cha?= =?UTF-8?q?nge=20adds=20a=20free=20space=20bitmask=20to=20the=20segment=20?= =?UTF-8?q?object.=20More=20optimal=20place=20for=20the=20bitmask=20could?= =?UTF-8?q?=20be=20in=20the=20arena=20(i.e.=20extend=20the=20arena?= =?UTF-8?q?=E2=80=99s=20bitmask=20for=20abandoned=20segments=20to=20includ?= =?UTF-8?q?e=20size=20information=20too).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/mimalloc/internal.h | 3 +++ include/mimalloc/types.h | 6 +++++ src/arena.c | 29 +++++++++++++++++++----- src/free.c | 10 +++++++++ src/segment.c | 45 ++++++++++++++++++++++++++++++++++++- 5 files changed, 86 insertions(+), 7 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 6c6e5ed0..aad8a92b 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -137,8 +137,10 @@ typedef struct mi_arena_field_cursor_s { // abstract mi_arena_id_t start; int count; size_t bitmap_idx; + size_t free_space_mask; } mi_arena_field_cursor_t; void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current); +void _mi_arena_field_cursor_init2(mi_heap_t* heap, mi_arena_field_cursor_t* current, size_t free_space_mask); mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous); // "segment-map.c" @@ -945,6 +947,7 @@ static inline size_t mi_bsr(uintptr_t x) { return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); } +size_t mi_free_space_mask_from_blocksize(size_t size); // --------------------------------------------------------------------------------- // Provide our own `_mi_memcpy` for potential performance optimizations. diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 2fdde904..c52cbcb7 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -220,6 +220,11 @@ typedef int32_t mi_ssize_t; #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX #endif +#define MI_FREE_SPACE_MASK_BIT_COUNT 31 +#define MI_FREE_SPACE_BINS_PER_BIT (MI_BIN_HUGE/MI_FREE_SPACE_MASK_BIT_COUNT) +#define MI_FREE_SPACE_MASK_ALL ((size_t)0xFFFFFFFF) +#define MI_FREE_SPACE_MASK_ABANDONED ((size_t)0x80000000) +#define MI_FREE_SPACE_MASK_ANY (MI_FREE_SPACE_MASK_ALL & (~MI_FREE_SPACE_MASK_ABANDONED)) // ------------------------------------------------------ // Mimalloc pages contain allocated blocks @@ -470,6 +475,7 @@ typedef struct mi_segment_s { mi_segment_kind_t kind; size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment + _Atomic(size_t) free_space_mask; // bitmask that indicates wich allocation sizes are available in this segment mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one extra final entry for huge blocks with large alignment } mi_segment_t; diff --git a/src/arena.c b/src/arena.c index 648ee844..9de7cc03 100644 --- a/src/arena.c +++ b/src/arena.c @@ -819,6 +819,15 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* curre current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena)); current->count = 0; current->bitmap_idx = 0; + current->free_space_mask = MI_FREE_SPACE_MASK_ANY; +} + +void _mi_arena_field_cursor_init2(mi_heap_t* heap, mi_arena_field_cursor_t* current, size_t free_space_mask) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); + current->count = 0; + current->bitmap_idx = 0; + current->free_space_mask = free_space_mask; } // reclaim abandoned segments @@ -829,6 +838,7 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr if (max_arena <= 0 || mi_atomic_load_relaxed(&abandoned_count) == 0) return NULL; int count = previous->count; + size_t space_mask = previous->free_space_mask; size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1; // visit arena's (from previous) @@ -849,14 +859,21 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); // try to reclaim it atomically if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) { - mi_atomic_decrement_relaxed(&abandoned_count); - previous->bitmap_idx = bitmap_idx; - previous->count = count; + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); - mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); - //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); - return segment; + if ((segment->free_space_mask & space_mask) != 0) { + mi_atomic_decrement_relaxed(&abandoned_count); + previous->bitmap_idx = bitmap_idx; + previous->count = count; + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + return segment; + } + else { + // mark abandoned again + _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); + } } } } diff --git a/src/free.c b/src/free.c index b9cb6346..ed3c7d08 100644 --- a/src/free.c +++ b/src/free.c @@ -283,6 +283,16 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg #endif } + size_t size_mask = mi_free_space_mask_from_blocksize(page->block_size); + size_t new_free_space_mask; + size_t old_free_space_mask = mi_atomic_load_relaxed(&segment->free_space_mask); + do { + if (((old_free_space_mask & MI_FREE_SPACE_MASK_ABANDONED) == 0) || ((old_free_space_mask & size_mask) != 0)) { + break; + } + new_free_space_mask = old_free_space_mask | size_mask; + } while (!mi_atomic_cas_weak_release(&segment->free_space_mask, &old_free_space_mask, new_free_space_mask)); + // and finally free the actual block by pushing it on the owning heap // thread_delayed free list (or heap delayed free list) mi_free_block_delayed_mt(page,block); diff --git a/src/segment.c b/src/segment.c index 4353cf49..231d99ac 100644 --- a/src/segment.c +++ b/src/segment.c @@ -386,6 +386,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { segment->thread_id = 0; + segment->free_space_mask = 0; _mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)mi_segment_size(segment)),tld); if (segment->was_reclaimed) { @@ -903,6 +904,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; segment->thread_id = _mi_thread_id(); + segment->free_space_mask = 0; segment->cookie = _mi_ptr_cookie(segment); segment->slice_entries = slice_entries; segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); @@ -1075,12 +1077,45 @@ void _mi_abandoned_await_readers(void) { Abandon segment/page ----------------------------------------------------------- */ +size_t mi_free_space_mask_from_blocksize(size_t size) +{ + size_t free_space_mask = 0; + uint8_t page_queue_index = _mi_bin(size); + uint8_t byteIndex = page_queue_index / MI_FREE_SPACE_BINS_PER_BIT; + + // index 40 is for size 16384 (MI_SMALL_OBJ_SIZE_MAX) + if (byteIndex >= MI_FREE_SPACE_MASK_BIT_COUNT) { + byteIndex = MI_FREE_SPACE_MASK_BIT_COUNT - 1; + } + + free_space_mask = 1ULL << byteIndex; + return free_space_mask; +} + +size_t mi_free_space_mask_from_slicecount(uint32_t slice_count) +{ + size_t free_space_mask = 0; + size_t max_size = MI_SMALL_OBJ_SIZE_MAX; + + if (slice_count >= MI_MEDIUM_PAGE_SIZE) { + max_size = slice_count * MI_SEGMENT_SLICE_SIZE; + } + + free_space_mask = mi_free_space_mask_from_blocksize(max_size - 1); + free_space_mask = free_space_mask | (free_space_mask - 1); // mark all allocations with size < max_size as available + + return free_space_mask; +} + static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); mi_assert_internal(segment->abandoned_visits == 0); mi_assert_expensive(mi_segment_is_valid(segment,tld)); + size_t free_space_mask = MI_FREE_SPACE_MASK_ABANDONED; + mi_atomic_exchange_acq_rel(&segment->free_space_mask, free_space_mask); + // remove the free pages from the free page queues mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -1090,6 +1125,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { if (slice->block_size == 0) { // a free page mi_segment_span_remove_from_queue(slice,tld); slice->block_size = 0; // but keep it free + free_space_mask |= mi_free_space_mask_from_slicecount(slice->slice_count); + } + else if (slice->used < slice->reserved) { + free_space_mask |= mi_free_space_mask_from_blocksize(slice->block_size); } slice = slice + slice->slice_count; } @@ -1109,6 +1148,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { tld->reclaim_count--; segment->was_reclaimed = false; } + + mi_atomic_or_acq_rel(&segment->free_space_mask, free_space_mask); _mi_arena_segment_mark_abandoned(segment); } @@ -1191,6 +1232,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, // can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free. mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id()); mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); + segment->free_space_mask = 0; segment->abandoned_visits = 0; segment->was_reclaimed = true; tld->reclaim_count++; @@ -1295,7 +1337,8 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice if (max_tries <= 0) return NULL; mi_segment_t* segment; - mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, ¤t); + size_t free_space_mask = mi_free_space_mask_from_blocksize(block_size); + mi_arena_field_cursor_t current; _mi_arena_field_cursor_init2(heap, ¤t, free_space_mask); while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) { segment->abandoned_visits++;