Experimental optimization for the segment reclamation logic.

The goal is to improve efficiency of finding an abandoned segment that contains free space of the required size.
This change adds a free space bitmask to the segment object. More optimal place for the bitmask could be in the arena (i.e. extend the arena’s bitmask for abandoned segments to include size information too).
This commit is contained in:
Sergiy Kuryata 2024-10-31 16:53:14 -07:00
parent b604099334
commit 911de68d3e
5 changed files with 86 additions and 7 deletions

View file

@ -137,8 +137,10 @@ typedef struct mi_arena_field_cursor_s { // abstract
mi_arena_id_t start;
int count;
size_t bitmap_idx;
size_t free_space_mask;
} mi_arena_field_cursor_t;
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current);
void _mi_arena_field_cursor_init2(mi_heap_t* heap, mi_arena_field_cursor_t* current, size_t free_space_mask);
mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous);
// "segment-map.c"
@ -945,6 +947,7 @@ static inline size_t mi_bsr(uintptr_t x) {
return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
}
size_t mi_free_space_mask_from_blocksize(size_t size);
// ---------------------------------------------------------------------------------
// Provide our own `_mi_memcpy` for potential performance optimizations.

View file

@ -220,6 +220,11 @@ typedef int32_t mi_ssize_t;
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
#endif
#define MI_FREE_SPACE_MASK_BIT_COUNT 31
#define MI_FREE_SPACE_BINS_PER_BIT (MI_BIN_HUGE/MI_FREE_SPACE_MASK_BIT_COUNT)
#define MI_FREE_SPACE_MASK_ALL ((size_t)0xFFFFFFFF)
#define MI_FREE_SPACE_MASK_ABANDONED ((size_t)0x80000000)
#define MI_FREE_SPACE_MASK_ANY (MI_FREE_SPACE_MASK_ALL & (~MI_FREE_SPACE_MASK_ABANDONED))
// ------------------------------------------------------
// Mimalloc pages contain allocated blocks
@ -470,6 +475,7 @@ typedef struct mi_segment_s {
mi_segment_kind_t kind;
size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
_Atomic(size_t) free_space_mask; // bitmask that indicates wich allocation sizes are available in this segment
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one extra final entry for huge blocks with large alignment
} mi_segment_t;

View file

@ -819,6 +819,15 @@ void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* curre
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena));
current->count = 0;
current->bitmap_idx = 0;
current->free_space_mask = MI_FREE_SPACE_MASK_ANY;
}
void _mi_arena_field_cursor_init2(mi_heap_t* heap, mi_arena_field_cursor_t* current, size_t free_space_mask) {
const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena));
current->count = 0;
current->bitmap_idx = 0;
current->free_space_mask = free_space_mask;
}
// reclaim abandoned segments
@ -829,6 +838,7 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
if (max_arena <= 0 || mi_atomic_load_relaxed(&abandoned_count) == 0) return NULL;
int count = previous->count;
size_t space_mask = previous->free_space_mask;
size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx);
size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1;
// visit arena's (from previous)
@ -849,14 +859,21 @@ mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* pr
mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx);
// try to reclaim it atomically
if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) {
mi_atomic_decrement_relaxed(&abandoned_count);
previous->bitmap_idx = bitmap_idx;
previous->count = count;
mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx));
mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx);
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return segment;
if ((segment->free_space_mask & space_mask) != 0) {
mi_atomic_decrement_relaxed(&abandoned_count);
previous->bitmap_idx = bitmap_idx;
previous->count = count;
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0);
//mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx));
return segment;
}
else {
// mark abandoned again
_mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL);
}
}
}
}

View file

@ -283,6 +283,16 @@ static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* seg
#endif
}
size_t size_mask = mi_free_space_mask_from_blocksize(page->block_size);
size_t new_free_space_mask;
size_t old_free_space_mask = mi_atomic_load_relaxed(&segment->free_space_mask);
do {
if (((old_free_space_mask & MI_FREE_SPACE_MASK_ABANDONED) == 0) || ((old_free_space_mask & size_mask) != 0)) {
break;
}
new_free_space_mask = old_free_space_mask | size_mask;
} while (!mi_atomic_cas_weak_release(&segment->free_space_mask, &old_free_space_mask, new_free_space_mask));
// and finally free the actual block by pushing it on the owning heap
// thread_delayed free list (or heap delayed free list)
mi_free_block_delayed_mt(page,block);

View file

@ -386,6 +386,7 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) {
static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
segment->thread_id = 0;
segment->free_space_mask = 0;
_mi_segment_map_freed_at(segment);
mi_segments_track_size(-((long)mi_segment_size(segment)),tld);
if (segment->was_reclaimed) {
@ -903,6 +904,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi
segment->segment_slices = segment_slices;
segment->segment_info_slices = info_slices;
segment->thread_id = _mi_thread_id();
segment->free_space_mask = 0;
segment->cookie = _mi_ptr_cookie(segment);
segment->slice_entries = slice_entries;
segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE);
@ -1075,12 +1077,45 @@ void _mi_abandoned_await_readers(void) {
Abandon segment/page
----------------------------------------------------------- */
size_t mi_free_space_mask_from_blocksize(size_t size)
{
size_t free_space_mask = 0;
uint8_t page_queue_index = _mi_bin(size);
uint8_t byteIndex = page_queue_index / MI_FREE_SPACE_BINS_PER_BIT;
// index 40 is for size 16384 (MI_SMALL_OBJ_SIZE_MAX)
if (byteIndex >= MI_FREE_SPACE_MASK_BIT_COUNT) {
byteIndex = MI_FREE_SPACE_MASK_BIT_COUNT - 1;
}
free_space_mask = 1ULL << byteIndex;
return free_space_mask;
}
size_t mi_free_space_mask_from_slicecount(uint32_t slice_count)
{
size_t free_space_mask = 0;
size_t max_size = MI_SMALL_OBJ_SIZE_MAX;
if (slice_count >= MI_MEDIUM_PAGE_SIZE) {
max_size = slice_count * MI_SEGMENT_SLICE_SIZE;
}
free_space_mask = mi_free_space_mask_from_blocksize(max_size - 1);
free_space_mask = free_space_mask | (free_space_mask - 1); // mark all allocations with size < max_size as available
return free_space_mask;
}
static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
mi_assert_internal(segment->used == segment->abandoned);
mi_assert_internal(segment->used > 0);
mi_assert_internal(segment->abandoned_visits == 0);
mi_assert_expensive(mi_segment_is_valid(segment,tld));
size_t free_space_mask = MI_FREE_SPACE_MASK_ABANDONED;
mi_atomic_exchange_acq_rel(&segment->free_space_mask, free_space_mask);
// remove the free pages from the free page queues
mi_slice_t* slice = &segment->slices[0];
const mi_slice_t* end = mi_segment_slices_end(segment);
@ -1090,6 +1125,10 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
if (slice->block_size == 0) { // a free page
mi_segment_span_remove_from_queue(slice,tld);
slice->block_size = 0; // but keep it free
free_space_mask |= mi_free_space_mask_from_slicecount(slice->slice_count);
}
else if (slice->used < slice->reserved) {
free_space_mask |= mi_free_space_mask_from_blocksize(slice->block_size);
}
slice = slice + slice->slice_count;
}
@ -1109,6 +1148,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) {
tld->reclaim_count--;
segment->was_reclaimed = false;
}
mi_atomic_or_acq_rel(&segment->free_space_mask, free_space_mask);
_mi_arena_segment_mark_abandoned(segment);
}
@ -1191,6 +1232,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
// can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free.
mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id());
mi_atomic_store_release(&segment->thread_id, _mi_thread_id());
segment->free_space_mask = 0;
segment->abandoned_visits = 0;
segment->was_reclaimed = true;
tld->reclaim_count++;
@ -1295,7 +1337,8 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
if (max_tries <= 0) return NULL;
mi_segment_t* segment;
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, &current);
size_t free_space_mask = mi_free_space_mask_from_blocksize(block_size);
mi_arena_field_cursor_t current; _mi_arena_field_cursor_init2(heap, &current, free_space_mask);
while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(&current)) != NULL))
{
segment->abandoned_visits++;