From 7020ed5e5230b8cdab4dea033dd472841c6e65f7 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 29 Feb 2024 11:26:03 -0800 Subject: [PATCH 1/4] do not purge if purge delay is negative --- src/arena.c | 96 +++++++++++++++++++++++++-------------------------- src/segment.c | 32 ++++++++--------- 2 files changed, 64 insertions(+), 64 deletions(-) diff --git a/src/arena.c b/src/arena.c index 0f71e978..42aac8fc 100644 --- a/src/arena.c +++ b/src/arena.c @@ -13,7 +13,7 @@ threads and need to be accessed using atomic operations. Arenas are used to for huge OS page (1GiB) reservations or for reserving OS memory upfront which can be improve performance or is sometimes needed -on embedded devices. We can also employ this with WASI or `sbrk` systems +on embedded devices. We can also employ this with WASI or `sbrk` systems to reserve large arenas upfront and be able to reuse the memory more effectively. The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. @@ -48,13 +48,13 @@ typedef struct mi_arena_s { size_t meta_size; // size of the arena structure itself (including its bitmaps) mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node - bool exclusive; // only allow allocations if specifically for this arena + bool exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(size_t) search_idx; // optimization to start the search for free blocks - _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) - mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -94,13 +94,13 @@ bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_i return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); } else { - return mi_arena_id_is_suitable(0, false, request_arena_id); + return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id); } } /* ----------------------------------------------------------- - Arena allocations get a (currently) 16-bit memory id where the + Arena allocations get a (currently) 16-bit memory id where the lower 8 bits are the arena id, and the upper bits the block index. ----------------------------------------------------------- */ @@ -208,7 +208,7 @@ static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; @@ -228,7 +228,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar mi_bitmap_index_t bitmap_index; if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; - // claimed it! + // claimed it! void* p = mi_arena_block_start(arena, bitmap_index); *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); memid->is_pinned = arena->memid.is_pinned; @@ -268,21 +268,21 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar // no need to commit, but check if already fully committed memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } - + return p; } // allocate in a speficic arena -static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, - bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) +static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED_RELEASE(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); - const size_t bcount = mi_block_count_of_size(size); + const size_t bcount = mi_block_count_of_size(size); const size_t arena_index = mi_arena_id_index(arena_id); mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); mi_assert_internal(size <= mi_arena_block_size(bcount)); - + // Check arena suitability mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; @@ -302,7 +302,7 @@ static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_no // allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, +static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { @@ -310,9 +310,9 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); if mi_likely(max_arena == 0) return NULL; - + if (req_arena_id != _mi_arena_id_none()) { - // try a specific arena if requested + // try a specific arena if requested if (mi_arena_id_index(req_arena_id) < max_arena) { void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; @@ -320,7 +320,7 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz } else { // try numa affine allocation - for (size_t i = 0; i < max_arena; i++) { + for (size_t i = 0; i < max_arena; i++) { void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } @@ -348,22 +348,22 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); if (arena_reserve == 0) return false; - if (!_mi_os_has_virtual_reserve()) { + if (!_mi_os_has_virtual_reserve()) { arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example) } arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); if (arena_count >= 8 && arena_count <= 128) { arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially - } + } if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size - + // commit eagerly? bool arena_commit = false; if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0); -} +} void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, @@ -378,9 +378,9 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); - if (p != NULL) return p; + if (p != NULL) return p; - // otherwise, try to first eagerly reserve a new arena + // otherwise, try to first eagerly reserve a new arena if (req_arena_id == _mi_arena_id_none()) { mi_arena_id_t arena_id = 0; if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { @@ -397,14 +397,14 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset errno = ENOMEM; return NULL; } - + // finally, fall back to the OS if (align_offset > 0) { return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); } else { return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); - } + } } void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) @@ -440,22 +440,22 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_assert_internal(arena->blocks_purge != NULL); mi_assert_internal(!arena->memid.is_pinned); const size_t size = mi_arena_block_size(blocks); - void* const p = mi_arena_block_start(arena, bitmap_idx); + void* const p = mi_arena_block_start(arena, bitmap_idx); bool needs_recommit; if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { // all blocks are committed, we can purge freely needs_recommit = _mi_os_purge(p, size, stats); } else { - // some blocks are not committed -- this can happen when a partially committed block is freed + // some blocks are not committed -- this can happen when a partially committed block is freed // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge - // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), // and also undo the decommit stats (as it was already adjusted) mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); _mi_stat_increase(&stats->committed, size); } - + // clear the purged blocks _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); // update committed bitmap @@ -473,7 +473,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t if (_mi_preloading() || delay == 0) { // decommit directly - mi_arena_purge(arena, bitmap_idx, blocks, stats); + mi_arena_purge(arena, bitmap_idx, blocks, stats); } else { // schedule decommit @@ -515,7 +515,7 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, } // returns true if anything was purged -static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) +static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) { if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); @@ -524,10 +524,10 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi // reset expire (if not already set concurrently) mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0); - + // potential purges scheduled, walk through the bitmap bool any_purged = false; - bool full_purge = true; + bool full_purge = true; for (size_t i = 0; i < arena->field_count; i++) { size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]); if (purge != 0) { @@ -578,7 +578,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) // allow only one thread to purge at a time static mi_atomic_guard_t purge_guard; - mi_atomic_guard(&purge_guard) + mi_atomic_guard(&purge_guard) { mi_msecs_t now = _mi_clock_now(); size_t max_purge_count = (visit_all ? max_arena : 1); @@ -591,7 +591,7 @@ static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) } } } - } + } } @@ -605,7 +605,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); - + if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through if (!all_committed && committed_size > 0) { @@ -623,7 +623,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); const size_t blocks = mi_block_count_of_size(size); - + // checks if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -645,7 +645,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi else { mi_assert_internal(arena->blocks_committed != NULL); mi_assert_internal(arena->blocks_purge != NULL); - + if (!all_committed) { // mark the entire range as no longer committed (so we recommit the full range when re-using) _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); @@ -660,9 +660,9 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi // works (as we should never reset decommitted parts). } // (delay) purge the entire range - mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); + mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } - + // and make it available to others again bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!all_inuse) { @@ -687,9 +687,9 @@ static void mi_arenas_unsafe_destroy(void) { for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { - if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { + if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); - _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); + _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); } else { new_max_arena = i; @@ -712,7 +712,7 @@ void _mi_arena_collect(bool force_purge, mi_stats_t* stats) { // for dynamic libraries that are unloaded and need to release all their allocated memory. void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { mi_arenas_unsafe_destroy(); - _mi_arena_collect(true /* force purge */, stats); // purge non-owned arenas + _mi_arena_collect(true /* force purge */, stats); // purge non-owned arenas } // Is a pointer inside any of our arenas? @@ -720,8 +720,8 @@ bool _mi_arena_contains(const void* p) { const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); - if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { - return true; + if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { + return true; } } return false; @@ -765,7 +765,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int mi_memid_t meta_memid; mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; - + // already zero'd due to os_alloc // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); @@ -782,12 +782,12 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap - arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap + arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap // initialize committed bitmap? if (arena->blocks_committed != NULL && arena->memid.initially_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning } - + // and claim leftover blocks if needed (so we never allocate there) ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; mi_assert_internal(post >= 0); diff --git a/src/segment.c b/src/segment.c index 6798bb66..26862899 100644 --- a/src/segment.c +++ b/src/segment.c @@ -239,7 +239,7 @@ static void mi_page_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tl mi_assert_internal(page->used == 0); mi_assert_expensive(!mi_pages_purge_contains(page, tld)); size_t psize; - void* start = mi_segment_raw_page_start(segment, page, &psize); + void* start = mi_segment_raw_page_start(segment, page, &psize); const bool needs_recommit = _mi_os_purge(start, psize, tld->stats); if (needs_recommit) { page->is_committed = false; } page->used = 0; @@ -249,7 +249,7 @@ static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_ if (page->is_committed) return true; mi_assert_internal(segment->allow_decommit); mi_assert_expensive(!mi_pages_purge_contains(page, tld)); - + size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); bool is_zero = false; @@ -259,8 +259,8 @@ static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_ page->is_committed = true; page->used = 0; page->is_zero_init = is_zero; - if (gsize > 0) { - mi_segment_protect_range(start + psize, gsize, true); + if (gsize > 0) { + mi_segment_protect_range(start + psize, gsize, true); } return true; } @@ -296,7 +296,7 @@ static void mi_segment_schedule_purge(mi_segment_t* segment, mi_page_t* page, mi // purge immediately? mi_page_purge(segment, page, tld); } - else { + else if (mi_option_get(mi_option_purge_delay) > 0) { // no purging if the delay is negative // otherwise push on the delayed page reset queue mi_page_queue_t* pq = &tld->pages_purge; // push on top @@ -484,11 +484,11 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->is_committed) { committed_size += page_size; } - if (!page->is_committed) { fully_committed = false; } + if (!page->is_committed) { fully_committed = false; } } MI_UNUSED(fully_committed); mi_assert_internal((fully_committed && committed_size == segment_size) || (!fully_committed && committed_size < segment_size)); - + _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged) _mi_arena_free(segment, segment_size, committed_size, segment->memid, tld->stats); } @@ -536,9 +536,9 @@ static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignme // commit failed; we cannot touch the memory: free the segment directly and return `NULL` _mi_arena_free(segment, segment_size, 0, memid, tld_os->stats); return NULL; - } + } } - + MI_UNUSED(info_size); segment->memid = memid; segment->allow_decommit = !memid.is_pinned; @@ -581,7 +581,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); const bool init_commit = eager; // || (page_kind >= MI_PAGE_LARGE); - + // Allocate the segment from the OS (segment_size can change due to alignment) mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, init_commit, init_segment_size, tld, os_tld); if (segment == NULL) return NULL; @@ -609,7 +609,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - + // set protection mi_segment_protect(segment, true, tld->os); @@ -628,7 +628,7 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t // don't purge as we are freeing now mi_segment_remove_all_purges(segment, false /* don't force as we are about to free */, tld); mi_segment_remove_from_free_queue(segment, tld); - + mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); mi_assert(segment->next == NULL); @@ -655,10 +655,10 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg // check commit if (!mi_page_ensure_committed(segment, page, tld)) return false; - + // set in-use before doing unreset to prevent delayed reset page->segment_in_use = true; - segment->used++; + segment->used++; mi_assert_internal(page->segment_in_use && page->is_committed && page->used==0 && !mi_pages_purge_contains(page,tld)); mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) { @@ -1134,7 +1134,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s static mi_page_t* mi_segment_find_free(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(mi_segment_has_free(segment)); - mi_assert_expensive(mi_segment_is_valid(segment, tld)); + mi_assert_expensive(mi_segment_is_valid(segment, tld)); for (size_t i = 0; i < segment->capacity; i++) { // TODO: use a bitmap instead of search? mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { @@ -1274,7 +1274,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc mi_assert_internal(page->free == NULL); if (segment->allow_decommit && page->is_committed) { size_t usize = mi_usable_size(block); - if (usize > sizeof(mi_block_t)) { + if (usize > sizeof(mi_block_t)) { usize = usize - sizeof(mi_block_t); uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); _mi_os_reset(p, usize, &_mi_stats_main); From bdda13b8800859558b95b937a747eda8c31a3b4e Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 29 Feb 2024 14:27:28 -0800 Subject: [PATCH 2/4] track abandoned segments in an arena bitmap instead of with a list --- include/mimalloc/internal.h | 4 + include/mimalloc/types.h | 1 - src/arena.c | 96 +++++++++++++++++- src/segment.c | 197 +++++------------------------------- 4 files changed, 122 insertions(+), 176 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 40401736..94ac04f5 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -124,6 +124,10 @@ bool _mi_arena_contains(const void* p); void _mi_arena_collect(bool force_purge, mi_stats_t* stats); void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); +bool _mi_arena_segment_clear_abandoned(mi_memid_t memid); +void _mi_arena_segment_mark_abandoned(mi_memid_t memid); +mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_id_t* current_id, size_t* current_idx); + // "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 06b96587..0a72af52 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -373,7 +373,6 @@ typedef struct mi_segment_s { size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` // segment fields - _Atomic(struct mi_segment_s*) abandoned_next; struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init` struct mi_segment_s* prev; diff --git a/src/arena.c b/src/arena.c index 42aac8fc..72898175 100644 --- a/src/arena.c +++ b/src/arena.c @@ -55,6 +55,7 @@ typedef struct mi_arena_s { mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) + mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -727,6 +728,89 @@ bool _mi_arena_contains(const void* p) { return false; } +/* ----------------------------------------------------------- + Abandoned blocks/segments. + This is used to atomically abandon/reclaim segments + (and crosses the arena API but it is convenient to have here). + Abandoned segments still have live blocks; they get reclaimed + when a thread frees in it, or when a thread needs a fresh + segment; these threads scan the abandoned segments through + the arena bitmaps. +----------------------------------------------------------- */ + +// reclaim a specific abandoned segment; `true` on success. +bool _mi_arena_segment_clear_abandoned(mi_memid_t memid ) +{ + if (memid.memkind != MI_MEM_ARENA) return true; // not in an arena, consider it un-abandoned + size_t arena_idx; + size_t bitmap_idx; + mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); + mi_assert_internal(arena != NULL); + bool was_abandoned = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx); + mi_assert_internal(was_abandoned); + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + return was_abandoned; +} + +// mark a specific segment as abandoned +void _mi_arena_segment_mark_abandoned(mi_memid_t memid) +{ + if (memid.memkind != MI_MEM_ARENA) return; // not in an arena + size_t arena_idx; + size_t bitmap_idx; + mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); + mi_assert_internal(arena != NULL); + const bool was_unset = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); + mi_assert_internal(was_unset); + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); +} + +// reclaim abandoned segments +mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_id_t* previous_id, size_t* previous_idx ) +{ + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + int arena_idx = *previous_id; + size_t field_idx = mi_bitmap_index_field(*previous_idx); + size_t bit_idx = mi_bitmap_index_bit_in_field(*previous_idx) + 1; + // visit arena's (from previous) + for( ; arena_idx < max_arena; arena_idx++, field_idx = 0, bit_idx = 0) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); + if (arena != NULL) { + // visit the abandoned fields (starting at previous_idx) + for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) { + mi_bitmap_field_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]); + if mi_unlikely(field != 0) { // skip zero fields quickly + // visit each set bit in the field (todo: maybe use `ctz` here?) + for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) { + // pre-check if the bit is set + mi_bitmap_field_t mask = ((mi_bitmap_field_t)1 << bit_idx); + if mi_unlikely((field & mask) == mask) { + mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); + // try to reclaim it atomically + if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) { + *previous_idx = bitmap_idx; + *previous_id = arena_idx; + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + return (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); + } + } + } + } + } + } + } + // no more found + *previous_idx = 0; + *previous_id = 0; + return NULL; +} + /* ----------------------------------------------------------- Add an arena. @@ -760,13 +844,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - const size_t bitmaps = (memid.is_pinned ? 2 : 4); + const size_t bitmaps = (memid.is_pinned ? 3 : 5); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); mi_memid_t meta_memid; mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; - // already zero'd due to os_alloc + // already zero'd due to zalloc // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); arena->memid = memid; @@ -780,9 +864,11 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int arena->is_large = is_large; arena->purge_expire = 0; arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap - arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap - arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap + // consequetive bitmaps + arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap + arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap + arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after abandonde bitmap + arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[4*fields]); // just after committed bitmap // initialize committed bitmap? if (arena->blocks_committed != NULL && arena->memid.initially_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning diff --git a/src/segment.c b/src/segment.c index 26862899..25ab9193 100644 --- a/src/segment.c +++ b/src/segment.c @@ -587,9 +587,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, if (segment == NULL) return NULL; mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); mi_assert_internal(segment->memid.is_pinned ? segment->memid.initially_committed : true); - - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan - + // zero the segment info (but not the `mem` fields) ptrdiff_t ofs = offsetof(mi_segment_t, next); _mi_memzero((uint8_t*)segment + ofs, info_size - ofs); @@ -743,171 +741,25 @@ Abandonment When threads terminate, they can leave segments with live blocks (reached through other threads). Such segments are "abandoned" and will be reclaimed by other threads to -reuse their pages and/or free them eventually +reuse their pages and/or free them eventually. The +`thread_id` of such segments is 0. -We maintain a global list of abandoned segments that are -reclaimed on demand. Since this is shared among threads -the implementation needs to avoid the A-B-A problem on -popping abandoned segments: -We use tagged pointers to avoid accidentally identifying -reused segments, much like stamped references in Java. -Secondly, we maintain a reader counter to avoid resetting -or decommitting segments that have a pending read operation. +When a block is freed in an abandoned segment, the segment +is reclaimed into that thread. -Note: the current implementation is one possible design; -another way might be to keep track of abandoned segments -in the regions. This would have the advantage of keeping -all concurrent code in one place and not needing to deal -with ABA issues. The drawback is that it is unclear how to -scan abandoned segments efficiently in that case as they -would be spread among all other segments in the regions. +Moreover, if threads are looking for a fresh segment, they +will first consider abondoned segments -- these can be found +by scanning the arena memory +(segments outside arena memoryare only reclaimed by a free). ----------------------------------------------------------- */ -// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers -// to put in a tag that increments on update to avoid the A-B-A problem. -#define MI_TAGGED_MASK MI_SEGMENT_MASK -typedef uintptr_t mi_tagged_segment_t; +// Maintain these for debug purposes +static mi_decl_cache_align _Atomic(size_t)abandoned_count; -static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { - return (mi_segment_t*)(ts & ~MI_TAGGED_MASK); -} -static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { - mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); - uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; - return ((uintptr_t)segment | tag); -} - -// This is a list of visited abandoned pages that were full at the time. -// this list migrates to `abandoned` when that becomes NULL. The use of -// this list reduces contention and the rate at which segments are visited. -static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL - -// The abandoned page list (tagged as it supports pop) -static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL - -// Maintain these for debug purposes (these counts may be a bit off) -static mi_decl_cache_align _Atomic(size_t) abandoned_count; -static mi_decl_cache_align _Atomic(size_t) abandoned_visited_count; - -// We also maintain a count of current readers of the abandoned list -// in order to prevent resetting/decommitting segment memory if it might -// still be read. -static mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0 - -// Push on the visited list -static void mi_abandoned_visited_push(mi_segment_t* segment) { - mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_internal(segment->used > 0); - mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited); - do { - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext); - } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment)); - mi_atomic_increment_relaxed(&abandoned_visited_count); -} - -// Move the visited list to the abandoned list. -static bool mi_abandoned_visited_revisit(void) -{ - // quick check if the visited list is empty - if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; - - // grab the whole visited list - mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL); - if (first == NULL) return false; - - // first try to swap directly if the abandoned list happens to be NULL - mi_tagged_segment_t afirst; - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); - if (mi_tagged_segment_ptr(ts)==NULL) { - size_t count = mi_atomic_load_relaxed(&abandoned_visited_count); - afirst = mi_tagged_segment(first, ts); - if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) { - mi_atomic_add_relaxed(&abandoned_count, count); - mi_atomic_sub_relaxed(&abandoned_visited_count, count); - return true; - } - } - - // find the last element of the visited list: O(n) - mi_segment_t* last = first; - mi_segment_t* next; - while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { - last = next; - } - - // and atomically prepend to the abandoned list - // (no need to increase the readers as we don't access the abandoned segments) - mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned); - size_t count; - do { - count = mi_atomic_load_relaxed(&abandoned_visited_count); - mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); - afirst = mi_tagged_segment(first, anext); - } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst)); - mi_atomic_add_relaxed(&abandoned_count, count); - mi_atomic_sub_relaxed(&abandoned_visited_count, count); - return true; -} - -// Push on the abandoned list. -static void mi_abandoned_push(mi_segment_t* segment) { - mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_internal(segment->used > 0); - mi_tagged_segment_t next; - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); - do { - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); - next = mi_tagged_segment(segment, ts); - } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next)); - mi_atomic_increment_relaxed(&abandoned_count); -} - -// Wait until there are no more pending reads on segments that used to be in the abandoned list +// legacy: Wait until there are no more pending reads on segments that used to be in the abandoned list void _mi_abandoned_await_readers(void) { - size_t n; - do { - n = mi_atomic_load_acquire(&abandoned_readers); - if (n != 0) mi_atomic_yield(); - } while (n != 0); -} - -// Pop from the abandoned list -static mi_segment_t* mi_abandoned_pop(void) { - mi_segment_t* segment; - // Check efficiently if it is empty (or if the visited list needs to be moved) - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); - segment = mi_tagged_segment_ptr(ts); - if mi_likely(segment == NULL) { - if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL - return NULL; - } - } - - // Do a pop. We use a reader count to prevent - // a segment to be decommitted while a read is still pending, - // and a tagged pointer to prevent A-B-A link corruption. - // (this is called from `region.c:_mi_mem_free` for example) - mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted - mi_tagged_segment_t next = 0; - ts = mi_atomic_load_acquire(&abandoned); - do { - segment = mi_tagged_segment_ptr(ts); - if (segment != NULL) { - mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next); - next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted - } - } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); - mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock - if (segment != NULL) { - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); - mi_atomic_decrement_relaxed(&abandoned_count); - } - return segment; + // nothing needed } /* ----------------------------------------------------------- @@ -917,7 +769,6 @@ static mi_segment_t* mi_abandoned_pop(void) { static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); - mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); // remove the segment from the free page queue if needed @@ -931,8 +782,7 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_segments_track_size(-((long)segment->segment_size), tld); segment->thread_id = 0; segment->abandoned_visits = 0; - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); - mi_abandoned_push(segment); + _mi_arena_segment_mark_abandoned(segment->memid); mi_atomic_increment_relaxed(&abandoned_count); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -995,7 +845,6 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool // Reclaim a segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { - mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } segment->thread_id = _mi_thread_id(); @@ -1056,7 +905,10 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; - while ((segment = mi_abandoned_pop()) != NULL) { + mi_arena_id_t current_id = 0; + size_t current_idx = 0; + while ((segment = _mi_arena_segment_clear_abandoned_next(¤t_id, ¤t_idx)) != NULL) { + mi_atomic_decrement_relaxed(&abandoned_count); mi_segment_reclaim(segment, heap, 0, NULL, tld); } } @@ -1065,8 +917,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, { *reclaimed = false; mi_segment_t* segment; - long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times - while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + mi_arena_id_t current_id = 0; + size_t current_idx = 0; + long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 1024); // limit the work to bound allocation times + while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t_id, ¤t_idx)) != NULL)) + { + mi_atomic_decrement_relaxed(&abandoned_count); segment->abandoned_visits++; // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way? @@ -1092,9 +948,10 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // otherwise, push on the visited list so it gets not looked at too quickly again + // otherwise, mark it back as abandoned // todo: reset delayed pages in the segment? - mi_abandoned_visited_push(segment); + mi_atomic_increment_relaxed(&abandoned_count); + _mi_arena_segment_mark_abandoned(segment->memid); } } return NULL; From 64edbc92dd081a81f9166d688c0ae81c4231a697 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 29 Feb 2024 14:58:59 -0800 Subject: [PATCH 3/4] allow abandoned segment reclaim on a free --- include/mimalloc.h | 1 + include/mimalloc/internal.h | 1 + src/alloc.c | 19 +++++++++++++++---- src/arena.c | 7 ++++--- src/options.c | 5 +++-- src/segment.c | 12 +++++++++++- 6 files changed, 35 insertions(+), 10 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 368c22cc..7b1b7d5a 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -342,6 +342,7 @@ typedef enum mi_option_e { mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) mi_option_arena_purge_mult, mi_option_purge_extend_delay, + mi_option_abandoned_reclaim_on_free, // reclaim abandoned segments on a free _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 94ac04f5..1143a184 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -147,6 +147,7 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, m void _mi_segment_thread_collect(mi_segments_tld_t* tld); void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_await_readers(void); +bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment); // "page.c" void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; diff --git a/src/alloc.c b/src/alloc.c index b17fdbdc..738985aa 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -406,12 +406,24 @@ static void mi_stat_huge_free(const mi_page_t* page) { // multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) { + // first see if the segment was abandoned and we can reclaim it + mi_segment_t* const segment = _mi_page_segment(page); + if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && + mi_atomic_load_relaxed(&segment->thread_id) == 0) + { + // the segment is abandoned, try to reclaim it into our heap + if (_mi_segment_attempt_reclaim(mi_prim_get_default_heap(), segment)) { + mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + mi_free(block); // recursively free as now it will be a local free in our heap + return; + } + } + // The padding check may access the non-thread-owned page for the key values. // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); _mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection - mi_segment_t* const segment = _mi_page_segment(page); if (segment->page_kind == MI_PAGE_HUGE) { #if MI_HUGE_PAGE_ABANDON // huge page segments are always abandoned and can be freed immediately @@ -424,9 +436,8 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // (as the owning thread needs to actually free the memory later). _mi_segment_huge_page_reset(segment, page, block); #endif - } - - + } + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading memset(block, MI_DEBUG_FREED, mi_usable_size(block)); #endif diff --git a/src/arena.c b/src/arena.c index 72898175..9ce8c3f7 100644 --- a/src/arena.c +++ b/src/arena.c @@ -749,9 +749,9 @@ bool _mi_arena_segment_clear_abandoned(mi_memid_t memid ) mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); bool was_abandoned = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx); - mi_assert_internal(was_abandoned); - mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); - mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + // mi_assert_internal(was_abandoned); + mi_assert_internal(!was_abandoned || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); return was_abandoned; } @@ -766,6 +766,7 @@ void _mi_arena_segment_mark_abandoned(mi_memid_t memid) mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); const bool was_unset = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); + MI_UNUSED_RELEASE(was_unset); mi_assert_internal(was_unset); mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); } diff --git a/src/options.c b/src/options.c index 2a83fcc7..61ed5be7 100644 --- a/src/options.c +++ b/src/options.c @@ -81,7 +81,7 @@ static mi_option_desc_t options[_mi_option_last] = { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output - { 8, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. number of segment reclaims from the abandoned segments per try. + { 16, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. number of segment reclaims from the abandoned segments per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! #if (MI_INTPTR_SIZE>4) { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time @@ -89,8 +89,9 @@ static mi_option_desc_t options[_mi_option_last] = { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif - { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's + { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, + { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) }, // reclaim an abandoned segment on a free }; static void mi_option_init(mi_option_desc_t* desc); diff --git a/src/segment.c b/src/segment.c index 25ab9193..3f325dcf 100644 --- a/src/segment.c +++ b/src/segment.c @@ -336,7 +336,7 @@ static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge mi_page_t* page = &segment->pages[i]; if (!page->segment_in_use) { mi_page_purge_remove(page, tld); - if (force_purge) { + if (force_purge && page->is_committed) { mi_page_purge(segment, page, tld); } } @@ -902,6 +902,16 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } } +// attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`) +bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) { + if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned + if (_mi_arena_segment_clear_abandoned(segment->memid)) { // atomically unabandon + mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, NULL, &heap->tld->segments); + mi_assert_internal(res != NULL); + return (res != NULL); + } + return false; +} void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; From d34d8f0f657bc6bdda5e8bc5ba530719ec410d43 Mon Sep 17 00:00:00 2001 From: daanx Date: Thu, 29 Feb 2024 15:39:47 -0800 Subject: [PATCH 4/4] fix signed comparison --- src/arena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arena.c b/src/arena.c index 9ce8c3f7..cd3a7ef2 100644 --- a/src/arena.c +++ b/src/arena.c @@ -774,7 +774,7 @@ void _mi_arena_segment_mark_abandoned(mi_memid_t memid) // reclaim abandoned segments mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_id_t* previous_id, size_t* previous_idx ) { - const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count); int arena_idx = *previous_id; size_t field_idx = mi_bitmap_index_field(*previous_idx); size_t bit_idx = mi_bitmap_index_bit_in_field(*previous_idx) + 1;