diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index fcd9efba..9f01ff34 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023 Microsoft Research, Daan Leijen +Copyright (c) 2018-2024 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -407,8 +407,8 @@ static inline void mi_atomic_yield(void) { // ---------------------------------------------------------------------- -// Locks -// These should be light-weight in-process only locks. +// Locks +// These should be light-weight in-process only locks. // Only used for reserving arena's and to maintain the abandoned list. // ---------------------------------------------------------------------- #if _MSC_VER @@ -419,28 +419,7 @@ static inline void mi_atomic_yield(void) { #if defined(_WIN32) -#if 0 - -#define mi_lock_t CRITICAL_SECTION - -static inline bool mi_lock_try_acquire(mi_lock_t* lock) { - return TryEnterCriticalSection(lock); -} -static inline void mi_lock_acquire(mi_lock_t* lock) { - EnterCriticalSection(lock); -} -static inline void mi_lock_release(mi_lock_t* lock) { - LeaveCriticalSection(lock); -} -static inline void mi_lock_init(mi_lock_t* lock) { - InitializeCriticalSection(lock); -} -static inline void mi_lock_done(mi_lock_t* lock) { - DeleteCriticalSection(lock); -} - -#else - +#if 1 #define mi_lock_t SRWLOCK // slim reader-writer lock static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -459,10 +438,31 @@ static inline void mi_lock_done(mi_lock_t* lock) { (void)(lock); } +#else +#define mi_lock_t CRITICAL_SECTION + +static inline bool mi_lock_try_acquire(mi_lock_t* lock) { + return TryEnterCriticalSection(lock); +} +static inline void mi_lock_acquire(mi_lock_t* lock) { + EnterCriticalSection(lock); +} +static inline void mi_lock_release(mi_lock_t* lock) { + LeaveCriticalSection(lock); +} +static inline void mi_lock_init(mi_lock_t* lock) { + InitializeCriticalSection(lock); +} +static inline void mi_lock_done(mi_lock_t* lock) { + DeleteCriticalSection(lock); +} + #endif #elif defined(MI_USE_PTHREADS) +void _mi_error_message(int err, const char* fmt, ...); + #define mi_lock_t pthread_mutex_t static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -471,7 +471,7 @@ static inline bool mi_lock_try_acquire(mi_lock_t* lock) { static inline void mi_lock_acquire(mi_lock_t* lock) { const int err = pthread_mutex_lock(lock); if (err != 0) { - mi_error_message(EFAULT, "internal error: lock cannot be acquired\n"); + _mi_error_message(err, "internal error: lock cannot be acquired\n"); } } static inline void mi_lock_release(mi_lock_t* lock) { @@ -520,7 +520,7 @@ static inline void mi_lock_acquire(mi_lock_t* lock) { for (int i = 0; i < 1000; i++) { // for at most 1000 tries? if (mi_lock_try_acquire(lock)) return; mi_atomic_yield(); - } + } } static inline void mi_lock_release(mi_lock_t* lock) { mi_atomic_store_release(lock, (uintptr_t)0); @@ -535,6 +535,4 @@ static inline void mi_lock_done(mi_lock_t* lock) { #endif - - #endif // MI_ATOMIC_H diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 461b5393..0b084558 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -149,7 +149,7 @@ typedef struct mi_arena_s mi_arena_t; // defined in `arena.c` // a memory id tracks the provenance of arena/OS allocated memory // --------------------------------------------------------------- -// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated. +// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated. // The memid keeps track of this. typedef enum mi_memkind_e { MI_MEM_NONE, // not allocated @@ -261,7 +261,7 @@ typedef uint8_t mi_heaptag_t; // // We don't count `freed` (as |free|) but use `used` to reduce // the number of memory accesses in the `mi_page_all_free` function(s). -// +// // Notes: // - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`). // - If a page is not part of a heap it is called "abandoned" -- in @@ -306,7 +306,7 @@ typedef struct mi_page_s { #define MI_PAGE_ALIGN MI_ARENA_SLICE_ALIGN // pages must be aligned on this for the page map. #define MI_PAGE_MIN_START_BLOCK_ALIGN MI_MAX_ALIGN_SIZE // minimal block alignment for the first block in a page (16b) -#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks +#define MI_PAGE_MAX_START_BLOCK_ALIGN2 MI_KiB // maximal block alignment for "power of 2"-sized blocks #define MI_PAGE_MAX_OVERALLOC_ALIGN MI_ARENA_SLICE_SIZE // (64 KiB) limit for which we overallocate in arena pages, beyond this use OS allocation #if (MI_ENCODE_FREELIST || MI_PADDING) && MI_SIZE_SIZE == 8 @@ -344,12 +344,12 @@ typedef enum mi_page_kind_e { // ------------------------------------------------------ // Heaps -// +// // Provide first-class heaps to allocate from. // A heap just owns a set of pages for allocation and // can only be allocate/reallocate from the thread that created it. // Freeing blocks can be done from any thread though. -// +// // Per thread, there is always a default heap that is // used for allocation; it is initialized to statically // point to an empty heap to avoid initialization checks @@ -532,9 +532,9 @@ void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount); // ------------------------------------------------------ // Sub processes use separate arena's and no heaps/pages/blocks -// are shared between sub processes. +// are shared between sub processes. // The subprocess structure contains essentially all static variables (except per subprocess :-)) -// +// // Each thread should belong to one sub-process only // ------------------------------------------------------ @@ -545,11 +545,12 @@ typedef struct mi_subproc_s { _Atomic(size_t) arena_count; // current count of arena's _Atomic(mi_arena_t*) arenas[MI_MAX_ARENAS]; // arena's of this sub-process mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time + _Atomic(int64_t) purge_expire; // expiration is set if any arenas can be purged - _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process + _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process mi_page_t* os_abandoned_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on) mi_lock_t os_abandoned_pages_lock; // lock for the os abandoned pages list (this lock protects list operations) - + mi_memid_t memid; // provenance of this memory block (meta or OS) mi_stats_t stats; // sub-process statistics (tld stats are merged in on thread termination) } mi_subproc_t; diff --git a/src/arena-abandon.c b/src/arena-abandon.c new file mode 100644 index 00000000..460c80fc --- /dev/null +++ b/src/arena-abandon.c @@ -0,0 +1,346 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#if !defined(MI_IN_ARENA_C) +#error "this file should be included from 'arena.c' (so mi_arena_t is visible)" +// add includes help an IDE +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "bitmap.h" +#endif + +// Minimal exports for arena-abandoned. +size_t mi_arena_id_index(mi_arena_id_t id); +mi_arena_t* mi_arena_from_index(size_t idx); +size_t mi_arena_get_count(void); +void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex); +bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index); + +/* ----------------------------------------------------------- + Abandoned blocks/segments: + + _mi_arena_segment_clear_abandoned + _mi_arena_segment_mark_abandoned + + This is used to atomically abandon/reclaim segments + (and crosses the arena API but it is convenient to have here). + + Abandoned segments still have live blocks; they get reclaimed + when a thread frees a block in it, or when a thread needs a fresh + segment. + + Abandoned segments are atomically marked in the `block_abandoned` + bitmap of arenas. Any segments allocated outside arenas are put + in the sub-process `abandoned_os_list`. This list is accessed + using locks but this should be uncommon and generally uncontended. + Reclaim and visiting either scan through the `block_abandoned` + bitmaps of the arena's, or visit the `abandoned_os_list` + + A potentially nicer design is to use arena's for everything + and perhaps have virtual arena's to map OS allocated memory + but this would lack the "density" of our current arena's. TBC. +----------------------------------------------------------- */ + + +// reclaim a specific OS abandoned segment; `true` on success. +// sets the thread_id. +static bool mi_arena_segment_os_clear_abandoned(mi_segment_t* segment, bool take_lock) { + mi_assert(segment->memid.memkind != MI_MEM_ARENA); + // not in an arena, remove from list of abandoned os segments + mi_subproc_t* const subproc = segment->subproc; + if (take_lock && !mi_lock_try_acquire(&subproc->abandoned_os_lock)) { + return false; // failed to acquire the lock, we just give up + } + // remove atomically from the abandoned os list (if possible!) + bool reclaimed = false; + mi_segment_t* const next = segment->abandoned_os_next; + mi_segment_t* const prev = segment->abandoned_os_prev; + if (next != NULL || prev != NULL || subproc->abandoned_os_list == segment) { + #if MI_DEBUG>3 + // find ourselves in the abandoned list (and check the count) + bool found = false; + size_t count = 0; + for (mi_segment_t* current = subproc->abandoned_os_list; current != NULL; current = current->abandoned_os_next) { + if (current == segment) { found = true; } + count++; + } + mi_assert_internal(found); + mi_assert_internal(count == mi_atomic_load_relaxed(&subproc->abandoned_os_list_count)); + #endif + // remove (atomically) from the list and reclaim + if (prev != NULL) { prev->abandoned_os_next = next; } + else { subproc->abandoned_os_list = next; } + if (next != NULL) { next->abandoned_os_prev = prev; } + else { subproc->abandoned_os_list_tail = prev; } + segment->abandoned_os_next = NULL; + segment->abandoned_os_prev = NULL; + mi_atomic_decrement_relaxed(&subproc->abandoned_count); + mi_atomic_decrement_relaxed(&subproc->abandoned_os_list_count); + if (take_lock) { // don't reset the thread_id when iterating + mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); + } + reclaimed = true; + } + if (take_lock) { mi_lock_release(&segment->subproc->abandoned_os_lock); } + return reclaimed; +} + +// reclaim a specific abandoned segment; `true` on success. +// sets the thread_id. +bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment) { + if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { + return mi_arena_segment_os_clear_abandoned(segment, true /* take lock */); + } + // arena segment: use the blocks_abandoned bitmap. + size_t arena_idx; + size_t bitmap_idx; + mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); + mi_arena_t* arena = mi_arena_from_index(arena_idx); + mi_assert_internal(arena != NULL); + // reclaim atomically + bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx); + if (was_marked) { + mi_assert_internal(mi_atomic_load_acquire(&segment->thread_id) == 0); + mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count); + mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); + } + // mi_assert_internal(was_marked); + mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + return was_marked; +} + + +// mark a specific OS segment as abandoned +static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { + mi_assert(segment->memid.memkind != MI_MEM_ARENA); + // not in an arena; we use a list of abandoned segments + mi_subproc_t* const subproc = segment->subproc; + mi_lock(&subproc->abandoned_os_lock) { + // push on the tail of the list (important for the visitor) + mi_segment_t* prev = subproc->abandoned_os_list_tail; + mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL); + mi_assert_internal(segment->abandoned_os_prev == NULL); + mi_assert_internal(segment->abandoned_os_next == NULL); + if (prev != NULL) { prev->abandoned_os_next = segment; } + else { subproc->abandoned_os_list = segment; } + subproc->abandoned_os_list_tail = segment; + segment->abandoned_os_prev = prev; + segment->abandoned_os_next = NULL; + mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count); + mi_atomic_increment_relaxed(&subproc->abandoned_count); + // and release the lock + } + return; +} + +// mark a specific segment as abandoned +// clears the thread_id. +void _mi_arena_segment_mark_abandoned(mi_segment_t* segment) +{ + mi_assert_internal(segment->used == segment->abandoned); + mi_atomic_store_release(&segment->thread_id, (uintptr_t)0); // mark as abandoned for multi-thread free's + if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { + mi_arena_segment_os_mark_abandoned(segment); + return; + } + // segment is in an arena, mark it in the arena `blocks_abandoned` bitmap + size_t arena_idx; + size_t bitmap_idx; + mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); + mi_arena_t* arena = mi_arena_from_index(arena_idx); + mi_assert_internal(arena != NULL); + // set abandonment atomically + mi_subproc_t* const subproc = segment->subproc; // don't access the segment after setting it abandoned + const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); + if (was_unmarked) { mi_atomic_increment_relaxed(&subproc->abandoned_count); } + mi_assert_internal(was_unmarked); + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); +} + + +/* ----------------------------------------------------------- + Iterate through the abandoned blocks/segments using a cursor. + This is used for reclaiming and abandoned block visiting. +----------------------------------------------------------- */ + +// start a cursor at a randomized arena +void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current) { + mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc); + current->bitmap_idx = 0; + current->subproc = subproc; + current->visit_all = visit_all; + current->hold_visit_lock = false; + const size_t abandoned_count = mi_atomic_load_relaxed(&subproc->abandoned_count); + const size_t abandoned_list_count = mi_atomic_load_relaxed(&subproc->abandoned_os_list_count); + const size_t max_arena = mi_arena_get_count(); + if (heap != NULL && heap->arena_id != _mi_arena_id_none()) { + // for a heap that is bound to one arena, only visit that arena + current->start = mi_arena_id_index(heap->arena_id); + current->end = current->start + 1; + current->os_list_count = 0; + } + else { + // otherwise visit all starting at a random location + if (abandoned_count > abandoned_list_count && max_arena > 0) { + current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); + current->end = current->start + max_arena; + } + else { + current->start = 0; + current->end = 0; + } + current->os_list_count = abandoned_list_count; // max entries to visit in the os abandoned list + } + mi_assert_internal(current->start <= max_arena); +} + +void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current) { + if (current->hold_visit_lock) { + mi_lock_release(¤t->subproc->abandoned_os_visit_lock); + current->hold_visit_lock = false; + } +} + +static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_subproc_t* subproc, mi_bitmap_index_t bitmap_idx) { + // try to reclaim an abandoned segment in the arena atomically + if (!_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) return NULL; + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); + // check that the segment belongs to our sub-process + // note: this is the reason we need the `abandoned_visit` lock in the case abandoned visiting is enabled. + // without the lock an abandoned visit may otherwise fail to visit all abandoned segments in the sub-process. + // for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the `abandoned_visit` lock. + if (segment->subproc != subproc) { + // it is from another sub-process, re-mark it and continue searching + const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); + mi_assert_internal(was_zero); MI_UNUSED(was_zero); + return NULL; + } + else { + // success, we unabandoned a segment in our sub-process + mi_atomic_decrement_relaxed(&subproc->abandoned_count); + return segment; + } +} + +static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_cursor_t* previous) { + const size_t max_arena = mi_arena_get_count(); + size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); + size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx); + // visit arena's (from the previous cursor) + for (; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) { + // index wraps around + size_t arena_idx = (previous->start >= max_arena ? previous->start % max_arena : previous->start); + mi_arena_t* arena = mi_arena_from_index(arena_idx); + if (arena != NULL) { + bool has_lock = false; + // visit the abandoned fields (starting at previous_idx) + for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) { + size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]); + if mi_unlikely(field != 0) { // skip zero fields quickly + // we only take the arena lock if there are actually abandoned segments present + if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) { + has_lock = (previous->visit_all ? (mi_lock_acquire(&arena->abandoned_visit_lock),true) : mi_lock_try_acquire(&arena->abandoned_visit_lock)); + if (!has_lock) { + if (previous->visit_all) { + _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock"); + } + // skip to next arena + break; + } + } + mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned)); + // visit each set bit in the field (todo: maybe use `ctz` here?) + for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) { + // pre-check if the bit is set + size_t mask = ((size_t)1 << bit_idx); + if mi_unlikely((field & mask) == mask) { + mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); + mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, bitmap_idx); + if (segment != NULL) { + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } + previous->bitmap_idx = mi_bitmap_index_create_ex(field_idx, bit_idx + 1); // start at next one for the next iteration + return segment; + } + } + } + } + } + if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } + } + } + return NULL; +} + +static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_cursor_t* previous) { + // go through the abandoned_os_list + // we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`. + // The lock is released when the cursor is released. + if (!previous->hold_visit_lock) { + previous->hold_visit_lock = (previous->visit_all ? (mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock),true) + : mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock)); + if (!previous->hold_visit_lock) { + if (previous->visit_all) { + _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock"); + } + return NULL; // we cannot get the lock, give up + } + } + // One list entry at a time + while (previous->os_list_count > 0) { + previous->os_list_count--; + mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free` + mi_segment_t* segment = previous->subproc->abandoned_os_list; + // pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries) + if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) { + mi_lock_release(&previous->subproc->abandoned_os_lock); + return segment; + } + // already abandoned, try again + mi_lock_release(&previous->subproc->abandoned_os_lock); + } + // done + mi_assert_internal(previous->os_list_count == 0); + return NULL; +} + + +// reclaim abandoned segments +// this does not set the thread id (so it appears as still abandoned) +mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous) { + if (previous->start < previous->end) { + // walk the arena + mi_segment_t* segment = mi_arena_segment_clear_abandoned_next_field(previous); + if (segment != NULL) { return segment; } + } + // no entries in the arena's anymore, walk the abandoned OS list + mi_assert_internal(previous->start == previous->end); + return mi_arena_segment_clear_abandoned_next_list(previous); +} + + +bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { + // (unfortunately) the visit_abandoned option must be enabled from the start. + // This is to avoid taking locks if abandoned list visiting is not required (as for most programs) + if (!mi_option_is_enabled(mi_option_visit_abandoned)) { + _mi_error_message(EFAULT, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON"); + return false; + } + mi_arena_field_cursor_t current; + _mi_arena_field_cursor_init(NULL, _mi_subproc_from_id(subproc_id), true /* visit all (blocking) */, ¤t); + mi_segment_t* segment; + bool ok = true; + while (ok && (segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL) { + ok = _mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg); + _mi_arena_segment_mark_abandoned(segment); + } + _mi_arena_field_cursor_done(¤t); + return ok; +} diff --git a/src/arena.c b/src/arena.c index af1f737e..0cea5776 100644 --- a/src/arena.c +++ b/src/arena.c @@ -43,7 +43,6 @@ typedef struct mi_arena_s { bool is_exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(mi_msecs_t) purge_expire; // expiration time when slices can be purged from `slices_purge`. - _Atomic(mi_msecs_t) purge_expire_extend; // the purge expiration may be extended by a bit mi_bitmap_t* slices_free; // is the slice free? mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible) @@ -56,14 +55,6 @@ typedef struct mi_arena_s { // note: when adding bitmaps revise `mi_arena_info_slices_needed` } mi_arena_t; -// Every "page" in `pages_purge` points to purge info -// (since we use it for any free'd range and not just for pages) -typedef struct mi_purge_info_s { - _Atomic(mi_msecs_t) expire; - _Atomic(size_t) slice_count; -} mi_purge_info_t; - - /* ----------------------------------------------------------- Arena id's @@ -79,7 +70,7 @@ mi_arena_t* _mi_arena_from_id(mi_arena_id_t id) { static bool mi_arena_id_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena) { - return ((arena == req_arena) || // they match, + return ((arena == req_arena) || // they match, (req_arena == NULL && !arena->is_exclusive)); // or the arena is not exclusive, and we didn't request a specific one } @@ -239,12 +230,12 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( memid->initially_zero = false; } } - #endif + #endif } } else { // already fully commited. - // if the OS has overcommit, and this is the first time we access these pages, then + // if the OS has overcommit, and this is the first time we access these pages, then // count the commit now (as at arena reserve we didn't count those commits as these are on-demand) if (_mi_os_has_overcommit() && touched_slices > 0) { mi_subproc_stat_increase( arena->subproc, committed, mi_size_of_slices(touched_slices)); @@ -266,7 +257,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); if (commit) { mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); } mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count)); - + return p; } @@ -329,7 +320,7 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_ if (arena_reserve > small_arena_reserve) { // try again err = mi_reserve_os_memory_ex(small_arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); - if (err != 0 && adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve, true); } // roll back + if (err != 0 && adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve, true); } // roll back } } return (err==0); @@ -436,7 +427,7 @@ static mi_decl_noinline void* mi_arenas_try_alloc( // otherwise, try to reserve a new arena -- but one thread at a time.. (todo: allow 2 or 4 to reduce contention?) const size_t arena_count = mi_arenas_get_count(subproc); - mi_lock(&subproc->arena_reserve_lock) { + mi_lock(&subproc->arena_reserve_lock) { if (arena_count == mi_arenas_get_count(subproc)) { // we are the first to enter the lock, reserve a fresh arena mi_arena_id_t arena_id = 0; @@ -445,11 +436,11 @@ static mi_decl_noinline void* mi_arenas_try_alloc( else { // another thread already reserved a new arena } - } + } // try once more to allocate in the new arena mi_assert_internal(req_arena == NULL); p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); - if (p != NULL) return p; + if (p != NULL) return p; return NULL; } @@ -669,7 +660,7 @@ static mi_page_t* mi_arena_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_ page->reserved = (uint16_t)reserved; page->page_start = (uint8_t*)page + block_start; page->block_size = block_size; - page->memid = memid; + page->memid = memid; page->free_is_zero = memid.initially_zero; if (block_size > 0 && _mi_is_power_of_two(block_size)) { page->block_size_shift = (uint8_t)mi_ctz(block_size); @@ -835,7 +826,7 @@ void _mi_arena_page_abandon(mi_page_t* page) { } } mi_subproc_stat_increase(_mi_subproc(), pages_abandoned, 1); - } + } _mi_page_unown(page); } @@ -877,7 +868,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) { mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); - + // this busy waits until a concurrent reader (from alloc_abandoned) is done mi_bitmap_clear_once_set(arena->pages_abandoned[bin], slice_index); mi_page_clear_abandoned_mapped(page); @@ -898,7 +889,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) { page->prev = NULL; } } - } + } } void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { @@ -1052,7 +1043,7 @@ static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t // success if (arena_id != NULL) { *arena_id = arena; } return true; - } + } } } @@ -1140,7 +1131,6 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; arena->purge_expire = 0; - arena->purge_expire_extend = 0; // mi_lock_init(&arena->abandoned_visit_lock); // init bitmaps @@ -1247,7 +1237,7 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, mi_arena_t else if (mi_page_is_abandoned(page)) { c = (mi_page_is_singleton(page) ? 's' : 'f'); } bit_of_page = (long)page->memid.mem.arena.slice_count; buf[bit] = c; - } + } else { char c = '?'; if (bit_of_page > 0) { c = '-'; } @@ -1261,7 +1251,7 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, mi_arena_t } if (bit==MI_BFIELD_BITS-1 && bit_of_page > 1) { c = '>'; } buf[bit] = c; - } + } } return bit_set_count; } @@ -1306,7 +1296,7 @@ static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bi } void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept { - mi_subproc_t* subproc = _mi_subproc(); + mi_subproc_t* subproc = _mi_subproc(); size_t max_arenas = mi_arenas_get_count(subproc); size_t free_total = 0; size_t slice_total = 0; @@ -1455,14 +1445,15 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_ } else { // schedule purge + const mi_msecs_t expire = _mi_clock_now() + delay; mi_msecs_t expire0 = 0; - if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, _mi_clock_now() + delay)) { + if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, expire)) { // expiration was not yet set - mi_atomic_storei64_release(&arena->purge_expire_extend, 0); + // maybe set the global arenas expire as well (if it wasn't set already) + mi_atomic_casi64_strong_acq_rel(&arena->subproc->purge_expire, &expire0, expire); } - else if (mi_atomic_loadi64_acquire(&arena->purge_expire_extend) < 10*delay) { // limit max extension time + else { // already an expiration was set - mi_atomic_addi64_acq_rel(&arena->purge_expire_extend, (mi_msecs_t)(delay/10)); // add smallish extra delay } mi_bitmap_setN(arena->slices_purge, slice_index, slice_count, NULL); } @@ -1491,7 +1482,7 @@ static bool mi_arena_try_purge_range(mi_arena_t* arena, size_t slice_index, size } static bool mi_arena_try_purge_visitor(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) { - mi_purge_visit_info_t* vinfo = (mi_purge_visit_info_t*)arg; + mi_purge_visit_info_t* vinfo = (mi_purge_visit_info_t*)arg; // try to purge: first claim the free blocks if (mi_arena_try_purge_range(arena, slice_index, slice_count)) { vinfo->any_purged = true; @@ -1515,18 +1506,13 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) { // check pre-conditions if (arena->memid.is_pinned) return false; - mi_msecs_t expire_base = mi_atomic_loadi64_relaxed(&arena->purge_expire); - mi_msecs_t expire_extend = mi_atomic_loadi64_relaxed(&arena->purge_expire_extend); - const mi_msecs_t expire = expire_base + expire_extend; - if (expire == 0) return false; // expired yet? - if (!force && expire > now) return false; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (!force && (expire == 0 || expire > now)) return false; // reset expire (if not already set concurrently) - if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire_base, (mi_msecs_t)0)) { - mi_atomic_storei64_release(&arena->purge_expire_extend, (mi_msecs_t)0); // and also reset the extend - } + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0); mi_subproc_stat_counter_increase(arena->subproc, arena_purges, 1); // go through all purge info's (with max MI_BFIELD_BITS ranges at a time) @@ -1539,12 +1525,17 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) } -static void mi_arenas_try_purge(bool force, bool visit_all) +static void mi_arenas_try_purge(bool force, bool visit_all) { if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled + // check if any arena needs purging? mi_tld_t* tld = _mi_tld(); mi_subproc_t* subproc = tld->subproc; + const mi_msecs_t now = _mi_clock_now(); + mi_msecs_t arenas_expire = mi_atomic_load_acquire(&subproc->purge_expire); + if (!force && (arenas_expire == 0 || arenas_expire < now)) return; + const size_t max_arena = mi_arenas_get_count(subproc); if (max_arena == 0) return; @@ -1552,20 +1543,28 @@ static void mi_arenas_try_purge(bool force, bool visit_all) static mi_atomic_guard_t purge_guard; mi_atomic_guard(&purge_guard) { - const mi_msecs_t now = _mi_clock_now(); + // increase global expire: at most one purge per delay cycle + mi_atomic_store_release(&subproc->purge_expire, now + mi_arena_purge_delay()); const size_t arena_start = tld->thread_seq % max_arena; - size_t max_purge_count = (visit_all ? max_arena : 1); + size_t max_purge_count = (visit_all ? max_arena : 2); + bool all_visited = true; for (size_t _i = 0; _i < max_arena; _i++) { size_t i = _i + arena_start; if (i >= max_arena) { i -= max_arena; } mi_arena_t* arena = mi_arena_from_index(subproc,i); if (arena != NULL) { if (mi_arena_try_purge(arena, now, force)) { - if (max_purge_count <= 1) break; + if (max_purge_count <= 1) { + all_visited = false; + break; + } max_purge_count--; } } } + if (all_visited) { + mi_atomic_store_release(&subproc->purge_expire, (mi_msecs_t)0); + } } } @@ -1662,7 +1661,7 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* are _mi_warning_message("the reloaded arena is not exclusive\n"); return false; } - + arena->is_exclusive = true; arena->subproc = _mi_subproc(); if (!mi_arena_add(arena->subproc, arena, arena_id)) { diff --git a/src/init.c b/src/init.c index 2f147e55..6bbea58e 100644 --- a/src/init.c +++ b/src/init.c @@ -308,7 +308,7 @@ static mi_tld_t* mi_tld_alloc(void) { #define MI_TLD_INVALID ((mi_tld_t*)1) mi_decl_noinline static void mi_tld_free(void) { - mi_tld_t* tld = _mi_tld(); + mi_tld_t* tld = _mi_tld(); if (tld != NULL && tld != MI_TLD_INVALID) { _mi_stats_done(&tld->stats); _mi_meta_free(tld, sizeof(mi_tld_t), tld->memid); @@ -325,7 +325,7 @@ mi_decl_noinline mi_tld_t* _mi_tld(void) { } if (tld==&tld_empty) { thread_tld = tld = mi_tld_alloc(); - } + } return tld; } @@ -523,7 +523,7 @@ void mi_thread_init(void) mi_attr_noexcept // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_thread_heap_init()) return; // returns true if already initialized - mi_subproc_stat_increase(_mi_subproc_main(), threads, 1); + mi_subproc_stat_increase(_mi_subproc_main(), threads, 1); //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } @@ -552,7 +552,7 @@ void _mi_thread_done(mi_heap_t* heap) // abandon the thread local heap _mi_thread_heap_done(heap); // returns true if already ran - + // free thread local data mi_tld_free(); } @@ -664,7 +664,7 @@ void mi_process_init(void) mi_attr_noexcept { _mi_prim_thread_associate_default_heap(NULL); #endif - mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) mi_track_init(); if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {