diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 3a0d4892..c4fac766 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023 Microsoft Research, Daan Leijen +Copyright (c) 2018-2024 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -402,19 +402,46 @@ static inline void mi_atomic_yield(void) { // ---------------------------------------------------------------------- -// Locks are only used for abandoned segment visiting in `arena.c` +// Locks +// These do not have to be recursive and should be light-weight +// in-process only locks. Only used for reserving arena's and to +// maintain the abandoned list. // ---------------------------------------------------------------------- +#if _MSC_VER +#pragma warning(disable:26110) // unlock with holding lock +#endif + +#define mi_lock(lock) for(bool _go = (mi_lock_acquire(lock),true); _go; (mi_lock_release(lock), _go=false) ) #if defined(_WIN32) +#if 1 +#define mi_lock_t SRWLOCK // slim reader-writer lock + +static inline bool mi_lock_try_acquire(mi_lock_t* lock) { + return TryAcquireSRWLockExclusive(lock); +} +static inline void mi_lock_acquire(mi_lock_t* lock) { + AcquireSRWLockExclusive(lock); +} +static inline void mi_lock_release(mi_lock_t* lock) { + ReleaseSRWLockExclusive(lock); +} +static inline void mi_lock_init(mi_lock_t* lock) { + InitializeSRWLock(lock); +} +static inline void mi_lock_done(mi_lock_t* lock) { + (void)(lock); +} + +#else #define mi_lock_t CRITICAL_SECTION static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return TryEnterCriticalSection(lock); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { EnterCriticalSection(lock); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { LeaveCriticalSection(lock); @@ -426,16 +453,22 @@ static inline void mi_lock_done(mi_lock_t* lock) { DeleteCriticalSection(lock); } +#endif #elif defined(MI_USE_PTHREADS) +void _mi_error_message(int err, const char* fmt, ...); + #define mi_lock_t pthread_mutex_t static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return (pthread_mutex_trylock(lock) == 0); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { - return (pthread_mutex_lock(lock) == 0); +static inline void mi_lock_acquire(mi_lock_t* lock) { + const int err = pthread_mutex_lock(lock); + if (err != 0) { + _mi_error_message(err, "internal error: lock cannot be acquired\n"); + } } static inline void mi_lock_release(mi_lock_t* lock) { pthread_mutex_unlock(lock); @@ -447,18 +480,16 @@ static inline void mi_lock_done(mi_lock_t* lock) { pthread_mutex_destroy(lock); } -/* #elif defined(__cplusplus) #include #define mi_lock_t std::mutex static inline bool mi_lock_try_acquire(mi_lock_t* lock) { - return lock->lock_try_acquire(); + return lock->try_lock(); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { lock->lock(); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { lock->unlock(); @@ -469,7 +500,6 @@ static inline void mi_lock_init(mi_lock_t* lock) { static inline void mi_lock_done(mi_lock_t* lock) { (void)(lock); } -*/ #else @@ -482,12 +512,11 @@ static inline bool mi_lock_try_acquire(mi_lock_t* lock) { uintptr_t expected = 0; return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { for (int i = 0; i < 1000; i++) { // for at most 1000 tries? - if (mi_lock_try_acquire(lock)) return true; + if (mi_lock_try_acquire(lock)) return; mi_atomic_yield(); } - return true; } static inline void mi_lock_release(mi_lock_t* lock) { mi_atomic_store_release(lock, (uintptr_t)0); @@ -502,6 +531,4 @@ static inline void mi_lock_done(mi_lock_t* lock) { #endif - - #endif // __MIMALLOC_ATOMIC_H diff --git a/src/arena-abandon.c b/src/arena-abandon.c index 48e37794..460c80fc 100644 --- a/src/arena-abandon.c +++ b/src/arena-abandon.c @@ -120,11 +120,7 @@ static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { mi_assert(segment->memid.memkind != MI_MEM_ARENA); // not in an arena; we use a list of abandoned segments mi_subproc_t* const subproc = segment->subproc; - if (!mi_lock_acquire(&subproc->abandoned_os_lock)) { - _mi_error_message(EFAULT, "internal error: failed to acquire the abandoned (os) segment lock to mark abandonment"); - // we can continue but cannot visit/reclaim such blocks.. - } - else { + mi_lock(&subproc->abandoned_os_lock) { // push on the tail of the list (important for the visitor) mi_segment_t* prev = subproc->abandoned_os_list_tail; mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL); @@ -138,7 +134,6 @@ static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count); mi_atomic_increment_relaxed(&subproc->abandoned_count); // and release the lock - mi_lock_release(&subproc->abandoned_os_lock); } return; } @@ -251,7 +246,7 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_ if mi_unlikely(field != 0) { // skip zero fields quickly // we only take the arena lock if there are actually abandoned segments present if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) { - has_lock = (previous->visit_all ? mi_lock_acquire(&arena->abandoned_visit_lock) : mi_lock_try_acquire(&arena->abandoned_visit_lock)); + has_lock = (previous->visit_all ? (mi_lock_acquire(&arena->abandoned_visit_lock),true) : mi_lock_try_acquire(&arena->abandoned_visit_lock)); if (!has_lock) { if (previous->visit_all) { _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock"); @@ -289,8 +284,8 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_c // we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`. // The lock is released when the cursor is released. if (!previous->hold_visit_lock) { - previous->hold_visit_lock = (previous->visit_all ? mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock) - : mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock)); + previous->hold_visit_lock = (previous->visit_all ? (mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock),true) + : mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock)); if (!previous->hold_visit_lock) { if (previous->visit_all) { _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock"); @@ -301,21 +296,15 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_c // One list entry at a time while (previous->os_list_count > 0) { previous->os_list_count--; - const bool has_lock = mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free` - if (has_lock) { - mi_segment_t* segment = previous->subproc->abandoned_os_list; - // pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries) - if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) { - mi_lock_release(&previous->subproc->abandoned_os_lock); - return segment; - } - // already abandoned, try again + mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free` + mi_segment_t* segment = previous->subproc->abandoned_os_list; + // pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries) + if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) { mi_lock_release(&previous->subproc->abandoned_os_lock); + return segment; } - else { - _mi_error_message(EFAULT, "failed to acquire abandoned OS list lock during abandoned block visit\n"); - return NULL; - } + // already abandoned, try again + mi_lock_release(&previous->subproc->abandoned_os_lock); } // done mi_assert_internal(previous->os_list_count == 0); diff --git a/src/arena.c b/src/arena.c index c575cef2..789bb283 100644 --- a/src/arena.c +++ b/src/arena.c @@ -33,7 +33,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific mi_memid_t memid; // memid of the memory area - _Atomic(uint8_t*)start; // the start of the memory area + _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t meta_size; // size of the arena structure itself (including its bitmaps) @@ -42,12 +42,13 @@ typedef struct mi_arena_s { bool exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited - _Atomic(size_t)search_idx; // optimization to start the search for free blocks - _Atomic(mi_msecs_t)purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. - mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) - mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) - mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) + _Atomic(size_t) search_idx; // optimization to start the search for free blocks + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be purged from `blocks_purge`. + + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) + mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. } mi_arena_t; @@ -60,6 +61,7 @@ typedef struct mi_arena_s { // The available arenas static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 +static mi_decl_cache_align _Atomic(int64_t) mi_arenas_purge_expire; // set if there exist purgeable arenas #define MI_IN_ARENA_C #include "arena-abandon.c" @@ -353,11 +355,10 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz } // try to reserve a fresh arena space -static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) +static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t *arena_id) { if (_mi_preloading()) return false; // use OS only while pre loading - if (req_arena_id != _mi_arena_id_none()) return false; - + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); if (arena_count > (MI_MAX_ARENAS - 4)) return false; @@ -398,15 +399,16 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset const int numa_node = _mi_os_numa_node(); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed? - if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc)) { // is arena allocation allowed? + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) + { void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid); if (p != NULL) return p; // otherwise, try to first eagerly reserve a new arena if (req_arena_id == _mi_arena_id_none()) { mi_arena_id_t arena_id = 0; - if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { + if (mi_arena_reserve(size, allow_large, &arena_id)) { // and try allocate in there mi_assert_internal(req_arena_id == _mi_arena_id_none()); p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid); @@ -500,13 +502,16 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t mi_arena_purge(arena, bitmap_idx, blocks); } else { - // schedule decommit - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); - if (expire != 0) { - mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay + // schedule purge + const mi_msecs_t expire = _mi_clock_now() + delay; + mi_msecs_t expire0 = 0; + if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, expire)) { + // expiration was not yet set + // maybe set the global arenas expire as well (if it wasn't set already) + mi_atomic_casi64_strong_acq_rel(&mi_arenas_purge_expire, &expire0, expire); } else { - mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); + // already an expiration was set } _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); } @@ -541,14 +546,16 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, // returns true if anything was purged static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) { - if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; + // check pre-conditions + if (arena->memid.is_pinned) return false; + + // expired yet? mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); - if (expire == 0) return false; - if (!force && expire > now) return false; + if (!force && (expire == 0 || expire > now)) return false; // reset expire (if not already set concurrently) mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0); - + // potential purges scheduled, walk through the bitmap bool any_purged = false; bool full_purge = true; @@ -595,9 +602,15 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) return any_purged; } -static void mi_arenas_try_purge( bool force, bool visit_all ) { +static void mi_arenas_try_purge( bool force, bool visit_all ) +{ if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled + // check if any arena needs purging? + const mi_msecs_t now = _mi_clock_now(); + mi_msecs_t arenas_expire = mi_atomic_load_acquire(&mi_arenas_purge_expire); + if (!force && (arenas_expire == 0 || arenas_expire < now)) return; + const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); if (max_arena == 0) return; @@ -605,17 +618,26 @@ static void mi_arenas_try_purge( bool force, bool visit_all ) { static mi_atomic_guard_t purge_guard; mi_atomic_guard(&purge_guard) { - mi_msecs_t now = _mi_clock_now(); - size_t max_purge_count = (visit_all ? max_arena : 1); + // increase global expire: at most one purge per delay cycle + mi_atomic_store_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay()); + size_t max_purge_count = (visit_all ? max_arena : 2); + bool all_visited = true; for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { if (mi_arena_try_purge(arena, now, force)) { - if (max_purge_count <= 1) break; + if (max_purge_count <= 1) { + all_visited = false; + break; + } max_purge_count--; } } } + if (all_visited) { + // all arena's were visited and purged: reset global expire + mi_atomic_store_release(&mi_arenas_purge_expire, 0); + } } } diff --git a/src/init.c b/src/init.c index 4f572023..3ce333a6 100644 --- a/src/init.c +++ b/src/init.c @@ -187,8 +187,8 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; #if MI_GUARDED mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { heap->guarded_sample_seed = seed; - if (heap->guarded_sample_seed == 0) { - heap->guarded_sample_seed = _mi_heap_random_next(heap); + if (heap->guarded_sample_seed == 0) { + heap->guarded_sample_seed = _mi_heap_random_next(heap); } heap->guarded_sample_rate = sample_rate; if (heap->guarded_sample_rate >= 1) { @@ -206,9 +206,9 @@ void _mi_heap_guarded_init(mi_heap_t* heap) { mi_heap_guarded_set_sample_rate(heap, (size_t)mi_option_get_clamp(mi_option_guarded_sample_rate, 0, LONG_MAX), (size_t)mi_option_get(mi_option_guarded_sample_seed)); - mi_heap_guarded_set_size_bound(heap, + mi_heap_guarded_set_size_bound(heap, (size_t)mi_option_get_clamp(mi_option_guarded_min, 0, LONG_MAX), - (size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) ); + (size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) ); } #else mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { @@ -276,11 +276,10 @@ void mi_subproc_delete(mi_subproc_id_t subproc_id) { mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id); // check if there are no abandoned segments still.. bool safe_to_delete = false; - if (mi_lock_acquire(&subproc->abandoned_os_lock)) { + mi_lock(&subproc->abandoned_os_lock) { if (subproc->abandoned_os_list == NULL) { safe_to_delete = true; } - mi_lock_release(&subproc->abandoned_os_lock); } if (!safe_to_delete) return; // safe to release @@ -417,7 +416,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { tld->heap_backing = bheap; tld->heaps = NULL; tld->segments.subproc = &mi_subproc_default; - tld->segments.stats = &tld->stats; + tld->segments.stats = &tld->stats; } // Free the thread local default heap (called from `mi_thread_done`) @@ -621,7 +620,7 @@ static void mi_detect_cpu_features(void) { } #else static void mi_detect_cpu_features(void) { - // nothing + // nothing } #endif