Merge branch 'dev' into dev-slice

This commit is contained in:
daanx 2024-12-21 15:33:47 -08:00
commit 2d01c22cd8
4 changed files with 109 additions and 72 deletions

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023 Microsoft Research, Daan Leijen
Copyright (c) 2018-2024 Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -402,19 +402,46 @@ static inline void mi_atomic_yield(void) {
// ----------------------------------------------------------------------
// Locks are only used for abandoned segment visiting in `arena.c`
// Locks
// These do not have to be recursive and should be light-weight
// in-process only locks. Only used for reserving arena's and to
// maintain the abandoned list.
// ----------------------------------------------------------------------
#if _MSC_VER
#pragma warning(disable:26110) // unlock with holding lock
#endif
#define mi_lock(lock) for(bool _go = (mi_lock_acquire(lock),true); _go; (mi_lock_release(lock), _go=false) )
#if defined(_WIN32)
#if 1
#define mi_lock_t SRWLOCK // slim reader-writer lock
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return TryAcquireSRWLockExclusive(lock);
}
static inline void mi_lock_acquire(mi_lock_t* lock) {
AcquireSRWLockExclusive(lock);
}
static inline void mi_lock_release(mi_lock_t* lock) {
ReleaseSRWLockExclusive(lock);
}
static inline void mi_lock_init(mi_lock_t* lock) {
InitializeSRWLock(lock);
}
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
#else
#define mi_lock_t CRITICAL_SECTION
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return TryEnterCriticalSection(lock);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
static inline void mi_lock_acquire(mi_lock_t* lock) {
EnterCriticalSection(lock);
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
LeaveCriticalSection(lock);
@ -426,16 +453,22 @@ static inline void mi_lock_done(mi_lock_t* lock) {
DeleteCriticalSection(lock);
}
#endif
#elif defined(MI_USE_PTHREADS)
void _mi_error_message(int err, const char* fmt, ...);
#define mi_lock_t pthread_mutex_t
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return (pthread_mutex_trylock(lock) == 0);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
return (pthread_mutex_lock(lock) == 0);
static inline void mi_lock_acquire(mi_lock_t* lock) {
const int err = pthread_mutex_lock(lock);
if (err != 0) {
_mi_error_message(err, "internal error: lock cannot be acquired\n");
}
}
static inline void mi_lock_release(mi_lock_t* lock) {
pthread_mutex_unlock(lock);
@ -447,18 +480,16 @@ static inline void mi_lock_done(mi_lock_t* lock) {
pthread_mutex_destroy(lock);
}
/*
#elif defined(__cplusplus)
#include <mutex>
#define mi_lock_t std::mutex
static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
return lock->lock_try_acquire();
return lock->try_lock();
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
static inline void mi_lock_acquire(mi_lock_t* lock) {
lock->lock();
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
lock->unlock();
@ -469,7 +500,6 @@ static inline void mi_lock_init(mi_lock_t* lock) {
static inline void mi_lock_done(mi_lock_t* lock) {
(void)(lock);
}
*/
#else
@ -482,12 +512,11 @@ static inline bool mi_lock_try_acquire(mi_lock_t* lock) {
uintptr_t expected = 0;
return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1);
}
static inline bool mi_lock_acquire(mi_lock_t* lock) {
static inline void mi_lock_acquire(mi_lock_t* lock) {
for (int i = 0; i < 1000; i++) { // for at most 1000 tries?
if (mi_lock_try_acquire(lock)) return true;
if (mi_lock_try_acquire(lock)) return;
mi_atomic_yield();
}
return true;
}
static inline void mi_lock_release(mi_lock_t* lock) {
mi_atomic_store_release(lock, (uintptr_t)0);
@ -502,6 +531,4 @@ static inline void mi_lock_done(mi_lock_t* lock) {
#endif
#endif // __MIMALLOC_ATOMIC_H

View file

@ -120,11 +120,7 @@ static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) {
mi_assert(segment->memid.memkind != MI_MEM_ARENA);
// not in an arena; we use a list of abandoned segments
mi_subproc_t* const subproc = segment->subproc;
if (!mi_lock_acquire(&subproc->abandoned_os_lock)) {
_mi_error_message(EFAULT, "internal error: failed to acquire the abandoned (os) segment lock to mark abandonment");
// we can continue but cannot visit/reclaim such blocks..
}
else {
mi_lock(&subproc->abandoned_os_lock) {
// push on the tail of the list (important for the visitor)
mi_segment_t* prev = subproc->abandoned_os_list_tail;
mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL);
@ -138,7 +134,6 @@ static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) {
mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count);
mi_atomic_increment_relaxed(&subproc->abandoned_count);
// and release the lock
mi_lock_release(&subproc->abandoned_os_lock);
}
return;
}
@ -251,7 +246,7 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_
if mi_unlikely(field != 0) { // skip zero fields quickly
// we only take the arena lock if there are actually abandoned segments present
if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) {
has_lock = (previous->visit_all ? mi_lock_acquire(&arena->abandoned_visit_lock) : mi_lock_try_acquire(&arena->abandoned_visit_lock));
has_lock = (previous->visit_all ? (mi_lock_acquire(&arena->abandoned_visit_lock),true) : mi_lock_try_acquire(&arena->abandoned_visit_lock));
if (!has_lock) {
if (previous->visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock");
@ -289,8 +284,8 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_c
// we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`.
// The lock is released when the cursor is released.
if (!previous->hold_visit_lock) {
previous->hold_visit_lock = (previous->visit_all ? mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock)
: mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock));
previous->hold_visit_lock = (previous->visit_all ? (mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock),true)
: mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock));
if (!previous->hold_visit_lock) {
if (previous->visit_all) {
_mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock");
@ -301,21 +296,15 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_c
// One list entry at a time
while (previous->os_list_count > 0) {
previous->os_list_count--;
const bool has_lock = mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free`
if (has_lock) {
mi_segment_t* segment = previous->subproc->abandoned_os_list;
// pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries)
if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) {
mi_lock_release(&previous->subproc->abandoned_os_lock);
return segment;
}
// already abandoned, try again
mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free`
mi_segment_t* segment = previous->subproc->abandoned_os_list;
// pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries)
if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) {
mi_lock_release(&previous->subproc->abandoned_os_lock);
return segment;
}
else {
_mi_error_message(EFAULT, "failed to acquire abandoned OS list lock during abandoned block visit\n");
return NULL;
}
// already abandoned, try again
mi_lock_release(&previous->subproc->abandoned_os_lock);
}
// done
mi_assert_internal(previous->os_list_count == 0);

View file

@ -33,7 +33,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
typedef struct mi_arena_s {
mi_arena_id_t id; // arena id; 0 for non-specific
mi_memid_t memid; // memid of the memory area
_Atomic(uint8_t*)start; // the start of the memory area
_Atomic(uint8_t*) start; // the start of the memory area
size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
size_t meta_size; // size of the arena structure itself (including its bitmaps)
@ -42,12 +42,13 @@ typedef struct mi_arena_s {
bool exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed)
mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited
_Atomic(size_t)search_idx; // optimization to start the search for free blocks
_Atomic(mi_msecs_t)purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`.
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
_Atomic(size_t) search_idx; // optimization to start the search for free blocks
_Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be purged from `blocks_purge`.
mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted)
mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted)
mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here)
mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
// do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields.
} mi_arena_t;
@ -60,6 +61,7 @@ typedef struct mi_arena_s {
// The available arenas
static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0
static mi_decl_cache_align _Atomic(int64_t) mi_arenas_purge_expire; // set if there exist purgeable arenas
#define MI_IN_ARENA_C
#include "arena-abandon.c"
@ -353,10 +355,9 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz
}
// try to reserve a fresh arena space
static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id)
static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t *arena_id)
{
if (_mi_preloading()) return false; // use OS only while pre loading
if (req_arena_id != _mi_arena_id_none()) return false;
const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count);
if (arena_count > (MI_MAX_ARENAS - 4)) return false;
@ -398,15 +399,16 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset
const int numa_node = _mi_os_numa_node(); // current numa node
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed?
if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
if (!mi_option_is_enabled(mi_option_disallow_arena_alloc)) { // is arena allocation allowed?
if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0)
{
void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid);
if (p != NULL) return p;
// otherwise, try to first eagerly reserve a new arena
if (req_arena_id == _mi_arena_id_none()) {
mi_arena_id_t arena_id = 0;
if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) {
if (mi_arena_reserve(size, allow_large, &arena_id)) {
// and try allocate in there
mi_assert_internal(req_arena_id == _mi_arena_id_none());
p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid);
@ -500,13 +502,16 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
mi_arena_purge(arena, bitmap_idx, blocks);
}
else {
// schedule decommit
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
if (expire != 0) {
mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay
// schedule purge
const mi_msecs_t expire = _mi_clock_now() + delay;
mi_msecs_t expire0 = 0;
if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, expire)) {
// expiration was not yet set
// maybe set the global arenas expire as well (if it wasn't set already)
mi_atomic_casi64_strong_acq_rel(&mi_arenas_purge_expire, &expire0, expire);
}
else {
mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay);
// already an expiration was set
}
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL);
}
@ -541,10 +546,12 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx,
// returns true if anything was purged
static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
{
if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false;
// check pre-conditions
if (arena->memid.is_pinned) return false;
// expired yet?
mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire);
if (expire == 0) return false;
if (!force && expire > now) return false;
if (!force && (expire == 0 || expire > now)) return false;
// reset expire (if not already set concurrently)
mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0);
@ -595,9 +602,15 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force)
return any_purged;
}
static void mi_arenas_try_purge( bool force, bool visit_all ) {
static void mi_arenas_try_purge( bool force, bool visit_all )
{
if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled
// check if any arena needs purging?
const mi_msecs_t now = _mi_clock_now();
mi_msecs_t arenas_expire = mi_atomic_load_acquire(&mi_arenas_purge_expire);
if (!force && (arenas_expire == 0 || arenas_expire < now)) return;
const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count);
if (max_arena == 0) return;
@ -605,17 +618,26 @@ static void mi_arenas_try_purge( bool force, bool visit_all ) {
static mi_atomic_guard_t purge_guard;
mi_atomic_guard(&purge_guard)
{
mi_msecs_t now = _mi_clock_now();
size_t max_purge_count = (visit_all ? max_arena : 1);
// increase global expire: at most one purge per delay cycle
mi_atomic_store_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay());
size_t max_purge_count = (visit_all ? max_arena : 2);
bool all_visited = true;
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
if (arena != NULL) {
if (mi_arena_try_purge(arena, now, force)) {
if (max_purge_count <= 1) break;
if (max_purge_count <= 1) {
all_visited = false;
break;
}
max_purge_count--;
}
}
}
if (all_visited) {
// all arena's were visited and purged: reset global expire
mi_atomic_store_release(&mi_arenas_purge_expire, 0);
}
}
}

View file

@ -276,11 +276,10 @@ void mi_subproc_delete(mi_subproc_id_t subproc_id) {
mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id);
// check if there are no abandoned segments still..
bool safe_to_delete = false;
if (mi_lock_acquire(&subproc->abandoned_os_lock)) {
mi_lock(&subproc->abandoned_os_lock) {
if (subproc->abandoned_os_list == NULL) {
safe_to_delete = true;
}
mi_lock_release(&subproc->abandoned_os_lock);
}
if (!safe_to_delete) return;
// safe to release