From f3d83e5fa62f9d5ec653d13db8eec2d814e72046 Mon Sep 17 00:00:00 2001 From: Daan Leijen Date: Fri, 20 Dec 2024 13:55:31 -0800 Subject: [PATCH 01/16] insert full pages at the end of the queue; only override page candidate if the page is not too full --- ide/vs2022/mimalloc-test.vcxproj | 6 +++--- include/mimalloc/internal.h | 2 +- src/page-queue.c | 2 +- src/page.c | 3 ++- test/test-stress.c | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj index a8b36d5e..6e4576fd 100644 --- a/ide/vs2022/mimalloc-test.vcxproj +++ b/ide/vs2022/mimalloc-test.vcxproj @@ -272,14 +272,14 @@ Console + + + {abb5eae7-b3e6-432e-b636-333449892ea6} - - - diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 012ce4f0..8b22e1c6 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -571,7 +571,7 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { } // is more than 7/8th of a page in use? -static inline bool mi_page_mostly_used(const mi_page_t* page) { +static inline bool mi_page_is_mostly_used(const mi_page_t* page) { if (page==NULL) return true; uint16_t frac = page->reserved / 8U; return (page->reserved - page->used <= frac); diff --git a/src/page-queue.c b/src/page-queue.c index 9796f3dc..67b54650 100644 --- a/src/page-queue.c +++ b/src/page-queue.c @@ -343,7 +343,7 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro static void mi_page_queue_enqueue_from_full(mi_page_queue_t* to, mi_page_queue_t* from, mi_page_t* page) { // note: we could insert at the front to increase reuse, but it slows down certain benchmarks (like `alloc-test`) - mi_page_queue_enqueue_from_ex(to, from, false /* enqueue at the end of the `to` queue? */, page); + mi_page_queue_enqueue_from_ex(to, from, true /* enqueue at the end of the `to` queue? */, page); } // Only called from `mi_heap_absorb`. diff --git a/src/page.c b/src/page.c index 4b25ed5d..8808c358 100644 --- a/src/page.c +++ b/src/page.c @@ -783,7 +783,8 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p page_candidate = page; candidate_count = 0; } - else if (!mi_page_mostly_used(page) && page->used >= page_candidate->used) { + // prefer to reuse fuller pages (in the hope the less used page gets freed) + else if (page->used >= page_candidate->used && !mi_page_is_mostly_used(page) && !mi_page_is_expandable(page)) { page_candidate = page; } // if we find a non-expandable candidate, or searched for N pages, return with the best candidate diff --git a/test/test-stress.c b/test/test-stress.c index 574d241b..6284ad39 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -319,7 +319,7 @@ int main(int argc, char** argv) { mi_collect(true); #endif #endif - //mi_stats_print(NULL); + mi_stats_print(NULL); //bench_end_program(); return 0; } From 7141d9f1642ff24f5d94e5ae3767f3212153f25f Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 20 Dec 2024 17:31:48 -0800 Subject: [PATCH 02/16] remove busy wait for arena reservation --- src/arena.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/arena.c b/src/arena.c index 44c909c1..74cd4977 100644 --- a/src/arena.c +++ b/src/arena.c @@ -453,7 +453,7 @@ static mi_decl_noinline void* mi_arena_try_alloc( mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); void* p; -again: + // try to find free slices in the arena's p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); if (p != NULL) return p; @@ -465,22 +465,25 @@ again: if (_mi_preloading()) return NULL; // otherwise, try to reserve a new arena -- but one thread at a time.. (todo: allow 2 or 4 to reduce contention?) - if (mi_lock_try_acquire(&mi_arena_reserve_lock)) { - mi_arena_id_t arena_id = 0; - bool ok = mi_arena_reserve(mi_size_of_slices(slice_count), allow_large, req_arena_id, &arena_id); + const size_t arena_count = mi_arena_get_count(); + if (mi_lock_acquire(&mi_arena_reserve_lock)) { + bool ok = true; + if (arena_count == mi_arena_get_count()) { + // we are the first to enter the lock, reserve a fresh arena + mi_arena_id_t arena_id = 0; + ok = mi_arena_reserve(mi_size_of_slices(slice_count), allow_large, req_arena_id, &arena_id); + } + else { + // another thread already reserved a new arena + } mi_lock_release(&mi_arena_reserve_lock); if (ok) { - // and try allocate in there + // try once more to allocate in the new arena mi_assert_internal(req_arena_id == _mi_arena_id_none()); p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); if (p != NULL) return p; } } - else { - // if we are racing with another thread wait until the new arena is reserved (todo: a better yield?) - mi_atomic_yield(); - goto again; - } return NULL; } From 93e14344c7be10f186a39f7bee998db8adcead9b Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 20 Dec 2024 17:32:26 -0800 Subject: [PATCH 03/16] use srw lock on windows --- include/mimalloc/atomic.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 3a0d4892..0c967896 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -402,28 +402,34 @@ static inline void mi_atomic_yield(void) { // ---------------------------------------------------------------------- -// Locks are only used for abandoned segment visiting in `arena.c` +// Locks +// These do not have to be recursive and should be light-weight +// in-process only locks. Only used for reserving arena's and to +// maintain the abandoned list. // ---------------------------------------------------------------------- +#if _MSC_VER +#pragma warning(disable:26110) // unlock with holding lock +#endif #if defined(_WIN32) -#define mi_lock_t CRITICAL_SECTION +#define mi_lock_t SRWLOCK // slim reader-writer lock static inline bool mi_lock_try_acquire(mi_lock_t* lock) { - return TryEnterCriticalSection(lock); + return TryAcquireSRWLockExclusive(lock); } static inline bool mi_lock_acquire(mi_lock_t* lock) { - EnterCriticalSection(lock); + AcquireSRWLockExclusive(lock); return true; } static inline void mi_lock_release(mi_lock_t* lock) { - LeaveCriticalSection(lock); + ReleaseSRWLockExclusive(lock); } static inline void mi_lock_init(mi_lock_t* lock) { - InitializeCriticalSection(lock); + InitializeSRWLock(lock); } static inline void mi_lock_done(mi_lock_t* lock) { - DeleteCriticalSection(lock); + // nothing } @@ -447,14 +453,13 @@ static inline void mi_lock_done(mi_lock_t* lock) { pthread_mutex_destroy(lock); } -/* #elif defined(__cplusplus) #include #define mi_lock_t std::mutex static inline bool mi_lock_try_acquire(mi_lock_t* lock) { - return lock->lock_try_acquire(); + return lock->try_lock(); } static inline bool mi_lock_acquire(mi_lock_t* lock) { lock->lock(); @@ -469,7 +474,6 @@ static inline void mi_lock_init(mi_lock_t* lock) { static inline void mi_lock_done(mi_lock_t* lock) { (void)(lock); } -*/ #else From a5b7d7f26461d0d241b6de41f215d63dbfa642cb Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 20 Dec 2024 21:38:31 -0800 Subject: [PATCH 04/16] subprocesses own arena's --- include/mimalloc.h | 2 +- include/mimalloc/atomic.h | 2 +- include/mimalloc/internal.h | 15 +- include/mimalloc/types.h | 56 +++---- src/alloc.c | 4 +- src/arena-meta.c | 6 +- src/arena.c | 315 +++++++++++++++++------------------- src/bitmap.c | 7 +- src/bitmap.h | 4 +- src/free.c | 6 +- src/heap.c | 7 +- src/init.c | 259 ++++++++++++++++------------- src/page.c | 2 +- 13 files changed, 351 insertions(+), 334 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 24217fae..7a58e54c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -279,7 +279,7 @@ mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_commit mi_decl_export void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept; // Experimental: heaps associated with specific memory arena's -typedef int mi_arena_id_t; +typedef void* mi_arena_id_t; mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 7dc492f6..ddb5a9a3 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -434,7 +434,7 @@ static inline void mi_lock_init(mi_lock_t* lock) { InitializeSRWLock(lock); } static inline void mi_lock_done(mi_lock_t* lock) { - // nothing + (void)(lock); } diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index a5ca3e27..24792f8c 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -101,8 +101,10 @@ bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); bool _mi_preloading(void); // true while the C runtime is not initialized yet void _mi_thread_done(mi_heap_t* heap); -mi_tld_t* _mi_tld(void); // current tld: `_mi_tld() == _mi_heap_get_default()->tld` +mi_tld_t* _mi_tld(void); // current tld: `_mi_tld() == _mi_heap_get_default()->tld` +mi_subproc_t* _mi_subproc(void); +mi_subproc_t* _mi_subproc_main(void); mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; size_t _mi_thread_seq_id(void) mi_attr_noexcept; @@ -142,10 +144,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t m // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_init(void); -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid); -bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +mi_arena_t* _mi_arena_from_id(mi_arena_id_t id); + +void* _mi_arena_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +void* _mi_arena_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena); bool _mi_arena_contains(const void* p); void _mi_arenas_collect(bool force_purge); void _mi_arena_unsafe_destroy_all(void); @@ -524,7 +527,7 @@ static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { if (heap != NULL) { page->heap = heap; page->heap_tag = heap->tag; - mi_atomic_store_release(&page->xthread_id, heap->thread_id); + mi_atomic_store_release(&page->xthread_id, heap->tld->thread_id); } else { page->heap = NULL; diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 0cf909d0..4d43e887 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -243,9 +243,6 @@ typedef size_t mi_page_flags_t; // atomically in `free.c:mi_free_block_mt`. typedef uintptr_t mi_thread_free_t; -// Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython) -typedef struct mi_subproc_s mi_subproc_t; - // A heap can serve only specific objects signified by its heap tag (e.g. various object types in CPython) typedef uint8_t mi_heaptag_t; @@ -299,7 +296,6 @@ typedef struct mi_page_s { mi_heap_t* heap; // heap this threads belong to. struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` - mi_subproc_t* subproc; // sub-process of this heap mi_memid_t memid; // provenance of the page memory } mi_page_t; @@ -380,7 +376,7 @@ typedef struct mi_random_cxt_s { // In debug mode there is a padding structure at the end of the blocks to check for buffer overflows -#if (MI_PADDING) +#if MI_PADDING typedef struct mi_padding_s { uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) @@ -397,10 +393,8 @@ typedef struct mi_padding_s { // A heap owns a set of pages. struct mi_heap_s { - mi_tld_t* tld; - // _Atomic(mi_block_t*) thread_delayed_free; - mi_threadid_t thread_id; // thread this heap belongs too - mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) + mi_tld_t* tld; // thread-local data + mi_arena_t* exclusive_arena; // if the heap belongs to a specific arena (or NULL) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation @@ -408,7 +402,6 @@ struct mi_heap_s { size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread - mi_memid_t memid; // provenance of the heap struct itseft (meta or os) long full_page_retain; // how many full pages can be retained per queue (before abondoning them) bool allow_page_reclaim; // `true` if this heap should not reclaim abandoned pages bool allow_page_abandon; // `true` if this heap can abandon pages to reduce memory footprint @@ -421,7 +414,8 @@ struct mi_heap_s { size_t guarded_sample_count; // current sample count (counting down to 0) #endif mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. - mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + mi_page_queue_t pages[MI_BIN_COUNT]; // queue of pages for each size class (or "bin") + mi_memid_t memid; // provenance of the heap struct itself (meta or os) }; @@ -479,7 +473,7 @@ typedef struct mi_stats_s { mi_stat_counter_t arena_count; mi_stat_counter_t guarded_alloc_count; #if MI_STAT>1 - mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; + mi_stat_count_t normal_bins[MI_BIN_COUNT]; #endif } mi_stats_t; @@ -513,19 +507,24 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // ------------------------------------------------------ -// Sub processes do not reclaim or visit segments -// from other sub processes +// Sub processes use separate arena's and no heaps/pages/blocks +// are shared between sub processes. +// Each thread should also belong to one sub-process only // ------------------------------------------------------ -struct mi_subproc_s { - _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // count of abandoned pages for this sub-process - _Atomic(size_t) abandoned_os_list_count; // count of abandoned pages in the os-list - mi_lock_t abandoned_os_lock; // lock for the abandoned os pages list (outside of arena's) (this lock protect list operations) - mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list - mi_page_t* abandoned_os_list; // doubly-linked list of abandoned pages outside of arena's (in OS allocated memory) - mi_page_t* abandoned_os_list_tail; // the tail-end of the list - mi_memid_t memid; // provenance of this memory block -}; +#define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`) + // 160 arenas is enough for ~2 TiB memory + +typedef struct mi_subproc_s { + _Atomic(size_t) arena_count; // current count of arena's + _Atomic(mi_arena_t*) arenas[MI_MAX_ARENAS]; // arena's of this sub-process + mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time + _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process + mi_page_queue_t os_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on) + mi_lock_t os_pages_lock; // lock for the os pages list (this lock protects list operations) + mi_memid_t memid; // provenance of this memory block (meta or OS) +} mi_subproc_t; + // ------------------------------------------------------ // Thread Local data @@ -534,20 +533,21 @@ struct mi_subproc_s { // Milliseconds as in `int64_t` to avoid overflows typedef int64_t mi_msecs_t; - // Thread local data struct mi_tld_s { - unsigned long long heartbeat; // monotonic heartbeat count + mi_threadid_t thread_id; // thread id of this thread + size_t thread_seq; // thread sequence id (linear count of created threads) + mi_subproc_t* subproc; // sub-process this thread belongs to. mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) - mi_subproc_t* subproc; // sub-process this thread belongs to. - size_t tseq; // thread sequence id - mi_memid_t memid; // provenance of the tld memory itself (meta or OS) + unsigned long long heartbeat; // monotonic heartbeat count bool recurse; // true if deferred was called; used to prevent infinite recursion. bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) mi_stats_t stats; // statistics + mi_memid_t memid; // provenance of the tld memory itself (meta or OS) }; + /* ----------------------------------------------------------- Error codes passed to `_mi_fatal_error` All are recoverable but EFAULT is a serious error and aborts by default in secure mode. diff --git a/src/alloc.c b/src/alloc.c index 25d6f62e..e5f2b8ae 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -134,7 +134,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, mi_assert(size <= MI_SMALL_SIZE_MAX); #if MI_DEBUG const uintptr_t tid = _mi_thread_id(); - mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local + mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == tid); // heaps are thread local #endif #if (MI_PADDING || MI_GUARDED) if (size == 0) { size = sizeof(void*); } @@ -188,7 +188,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z else { // regular allocation mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == _mi_thread_id()); // heaps are thread local void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_track_malloc(p,size,zero); diff --git a/src/arena-meta.c b/src/arena-meta.c index ceda06ba..f28c50e9 100644 --- a/src/arena-meta.c +++ b/src/arena-meta.c @@ -64,10 +64,12 @@ static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) { // allocate a fresh meta page and add it to the global list. static mi_meta_page_t* mi_meta_page_zalloc(void) { // allocate a fresh arena slice + // note: we always use subproc_main directly for the meta-data since at thread start the metadata for the + // tld and heap need to be (meta) allocated and at that time we cannot read the tld pointer (yet). mi_memid_t memid; - mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, + mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(_mi_subproc_main(), MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, true /* commit*/, true /* allow large */, - _mi_arena_id_none(), 0 /* tseq */, &memid ); + NULL, 0 /* tseq */, &memid ); if (mpage == NULL) return NULL; mi_assert_internal(_mi_is_aligned(mpage,MI_META_PAGE_ALIGN)); if (!memid.initially_zero) { diff --git a/src/arena.c b/src/arena.c index 74cd4977..bb846da9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -35,7 +35,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo // A memory arena descriptor typedef struct mi_arena_s { mi_memid_t memid; // memid of the memory area - mi_arena_id_t id; // arena id (> 0 where `arena == arenas[arena->id - 1]`) + mi_subproc_t* subproc; // subprocess this arena belongs to (`this 'in' this->subproc->arenas`) size_t slice_count; // total size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`) size_t info_slices; // initial slices reserved for the arena bitmaps @@ -64,64 +64,45 @@ typedef struct mi_purge_info_s { } mi_purge_info_t; -#define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`) - // 160 arenas is enough for ~2 TiB memory - -// The available arenas -static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; -static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 - - -static mi_lock_t mi_arena_reserve_lock; - -void _mi_arena_init(void) { - mi_lock_init(&mi_arena_reserve_lock); -} /* ----------------------------------------------------------- Arena id's - id = arena_index + 1 ----------------------------------------------------------- */ -size_t mi_arena_id_index(mi_arena_id_t id) { - return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); -} - -static mi_arena_id_t mi_arena_id_create(size_t arena_index) { - mi_assert_internal(arena_index < MI_MAX_ARENAS); - return (int)arena_index + 1; +static mi_arena_id_t mi_arena_id_create(mi_arena_t* arena) { + return arena; } mi_arena_id_t _mi_arena_id_none(void) { - return 0; + return NULL; } -static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { - return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || - (arena_id == req_arena_id)); +mi_arena_t* _mi_arena_from_id(mi_arena_id_t id) { + return (mi_arena_t*)id; } -bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { + +static bool mi_arena_id_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena) { + return ((arena == req_arena) || // they match, + (req_arena == NULL && !arena->is_exclusive)); // or the arena is not exclusive, and we didn't request a specific one +} + +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena) { if (memid.memkind == MI_MEM_ARENA) { - const mi_arena_t* arena = memid.mem.arena.arena; - return mi_arena_id_is_suitable(arena->id, arena->is_exclusive, request_arena_id); + return mi_arena_id_is_suitable(memid.mem.arena.arena, request_arena); } else { - return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id); + return mi_arena_id_is_suitable(NULL, request_arena); } } -size_t mi_arena_get_count(void) { - return mi_atomic_load_relaxed(&mi_arena_count); +size_t mi_arenas_get_count(mi_subproc_t* subproc) { + return mi_atomic_load_relaxed(&subproc->arena_count); } -mi_arena_t* mi_arena_from_index(size_t idx) { - mi_assert_internal(idx < mi_arena_get_count()); - return mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[idx]); -} - -mi_arena_t* mi_arena_from_id(mi_arena_id_t id) { - return mi_arena_from_index(mi_arena_id_index(id)); +mi_arena_t* mi_arena_from_index(mi_subproc_t* subproc, size_t idx) { + mi_assert_internal(idx < mi_arenas_get_count(subproc)); + return mi_atomic_load_ptr_relaxed(mi_arena_t, &subproc->arenas[idx]); } static size_t mi_arena_info_slices(mi_arena_t* arena) { @@ -159,9 +140,7 @@ uint8_t* mi_arena_slice_start(mi_arena_t* arena, size_t slice_index) { // Arena area void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; - const size_t arena_index = mi_arena_id_index(arena_id); - if (arena_index >= MI_MAX_ARENAS) return NULL; - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + mi_arena_t* arena = _mi_arena_from_id(arena_id); if (arena == NULL) return NULL; if (size != NULL) { *size = mi_size_of_slices(arena->slice_count); } return mi_arena_start(arena); @@ -297,12 +276,12 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // try to reserve a fresh arena space -static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) +static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) { // if (_mi_preloading()) return false; // use OS only while pre loading if (req_arena_id != _mi_arena_id_none()) return false; - const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); + const size_t arena_count = mi_arenas_get_count(subproc); if (arena_count > (MI_MAX_ARENAS - 4)) return false; // calc reserve @@ -368,32 +347,27 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re Arena iteration ----------------------------------------------------------- */ -static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, int numa_node, bool allow_large) { +static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_large) { if (!allow_large && arena->is_large) return false; - if (!mi_arena_id_is_suitable(arena->id, arena->is_exclusive, req_arena_id)) return false; - if (req_arena_id == _mi_arena_id_none()) { // if not specific, check numa affinity + if (!mi_arena_id_is_suitable(arena, req_arena)) return false; + if (req_arena == NULL) { // if not specific, check numa affinity const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); if (!numa_suitable) return false; } return true; } - -#define mi_forall_arenas(req_arena_id, tseq, name_arena) \ - { \ - const size_t _arena_count = mi_arena_get_count(); \ - if (_arena_count > 0) { \ - const size_t _arena_cycle = _arena_count - 1; /* first search the arenas below the last one */ \ - size_t _start; \ - if (req_arena_id == _mi_arena_id_none()) { \ - /* always start searching in the arena's below the max */ \ - _start = (_arena_cycle <= 1 ? 0 : (tseq % _arena_cycle)); \ +#define mi_forall_arenas(subproc, req_arena, tseq, name_arena) { \ + const size_t _arena_count = mi_arenas_get_count(subproc); \ + const size_t _arena_cycle = (_arena_count == 0 ? 0 : _arena_count - 1); /* first search the arenas below the last one */ \ + /* always start searching in the arena's below the max */ \ + size_t _start = (_arena_cycle <= 1 ? 0 : (tseq % _arena_cycle)); \ + for (size_t _i = 0; _i < _arena_count; _i++) { \ + mi_arena_t* name_arena; \ + if (req_arena != NULL) { \ + name_arena = req_arena; /* if there is a specific req_arena, only search that one */\ } \ else { \ - _start = mi_arena_id_index(req_arena_id); \ - mi_assert_internal(_start < _arena_count); \ - } \ - for (size_t _i = 0; _i < _arena_count; _i++) { \ size_t _idx; \ if (_i < _arena_cycle) { \ _idx = _i + _start; \ @@ -402,19 +376,20 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are else { \ _idx = _i; /* remaining arena's */ \ } \ - mi_arena_t* const name_arena = mi_arena_from_index(_idx); \ - if (name_arena != NULL) \ - { + name_arena = mi_arena_from_index(subproc,_idx); \ + } \ + if (name_arena != NULL) \ + { #define mi_forall_arenas_end() \ - } \ - if (req_arena_id != _mi_arena_id_none()) break; \ } \ - }} + if (req_arena != NULL) break; \ + } \ + } -#define mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, name_arena) \ - mi_forall_arenas(req_arena_id,tseq,name_arena) { \ - if (mi_arena_is_suitable(name_arena, req_arena_id, -1 /* todo: numa node */, allow_large)) { \ +#define mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, name_arena) \ + mi_forall_arenas(subproc, req_arena,tseq,name_arena) { \ + if (mi_arena_is_suitable(name_arena, req_arena, -1 /* todo: numa node */, allow_large)) { \ #define mi_forall_suitable_arenas_end() \ }} \ @@ -425,17 +400,16 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are ----------------------------------------------------------- */ // allocate slices from the arenas -static mi_decl_noinline void* mi_arena_try_find_free( - size_t slice_count, size_t alignment, - bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) +static mi_decl_noinline void* mi_arenas_try_find_free( + mi_subproc_t* subproc, size_t slice_count, size_t alignment, + bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_OBJ_SIZE)); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); if (alignment > MI_ARENA_SLICE_ALIGN) return NULL; // search arena's - mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, arena) + mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena) { void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid); if (p != NULL) return p; @@ -445,42 +419,43 @@ static mi_decl_noinline void* mi_arena_try_find_free( } // Allocate slices from the arena's -- potentially allocating a fresh arena -static mi_decl_noinline void* mi_arena_try_alloc( +static mi_decl_noinline void* mi_arenas_try_alloc( + mi_subproc_t* subproc, size_t slice_count, size_t alignment, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) + mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); void* p; // try to find free slices in the arena's - p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); + p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); if (p != NULL) return p; // did we need a specific arena? - if (req_arena_id != _mi_arena_id_none()) return NULL; + if (req_arena != NULL) return NULL; // don't create arena's while preloading (todo: or should we?) if (_mi_preloading()) return NULL; // otherwise, try to reserve a new arena -- but one thread at a time.. (todo: allow 2 or 4 to reduce contention?) - const size_t arena_count = mi_arena_get_count(); - if (mi_lock_acquire(&mi_arena_reserve_lock)) { + const size_t arena_count = mi_arenas_get_count(subproc); + if (mi_lock_acquire(&subproc->arena_reserve_lock)) { bool ok = true; - if (arena_count == mi_arena_get_count()) { + if (arena_count == mi_arenas_get_count(subproc)) { // we are the first to enter the lock, reserve a fresh arena mi_arena_id_t arena_id = 0; - ok = mi_arena_reserve(mi_size_of_slices(slice_count), allow_large, req_arena_id, &arena_id); + ok = mi_arena_reserve(subproc, mi_size_of_slices(slice_count), allow_large, req_arena, &arena_id); } else { // another thread already reserved a new arena } - mi_lock_release(&mi_arena_reserve_lock); + mi_lock_release(&subproc->arena_reserve_lock); if (ok) { // try once more to allocate in the new arena - mi_assert_internal(req_arena_id == _mi_arena_id_none()); - p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); + mi_assert_internal(req_arena == NULL); + p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); if (p != NULL) return p; } } @@ -510,10 +485,10 @@ static void* mi_arena_os_alloc_aligned( // Allocate large sized memory -void* _mi_arena_alloc_aligned( +void* _mi_arena_alloc_aligned( mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) + mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert_internal(memid != NULL); mi_assert_internal(size > 0); @@ -522,24 +497,24 @@ void* _mi_arena_alloc_aligned( // const int numa_node = _mi_os_numa_node(&tld->os); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed? - req_arena_id == _mi_arena_id_none() && // not a specific arena? + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed? + req_arena == NULL && // not a specific arena? size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment { const size_t slice_count = mi_slice_count_of_size(size); - void* p = mi_arena_try_alloc(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); + void* p = mi_arenas_try_alloc(subproc,slice_count, alignment, commit, allow_large, req_arena, tseq, memid); if (p != NULL) return p; } // fall back to the OS - void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena_id, memid); + void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena, memid); return p; } -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) +void* _mi_arena_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { - return _mi_arena_alloc_aligned(size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena_id, tseq, memid); + return _mi_arena_alloc_aligned(subproc, size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena, tseq, memid); } @@ -548,7 +523,7 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t Arena page allocation ----------------------------------------------------------- */ -static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag, bool* keep_abandoned) { +static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, mi_heaptag_t heap_tag, bool* keep_abandoned) { // found an abandoned page of the right size mi_page_t* const page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); // can we claim ownership? @@ -560,9 +535,9 @@ static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, *keep_abandoned = true; return false; } - if (subproc != page->subproc || heap_tag != page->heap_tag) { - // wrong sub-process or heap_tag.. we need to unown again - // note: this normally never happens unless subprocesses/heaptags are actually used. + if (heap_tag != page->heap_tag) { + // wrong heap_tag.. we need to unown again + // note: this normally never happens unless heaptags are actually used. // (an unown might free the page, and depending on that we can keep it in the abandoned map or not) // note: a minor wrinkle: the page will still be mapped but the abandoned map entry is (temporarily) clear at this point. // so we cannot check in `mi_arena_free` for this invariant to hold. @@ -570,31 +545,31 @@ static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, *keep_abandoned = !freed; return false; } - // yes, we can reclaim it, keep the abandaned map entry clear + // yes, we can reclaim it, keep the abandoned map entry clear *keep_abandoned = false; return true; } -static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t block_size, mi_arena_id_t req_arena_id, mi_heaptag_t heaptag, mi_tld_t* tld) +static mi_page_t* mi_arena_page_try_find_abandoned(mi_subproc_t* subproc, size_t slice_count, size_t block_size, mi_arena_t* req_arena, mi_heaptag_t heaptag, size_t tseq) { MI_UNUSED(slice_count); const size_t bin = _mi_bin(block_size); mi_assert_internal(bin < MI_BIN_COUNT); // any abandoned in our size class? - mi_subproc_t* const subproc = tld->subproc; mi_assert_internal(subproc != NULL); - if (mi_atomic_load_relaxed(&subproc->abandoned_count[bin]) == 0) return NULL; + if (mi_atomic_load_relaxed(&subproc->abandoned_count[bin]) == 0) { + return NULL; + } // search arena's const bool allow_large = true; - size_t tseq = tld->tseq; - mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, arena) + mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena) { size_t slice_index; mi_bitmap_t* const bitmap = arena->pages_abandoned[bin]; - if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, subproc, heaptag)) { + if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, heaptag)) { // found an abandoned page of the right size // and claimed ownership. mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); @@ -621,8 +596,8 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl return NULL; } -static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_size, size_t block_alignment, - mi_arena_id_t req_arena_id, mi_tld_t* tld) +static mi_page_t* mi_arena_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment, + mi_arena_t* req_arena, size_t tseq) { const bool allow_large = true; const bool commit = true; @@ -636,7 +611,7 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz !os_align && // not large alignment slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large { - page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, tld->tseq, &memid); + page = (mi_page_t*)mi_arenas_try_alloc(subproc, slice_count, page_alignment, commit, allow_large, req_arena, tseq, &memid); if (page != NULL) { mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count)); mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index); @@ -648,10 +623,10 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz if (os_align) { // note: slice_count already includes the page mi_assert_internal(slice_count >= mi_slice_count_of_size(block_size) + mi_slice_count_of_size(page_alignment)); - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena_id, &memid); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena, &memid); } else { - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena_id, &memid); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena, &memid); } } @@ -724,17 +699,17 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz } static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t slice_count, size_t block_size) { - const mi_arena_id_t req_arena_id = heap->arena_id; + mi_arena_t* req_arena = heap->exclusive_arena; mi_tld_t* const tld = heap->tld; // 1. look for an abandoned page - mi_page_t* page = mi_arena_page_try_find_abandoned(slice_count, block_size, req_arena_id, heap->tag, tld); + mi_page_t* page = mi_arena_page_try_find_abandoned(tld->subproc, slice_count, block_size, req_arena, heap->tag, tld->thread_seq); if (page != NULL) { return page; // return as abandoned } // 2. find a free block, potentially allocating a new arena - page = mi_arena_page_alloc_fresh(slice_count, block_size, 1, req_arena_id, tld); + page = mi_arena_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq); if (page != NULL) { mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count); _mi_page_init(heap, page); @@ -746,13 +721,13 @@ static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t slice_count, size static mi_page_t* mi_singleton_page_alloc(mi_heap_t* heap, size_t block_size, size_t block_alignment) { - const mi_arena_id_t req_arena_id = heap->arena_id; + mi_arena_t* req_arena = heap->exclusive_arena; mi_tld_t* const tld = heap->tld; const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN); const size_t info_size = (os_align ? MI_PAGE_ALIGN : mi_page_info_size()); const size_t slice_count = mi_slice_count_of_size(info_size + block_size); - mi_page_t* page = mi_arena_page_alloc_fresh(slice_count, block_size, block_alignment, req_arena_id, tld); + mi_page_t* page = mi_arena_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq); if (page == NULL) return NULL; mi_assert(page != NULL); @@ -836,7 +811,6 @@ void _mi_arena_page_abandon(mi_page_t* page) { mi_assert_internal(!mi_page_all_free(page)); mi_assert_internal(page->next==NULL); - mi_subproc_t* subproc = page->subproc; if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) { // make available for allocations size_t bin = _mi_bin(mi_page_block_size(page)); @@ -851,7 +825,7 @@ void _mi_arena_page_abandon(mi_page_t* page) { mi_page_set_abandoned_mapped(page); const bool wasclear = mi_bitmap_set(arena->pages_abandoned[bin], slice_index); MI_UNUSED(wasclear); mi_assert_internal(wasclear); - mi_atomic_increment_relaxed(&subproc->abandoned_count[bin]); + mi_atomic_increment_relaxed(&arena->subproc->abandoned_count[bin]); } else { // page is full (or a singleton), page is OS/externally allocated @@ -902,7 +876,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) { // this busy waits until a concurrent reader (from alloc_abandoned) is done mi_bitmap_clear_once_set(arena->pages_abandoned[bin], slice_index); mi_page_clear_abandoned_mapped(page); - mi_atomic_decrement_relaxed(&page->subproc->abandoned_count[bin]); + mi_atomic_decrement_relaxed(&arena->subproc->abandoned_count[bin]); } else { // page is full (or a singleton), page is OS/nly allocated @@ -989,9 +963,10 @@ void _mi_arenas_collect(bool force_purge) { // Is a pointer inside any of our arenas? bool _mi_arena_contains(const void* p) { - const size_t max_arena = mi_arena_get_count(); + mi_subproc_t* subproc = _mi_subproc(); + const size_t max_arena = mi_arenas_get_count(subproc); for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) >(const uint8_t*)p) { return true; } @@ -1007,14 +982,14 @@ bool _mi_arena_contains(const void* p) { // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. -static void mi_arenas_unsafe_destroy(void) { - const size_t max_arena = mi_arena_get_count(); +static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { + const size_t max_arena = mi_arenas_get_count(subproc); size_t new_max_arena = 0; for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena != NULL) { // mi_lock_done(&arena->abandoned_visit_lock); - mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + mi_atomic_store_ptr_release(mi_arena_t, &subproc->arenas[i], NULL); if (mi_memkind_is_os(arena->memid.memkind)) { _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid); } @@ -1023,14 +998,14 @@ static void mi_arenas_unsafe_destroy(void) { // try to lower the max arena. size_t expected = max_arena; - mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); + mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, new_max_arena); } // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. void _mi_arena_unsafe_destroy_all(void) { - mi_arenas_unsafe_destroy(); + mi_arenas_unsafe_destroy(_mi_subproc()); _mi_arenas_collect(true /* force purge */); // purge non-owned arenas } @@ -1039,40 +1014,36 @@ void _mi_arena_unsafe_destroy_all(void) { Add an arena. ----------------------------------------------------------- */ -static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { +static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { mi_assert_internal(arena != NULL); mi_assert_internal(arena->slice_count > 0); - if (arena_id != NULL) { *arena_id = -1; } + if (arena_id != NULL) { *arena_id = NULL; } // first try to find a NULL entry - const size_t count = mi_arena_get_count(); + const size_t count = mi_arenas_get_count(subproc); size_t i; for (i = 0; i < count; i++) { - if (mi_arena_from_index(i) == NULL) { - arena->id = mi_arena_id_create(i); + if (mi_arena_from_index(subproc,i) == NULL) { mi_arena_t* expected = NULL; - if (mi_atomic_cas_ptr_strong_release(mi_arena_t, &mi_arenas[i], &expected, arena)) { + if (mi_atomic_cas_ptr_strong_release(mi_arena_t, &subproc->arenas[i], &expected, arena)) { // success - if (arena_id != NULL) { *arena_id = arena->id; } + if (arena_id != NULL) { *arena_id = arena; } return true; - } - else { - arena->id = _mi_arena_id_none(); - } + } } } // otherwise increase the max - i = mi_atomic_increment_acq_rel(&mi_arena_count); + i = mi_atomic_increment_acq_rel(&subproc->arena_count); if (i >= MI_MAX_ARENAS) { - mi_atomic_decrement_acq_rel(&mi_arena_count); + mi_atomic_decrement_acq_rel(&subproc->arena_count); + arena->subproc = NULL; return false; } _mi_stat_counter_increase(&stats->arena_count,1); - arena->id = mi_arena_id_create(i); - mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); - if (arena_id != NULL) { *arena_id = arena->id; } + mi_atomic_store_ptr_release(mi_arena_t,&subproc->arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena; } return true; } @@ -1099,7 +1070,7 @@ static mi_bitmap_t* mi_arena_bitmap_init(size_t slice_count, uint8_t** base) { } -static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { mi_assert(!is_large || (memid.initially_committed && memid.is_pinned)); mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)); @@ -1138,7 +1109,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int } // init - arena->id = _mi_arena_id_none(); + arena->subproc = subproc; arena->memid = memid; arena->is_exclusive = exclusive; arena->slice_count = slice_count; @@ -1176,7 +1147,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int mi_bitmap_setN(arena->slices_dirty, 0, info_slices, NULL); } - return mi_arena_add(arena, arena_id, &_mi_stats_main); + return mi_arena_add(subproc, arena, arena_id, &_mi_stats_main); } @@ -1187,7 +1158,7 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is memid.initially_committed = is_committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; - return mi_manage_os_memory_ex2(start, size, is_large, numa_node, exclusive, memid, arena_id); + return mi_manage_os_memory_ex2(_mi_subproc(), start, size, is_large, numa_node, exclusive, memid, arena_id); } // Reserve a range of regular OS memory @@ -1198,7 +1169,7 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid); if (start == NULL) return ENOMEM; const bool is_large = memid.is_pinned; // todo: use separate is_large field? - if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(_mi_subproc(), start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { _mi_os_free_ex(start, size, commit, memid); _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); return ENOMEM; @@ -1307,16 +1278,18 @@ static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bi } void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept { - size_t max_arenas = mi_arena_get_count(); + mi_subproc_t* subproc = _mi_subproc(); + size_t max_arenas = mi_arenas_get_count(subproc); size_t free_total = 0; size_t slice_total = 0; //size_t abandoned_total = 0; size_t page_total = 0; for (size_t i = 0; i < max_arenas; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena == NULL) break; + mi_assert(arena->subproc == subproc); slice_total += arena->slice_count; - _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : "")); + _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : "", arena->subproc)); if (show_inuse) { free_total += mi_debug_show_bitmap("in-use slices", arena->slice_count, arena->slices_free, true, NULL); } @@ -1342,7 +1315,7 @@ void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) ----------------------------------------------------------- */ // reserve at a specific numa node int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - if (arena_id != NULL) *arena_id = -1; + if (arena_id != NULL) *arena_id = NULL; if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); @@ -1356,7 +1329,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, true, numa_node, exclusive, memid, arena_id)) { _mi_os_free(p, hsize, memid); return ENOMEM; } @@ -1538,10 +1511,13 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) } -static void mi_arenas_try_purge(bool force, bool visit_all) { +static void mi_arenas_try_purge(bool force, bool visit_all) +{ if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled - const size_t max_arena = mi_arena_get_count(); + mi_tld_t* tld = _mi_tld(); + mi_subproc_t* subproc = tld->subproc; + const size_t max_arena = mi_arenas_get_count(subproc); if (max_arena == 0) return; // allow only one thread to purge at a time @@ -1549,12 +1525,12 @@ static void mi_arenas_try_purge(bool force, bool visit_all) { mi_atomic_guard(&purge_guard) { const mi_msecs_t now = _mi_clock_now(); - const size_t arena_start = _mi_tld()->tseq % max_arena; + const size_t arena_start = tld->thread_seq % max_arena; size_t max_purge_count = (visit_all ? max_arena : 1); for (size_t _i = 0; _i < max_arena; _i++) { size_t i = _i + arena_start; if (i >= max_arena) { i -= max_arena; } - mi_arena_t* arena = mi_arena_from_index(i); + mi_arena_t* arena = mi_arena_from_index(subproc,i); if (arena != NULL) { if (mi_arena_try_purge(arena, now, force)) { if (max_purge_count <= 1) break; @@ -1590,13 +1566,7 @@ static bool mi_arena_pages_reregister(mi_arena_t* arena) { } mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* full_size) { - const size_t count = mi_arena_get_count(); - const size_t arena_idx = mi_arena_id_index(arena_id); - if (count <= arena_idx) { - _mi_warning_message("arena id is invalid (%zu)\n", arena_id); - return false; - } - mi_arena_t* arena = mi_arena_from_id(arena_id); + mi_arena_t* arena = _mi_arena_from_id(arena_id); if (arena==NULL) { return false; } @@ -1627,10 +1597,17 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* _mi_page_map_unregister_range(arena, asize); // set the entry to NULL - mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[arena_idx], NULL); - if (arena_idx + 1 == count) { // try adjust the count? - size_t expected = count; - mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, count-1); + mi_subproc_t* subproc = arena->subproc; + const size_t count = mi_arenas_get_count(subproc); + for(size_t i = 0; i < count; i++) { + if (mi_arena_from_index(subproc, i) == arena) { + mi_atomic_store_ptr_release(mi_arena_t, &subproc->arenas[i], NULL); + if (i + 1 == count) { // try adjust the count? + size_t expected = count; + mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, count-1); + } + break; + } } return true; } @@ -1662,8 +1639,8 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, arena->memid.initially_zero = is_zero; arena->is_exclusive = true; arena->is_large = is_large; - arena->id = _mi_arena_id_none(); - if (!mi_arena_add(arena, arena_id, &_mi_stats_main)) { + arena->subproc = NULL; + if (!mi_arena_add(_mi_subproc(), arena, arena_id, &_mi_stats_main)) { return false; } mi_arena_pages_reregister(arena); diff --git a/src/bitmap.c b/src/bitmap.c index 6fae1ed6..6352e4ea 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1228,7 +1228,6 @@ bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_t tseq, size_t n, typedef struct mi_claim_fun_data_s { mi_arena_t* arena; - mi_subproc_t* subproc; mi_heaptag_t heap_tag; } mi_claim_fun_data_t; @@ -1242,7 +1241,7 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk const size_t slice_index = (chunk_idx * MI_BCHUNK_BITS) + cidx; mi_assert_internal(slice_index < mi_bitmap_max_bits(bitmap)); bool keep_set = true; - if ((*claim_fun)(slice_index, claim_data->arena, claim_data->subproc, claim_data->heap_tag, &keep_set)) { + if ((*claim_fun)(slice_index, claim_data->arena, claim_data->heap_tag, &keep_set)) { // success! mi_assert_internal(!keep_set); *pidx = slice_index; @@ -1267,9 +1266,9 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk // Find a set bit in the bitmap and try to atomically clear it and claim it. // (Used to find pages in the pages_abandoned bitmaps.) mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, - mi_claim_fun_t* claim, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag) + mi_claim_fun_t* claim, mi_arena_t* arena, mi_heaptag_t heap_tag) { - mi_claim_fun_data_t claim_data = { arena, subproc, heap_tag }; + mi_claim_fun_data_t claim_data = { arena, heap_tag }; return mi_bitmap_find(bitmap, tseq, 1, pidx, &mi_bitmap_try_find_and_claim_visit, (void*)claim, &claim_data); } diff --git a/src/bitmap.h b/src/bitmap.h index 47c22025..16ecea07 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -208,13 +208,13 @@ mi_decl_nodiscard static inline bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* // Called once a bit is cleared to see if the memory slice can be claimed. -typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag, bool* keep_set); +typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, mi_heaptag_t heap_tag, bool* keep_set); // Find a set bits in the bitmap, atomically clear it, and check if `claim` returns true. // If not claimed, continue on (potentially setting the bit again depending on `keep_set`). // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, - mi_claim_fun_t* claim, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag ); + mi_claim_fun_t* claim, mi_arena_t* arena, mi_heaptag_t heap_tag ); // Atomically clear a bit but only if it is set. Will block otherwise until the bit is set. diff --git a/src/free.c b/src/free.c index 14034593..770856da 100644 --- a/src/free.c +++ b/src/free.c @@ -210,7 +210,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { if (mi_page_all_free(page)) { // first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish) - _mi_arena_page_unabandon(page); + _mi_arena_page_unabandon(page); // we can free the page directly _mi_arena_page_free(page); return; @@ -234,8 +234,8 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag); if ((tagheap != NULL) && // don't reclaim across heap object types (tagheap->allow_page_reclaim) && // we are allowed to reclaim abandoned pages - (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? ) - (_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?) + // (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? ) + (_mi_arena_memid_is_suitable(page->memid, tagheap->exclusive_arena)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?) ) { if (mi_page_queue(tagheap, page->block_size)->first != NULL) { // don't reclaim for an block_size we don't use diff --git a/src/heap.c b/src/heap.c index dee404d2..e8743691 100644 --- a/src/heap.c +++ b/src/heap.c @@ -178,7 +178,7 @@ mi_heap_t* mi_heap_get_backing(void) { mi_assert_internal(heap!=NULL); mi_heap_t* bheap = heap->tld->heap_backing; mi_assert_internal(bheap!=NULL); - mi_assert_internal(bheap->thread_id == _mi_thread_id()); + mi_assert_internal(bheap->tld->thread_id == _mi_thread_id()); return bheap; } @@ -190,8 +190,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->memid = memid; heap->tld = tld; // avoid reading the thread-local tld during initialization - heap->thread_id = _mi_thread_id(); - heap->arena_id = arena_id; + heap->exclusive_arena = _mi_arena_from_id(arena_id); heap->allow_page_reclaim = !noreclaim; heap->allow_page_abandon = (!noreclaim && mi_option_get(mi_option_full_page_retain) >= 0); heap->full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); @@ -254,7 +253,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { } bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) { - return _mi_arena_memid_is_suitable(memid, heap->arena_id); + return _mi_arena_memid_is_suitable(memid, heap->exclusive_arena); } uintptr_t _mi_heap_random_next(mi_heap_t* heap) { diff --git a/src/init.c b/src/init.c index 9a26d56f..a15a9c6c 100644 --- a/src/init.c +++ b/src/init.c @@ -33,8 +33,7 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, #endif NULL, // xheap - NULL, NULL, // next, prev - NULL, // subproc + NULL, NULL, // next, prev MI_MEMID_STATIC // memid }; @@ -96,27 +95,76 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- +static mi_decl_cache_align mi_subproc_t subproc_main; + +static mi_decl_cache_align mi_tld_t tld_empty = { + 0, // thread_id + 0, // thread_seq + &subproc_main, // subproc + NULL, // heap_backing + NULL, // heaps list + 0, // heartbeat + false, // recurse + false, // is_in_threadpool + { MI_STATS_NULL }, // stats + MI_MEMID_STATIC // memid +}; + mi_decl_cache_align const mi_heap_t _mi_heap_empty = { - NULL, - // MI_ATOMIC_VAR_INIT(NULL), // thread delayed free - 0, // thread_id - 0, // arena_id - 0, // cookie - { 0, 0 }, // keys - { {0}, {0}, 0, true }, // random - 0, // page count - MI_BIN_FULL, 0, // page retired min/max - NULL, // next - MI_MEMID_STATIC, // memid - 0, // full page retain - false, // can reclaim - true, // can eager abandon - 0, // tag + &tld_empty, // tld + NULL, // exclusive_arena + 0, // cookie + { 0, 0 }, // keys + { {0}, {0}, 0, true }, // random + 0, // page count + MI_BIN_FULL, 0, // page retired min/max + NULL, // next + 0, // full page retain + false, // can reclaim + true, // can eager abandon + 0, // tag #if MI_GUARDED - 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) + 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) #endif MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY + MI_PAGE_QUEUES_EMPTY, + MI_MEMID_STATIC +}; + +extern mi_heap_t heap_main; + +static mi_decl_cache_align mi_tld_t tld_main = { + 0, // thread_id + 0, // thread_seq + &subproc_main, // subproc + &heap_main, // heap_backing + &heap_main, // heaps list + 0, // heartbeat + false, // recurse + false, // is_in_threadpool + { MI_STATS_NULL }, // stats + MI_MEMID_STATIC // memid +}; + +mi_decl_cache_align mi_heap_t heap_main = { + &tld_main, // thread local data + 0, // initial cookie + 0, // arena id + { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0x846ca68b}, {0}, 0, true }, // random + 0, // page count + MI_BIN_FULL, 0, // page retired min/max + NULL, // next heap + 2, // full page retain + true, // allow page reclaim + true, // allow page abandon + 0, // tag + #if MI_GUARDED + 0, 0, 0, 0, 0, + #endif + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY, + MI_MEMID_STATIC }; @@ -124,49 +172,9 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { return _mi_prim_thread_id(); } - // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; -extern mi_heap_t _mi_heap_main; - -static mi_decl_cache_align mi_subproc_t mi_subproc_default; - -static mi_decl_cache_align mi_tld_t tld_main = { - 0, - &_mi_heap_main, // heap_backing - &_mi_heap_main, // heaps list - &mi_subproc_default, // subproc - 0, // tseq - MI_MEMID_STATIC, // memid - false, // recurse - false, // is_in_threadpool - { MI_STATS_NULL } // stats -}; - -mi_decl_cache_align mi_heap_t _mi_heap_main = { - &tld_main, - // MI_ATOMIC_VAR_INIT(NULL), // thread delayed free list - 0, // thread id - 0, // initial cookie - 0, // arena id - { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) - { {0x846ca68b}, {0}, 0, true }, // random - 0, // page count - MI_BIN_FULL, 0, // page retired min/max - NULL, // next heap - MI_MEMID_STATIC, // memid - 2, // full page retain - true, // allow page reclaim - true, // allow page abandon - 0, // tag - #if MI_GUARDED - 0, 0, 0, 0, 0, - #endif - MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY -}; - bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; @@ -210,30 +218,46 @@ void _mi_heap_guarded_init(mi_heap_t* heap) { } #endif - -static void mi_heap_main_init(void) { - if (_mi_heap_main.cookie == 0) { - _mi_heap_main.thread_id = _mi_thread_id(); - _mi_heap_main.cookie = 1; - #if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB) - _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking - #else - _mi_random_init(&_mi_heap_main.random); - #endif - _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); - mi_lock_init(&mi_subproc_default.abandoned_os_lock); - mi_lock_init(&mi_subproc_default.abandoned_os_visit_lock); - _mi_heap_guarded_init(&_mi_heap_main); - _mi_heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); - _mi_heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); +// Initialize main subproc +static void mi_subproc_main_init(void) { + if (subproc_main.memid.memkind != MI_MEM_STATIC) { + subproc_main.memid = _mi_memid_create(MI_MEM_STATIC); + mi_lock_init(&subproc_main.os_pages_lock); + mi_lock_init(&subproc_main.arena_reserve_lock); } } -mi_heap_t* _mi_heap_main_get(void) { +// Initialize main tld +static void mi_tld_main_init(void) { + if (tld_main.thread_id == 0) { + tld_main.thread_id = _mi_prim_thread_id(); + } +} + +// Initialization of the (statically allocated) main heap, and the main tld and subproc. +static void mi_heap_main_init(void) { + if (heap_main.cookie == 0) { + mi_subproc_main_init(); + mi_tld_main_init(); + // heap + heap_main.cookie = 1; + #if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB) + _mi_random_init_weak(&heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking + #else + _mi_random_init(&heap_main.random); + #endif + heap_main.cookie = _mi_heap_random_next(&heap_main); + heap_main.keys[0] = _mi_heap_random_next(&heap_main); + heap_main.keys[1] = _mi_heap_random_next(&heap_main); + _mi_heap_guarded_init(&heap_main); + heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); + heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); + } +} + +mi_heap_t* heap_main_get(void) { mi_heap_main_init(); - return &_mi_heap_main; + return &heap_main; } @@ -265,8 +289,9 @@ static mi_tld_t* mi_tld_alloc(void) { tld->memid = memid; tld->heap_backing = NULL; tld->heaps = NULL; - tld->subproc = &mi_subproc_default; - tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->subproc = &subproc_main; + tld->thread_id = _mi_prim_thread_id(); + tld->thread_seq = mi_atomic_add_acq_rel(&mi_tcount, 1); tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool(); return tld; } @@ -291,12 +316,24 @@ mi_decl_noinline mi_tld_t* _mi_tld(void) { return mi_tld; } +mi_subproc_t* _mi_subproc(void) { + if (_mi_is_main_thread()) { // during initialization we should not recurse over reading the _mi_tld + return &subproc_main; + } + else { + return _mi_tld()->subproc; + } +} /* ----------------------------------------------------------- Sub process ----------------------------------------------------------- */ +mi_subproc_t* _mi_subproc_main(void) { + return &subproc_main; +} + mi_subproc_id_t mi_subproc_main(void) { return NULL; } @@ -305,42 +342,41 @@ mi_subproc_id_t mi_subproc_new(void) { mi_memid_t memid; mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid); if (subproc == NULL) return NULL; - subproc->abandoned_os_list = NULL; subproc->memid = memid; - mi_lock_init(&subproc->abandoned_os_lock); - mi_lock_init(&subproc->abandoned_os_visit_lock); + mi_lock_init(&subproc->os_pages_lock); + mi_lock_init(&subproc->arena_reserve_lock); return subproc; } mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) { - return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id); + return (subproc_id == NULL ? &subproc_main : (mi_subproc_t*)subproc_id); } void mi_subproc_delete(mi_subproc_id_t subproc_id) { if (subproc_id == NULL) return; mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id); - // check if there are no abandoned segments still.. + // check if there are os pages still.. bool safe_to_delete = false; - if (mi_lock_acquire(&subproc->abandoned_os_lock)) { - if (subproc->abandoned_os_list == NULL) { + if (mi_lock_acquire(&subproc->os_pages_lock)) { + if (subproc->os_pages.first == NULL) { safe_to_delete = true; } - mi_lock_release(&subproc->abandoned_os_lock); + mi_lock_release(&subproc->os_pages_lock); } if (!safe_to_delete) return; // safe to release // todo: should we refcount subprocesses? - mi_lock_done(&subproc->abandoned_os_lock); - mi_lock_done(&subproc->abandoned_os_visit_lock); + mi_lock_done(&subproc->os_pages_lock); + mi_lock_done(&subproc->arena_reserve_lock); _mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid); } void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { - mi_heap_t* heap = mi_heap_get_default(); - if (heap == NULL) return; - mi_assert(heap->tld->subproc == &mi_subproc_default); - if (heap->tld->subproc != &mi_subproc_default) return; - heap->tld->subproc = _mi_subproc_from_id(subproc_id); + mi_tld_t* tld = _mi_tld(); + if (tld == NULL) return; + mi_assert(tld->subproc == &subproc_main); + if (tld->subproc != &subproc_main) return; + tld->subproc = _mi_subproc_from_id(subproc_id); } @@ -352,10 +388,10 @@ void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { static bool _mi_thread_heap_init(void) { if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { - // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization + // mi_assert_internal(heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); - _mi_heap_set_default_direct(&_mi_heap_main); + _mi_heap_set_default_direct(&heap_main); //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { @@ -383,7 +419,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { if (!mi_heap_is_initialized(heap)) return true; // reset default heap - _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct(_mi_is_main_thread() ? &heap_main : (mi_heap_t*)&_mi_heap_empty); // switch to backing heap heap = heap->tld->heap_backing; @@ -403,7 +439,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_backing(heap)); // collect if not the main thread - if (heap != &_mi_heap_main) { + if (heap != &heap_main) { _mi_heap_collect_abandon(heap); } @@ -413,12 +449,12 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { // free heap meta data _mi_meta_free(heap, sizeof(mi_heap_t), heap->memid); - if (heap == &_mi_heap_main) { + if (heap == &heap_main) { #if 0 // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, // there may still be delete/free calls after the mi_fls_done is called. Issue #207 _mi_heap_destroy_pages(heap); - mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); + mi_assert_internal(heap->tld->heap_backing == &heap_main); #endif } @@ -449,12 +485,12 @@ static void mi_process_setup_auto_thread_done(void) { if (tls_initialized) return; tls_initialized = true; _mi_prim_thread_init_auto_done(); - _mi_heap_set_default_direct(&_mi_heap_main); + _mi_heap_set_default_direct(&heap_main); } bool _mi_is_main_thread(void) { - return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); + return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id()); } static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); @@ -501,7 +537,7 @@ void _mi_thread_done(mi_heap_t* heap) _mi_stat_decrease(&_mi_stats_main.threads, 1); // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... - if (heap->thread_id != _mi_thread_id()) return; + if (heap->tld->thread_id != _mi_prim_thread_id()) return; // abandon the thread local heap _mi_thread_heap_done(heap); // returns true if already ran @@ -560,7 +596,7 @@ void _mi_process_load(void) { } // reseed random - _mi_random_reinit_if_weak(&_mi_heap_main.random); + _mi_random_reinit_if_weak(&heap_main.random); } #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) @@ -587,7 +623,7 @@ void mi_process_init(void) mi_attr_noexcept { // ensure we are called once static mi_atomic_once_t process_init; #if _MSC_VER < 1920 - mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main + mi_heap_main_init(); // vs2017 can dynamically re-initialize heap_main #endif if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; @@ -595,10 +631,11 @@ void mi_process_init(void) mi_attr_noexcept { mi_process_setup_auto_thread_done(); mi_detect_cpu_features(); + mi_subproc_main_init(); + mi_tld_main_init(); + mi_heap_main_init(); _mi_os_init(); _mi_page_map_init(); - _mi_arena_init(); - mi_heap_main_init(); #if MI_DEBUG _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif @@ -609,7 +646,7 @@ void mi_process_init(void) mi_attr_noexcept { #endif mi_thread_init(); - #if defined(_WIN32) + #if defined(_WIN32) && defined(MI_WIN_USE_FLS) // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup // will not call _mi_thread_done on the (still executing) main thread. See issue #508. @@ -670,7 +707,7 @@ void mi_cdecl _mi_process_done(void) { mi_stats_print(NULL); } _mi_allocator_done(); - _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); + _mi_verbose_message("process done: 0x%zx\n", tld_main.thread_id); os_preloading = true; // don't call the C runtime anymore } diff --git a/src/page.c b/src/page.c index d97537d1..0444b47e 100644 --- a/src/page.c +++ b/src/page.c @@ -591,7 +591,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { void _mi_page_init(mi_heap_t* heap, mi_page_t* page) { mi_assert(page != NULL); mi_page_set_heap(page, heap); - page->subproc = heap->tld->subproc; + size_t page_size; uint8_t* page_start = mi_page_area(page, &page_size); MI_UNUSED(page_start); mi_track_mem_noaccess(page_start,page_size); From daac75af3611710b9631434a25fbe9f30fd11414 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 20 Dec 2024 22:13:58 -0800 Subject: [PATCH 05/16] fix lock recursion --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 +- include/mimalloc/atomic.h | 27 +++++++++++-- src/arena.c | 15 ++++++-- src/init.c | 51 +++++++++++++------------ 4 files changed, 62 insertions(+), 35 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index fd88cd8e..672cbb87 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -279,8 +279,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea6} + + {abb5eae7-b3e6-432e-b636-333449892ea7} diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index ddb5a9a3..ab1e161d 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -408,9 +408,8 @@ static inline void mi_atomic_yield(void) { // ---------------------------------------------------------------------- // Locks -// These do not have to be recursive and should be light-weight -// in-process only locks. Only used for reserving arena's and to -// maintain the abandoned list. +// These should be light-weight in-process only locks. +// Only used for reserving arena's and to maintain the abandoned list. // ---------------------------------------------------------------------- #if _MSC_VER #pragma warning(disable:26110) // unlock with holding lock @@ -418,6 +417,26 @@ static inline void mi_atomic_yield(void) { #if defined(_WIN32) +#define mi_lock_t CRITICAL_SECTION + +static inline bool mi_lock_try_acquire(mi_lock_t* lock) { + return TryEnterCriticalSection(lock); +} +static inline bool mi_lock_acquire(mi_lock_t* lock) { + EnterCriticalSection(lock); + return true; +} +static inline void mi_lock_release(mi_lock_t* lock) { + LeaveCriticalSection(lock); +} +static inline void mi_lock_init(mi_lock_t* lock) { + InitializeCriticalSection(lock); +} +static inline void mi_lock_done(mi_lock_t* lock) { + DeleteCriticalSection(lock); +} + +#if 0 #define mi_lock_t SRWLOCK // slim reader-writer lock static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -436,7 +455,7 @@ static inline void mi_lock_init(mi_lock_t* lock) { static inline void mi_lock_done(mi_lock_t* lock) { (void)(lock); } - +#endif #elif defined(MI_USE_PTHREADS) diff --git a/src/arena.c b/src/arena.c index bb846da9..fd914f43 100644 --- a/src/arena.c +++ b/src/arena.c @@ -275,6 +275,8 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( } +static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id); + // try to reserve a fresh arena space static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) { @@ -325,7 +327,7 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_ const bool adjust = (overcommit && arena_commit); if (adjust) { _mi_stat_adjust_decrease(&_mi_stats_main.committed, arena_reserve, true /* on alloc */); } // and try to reserve the arena - int err = mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); + int err = mi_reserve_os_memory_ex2(subproc, arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); if (err != 0) { if (adjust) { _mi_stat_adjust_increase(&_mi_stats_main.committed, arena_reserve, true); } // roll back // failed, try a smaller size? @@ -1162,14 +1164,14 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is } // Reserve a range of regular OS memory -int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { +static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_SLICE_SIZE); // at least one slice mi_memid_t memid; void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid); if (start == NULL) return ENOMEM; const bool is_large = memid.is_pinned; // todo: use separate is_large field? - if (!mi_manage_os_memory_ex2(_mi_subproc(), start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(subproc, start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { _mi_os_free_ex(start, size, commit, memid); _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); return ENOMEM; @@ -1180,6 +1182,11 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc return 0; } +// Reserve a range of regular OS memory +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + return mi_reserve_os_memory_ex2(_mi_subproc(), size, commit, allow_large, exclusive, arena_id); +} + // Manage a range of regular OS memory bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); @@ -1289,7 +1296,7 @@ void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) if (arena == NULL) break; mi_assert(arena->subproc == subproc); slice_total += arena->slice_count; - _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : "", arena->subproc)); + _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""), arena->subproc); if (show_inuse) { free_total += mi_debug_show_bitmap("in-use slices", arena->slice_count, arena->slices_free, true, NULL); } diff --git a/src/init.c b/src/init.c index a15a9c6c..177ca2bd 100644 --- a/src/init.c +++ b/src/init.c @@ -11,30 +11,31 @@ terms of the MIT license. A copy of the license can be found in the file #include // memcpy, memset #include // atexit -#define MI_MEMID_STATIC {{{NULL,0}}, MI_MEM_STATIC, true /* pinned */, true /* committed */, false /* zero */ } +#define MI_MEMID_INIT(kind) {{{NULL,0}}, kind, true /* pinned */, true /* committed */, false /* zero */ } +#define MI_MEMID_STATIC MI_MEMID_INIT(MI_MEM_STATIC) // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - MI_ATOMIC_VAR_INIT(0), // xthread_id - NULL, // free - 0, // used - 0, // capacity - 0, // reserved capacity - 0, // block size shift - 0, // retire_expire - NULL, // local_free - MI_ATOMIC_VAR_INIT(0), // xthread_free - MI_ATOMIC_VAR_INIT(0), // xflags - 0, // block_size - NULL, // page_start - 0, // heap tag - false, // is_zero + MI_ATOMIC_VAR_INIT(0), // xthread_id + NULL, // free + 0, // used + 0, // capacity + 0, // reserved capacity + 0, // block size shift + 0, // retire_expire + NULL, // local_free + MI_ATOMIC_VAR_INIT(0), // xthread_free + MI_ATOMIC_VAR_INIT(0), // xflags + 0, // block_size + NULL, // page_start + 0, // heap tag + false, // is_zero #if (MI_PADDING || MI_ENCODE_FREELIST) - { 0, 0 }, + { 0, 0 }, // keys #endif - NULL, // xheap - NULL, NULL, // next, prev - MI_MEMID_STATIC // memid + NULL, // xheap + NULL, NULL, // next, prev + MI_MEMID_STATIC // memid }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) @@ -100,7 +101,7 @@ static mi_decl_cache_align mi_subproc_t subproc_main; static mi_decl_cache_align mi_tld_t tld_empty = { 0, // thread_id 0, // thread_seq - &subproc_main, // subproc + &subproc_main, // subproc NULL, // heap_backing NULL, // heaps list 0, // heartbeat @@ -111,7 +112,7 @@ static mi_decl_cache_align mi_tld_t tld_empty = { }; mi_decl_cache_align const mi_heap_t _mi_heap_empty = { - &tld_empty, // tld + &tld_empty, // tld NULL, // exclusive_arena 0, // cookie { 0, 0 }, // keys @@ -136,9 +137,9 @@ extern mi_heap_t heap_main; static mi_decl_cache_align mi_tld_t tld_main = { 0, // thread_id 0, // thread_seq - &subproc_main, // subproc - &heap_main, // heap_backing - &heap_main, // heaps list + &subproc_main, // subproc + &heap_main, // heap_backing + &heap_main, // heaps list 0, // heartbeat false, // recurse false, // is_in_threadpool @@ -147,7 +148,7 @@ static mi_decl_cache_align mi_tld_t tld_main = { }; mi_decl_cache_align mi_heap_t heap_main = { - &tld_main, // thread local data + &tld_main, // thread local data 0, // initial cookie 0, // arena id { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) From dece8a587b5cb8642c28e0aa40c850da9c30ceb4 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 10:43:08 -0800 Subject: [PATCH 06/16] make stats part of a subproc --- ide/vs2022/mimalloc-test-stress.vcxproj | 4 +- include/mimalloc/atomic.h | 6 +- include/mimalloc/internal.h | 1 - include/mimalloc/types.h | 126 ++++++++++++++-------- src/alloc-aligned.c | 4 +- src/arena.c | 51 +++++---- src/bitmap.c | 4 +- src/free.c | 2 +- src/heap.c | 20 ++-- src/init.c | 89 +++++++++------- src/os.c | 30 +++--- src/page.c | 12 +-- src/stats.c | 136 +++++++++++++----------- test/test-stress.c | 8 +- 14 files changed, 274 insertions(+), 219 deletions(-) diff --git a/ide/vs2022/mimalloc-test-stress.vcxproj b/ide/vs2022/mimalloc-test-stress.vcxproj index 672cbb87..fd88cd8e 100644 --- a/ide/vs2022/mimalloc-test-stress.vcxproj +++ b/ide/vs2022/mimalloc-test-stress.vcxproj @@ -279,8 +279,8 @@ - - {abb5eae7-b3e6-432e-b636-333449892ea7} + + {abb5eae7-b3e6-432e-b636-333449892ea6} diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index ab1e161d..0c7fafe3 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -417,6 +417,8 @@ static inline void mi_atomic_yield(void) { #if defined(_WIN32) +#if 0 + #define mi_lock_t CRITICAL_SECTION static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -436,7 +438,8 @@ static inline void mi_lock_done(mi_lock_t* lock) { DeleteCriticalSection(lock); } -#if 0 +#else + #define mi_lock_t SRWLOCK // slim reader-writer lock static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -455,6 +458,7 @@ static inline void mi_lock_init(mi_lock_t* lock) { static inline void mi_lock_done(mi_lock_t* lock) { (void)(lock); } + #endif #elif defined(MI_USE_PTHREADS) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 24792f8c..7774b378 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -90,7 +90,6 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c -extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; void _mi_process_load(void); void mi_cdecl _mi_process_done(void); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 4d43e887..ca3913ad 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -293,7 +293,7 @@ typedef struct mi_page_s { uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary #endif - mi_heap_t* heap; // heap this threads belong to. + mi_heap_t* heap; // the heap owning this page (or NULL for abandoned pages) struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` mi_memid_t memid; // provenance of the page memory @@ -394,7 +394,7 @@ typedef struct mi_padding_s { // A heap owns a set of pages. struct mi_heap_s { mi_tld_t* tld; // thread-local data - mi_arena_t* exclusive_arena; // if the heap belongs to a specific arena (or NULL) + mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation @@ -444,18 +444,18 @@ typedef struct mi_stat_counter_s { } mi_stat_counter_t; typedef struct mi_stats_s { - mi_stat_count_t pages; - mi_stat_count_t reserved; - mi_stat_count_t committed; - mi_stat_count_t reset; - mi_stat_count_t purged; - mi_stat_count_t page_committed; - mi_stat_count_t pages_abandoned; - mi_stat_count_t threads; - mi_stat_count_t normal; - mi_stat_count_t huge; - mi_stat_count_t giant; - mi_stat_count_t malloc; + mi_stat_count_t pages; + mi_stat_count_t reserved; + mi_stat_count_t committed; + mi_stat_count_t reset; + mi_stat_count_t purged; + mi_stat_count_t page_committed; + mi_stat_count_t pages_abandoned; + mi_stat_count_t threads; + mi_stat_count_t normal; + mi_stat_count_t huge; + mi_stat_count_t giant; + mi_stat_count_t malloc; mi_stat_counter_t pages_extended; mi_stat_counter_t pages_reclaim_on_alloc; mi_stat_counter_t pages_reclaim_on_free; @@ -479,37 +479,72 @@ typedef struct mi_stats_s { // add to stat keeping track of the peak -void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); -void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void __mi_stat_increase(mi_stat_count_t* stat, size_t amount); +void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount); +void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount); // adjust stat in special cases to compensate for double counting -void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc); -void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_free); +void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc); +void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_free); +void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc); +void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount, bool on_free); // counters can just be increased -void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); +void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); +void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount); #if (MI_STAT) -#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) -#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) -#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) -#define mi_stat_adjust_increase(stat,amnt,b) _mi_stat_adjust_increase( &(stat), amnt, b) -#define mi_stat_adjust_decrease(stat,amnt,b) _mi_stat_adjust_decrease( &(stat), amnt, b) +#define mi_debug_stat_increase(stat,amount) __mi_stat_increase( &(stat), amount) +#define mi_debug_stat_decrease(stat,amount) __mi_stat_decrease( &(stat), amount) +#define mi_debug_stat_counter_increase(stat,amount) __mi_stat_counter_increase( &(stat), amount) +#define mi_debug_stat_increase_mt(stat,amount) __mi_stat_increase_mt( &(stat), amount) +#define mi_debug_stat_decrease_mt(stat,amount) __mi_stat_decrease_mt( &(stat), amount) +#define mi_debug_stat_counter_increase_mt(stat,amount) __mi_stat_counter_increase_mt( &(stat), amount) +#define mi_debug_stat_adjust_increase_mt(stat,amnt,b) __mi_stat_adjust_increase_mt( &(stat), amnt, b) +#define mi_debug_stat_adjust_decrease_mt(stat,amnt,b) __mi_stat_adjust_decrease_mt( &(stat), amnt, b) #else -#define mi_stat_increase(stat,amount) ((void)0) -#define mi_stat_decrease(stat,amount) ((void)0) -#define mi_stat_counter_increase(stat,amount) ((void)0) -#define mi_stat_adjuct_increase(stat,amnt,b) ((void)0) -#define mi_stat_adjust_decrease(stat,amnt,b) ((void)0) +#define mi_debug_stat_increase(stat,amount) ((void)0) +#define mi_debug_stat_decrease(stat,amount) ((void)0) +#define mi_debug_stat_counter_increase(stat,amount) ((void)0) +#define mi_debug_stat_increase_mt(stat,amount) ((void)0) +#define mi_debug_stat_decrease_mt(stat,amount) ((void)0) +#define mi_debug_stat_counter_increase_mt(stat,amount) ((void)0) +#define mi_debug_stat_adjust_increase(stat,amnt,b) ((void)0) +#define mi_debug_stat_adjust_decrease(stat,amnt,b) ((void)0) #endif -#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) -#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) -#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) +#define mi_subproc_stat_counter_increase(subproc,stat,amount) __mi_stat_counter_increase_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_increase(subproc,stat,amount) __mi_stat_increase_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_decrease(subproc,stat,amount) __mi_stat_decrease_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_adjust_increase(subproc,stat,amnt,b) __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amnt, b) +#define mi_subproc_stat_adjust_decrease(subproc,stat,amnt,b) __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amnt, b) + +#define mi_os_stat_counter_increase(stat,amount) mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount) +#define mi_os_stat_increase(stat,amount) mi_subproc_stat_increase(_mi_subproc(),stat,amount) +#define mi_os_stat_decrease(stat,amount) mi_subproc_stat_decrease(_mi_subproc(),stat,amount) + +#define mi_tld_stat_counter_increase(tld,stat,amount) __mi_stat_counter_increase( &(tld)->stats.stat, amount) +#define mi_tld_stat_increase(tld,stat,amount) __mi_stat_increase( &(tld)->stats.stat, amount) +#define mi_tld_stat_decrease(tld,stat,amount) __mi_stat_decrease( &(tld)->stats.stat, amount) + +#define mi_debug_tld_stat_counter_increase(tld,stat,amount) mi_debug_stat_counter_increase( (tld)->stats.stat, amount) +#define mi_debug_tld_stat_increase(tld,stat,amount) mi_debug_stat_increase( (tld)->stats.stat, amount) +#define mi_debug_tld_stat_decrease(tld,stat,amount) mi_debug_stat_decrease( (tld)->stats.stat, amount) + +#define mi_heap_stat_counter_increase(heap,stat,amount) mi_tld_stat_counter_increase((heap)->tld, stat, amount) +#define mi_heap_stat_increase(heap,stat,amount) mi_tld_stat_increase( (heap)->tld, stat, amount) +#define mi_heap_stat_decrease(heap,stat,amount) mi_tld_stat_decrease( (heap)->tld, stat, amount) + +#define mi_debug_heap_stat_counter_increase(heap,stat,amount) mi_debug_tld_stat_counter_increase((heap)->tld, stat, amount) +#define mi_debug_heap_stat_increase(heap,stat,amount) mi_debug_tld_stat_increase( (heap)->tld, stat, amount) +#define mi_debug_heap_stat_decrease(heap,stat,amount) mi_debug_tld_stat_decrease( (heap)->tld, stat, amount) // ------------------------------------------------------ // Sub processes use separate arena's and no heaps/pages/blocks // are shared between sub processes. -// Each thread should also belong to one sub-process only +// The subprocess structure contains essentially all static variables (except per subprocess :-)) +// +// Each thread should belong to one sub-process only // ------------------------------------------------------ #define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`) @@ -519,10 +554,13 @@ typedef struct mi_subproc_s { _Atomic(size_t) arena_count; // current count of arena's _Atomic(mi_arena_t*) arenas[MI_MAX_ARENAS]; // arena's of this sub-process mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time - _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process + + _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process mi_page_queue_t os_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on) mi_lock_t os_pages_lock; // lock for the os pages list (this lock protects list operations) + mi_memid_t memid; // provenance of this memory block (meta or OS) + mi_stats_t stats; // sub-process statistics (tld stats are merged in on thread termination) } mi_subproc_t; @@ -535,16 +573,16 @@ typedef int64_t mi_msecs_t; // Thread local data struct mi_tld_s { - mi_threadid_t thread_id; // thread id of this thread - size_t thread_seq; // thread sequence id (linear count of created threads) - mi_subproc_t* subproc; // sub-process this thread belongs to. - mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) - mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) - unsigned long long heartbeat; // monotonic heartbeat count - bool recurse; // true if deferred was called; used to prevent infinite recursion. - bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) - mi_stats_t stats; // statistics - mi_memid_t memid; // provenance of the tld memory itself (meta or OS) + mi_threadid_t thread_id; // thread id of this thread + size_t thread_seq; // thread sequence id (linear count of created threads) + mi_subproc_t* subproc; // sub-process this thread belongs to. + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) + unsigned long long heartbeat; // monotonic heartbeat count + bool recurse; // true if deferred was called; used to prevent infinite recursion. + bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) + mi_stats_t stats; // statistics + mi_memid_t memid; // provenance of the tld memory itself (meta or OS) }; diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 14cbee45..5da9fc0c 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -193,9 +193,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; if mi_likely(is_aligned) { - #if MI_STAT>1 - mi_heap_stat_increase(heap, malloc, size); - #endif + mi_debug_heap_stat_increase(heap, malloc, size); void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); diff --git a/src/arena.c b/src/arena.c index fd914f43..dcff8920 100644 --- a/src/arena.c +++ b/src/arena.c @@ -69,10 +69,6 @@ typedef struct mi_purge_info_s { Arena id's ----------------------------------------------------------- */ -static mi_arena_id_t mi_arena_id_create(mi_arena_t* arena) { - return arena; -} - mi_arena_id_t _mi_arena_id_none(void) { return NULL; } @@ -222,14 +218,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count); // adjust the stats so we don't double count the commits if (already_committed_count > 0) { - _mi_stat_adjust_decrease(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count), true /* on alloc */); + mi_subproc_stat_adjust_decrease(arena->subproc, committed, mi_size_of_slices(already_committed_count), true /* on alloc */); } // now actually commit bool commit_zero = false; if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero)) { // failed to commit (todo: give warning?) if (already_committed_count > 0) { - _mi_stat_increase(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); + mi_subproc_stat_increase(arena->subproc, committed, mi_size_of_slices(already_committed_count)); } memid->initially_committed = false; } @@ -251,7 +247,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // if the OS has overcommit, and this is the first time we access these pages, then // count the commit now (as at arena reserve we didn't count those commits as these are on-demand) if (_mi_os_has_overcommit() && touched_slices > 0) { - _mi_stat_increase(&_mi_stats_main.committed, mi_size_of_slices(touched_slices)); + mi_subproc_stat_increase( arena->subproc, committed, mi_size_of_slices(touched_slices)); } } // tool support @@ -325,18 +321,18 @@ static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_ // on an OS with overcommit (Linux) we don't count the commit yet as it is on-demand. Once a slice // is actually allocated for the first time it will be counted. const bool adjust = (overcommit && arena_commit); - if (adjust) { _mi_stat_adjust_decrease(&_mi_stats_main.committed, arena_reserve, true /* on alloc */); } + if (adjust) { mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve, true /* on alloc */); } // and try to reserve the arena int err = mi_reserve_os_memory_ex2(subproc, arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); if (err != 0) { - if (adjust) { _mi_stat_adjust_increase(&_mi_stats_main.committed, arena_reserve, true); } // roll back + if (adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve, true); } // roll back // failed, try a smaller size? const size_t small_arena_reserve = (MI_SIZE_BITS == 32 ? 128*MI_MiB : 1*MI_GiB); - if (adjust) { _mi_stat_adjust_decrease(&_mi_stats_main.committed, arena_reserve, true); } + if (adjust) { mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve, true); } if (arena_reserve > small_arena_reserve) { // try again err = mi_reserve_os_memory_ex(small_arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); - if (err != 0 && adjust) { _mi_stat_adjust_increase(&_mi_stats_main.committed, arena_reserve, true); } // roll back + if (err != 0 && adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve, true); } // roll back } } return (err==0); @@ -579,8 +575,8 @@ static mi_page_t* mi_arena_page_try_find_abandoned(mi_subproc_t* subproc, size_t mi_assert_internal(mi_page_is_abandoned(page)); mi_assert_internal(mi_arena_has_page(arena,page)); mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]); - _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); - _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1); + mi_subproc_stat_decrease( arena->subproc, pages_abandoned, 1); + mi_subproc_stat_counter_increase(arena->subproc, pages_reclaim_on_alloc, 1); _mi_page_free_collect(page, false); // update `used` count mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); @@ -828,12 +824,13 @@ void _mi_arena_page_abandon(mi_page_t* page) { const bool wasclear = mi_bitmap_set(arena->pages_abandoned[bin], slice_index); MI_UNUSED(wasclear); mi_assert_internal(wasclear); mi_atomic_increment_relaxed(&arena->subproc->abandoned_count[bin]); + mi_subproc_stat_increase(arena->subproc, pages_abandoned, 1); } else { // page is full (or a singleton), page is OS/externally allocated // leave as is; it will be reclaimed when an object is free'd in the page - } - _mi_stat_increase(&_mi_stats_main.pages_abandoned, 1); + mi_subproc_stat_increase(_mi_subproc(), pages_abandoned, 1); + } _mi_page_unown(page); } @@ -850,8 +847,9 @@ bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page) { return false; } else { - _mi_stat_counter_increase(&_mi_stats_main.pages_reabandon_full, 1); - _mi_stat_adjust_decrease(&_mi_stats_main.pages_abandoned, 1, true /* on alloc */); // adjust as we are not abandoning fresh + mi_subproc_t* subproc = _mi_subproc(); + mi_subproc_stat_counter_increase( subproc, pages_reabandon_full, 1); + mi_subproc_stat_adjust_decrease( subproc, pages_abandoned, 1, true /* on alloc */); // adjust as we are not abandoning fresh _mi_arena_page_abandon(page); return true; } @@ -879,13 +877,14 @@ void _mi_arena_page_unabandon(mi_page_t* page) { mi_bitmap_clear_once_set(arena->pages_abandoned[bin], slice_index); mi_page_clear_abandoned_mapped(page); mi_atomic_decrement_relaxed(&arena->subproc->abandoned_count[bin]); + mi_subproc_stat_decrease(arena->subproc, pages_abandoned, 1); } else { - // page is full (or a singleton), page is OS/nly allocated + // page is full (or a singleton), page is OS allocated // nothing to do // TODO: maintain count of these as well? - } - _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); + mi_subproc_stat_decrease(_mi_subproc(), pages_abandoned, 1); + } } void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { @@ -1016,7 +1015,7 @@ void _mi_arena_unsafe_destroy_all(void) { Add an arena. ----------------------------------------------------------- */ -static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { +static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal(arena->slice_count > 0); if (arena_id != NULL) { *arena_id = NULL; } @@ -1043,7 +1042,7 @@ static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t return false; } - _mi_stat_counter_increase(&stats->arena_count,1); + mi_subproc_stat_counter_increase(arena->subproc, arena_count, 1); mi_atomic_store_ptr_release(mi_arena_t,&subproc->arenas[i], arena); if (arena_id != NULL) { *arena_id = arena; } return true; @@ -1149,7 +1148,7 @@ static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t s mi_bitmap_setN(arena->slices_dirty, 0, info_slices, NULL); } - return mi_arena_add(subproc, arena, arena_id, &_mi_stats_main); + return mi_arena_add(subproc, arena, arena_id); } @@ -1414,7 +1413,7 @@ static bool mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_c // update committed bitmap if (needs_recommit) { - _mi_stat_adjust_decrease(&_mi_stats_main.committed, mi_size_of_slices(slice_count - already_committed), false /* on freed */); + mi_subproc_stat_adjust_decrease( arena->subproc, committed, mi_size_of_slices(slice_count - already_committed), false /* on freed */); mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count); } return needs_recommit; @@ -1506,7 +1505,7 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire_base, (mi_msecs_t)0)) { mi_atomic_storei64_release(&arena->purge_expire_extend, (mi_msecs_t)0); // and also reset the extend } - _mi_stat_counter_increase(&_mi_stats_main.arena_purges, 1); + mi_subproc_stat_counter_increase(arena->subproc, arena_purges, 1); // go through all purge info's (with max MI_BFIELD_BITS ranges at a time) // this also clears those ranges atomically (so any newly freed blocks will get purged next @@ -1647,7 +1646,7 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, arena->is_exclusive = true; arena->is_large = is_large; arena->subproc = NULL; - if (!mi_arena_add(_mi_subproc(), arena, arena_id, &_mi_stats_main)) { + if (!mi_arena_add(_mi_subproc(), arena, arena_id)) { return false; } mi_arena_pages_reregister(arena); diff --git a/src/bitmap.c b/src/bitmap.c index 6352e4ea..e4a4cc2d 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -106,7 +106,9 @@ static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_ do { if mi_unlikely((old&mask) == 0) { old = mi_atomic_load_acquire(b); - if ((old&mask)==0) { _mi_stat_counter_increase(&_mi_stats_main.pages_unabandon_busy_wait, 1); } + if ((old&mask)==0) { + mi_subproc_stat_counter_increase(_mi_subproc(), pages_unabandon_busy_wait, 1); + } while ((old&mask)==0) { // busy wait mi_atomic_yield(); old = mi_atomic_load_acquire(b); diff --git a/src/free.c b/src/free.c index 770856da..88f784c7 100644 --- a/src/free.c +++ b/src/free.c @@ -242,7 +242,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { // first remove it from the abandoned pages in the arena -- this waits for any readers to finish _mi_arena_page_unabandon(page); _mi_heap_page_reclaim(tagheap, page); - _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_free, 1); + mi_heap_stat_counter_increase(tagheap, pages_reclaim_on_free, 1); return; } } diff --git a/src/heap.c b/src/heap.c index e8743691..d82b383f 100644 --- a/src/heap.c +++ b/src/heap.c @@ -141,7 +141,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - + // collect arenas (this is program wide so don't force purges on abandonment of threads) _mi_arenas_collect(collect == MI_FORCE /* force purge? */); } @@ -183,9 +183,9 @@ mi_heap_t* mi_heap_get_backing(void) { } // todo: make order of parameters consistent (but would that break compat with CPython?) -void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t heap_tag, mi_tld_t* tld) +void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t heap_tag, mi_tld_t* tld) { - mi_assert_internal(heap!=NULL); + mi_assert_internal(heap!=NULL); mi_memid_t memid = heap->memid; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->memid = memid; @@ -204,7 +204,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint heap->full_page_retain = heap->full_page_retain / 4; } } - + if (heap->tld->heap_backing == NULL) { heap->tld->heap_backing = heap; // first heap becomes the backing heap _mi_random_init(&heap->random); @@ -240,7 +240,7 @@ mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id) { mi_heap_t* bheap = mi_heap_get_backing(); mi_assert_internal(bheap != NULL); - return _mi_heap_create(heap_tag, allow_destroy, arena_id, bheap->tld); + return _mi_heap_create(heap_tag, allow_destroy, arena_id, bheap->tld); } mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { @@ -333,17 +333,17 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ if (bsize > MI_LARGE_MAX_OBJ_SIZE) { mi_heap_stat_decrease(heap, huge, bsize); } -#if (MI_STAT) + #if (MI_STAT) _mi_page_free_collect(page, false); // update used count const size_t inuse = page->used; if (bsize <= MI_LARGE_MAX_OBJ_SIZE) { mi_heap_stat_decrease(heap, normal, bsize * inuse); -#if (MI_STAT>1) + #if (MI_STAT>1) mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], inuse); -#endif + #endif } mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... -#endif + #endif /// pretend it is all free now mi_assert_internal(mi_page_thread_free(page) == NULL); @@ -460,7 +460,7 @@ void mi_heap_delete(mi_heap_t* heap) // transfer still used pages to the backing heap mi_heap_absorb(bheap, heap); } - else + else */ { // abandon all pages diff --git a/src/init.c b/src/init.c index 177ca2bd..5159941a 100644 --- a/src/init.c +++ b/src/init.c @@ -34,7 +34,7 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, // keys #endif NULL, // xheap - NULL, NULL, // next, prev + NULL, NULL, // next, prev MI_MEMID_STATIC // memid }; @@ -103,7 +103,7 @@ static mi_decl_cache_align mi_tld_t tld_empty = { 0, // thread_seq &subproc_main, // subproc NULL, // heap_backing - NULL, // heaps list + NULL, // heaps list 0, // heartbeat false, // recurse false, // is_in_threadpool @@ -139,7 +139,7 @@ static mi_decl_cache_align mi_tld_t tld_main = { 0, // thread_seq &subproc_main, // subproc &heap_main, // heap_backing - &heap_main, // heaps list + &heap_main, // heaps list 0, // heartbeat false, // recurse false, // is_in_threadpool @@ -165,7 +165,7 @@ mi_decl_cache_align mi_heap_t heap_main = { #endif MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY, - MI_MEMID_STATIC + MI_MEMID_STATIC }; @@ -237,7 +237,7 @@ static void mi_tld_main_init(void) { // Initialization of the (statically allocated) main heap, and the main tld and subproc. static void mi_heap_main_init(void) { - if (heap_main.cookie == 0) { + if (heap_main.cookie == 0) { mi_subproc_main_init(); mi_tld_main_init(); // heap @@ -249,7 +249,7 @@ static void mi_heap_main_init(void) { #endif heap_main.cookie = _mi_heap_random_next(&heap_main); heap_main.keys[0] = _mi_heap_random_next(&heap_main); - heap_main.keys[1] = _mi_heap_random_next(&heap_main); + heap_main.keys[1] = _mi_heap_random_next(&heap_main); _mi_heap_guarded_init(&heap_main); heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); @@ -266,14 +266,21 @@ mi_heap_t* heap_main_get(void) { Thread local data ----------------------------------------------------------- */ -// Thread sequence number -static _Atomic(size_t) mi_tcount; +// Count current and total created threads +static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); +static _Atomic(size_t) thread_total_count; + +size_t _mi_current_thread_count(void) { + return mi_atomic_load_relaxed(&thread_count); +} + // The mimalloc thread local data -mi_decl_thread mi_tld_t* mi_tld; +mi_decl_thread mi_tld_t* thread_tld = &tld_empty; // Allocate fresh tld static mi_tld_t* mi_tld_alloc(void) { + mi_atomic_increment_relaxed(&thread_count); if (_mi_is_main_thread()) { return &tld_main; } @@ -292,7 +299,7 @@ static mi_tld_t* mi_tld_alloc(void) { tld->heaps = NULL; tld->subproc = &subproc_main; tld->thread_id = _mi_prim_thread_id(); - tld->thread_seq = mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1); tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool(); return tld; } @@ -301,28 +308,38 @@ static mi_tld_t* mi_tld_alloc(void) { #define MI_TLD_INVALID ((mi_tld_t*)1) mi_decl_noinline static void mi_tld_free(void) { - mi_tld_t* tld = _mi_tld(); - mi_tld = MI_TLD_INVALID; - _mi_meta_free(tld, sizeof(mi_tld_t), tld->memid); + mi_tld_t* tld = _mi_tld(); + if (tld != NULL && tld != MI_TLD_INVALID) { + _mi_stats_done(&tld->stats); + _mi_meta_free(tld, sizeof(mi_tld_t), tld->memid); + } + tld = MI_TLD_INVALID; + mi_atomic_decrement_relaxed(&thread_count); } mi_decl_noinline mi_tld_t* _mi_tld(void) { - if (mi_tld == MI_TLD_INVALID) { - _mi_error_message(EFAULT, "internal error: tld accessed after the thread terminated\n"); - mi_tld = NULL; + mi_tld_t* tld = thread_tld; + if (tld == MI_TLD_INVALID) { + _mi_error_message(EFAULT, "internal error: tld is accessed after the thread terminated\n"); + thread_tld = &tld_empty; } - if (mi_tld==NULL) { - mi_tld = mi_tld_alloc(); + if (tld==&tld_empty) { + thread_tld = tld = mi_tld_alloc(); } - return mi_tld; + return tld; } mi_subproc_t* _mi_subproc(void) { - if (_mi_is_main_thread()) { // during initialization we should not recurse over reading the _mi_tld - return &subproc_main; + // should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()` + // todo: this will still fail on OS systems where the first access to a thread-local causes allocation. + // on such systems we can check for this with the _mi_prim_get_default_heap as those are protected (by being + // stored in a TLS slot for example) + mi_heap_t* heap = mi_prim_get_default_heap(); + if (heap == NULL || heap == &_mi_heap_empty) { + return _mi_subproc_main(); } else { - return _mi_tld()->subproc; + return thread_tld->subproc; // don't call `_mi_tld()` } } @@ -396,11 +413,11 @@ static bool _mi_thread_heap_init(void) { //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { - // allocates tld data - // note: we cannot access thread-locals yet as that can cause (recursive) allocation + // allocates tld data + // note: we cannot access thread-locals yet as that can cause (recursive) allocation // (on macOS <= 14 for example where the loader allocates thread-local data on demand). - mi_tld_t* tld = mi_tld_alloc(); - + mi_tld_t* tld = mi_tld_alloc(); + // allocate and initialize the heap mi_heap_t* heap = _mi_heap_create(0 /* default tag */, false /* allow destroy? */, _mi_arena_id_none(), tld); @@ -409,7 +426,7 @@ static bool _mi_thread_heap_init(void) { _mi_heap_set_default_direct(heap); // now that the heap is set for this thread, we can set the thread-local tld. - mi_tld = tld; + thread_tld = tld; } return false; } @@ -444,9 +461,6 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { _mi_heap_collect_abandon(heap); } - // merge stats - _mi_stats_done(&heap->tld->stats); - // free heap meta data _mi_meta_free(heap, sizeof(mi_heap_t), heap->memid); @@ -494,11 +508,6 @@ bool _mi_is_main_thread(void) { return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id()); } -static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); - -size_t _mi_current_thread_count(void) { - return mi_atomic_load_relaxed(&thread_count); -} // This is called from the `mi_malloc_generic` void mi_thread_init(void) mi_attr_noexcept @@ -511,8 +520,7 @@ void mi_thread_init(void) mi_attr_noexcept // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_thread_heap_init()) return; // returns true if already initialized - _mi_stat_increase(&_mi_stats_main.threads, 1); - mi_atomic_increment_relaxed(&thread_count); + mi_subproc_stat_increase(_mi_subproc_main(), threads, 1); //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } @@ -534,15 +542,14 @@ void _mi_thread_done(mi_heap_t* heap) } // adjust stats - mi_atomic_decrement_relaxed(&thread_count); - _mi_stat_decrease(&_mi_stats_main.threads, 1); + mi_subproc_stat_decrease(_mi_subproc_main(), threads, 1); // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->tld->thread_id != _mi_prim_thread_id()) return; // abandon the thread local heap _mi_thread_heap_done(heap); // returns true if already ran - + // free thread local data mi_tld_free(); } @@ -654,7 +661,7 @@ void mi_process_init(void) mi_attr_noexcept { _mi_prim_thread_associate_default_heap(NULL); #endif - mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) mi_track_init(); if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { diff --git a/src/os.c b/src/os.c index 86ecb16b..53e8f571 100644 --- a/src/os.c +++ b/src/os.c @@ -114,9 +114,9 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } if (still_committed) { - _mi_stat_decrease(&os_stats->committed, size); + mi_os_stat_decrease(committed, size); } - _mi_stat_decrease(&os_stats->reserved, size); + mi_os_stat_decrease(reserved, size); } void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { @@ -171,11 +171,11 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large); } - _mi_stat_counter_increase(&os_stats->mmap_calls, 1); + mi_os_stat_counter_increase(mmap_calls, 1); if (p != NULL) { - _mi_stat_increase(&os_stats->reserved, size); + mi_os_stat_increase(reserved, size); if (commit) { - _mi_stat_increase(&os_stats->committed, size); + mi_os_stat_increase(committed, size); // seems needed for asan (or `mimalloc-test-api` fails) #ifdef MI_TRACK_ASAN if (*is_zero) { mi_track_mem_defined(p,size); } @@ -290,7 +290,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); - + bool os_is_large = false; bool os_is_zero = false; void* os_base = NULL; @@ -379,8 +379,8 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* bool _mi_os_commit(void* addr, size_t size, bool* is_zero) { if (is_zero != NULL) { *is_zero = false; } - _mi_stat_increase(&os_stats->committed, size); // use size for precise commit vs. decommit - _mi_stat_counter_increase(&os_stats->commit_calls, 1); + mi_os_stat_increase(committed, size); // use size for precise commit vs. decommit + mi_os_stat_counter_increase(commit_calls, 1); // page align range size_t csize; @@ -408,7 +408,7 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero) { static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit) { mi_assert_internal(needs_recommit!=NULL); - _mi_stat_decrease(&os_stats->committed, size); + mi_os_stat_decrease(committed, size); // page align size_t csize; @@ -440,8 +440,8 @@ bool _mi_os_reset(void* addr, size_t size) { size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) - _mi_stat_increase(&os_stats->reset, csize); - _mi_stat_counter_increase(&os_stats->reset_calls, 1); + mi_os_stat_increase(reset, csize); + mi_os_stat_counter_increase(reset_calls, 1); #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN memset(start, 0, csize); // pretend it is eagerly reset @@ -460,8 +460,8 @@ bool _mi_os_reset(void* addr, size_t size) { bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset) { if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? - _mi_stat_counter_increase(&os_stats->purge_calls, 1); - _mi_stat_increase(&os_stats->purged, size); + mi_os_stat_counter_increase(purge_calls, 1); + mi_os_stat_increase(purged, size); if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) @@ -595,8 +595,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // success, record it page++; // increase before timeout check (see issue #711) - _mi_stat_increase(&os_stats->committed, MI_HUGE_OS_PAGE_SIZE); - _mi_stat_increase(&os_stats->reserved, MI_HUGE_OS_PAGE_SIZE); + mi_os_stat_increase(committed, MI_HUGE_OS_PAGE_SIZE); + mi_os_stat_increase(reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout if (max_msecs > 0) { diff --git a/src/page.c b/src/page.c index 0444b47e..31dbcc7d 100644 --- a/src/page.c +++ b/src/page.c @@ -387,9 +387,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { const size_t bsize = mi_page_block_size(page); if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? - mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); + mi_debug_heap_stat_counter_increase(heap, page_no_retire, 1); + page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE); @@ -554,7 +554,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { size_t page_size; //uint8_t* page_start = mi_page_area(page, &page_size); - mi_heap_stat_counter_increase(heap, pages_extended, 1); + mi_debug_heap_stat_counter_increase(heap, pages_extended, 1); // calculate the extend count const size_t bsize = mi_page_block_size(page); @@ -583,7 +583,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { } // enable the new free list page->capacity += (uint16_t)extend; - mi_heap_stat_increase(heap, page_committed, extend * bsize); + mi_debug_heap_stat_increase(heap, page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -709,8 +709,8 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m page = next; } // for each page - mi_heap_stat_counter_increase(heap, searches, count); - + mi_debug_heap_stat_counter_increase(heap, searches, count); + // set the page to the best candidate if (page_candidate != NULL) { page = page_candidate; diff --git a/src/stats.c b/src/stats.c index bb17b936..2a395ed5 100644 --- a/src/stats.c +++ b/src/stats.c @@ -19,88 +19,93 @@ terms of the MIT license. A copy of the license can be found in the file Statistics operations ----------------------------------------------------------- */ -static bool mi_is_in_main(void* stat) { - return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main - && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t))); +static void mi_stat_update_mt(mi_stat_count_t* stat, int64_t amount) { + if (amount == 0) return; + // add atomically + int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_maxi64_relaxed(&stat->peak, current + amount); + if (amount > 0) { + mi_atomic_addi64_relaxed(&stat->allocated, amount); + } + else { + mi_atomic_addi64_relaxed(&stat->freed, -amount); + } } static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (amount == 0) return; - if mi_unlikely(mi_is_in_main(stat)) - { - // add atomically (for abandoned pages) - int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); - mi_atomic_maxi64_relaxed(&stat->peak, current + amount); - if (amount > 0) { - mi_atomic_addi64_relaxed(&stat->allocated,amount); - } - else { - mi_atomic_addi64_relaxed(&stat->freed, -amount); - } + // add thread local + stat->current += amount; + if (stat->current > stat->peak) stat->peak = stat->current; + if (amount > 0) { + stat->allocated += amount; } else { - // add thread local - stat->current += amount; - if (stat->current > stat->peak) stat->peak = stat->current; - if (amount > 0) { - stat->allocated += amount; - } - else { - stat->freed += -amount; - } + stat->freed += -amount; } } + // Adjust stats to compensate; for example before committing a range, // first adjust downwards with parts that were already committed so // we avoid double counting. +static void mi_stat_adjust_mt(mi_stat_count_t* stat, int64_t amount, bool on_alloc) { + if (amount == 0) return; + // adjust atomically + mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_addi64_relaxed((on_alloc ? &stat->allocated : &stat->freed), amount); +} + static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount, bool on_alloc) { if (amount == 0) return; - if mi_unlikely(mi_is_in_main(stat)) - { - // adjust atomically - mi_atomic_addi64_relaxed(&stat->current, amount); - mi_atomic_addi64_relaxed((on_alloc ? &stat->allocated : &stat->freed), amount); + stat->current += amount; + if (on_alloc) { + stat->allocated += amount; } else { - // don't affect the peak - stat->current += amount; - if (on_alloc) { - stat->allocated += amount; - } - else { - stat->freed += amount; - } + stat->freed += amount; } } -void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { - if (mi_is_in_main(stat)) { - mi_atomic_addi64_relaxed( &stat->count, 1 ); - mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount ); - } - else { - stat->count++; - stat->total += amount; - } +void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount) { + mi_atomic_addi64_relaxed(&stat->count, 1); + mi_atomic_addi64_relaxed(&stat->total, (int64_t)amount); } -void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) { +void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { + stat->count++; + stat->total += amount; +} + +void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount) { + mi_stat_update_mt(stat, (int64_t)amount); +} +void __mi_stat_increase(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, (int64_t)amount); } -void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { +void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount) { + mi_stat_update_mt(stat, -((int64_t)amount)); +} +void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, -((int64_t)amount)); } -void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc) { +void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc) { + mi_stat_adjust_mt(stat, (int64_t)amount, on_alloc); +} +void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc) { mi_stat_adjust(stat, (int64_t)amount, on_alloc); } -void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_alloc) { +void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc) { + mi_stat_adjust_mt(stat, -((int64_t)amount), on_alloc); +} +void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_alloc) { mi_stat_adjust(stat, -((int64_t)amount), on_alloc); } + // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; @@ -401,27 +406,29 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) static mi_msecs_t mi_process_start; // = 0 -static mi_stats_t* mi_stats_get_default(void) { - mi_heap_t* heap = mi_heap_get_default(); - return &heap->tld->stats; +// return thread local stats +static mi_stats_t* mi_get_tld_stats(void) { + return &_mi_tld()->stats; } static void mi_stats_merge_from(mi_stats_t* stats) { - if (stats != &_mi_stats_main) { - mi_stats_add(&_mi_stats_main, stats); - memset(stats, 0, sizeof(mi_stats_t)); + mi_subproc_t* subproc = _mi_subproc(); + if (stats != &subproc->stats) { + mi_stats_add(&subproc->stats, stats); + _mi_memzero(stats, sizeof(mi_stats_t)); } } void mi_stats_reset(void) mi_attr_noexcept { - mi_stats_t* stats = mi_stats_get_default(); - if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); } - memset(&_mi_stats_main, 0, sizeof(mi_stats_t)); + mi_stats_t* stats = mi_get_tld_stats(); + mi_subproc_t* subproc = _mi_subproc(); + if (stats != &subproc->stats) { _mi_memzero(stats, sizeof(mi_stats_t)); } + _mi_memzero(&subproc->stats, sizeof(mi_stats_t)); if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); }; } void mi_stats_merge(void) mi_attr_noexcept { - mi_stats_merge_from( mi_stats_get_default() ); + mi_stats_merge_from( mi_get_tld_stats() ); } void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` @@ -429,8 +436,8 @@ void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` } void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - mi_stats_merge_from(mi_stats_get_default()); - _mi_stats_print(&_mi_stats_main, out, arg); + mi_stats_merge_from(mi_get_tld_stats()); + _mi_stats_print(&_mi_subproc()->stats, out, arg); } void mi_stats_print(void* out) mi_attr_noexcept { @@ -439,7 +446,7 @@ void mi_stats_print(void* out) mi_attr_noexcept { } void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - _mi_stats_print(mi_stats_get_default(), out, arg); + _mi_stats_print(mi_get_tld_stats(), out, arg); } @@ -473,11 +480,12 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { + mi_subproc_t* subproc = _mi_subproc(); mi_process_info_t pinfo; _mi_memzero_var(pinfo); pinfo.elapsed = _mi_clock_end(mi_process_start); - pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.current))); + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.peak))); pinfo.current_rss = pinfo.current_commit; pinfo.peak_rss = pinfo.peak_commit; pinfo.utime = 0; diff --git a/test/test-stress.c b/test/test-stress.c index b35743df..0920a02e 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -48,10 +48,10 @@ static int ITER = 20; static int THREADS = 32; static int SCALE = 50; static int ITER = 50; -#elif 0 -static int THREADS = 64; -static int SCALE = 400; -static int ITER = 10; +#elif 1 +static int THREADS = 32; +static int SCALE = 25; +static int ITER = 50; #define ALLOW_LARGE true #else static int THREADS = 32; // more repeatable if THREADS <= #processors From 95aeda4cdda2431c20ed9fa3facb241b142ae773 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 10:53:34 -0800 Subject: [PATCH 07/16] merge subproc stats on delete --- include/mimalloc/internal.h | 1 + src/init.c | 4 ++++ src/stats.c | 23 +++++++++++------------ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 7774b378..e316de94 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -203,6 +203,7 @@ void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page); // "stats.c" void _mi_stats_done(mi_stats_t* stats); +void _mi_stats_merge_from(mi_stats_t* to, mi_stats_t* from); mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); diff --git a/src/init.c b/src/init.c index 5159941a..3af4f4ef 100644 --- a/src/init.c +++ b/src/init.c @@ -382,6 +382,10 @@ void mi_subproc_delete(mi_subproc_id_t subproc_id) { mi_lock_release(&subproc->os_pages_lock); } if (!safe_to_delete) return; + + // merge stats back into the main subproc? + _mi_stats_merge_from(&_mi_subproc_main()->stats, &subproc->stats); + // safe to release // todo: should we refcount subprocesses? mi_lock_done(&subproc->os_pages_lock); diff --git a/src/stats.c b/src/stats.c index 2a395ed5..102373ec 100644 --- a/src/stats.c +++ b/src/stats.c @@ -411,14 +411,6 @@ static mi_stats_t* mi_get_tld_stats(void) { return &_mi_tld()->stats; } -static void mi_stats_merge_from(mi_stats_t* stats) { - mi_subproc_t* subproc = _mi_subproc(); - if (stats != &subproc->stats) { - mi_stats_add(&subproc->stats, stats); - _mi_memzero(stats, sizeof(mi_stats_t)); - } -} - void mi_stats_reset(void) mi_attr_noexcept { mi_stats_t* stats = mi_get_tld_stats(); mi_subproc_t* subproc = _mi_subproc(); @@ -427,16 +419,23 @@ void mi_stats_reset(void) mi_attr_noexcept { if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); }; } -void mi_stats_merge(void) mi_attr_noexcept { - mi_stats_merge_from( mi_get_tld_stats() ); +void _mi_stats_merge_from(mi_stats_t* to, mi_stats_t* from) { + if (to != from) { + mi_stats_add(to, from); + _mi_memzero(from, sizeof(mi_stats_t)); + } } void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` - mi_stats_merge_from(stats); + _mi_stats_merge_from(&_mi_subproc()->stats, stats); +} + +void mi_stats_merge(void) mi_attr_noexcept { + _mi_stats_done( mi_get_tld_stats() ); } void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - mi_stats_merge_from(mi_get_tld_stats()); + mi_stats_merge(); _mi_stats_print(&_mi_subproc()->stats, out, arg); } From 4ad7fedd25e0869aa6fbca2aa24fe08dd4eebc39 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 11:35:30 -0800 Subject: [PATCH 08/16] track os abandoned pages in a list --- include/mimalloc/atomic.h | 25 ++++++++--------- include/mimalloc/types.h | 4 +-- src/arena-meta.c | 7 +++-- src/arena.c | 56 ++++++++++++++++++++++++++------------- src/init.c | 11 ++++---- 5 files changed, 61 insertions(+), 42 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 0c7fafe3..fcd9efba 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -415,6 +415,8 @@ static inline void mi_atomic_yield(void) { #pragma warning(disable:26110) // unlock with holding lock #endif +#define mi_lock(lock) for(bool _go = (mi_lock_acquire(lock),true); _go; (mi_lock_release(lock), _go=false) ) + #if defined(_WIN32) #if 0 @@ -424,9 +426,8 @@ static inline void mi_atomic_yield(void) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return TryEnterCriticalSection(lock); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { EnterCriticalSection(lock); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { LeaveCriticalSection(lock); @@ -445,9 +446,8 @@ static inline void mi_lock_done(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return TryAcquireSRWLockExclusive(lock); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { AcquireSRWLockExclusive(lock); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { ReleaseSRWLockExclusive(lock); @@ -468,8 +468,11 @@ static inline void mi_lock_done(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return (pthread_mutex_trylock(lock) == 0); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { - return (pthread_mutex_lock(lock) == 0); +static inline void mi_lock_acquire(mi_lock_t* lock) { + const int err = pthread_mutex_lock(lock); + if (err != 0) { + mi_error_message(EFAULT, "internal error: lock cannot be acquired\n"); + } } static inline void mi_lock_release(mi_lock_t* lock) { pthread_mutex_unlock(lock); @@ -489,9 +492,8 @@ static inline void mi_lock_done(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return lock->try_lock(); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { lock->lock(); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { lock->unlock(); @@ -514,12 +516,11 @@ static inline bool mi_lock_try_acquire(mi_lock_t* lock) { uintptr_t expected = 0; return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { for (int i = 0; i < 1000; i++) { // for at most 1000 tries? - if (mi_lock_try_acquire(lock)) return true; + if (mi_lock_try_acquire(lock)) return; mi_atomic_yield(); - } - return true; + } } static inline void mi_lock_release(mi_lock_t* lock) { mi_atomic_store_release(lock, (uintptr_t)0); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index ca3913ad..59393848 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -556,8 +556,8 @@ typedef struct mi_subproc_s { mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process - mi_page_queue_t os_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on) - mi_lock_t os_pages_lock; // lock for the os pages list (this lock protects list operations) + mi_page_t* os_abandoned_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on) + mi_lock_t os_abandoned_pages_lock; // lock for the os abandoned pages list (this lock protects list operations) mi_memid_t memid; // provenance of this memory block (meta or OS) mi_stats_t stats; // sub-process statistics (tld stats are merged in on thread termination) diff --git a/src/arena-meta.c b/src/arena-meta.c index f28c50e9..a5dc8e75 100644 --- a/src/arena-meta.c +++ b/src/arena-meta.c @@ -64,12 +64,11 @@ static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) { // allocate a fresh meta page and add it to the global list. static mi_meta_page_t* mi_meta_page_zalloc(void) { // allocate a fresh arena slice - // note: we always use subproc_main directly for the meta-data since at thread start the metadata for the - // tld and heap need to be (meta) allocated and at that time we cannot read the tld pointer (yet). + // note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again.. mi_memid_t memid; - mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(_mi_subproc_main(), MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, + mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(_mi_subproc(), MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, true /* commit*/, true /* allow large */, - NULL, 0 /* tseq */, &memid ); + NULL /* req arena */, 0 /* thread_seq */, &memid); if (mpage == NULL) return NULL; mi_assert_internal(_mi_is_aligned(mpage,MI_META_PAGE_ALIGN)); if (!memid.initially_zero) { diff --git a/src/arena.c b/src/arena.c index dcff8920..c4b02cf6 100644 --- a/src/arena.c +++ b/src/arena.c @@ -439,24 +439,20 @@ static mi_decl_noinline void* mi_arenas_try_alloc( // otherwise, try to reserve a new arena -- but one thread at a time.. (todo: allow 2 or 4 to reduce contention?) const size_t arena_count = mi_arenas_get_count(subproc); - if (mi_lock_acquire(&subproc->arena_reserve_lock)) { - bool ok = true; + mi_lock(&subproc->arena_reserve_lock) { if (arena_count == mi_arenas_get_count(subproc)) { // we are the first to enter the lock, reserve a fresh arena mi_arena_id_t arena_id = 0; - ok = mi_arena_reserve(subproc, mi_size_of_slices(slice_count), allow_large, req_arena, &arena_id); + mi_arena_reserve(subproc, mi_size_of_slices(slice_count), allow_large, req_arena, &arena_id); } else { // another thread already reserved a new arena } - mi_lock_release(&subproc->arena_reserve_lock); - if (ok) { - // try once more to allocate in the new arena - mi_assert_internal(req_arena == NULL); - p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); - if (p != NULL) return p; - } - } + } + // try once more to allocate in the new arena + mi_assert_internal(req_arena == NULL); + p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); + if (p != NULL) return p; return NULL; } @@ -685,11 +681,13 @@ static mi_page_t* mi_arena_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_ else { page->block_size_shift = 0; } + // and own it + mi_page_try_claim_ownership(page); + + // register in the page map _mi_page_map_register(page); mi_assert_internal(_mi_ptr_page(page)==page); mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page); - - mi_page_try_claim_ownership(page); mi_assert_internal(mi_page_block_size(page) == block_size); mi_assert_internal(mi_page_is_abandoned(page)); mi_assert_internal(mi_page_is_owned(page)); @@ -771,7 +769,8 @@ void _mi_arena_page_free(mi_page_t* page) { mi_assert_internal(_mi_ptr_page(page)==page); mi_assert_internal(mi_page_is_owned(page)); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(page->next==NULL); + mi_assert_internal(mi_page_is_abandoned(page)); + mi_assert_internal(page->next==NULL && page->prev==NULL); #if MI_DEBUG>1 if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) { @@ -790,6 +789,7 @@ void _mi_arena_page_free(mi_page_t* page) { } #endif + // unregister page _mi_page_map_unregister(page); if (page->memid.memkind == MI_MEM_ARENA) { mi_bitmap_clear(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index); @@ -807,7 +807,7 @@ void _mi_arena_page_abandon(mi_page_t* page) { mi_assert_internal(mi_page_is_owned(page)); mi_assert_internal(mi_page_is_abandoned(page)); mi_assert_internal(!mi_page_all_free(page)); - mi_assert_internal(page->next==NULL); + mi_assert_internal(page->next==NULL && page->prev == NULL); if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) { // make available for allocations @@ -827,8 +827,19 @@ void _mi_arena_page_abandon(mi_page_t* page) { mi_subproc_stat_increase(arena->subproc, pages_abandoned, 1); } else { - // page is full (or a singleton), page is OS/externally allocated + // page is full (or a singleton), or the page is OS/externally allocated // leave as is; it will be reclaimed when an object is free'd in the page + mi_subproc_t* subproc = _mi_subproc(); + // but for non-arena pages, add to the subproc list so these can be visited + if (page->memid.memkind != MI_MEM_ARENA && mi_option_is_enabled(mi_option_visit_abandoned)) { + mi_lock(&subproc->os_abandoned_pages_lock) { + // push in front + page->prev = NULL; + page->next = subproc->os_abandoned_pages; + if (page->next != NULL) { page->next->prev = page; } + subproc->os_abandoned_pages = page; + } + } mi_subproc_stat_increase(_mi_subproc(), pages_abandoned, 1); } _mi_page_unown(page); @@ -881,9 +892,18 @@ void _mi_arena_page_unabandon(mi_page_t* page) { } else { // page is full (or a singleton), page is OS allocated - // nothing to do - // TODO: maintain count of these as well? + mi_subproc_t* subproc = _mi_subproc(); mi_subproc_stat_decrease(_mi_subproc(), pages_abandoned, 1); + // if not an arena page, remove from the subproc os pages list + if (page->memid.memkind != MI_MEM_ARENA && mi_option_is_enabled(mi_option_visit_abandoned)) { + mi_lock(&subproc->os_abandoned_pages_lock) { + if (page->prev != NULL) { page->prev->next = page->next; } + if (page->next != NULL) { page->next->prev = page->prev; } + if (subproc->os_abandoned_pages == page) { subproc->os_abandoned_pages = page->next; } + page->next = NULL; + page->prev = NULL; + } + } } } diff --git a/src/init.c b/src/init.c index 3af4f4ef..1968ef68 100644 --- a/src/init.c +++ b/src/init.c @@ -223,7 +223,7 @@ void _mi_heap_guarded_init(mi_heap_t* heap) { static void mi_subproc_main_init(void) { if (subproc_main.memid.memkind != MI_MEM_STATIC) { subproc_main.memid = _mi_memid_create(MI_MEM_STATIC); - mi_lock_init(&subproc_main.os_pages_lock); + mi_lock_init(&subproc_main.os_abandoned_pages_lock); mi_lock_init(&subproc_main.arena_reserve_lock); } } @@ -361,7 +361,7 @@ mi_subproc_id_t mi_subproc_new(void) { mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid); if (subproc == NULL) return NULL; subproc->memid = memid; - mi_lock_init(&subproc->os_pages_lock); + mi_lock_init(&subproc->os_abandoned_pages_lock); mi_lock_init(&subproc->arena_reserve_lock); return subproc; } @@ -375,11 +375,10 @@ void mi_subproc_delete(mi_subproc_id_t subproc_id) { mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id); // check if there are os pages still.. bool safe_to_delete = false; - if (mi_lock_acquire(&subproc->os_pages_lock)) { - if (subproc->os_pages.first == NULL) { + mi_lock(&subproc->os_abandoned_pages_lock) { + if (subproc->os_abandoned_pages == NULL) { safe_to_delete = true; } - mi_lock_release(&subproc->os_pages_lock); } if (!safe_to_delete) return; @@ -388,7 +387,7 @@ void mi_subproc_delete(mi_subproc_id_t subproc_id) { // safe to release // todo: should we refcount subprocesses? - mi_lock_done(&subproc->os_pages_lock); + mi_lock_done(&subproc->os_abandoned_pages_lock); mi_lock_done(&subproc->arena_reserve_lock); _mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid); } From 89b0d5a357af02809509544f83c92e7f5be11a3f Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 11:53:29 -0800 Subject: [PATCH 09/16] allocate heaps associated with an arena in that arena --- include/mimalloc/internal.h | 11 ++++++----- include/mimalloc/types.h | 21 ++++++--------------- src/arena-meta.c | 5 +---- src/arena.c | 6 ++---- src/heap.c | 14 +++++++++++--- src/init.c | 10 +++++----- 6 files changed, 31 insertions(+), 36 deletions(-) diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index e316de94..208989e3 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -147,6 +147,7 @@ mi_arena_t* _mi_arena_from_id(mi_arena_id_t id); void* _mi_arena_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); void* _mi_arena_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +void _mi_arena_free(void* p, size_t size, mi_memid_t memid); bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena); bool _mi_arena_contains(const void* p); void _mi_arenas_collect(bool force_purge); @@ -421,11 +422,11 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { return (heap != &_mi_heap_empty); } -static inline uintptr_t _mi_ptr_cookie(const void* p) { - extern mi_heap_t _mi_heap_main; - mi_assert_internal(_mi_heap_main.cookie != 0); - return ((uintptr_t)p ^ _mi_heap_main.cookie); -} +//static inline uintptr_t _mi_ptr_cookie(const void* p) { +// extern mi_heap_t _mi_heap_main; +// mi_assert_internal(_mi_heap_main.cookie != 0); +// return ((uintptr_t)p ^ _mi_heap_main.cookie); +//} /* ----------------------------------------------------------- diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 59393848..461b5393 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -396,7 +396,6 @@ struct mi_heap_s { mi_tld_t* tld; // thread-local data mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) - uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) @@ -522,21 +521,13 @@ void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount); #define mi_os_stat_increase(stat,amount) mi_subproc_stat_increase(_mi_subproc(),stat,amount) #define mi_os_stat_decrease(stat,amount) mi_subproc_stat_decrease(_mi_subproc(),stat,amount) -#define mi_tld_stat_counter_increase(tld,stat,amount) __mi_stat_counter_increase( &(tld)->stats.stat, amount) -#define mi_tld_stat_increase(tld,stat,amount) __mi_stat_increase( &(tld)->stats.stat, amount) -#define mi_tld_stat_decrease(tld,stat,amount) __mi_stat_decrease( &(tld)->stats.stat, amount) +#define mi_heap_stat_counter_increase(heap,stat,amount) __mi_stat_counter_increase( &(heap)->tld->stats.stat, amount) +#define mi_heap_stat_increase(heap,stat,amount) __mi_stat_increase( &(heap)->tld->stats.stat, amount) +#define mi_heap_stat_decrease(heap,stat,amount) __mi_stat_decrease( &(heap)->tld->stats.stat, amount) -#define mi_debug_tld_stat_counter_increase(tld,stat,amount) mi_debug_stat_counter_increase( (tld)->stats.stat, amount) -#define mi_debug_tld_stat_increase(tld,stat,amount) mi_debug_stat_increase( (tld)->stats.stat, amount) -#define mi_debug_tld_stat_decrease(tld,stat,amount) mi_debug_stat_decrease( (tld)->stats.stat, amount) - -#define mi_heap_stat_counter_increase(heap,stat,amount) mi_tld_stat_counter_increase((heap)->tld, stat, amount) -#define mi_heap_stat_increase(heap,stat,amount) mi_tld_stat_increase( (heap)->tld, stat, amount) -#define mi_heap_stat_decrease(heap,stat,amount) mi_tld_stat_decrease( (heap)->tld, stat, amount) - -#define mi_debug_heap_stat_counter_increase(heap,stat,amount) mi_debug_tld_stat_counter_increase((heap)->tld, stat, amount) -#define mi_debug_heap_stat_increase(heap,stat,amount) mi_debug_tld_stat_increase( (heap)->tld, stat, amount) -#define mi_debug_heap_stat_decrease(heap,stat,amount) mi_debug_tld_stat_decrease( (heap)->tld, stat, amount) +#define mi_debug_heap_stat_counter_increase(heap,stat,amount) mi_debug_stat_counter_increase( (heap)->tld->stats.stat, amount) +#define mi_debug_heap_stat_increase(heap,stat,amount) mi_debug_stat_increase( (heap)->tld->stats.stat, amount) +#define mi_debug_heap_stat_decrease(heap,stat,amount) mi_debug_stat_decrease( (heap)->tld->stats.stat, amount) // ------------------------------------------------------ diff --git a/src/arena-meta.c b/src/arena-meta.c index a5dc8e75..065a1331 100644 --- a/src/arena-meta.c +++ b/src/arena-meta.c @@ -148,11 +148,8 @@ mi_decl_noinline void _mi_meta_free(void* p, size_t size, mi_memid_t memid) { _mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE); mi_bitmap_setN(&mpage->blocks_free, block_idx, block_count,NULL); } - else if (mi_memid_is_os(memid)) { - _mi_os_free(p, size, memid); - } else { - mi_assert_internal(mi_memid_needs_no_free(memid)); + _mi_arena_free(p,size,memid); } } diff --git a/src/arena.c b/src/arena.c index c4b02cf6..869cba49 100644 --- a/src/arena.c +++ b/src/arena.c @@ -762,8 +762,6 @@ mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t block return page; } -static void mi_arena_free(void* p, size_t size, mi_memid_t memid); - void _mi_arena_page_free(mi_page_t* page) { mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_ptr_page(page)==page); @@ -794,7 +792,7 @@ void _mi_arena_page_free(mi_page_t* page) { if (page->memid.memkind == MI_MEM_ARENA) { mi_bitmap_clear(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index); } - mi_arena_free(page, mi_memid_size(page->memid), page->memid); + _mi_arena_free(page, mi_memid_size(page->memid), page->memid); } /* ----------------------------------------------------------- @@ -920,7 +918,7 @@ void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices); static void mi_arenas_try_purge(bool force, bool visit_all); -static void mi_arena_free(void* p, size_t size, mi_memid_t memid) { +void _mi_arena_free(void* p, size_t size, mi_memid_t memid) { if (p==NULL) return; if (size==0) return; diff --git a/src/heap.c b/src/heap.c index d82b383f..f47aaad9 100644 --- a/src/heap.c +++ b/src/heap.c @@ -213,8 +213,8 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint _mi_random_split(&heap->tld->heap_backing->random, &heap->random); } heap->cookie = _mi_heap_random_next(heap) | 1; - heap->keys[0] = _mi_heap_random_next(heap); - heap->keys[1] = _mi_heap_random_next(heap); + //heap->keys[0] = _mi_heap_random_next(heap); + //heap->keys[1] = _mi_heap_random_next(heap);*/ _mi_heap_guarded_init(heap); // push on the thread local heaps list @@ -227,7 +227,15 @@ mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena mi_assert(heap_tag >= 0 && heap_tag < 256); // allocate and initialize a heap mi_memid_t memid; - mi_heap_t* heap = (mi_heap_t*)_mi_meta_zalloc(sizeof(mi_heap_t), &memid); + mi_heap_t* heap; + if (arena_id == _mi_arena_id_none()) { + heap = (mi_heap_t*)_mi_meta_zalloc(sizeof(mi_heap_t), &memid); + } + else { + // heaps associated wita a specific arena are allocated in that arena + // note: takes up at least one slice which is quite wasteful... + heap = (mi_heap_t*)_mi_arena_alloc(_mi_subproc(), sizeof(mi_heap_t), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, &memid); + } if (heap==NULL) { _mi_error_message(ENOMEM, "unable to allocate heap meta-data\n"); return NULL; diff --git a/src/init.c b/src/init.c index 1968ef68..2f147e55 100644 --- a/src/init.c +++ b/src/init.c @@ -115,7 +115,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { &tld_empty, // tld NULL, // exclusive_arena 0, // cookie - { 0, 0 }, // keys + //{ 0, 0 }, // keys { {0}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max @@ -149,9 +149,9 @@ static mi_decl_cache_align mi_tld_t tld_main = { mi_decl_cache_align mi_heap_t heap_main = { &tld_main, // thread local data + NULL, // exclusive arena 0, // initial cookie - 0, // arena id - { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + //{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) { {0x846ca68b}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max @@ -248,8 +248,8 @@ static void mi_heap_main_init(void) { _mi_random_init(&heap_main.random); #endif heap_main.cookie = _mi_heap_random_next(&heap_main); - heap_main.keys[0] = _mi_heap_random_next(&heap_main); - heap_main.keys[1] = _mi_heap_random_next(&heap_main); + //heap_main.keys[0] = _mi_heap_random_next(&heap_main); + //heap_main.keys[1] = _mi_heap_random_next(&heap_main); _mi_heap_guarded_init(&heap_main); heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); From 7d46478a5f7c16b078b7955df95d3801eb1d585d Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 13:19:06 -0800 Subject: [PATCH 10/16] add initial load/unload for heaps --- include/mimalloc.h | 8 ++++- src/arena.c | 22 +++++++----- src/heap.c | 83 ++++++++++++++++++++++++++++++++++++---------- 3 files changed, 86 insertions(+), 27 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 7a58e54c..b0a20e9e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -326,7 +326,13 @@ mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, //mi_decl_export void mi_os_decommit(void* p, size_t size); mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size); -mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, mi_arena_id_t* arena_id); +mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id); +mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena); +mi_decl_export void mi_heap_unload(mi_heap_t* heap); + +// Is a pointer contained in the given arena area? +mi_decl_export bool mi_arena_contains(mi_arena_id_t arena_id, const void* p); + // ------------------------------------------------------ // Convenience diff --git a/src/arena.c b/src/arena.c index 869cba49..aa3c9175 100644 --- a/src/arena.c +++ b/src/arena.c @@ -492,7 +492,6 @@ void* _mi_arena_alloc_aligned( mi_subproc_t* subproc, // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed? - req_arena == NULL && // not a specific arena? size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment { @@ -980,13 +979,21 @@ void _mi_arenas_collect(bool force_purge) { mi_arenas_try_purge(force_purge, force_purge /* visit all? */); } + +// Is a pointer contained in the given arena area? +bool mi_arena_contains(mi_arena_id_t arena_id, const void* p) { + mi_arena_t* arena = _mi_arena_from_id(arena_id); + return (mi_arena_start(arena) <= (const uint8_t*)p && + mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) >(const uint8_t*)p); +} + // Is a pointer inside any of our arenas? bool _mi_arena_contains(const void* p) { mi_subproc_t* subproc = _mi_subproc(); const size_t max_arena = mi_arenas_get_count(subproc); for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); - if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) >(const uint8_t*)p) { + if (arena != NULL && mi_arena_contains(arena,p)) { return true; } } @@ -1636,7 +1643,7 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* return true; } -mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, mi_arena_id_t* arena_id) { +mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id) { // assume the memory area is already containing the arena if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } if (start == NULL || size == 0) return false; @@ -1658,13 +1665,10 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, _mi_warning_message("the reloaded arena is not exclusive\n"); return false; } - arena->memid.is_pinned = is_large; - arena->memid.initially_committed = is_committed; - arena->memid.initially_zero = is_zero; + arena->is_exclusive = true; - arena->is_large = is_large; - arena->subproc = NULL; - if (!mi_arena_add(_mi_subproc(), arena, arena_id)) { + arena->subproc = _mi_subproc(); + if (!mi_arena_add(arena->subproc, arena, arena_id)) { return false; } mi_arena_pages_reregister(arena); diff --git a/src/heap.c b/src/heap.c index f47aaad9..03030b47 100644 --- a/src/heap.c +++ b/src/heap.c @@ -234,7 +234,7 @@ mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena else { // heaps associated wita a specific arena are allocated in that arena // note: takes up at least one slice which is quite wasteful... - heap = (mi_heap_t*)_mi_arena_alloc(_mi_subproc(), sizeof(mi_heap_t), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, &memid); + heap = (mi_heap_t*)_mi_arena_alloc(_mi_subproc(), _mi_align_up(sizeof(mi_heap_t),MI_ARENA_MIN_OBJ_SIZE), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, &memid); } if (heap==NULL) { _mi_error_message(ENOMEM, "unable to allocate heap meta-data\n"); @@ -280,7 +280,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { } // called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources. -static void mi_heap_free(mi_heap_t* heap) { +static void mi_heap_free(mi_heap_t* heap, bool do_free_mem) { mi_assert(heap != NULL); mi_assert_internal(mi_heap_is_initialized(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; @@ -307,7 +307,9 @@ static void mi_heap_free(mi_heap_t* heap) { mi_assert_internal(heap->tld->heaps != NULL); // and free the used memory - _mi_meta_free(heap, sizeof(*heap), heap->memid); + if (do_free_mem) { + _mi_meta_free(heap, sizeof(*heap), heap->memid); + } } // return a heap on the same thread as `heap` specialized for the specified tag (if it exists) @@ -403,7 +405,7 @@ void mi_heap_destroy(mi_heap_t* heap) { #endif // free all pages _mi_heap_destroy_pages(heap); - mi_heap_free(heap); + mi_heap_free(heap,true); } #endif } @@ -462,20 +464,11 @@ void mi_heap_delete(mi_heap_t* heap) mi_assert_expensive(mi_heap_is_valid(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; - /* - mi_heap_t* bheap = heap->tld->heap_backing; - if (bheap != heap && mi_heaps_are_compatible(bheap,heap)) { - // transfer still used pages to the backing heap - mi_heap_absorb(bheap, heap); - } - else - */ - { - // abandon all pages - _mi_heap_collect_abandon(heap); - } + // abandon all pages + _mi_heap_collect_abandon(heap); + mi_assert_internal(heap->page_count==0); - mi_heap_free(heap); + mi_heap_free(heap,true); } mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { @@ -489,7 +482,63 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { } +/* ----------------------------------------------------------- + Load/unload heaps +----------------------------------------------------------- */ +void mi_heap_unload(mi_heap_t* heap) { + mi_assert(mi_heap_is_initialized(heap)); + mi_assert_expensive(mi_heap_is_valid(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return; + if (heap->exclusive_arena == NULL) { + _mi_warning_message("cannot unload heaps that are not associated with an exclusive arena\n"); + return; + } + + // abandon all pages so all thread'id in the pages are cleared + _mi_heap_collect_abandon(heap); + mi_assert_internal(heap->page_count==0); + // remove from heap list + mi_heap_free(heap, false /* but don't actually free the memory */); + + // disassociate from the current thread-local and static state + heap->tld = NULL; + return; +} + +bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena_id) { + mi_assert(mi_heap_is_initialized(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return false; + if (heap->exclusive_arena == NULL) { + _mi_warning_message("cannot reload heaps that were not associated with an exclusive arena\n"); + return false; + } + if (heap->tld != NULL) { + _mi_warning_message("cannot reload heaps that were not unloaded first\n"); + return false; + } + mi_arena_t* arena = _mi_arena_from_id(arena_id); + if (heap->exclusive_arena != arena) { + _mi_warning_message("trying to reload a heap at a different arena address: %p vs %p\n", heap->exclusive_arena, arena); + return false; + } + + mi_assert_internal(heap->page_count==0); + + // re-associate from the current thread-local and static state + heap->tld = _mi_tld(); + + // reinit direct pages (as we may be in a different process) + mi_assert_internal(heap->page_count == 0); + for (int i = 0; i < MI_PAGES_DIRECT; i++) { + heap->pages_free_direct[i] = (mi_page_t*)&_mi_page_empty; + } + + // push on the thread local heaps list + heap->next = heap->tld->heaps; + heap->tld->heaps = heap; + return true; +} /* ----------------------------------------------------------- Analysis From e3ebebb9902c56b6899f70f046cbcc8089674569 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 14:39:17 -0800 Subject: [PATCH 11/16] update lock primitive; fix arena exclusive allocation --- include/mimalloc/atomic.h | 31 ++++++++++++++++++++++++++++--- src/arena-abandon.c | 33 +++++++++++---------------------- src/arena.c | 5 +++-- src/init.c | 15 +++++++-------- 4 files changed, 49 insertions(+), 35 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 0c967896..733dbf42 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023 Microsoft Research, Daan Leijen +Copyright (c) 2018-2024 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -411,8 +411,11 @@ static inline void mi_atomic_yield(void) { #pragma warning(disable:26110) // unlock with holding lock #endif +#define mi_lock(lock) for(bool _go = (mi_lock_acquire(lock),true); _go; (mi_lock_release(lock), _go=false) ) + #if defined(_WIN32) +#if 1 #define mi_lock_t SRWLOCK // slim reader-writer lock static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -432,6 +435,30 @@ static inline void mi_lock_done(mi_lock_t* lock) { // nothing } +#else +#define mi_lock_t CRITICAL_SECTION + +static inline bool mi_lock_try_acquire(mi_lock_t* lock) { + return TryEnterCriticalSection(lock); + +} +static inline void mi_lock_acquire(mi_lock_t* lock) { + EnterCriticalSection(lock); + +} +static inline void mi_lock_release(mi_lock_t* lock) { + LeaveCriticalSection(lock); + +} +static inline void mi_lock_init(mi_lock_t* lock) { + InitializeCriticalSection(lock); + +} +static inline void mi_lock_done(mi_lock_t* lock) { + DeleteCriticalSection(lock); + +} +#endif #elif defined(MI_USE_PTHREADS) @@ -506,6 +533,4 @@ static inline void mi_lock_done(mi_lock_t* lock) { #endif - - #endif // __MIMALLOC_ATOMIC_H diff --git a/src/arena-abandon.c b/src/arena-abandon.c index 48e37794..460c80fc 100644 --- a/src/arena-abandon.c +++ b/src/arena-abandon.c @@ -120,11 +120,7 @@ static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { mi_assert(segment->memid.memkind != MI_MEM_ARENA); // not in an arena; we use a list of abandoned segments mi_subproc_t* const subproc = segment->subproc; - if (!mi_lock_acquire(&subproc->abandoned_os_lock)) { - _mi_error_message(EFAULT, "internal error: failed to acquire the abandoned (os) segment lock to mark abandonment"); - // we can continue but cannot visit/reclaim such blocks.. - } - else { + mi_lock(&subproc->abandoned_os_lock) { // push on the tail of the list (important for the visitor) mi_segment_t* prev = subproc->abandoned_os_list_tail; mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL); @@ -138,7 +134,6 @@ static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count); mi_atomic_increment_relaxed(&subproc->abandoned_count); // and release the lock - mi_lock_release(&subproc->abandoned_os_lock); } return; } @@ -251,7 +246,7 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_ if mi_unlikely(field != 0) { // skip zero fields quickly // we only take the arena lock if there are actually abandoned segments present if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) { - has_lock = (previous->visit_all ? mi_lock_acquire(&arena->abandoned_visit_lock) : mi_lock_try_acquire(&arena->abandoned_visit_lock)); + has_lock = (previous->visit_all ? (mi_lock_acquire(&arena->abandoned_visit_lock),true) : mi_lock_try_acquire(&arena->abandoned_visit_lock)); if (!has_lock) { if (previous->visit_all) { _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock"); @@ -289,8 +284,8 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_c // we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`. // The lock is released when the cursor is released. if (!previous->hold_visit_lock) { - previous->hold_visit_lock = (previous->visit_all ? mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock) - : mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock)); + previous->hold_visit_lock = (previous->visit_all ? (mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock),true) + : mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock)); if (!previous->hold_visit_lock) { if (previous->visit_all) { _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock"); @@ -301,21 +296,15 @@ static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_c // One list entry at a time while (previous->os_list_count > 0) { previous->os_list_count--; - const bool has_lock = mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free` - if (has_lock) { - mi_segment_t* segment = previous->subproc->abandoned_os_list; - // pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries) - if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) { - mi_lock_release(&previous->subproc->abandoned_os_lock); - return segment; - } - // already abandoned, try again + mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free` + mi_segment_t* segment = previous->subproc->abandoned_os_list; + // pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries) + if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) { mi_lock_release(&previous->subproc->abandoned_os_lock); + return segment; } - else { - _mi_error_message(EFAULT, "failed to acquire abandoned OS list lock during abandoned block visit\n"); - return NULL; - } + // already abandoned, try again + mi_lock_release(&previous->subproc->abandoned_os_lock); } // done mi_assert_internal(previous->os_list_count == 0); diff --git a/src/arena.c b/src/arena.c index 164f3116..86ac5955 100644 --- a/src/arena.c +++ b/src/arena.c @@ -394,8 +394,9 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset const int numa_node = _mi_os_numa_node(); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed? - if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc)) { // is arena allocation allowed? + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) + { void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid); if (p != NULL) return p; diff --git a/src/init.c b/src/init.c index 3e4da831..68a1d7e2 100644 --- a/src/init.c +++ b/src/init.c @@ -168,8 +168,8 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; #if MI_GUARDED mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { heap->guarded_sample_seed = seed; - if (heap->guarded_sample_seed == 0) { - heap->guarded_sample_seed = _mi_heap_random_next(heap); + if (heap->guarded_sample_seed == 0) { + heap->guarded_sample_seed = _mi_heap_random_next(heap); } heap->guarded_sample_rate = sample_rate; if (heap->guarded_sample_rate >= 1) { @@ -187,9 +187,9 @@ void _mi_heap_guarded_init(mi_heap_t* heap) { mi_heap_guarded_set_sample_rate(heap, (size_t)mi_option_get_clamp(mi_option_guarded_sample_rate, 0, LONG_MAX), (size_t)mi_option_get(mi_option_guarded_sample_seed)); - mi_heap_guarded_set_size_bound(heap, + mi_heap_guarded_set_size_bound(heap, (size_t)mi_option_get_clamp(mi_option_guarded_min, 0, LONG_MAX), - (size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) ); + (size_t)mi_option_get_clamp(mi_option_guarded_max, 0, LONG_MAX) ); } #else mi_decl_export void mi_heap_guarded_set_sample_rate(mi_heap_t* heap, size_t sample_rate, size_t seed) { @@ -257,11 +257,10 @@ void mi_subproc_delete(mi_subproc_id_t subproc_id) { mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id); // check if there are no abandoned segments still.. bool safe_to_delete = false; - if (mi_lock_acquire(&subproc->abandoned_os_lock)) { + mi_lock(&subproc->abandoned_os_lock) { if (subproc->abandoned_os_list == NULL) { safe_to_delete = true; } - mi_lock_release(&subproc->abandoned_os_lock); } if (!safe_to_delete) return; // safe to release @@ -398,7 +397,7 @@ void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { tld->heap_backing = bheap; tld->heaps = NULL; tld->segments.subproc = &mi_subproc_default; - tld->segments.stats = &tld->stats; + tld->segments.stats = &tld->stats; } // Free the thread local default heap (called from `mi_thread_done`) @@ -599,7 +598,7 @@ static void mi_detect_cpu_features(void) { } #else static void mi_detect_cpu_features(void) { - // nothing + // nothing } #endif From 108c84e858b7ee2aa2fd3f00de03afb879e89718 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 14:45:14 -0800 Subject: [PATCH 12/16] remove req_arena parameter to arena_reserve --- src/arena.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/arena.c b/src/arena.c index aa3c9175..af1f737e 100644 --- a/src/arena.c +++ b/src/arena.c @@ -274,11 +274,8 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id); // try to reserve a fresh arena space -static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) +static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_large, mi_arena_id_t* arena_id) { - // if (_mi_preloading()) return false; // use OS only while pre loading - if (req_arena_id != _mi_arena_id_none()) return false; - const size_t arena_count = mi_arenas_get_count(subproc); if (arena_count > (MI_MAX_ARENAS - 4)) return false; @@ -443,7 +440,7 @@ static mi_decl_noinline void* mi_arenas_try_alloc( if (arena_count == mi_arenas_get_count(subproc)) { // we are the first to enter the lock, reserve a fresh arena mi_arena_id_t arena_id = 0; - mi_arena_reserve(subproc, mi_size_of_slices(slice_count), allow_large, req_arena, &arena_id); + mi_arena_reserve(subproc, mi_size_of_slices(slice_count), allow_large, &arena_id); } else { // another thread already reserved a new arena From 476d4699ff93380009ae35780c2261ae674e4200 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 15:24:46 -0800 Subject: [PATCH 13/16] limit purgeing to one purge cycle per purge delay --- include/mimalloc/atomic.h | 26 +++++++-------- src/arena.c | 69 +++++++++++++++++++++++++-------------- 2 files changed, 56 insertions(+), 39 deletions(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 733dbf42..c6083102 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -421,9 +421,8 @@ static inline void mi_atomic_yield(void) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return TryAcquireSRWLockExclusive(lock); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { AcquireSRWLockExclusive(lock); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { ReleaseSRWLockExclusive(lock); @@ -432,7 +431,7 @@ static inline void mi_lock_init(mi_lock_t* lock) { InitializeSRWLock(lock); } static inline void mi_lock_done(mi_lock_t* lock) { - // nothing + (void)(lock); } #else @@ -440,24 +439,20 @@ static inline void mi_lock_done(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return TryEnterCriticalSection(lock); - } static inline void mi_lock_acquire(mi_lock_t* lock) { EnterCriticalSection(lock); - } static inline void mi_lock_release(mi_lock_t* lock) { LeaveCriticalSection(lock); - } static inline void mi_lock_init(mi_lock_t* lock) { InitializeCriticalSection(lock); - } static inline void mi_lock_done(mi_lock_t* lock) { DeleteCriticalSection(lock); - } + #endif #elif defined(MI_USE_PTHREADS) @@ -467,8 +462,11 @@ static inline void mi_lock_done(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return (pthread_mutex_trylock(lock) == 0); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { - return (pthread_mutex_lock(lock) == 0); +static inline void mi_lock_acquire(mi_lock_t* lock) { + const int err = pthread_mutex_lock(lock); + if (err != 0) { + mi_error_message(EFAULT, "internal error: lock cannot be acquired\n"); + } } static inline void mi_lock_release(mi_lock_t* lock) { pthread_mutex_unlock(lock); @@ -488,9 +486,8 @@ static inline void mi_lock_done(mi_lock_t* lock) { static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return lock->try_lock(); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { lock->lock(); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { lock->unlock(); @@ -513,12 +510,11 @@ static inline bool mi_lock_try_acquire(mi_lock_t* lock) { uintptr_t expected = 0; return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { for (int i = 0; i < 1000; i++) { // for at most 1000 tries? - if (mi_lock_try_acquire(lock)) return true; + if (mi_lock_try_acquire(lock)) return; mi_atomic_yield(); } - return true; } static inline void mi_lock_release(mi_lock_t* lock) { mi_atomic_store_release(lock, (uintptr_t)0); diff --git a/src/arena.c b/src/arena.c index 86ac5955..0ddb2936 100644 --- a/src/arena.c +++ b/src/arena.c @@ -33,7 +33,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific mi_memid_t memid; // memid of the memory area - _Atomic(uint8_t*)start; // the start of the memory area + _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) size_t meta_size; // size of the arena structure itself (including its bitmaps) @@ -42,12 +42,13 @@ typedef struct mi_arena_s { bool exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited - _Atomic(size_t)search_idx; // optimization to start the search for free blocks - _Atomic(mi_msecs_t)purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. - mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) - mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) - mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) + _Atomic(size_t) search_idx; // optimization to start the search for free blocks + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be purged from `blocks_purge`. + + mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? + mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) + mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. } mi_arena_t; @@ -60,6 +61,7 @@ typedef struct mi_arena_s { // The available arenas static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 +static mi_decl_cache_align _Atomic(int64_t) mi_arenas_purge_expire; // set if there exist purgeable arenas #define MI_IN_ARENA_C #include "arena-abandon.c" @@ -349,11 +351,10 @@ static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, siz } // try to reserve a fresh arena space -static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) +static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t *arena_id) { if (_mi_preloading()) return false; // use OS only while pre loading - if (req_arena_id != _mi_arena_id_none()) return false; - + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); if (arena_count > (MI_MAX_ARENAS - 4)) return false; @@ -403,7 +404,7 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset // otherwise, try to first eagerly reserve a new arena if (req_arena_id == _mi_arena_id_none()) { mi_arena_id_t arena_id = 0; - if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { + if (mi_arena_reserve(size, allow_large, &arena_id)) { // and try allocate in there mi_assert_internal(req_arena_id == _mi_arena_id_none()); p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid); @@ -497,13 +498,16 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t mi_arena_purge(arena, bitmap_idx, blocks); } else { - // schedule decommit - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); - if (expire != 0) { - mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay + // schedule purge + const mi_msecs_t expire = _mi_clock_now() + delay; + mi_msecs_t expire0 = 0; + if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, expire)) { + // expiration was not yet set + // maybe set the global arenas expire as well (if it wasn't set already) + mi_atomic_casi64_strong_acq_rel(&mi_arenas_purge_expire, &expire0, expire); } else { - mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); + // already an expiration was set } _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); } @@ -538,14 +542,16 @@ static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, // returns true if anything was purged static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) { - if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; + // check pre-conditions + if (arena->memid.is_pinned) return false; + + // expired yet? mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); - if (expire == 0) return false; - if (!force && expire > now) return false; + if (!force && (expire == 0 || expire > now)) return false; // reset expire (if not already set concurrently) mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0); - + // potential purges scheduled, walk through the bitmap bool any_purged = false; bool full_purge = true; @@ -592,9 +598,15 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) return any_purged; } -static void mi_arenas_try_purge( bool force, bool visit_all ) { +static void mi_arenas_try_purge( bool force, bool visit_all ) +{ if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled + // check if any arena needs purging? + const mi_msecs_t now = _mi_clock_now(); + mi_msecs_t arenas_expire = mi_atomic_load_acquire(&mi_arenas_purge_expire); + if (!force && (arenas_expire == 0 || arenas_expire < now)) return; + const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); if (max_arena == 0) return; @@ -602,17 +614,26 @@ static void mi_arenas_try_purge( bool force, bool visit_all ) { static mi_atomic_guard_t purge_guard; mi_atomic_guard(&purge_guard) { - mi_msecs_t now = _mi_clock_now(); - size_t max_purge_count = (visit_all ? max_arena : 1); + // increase global expire: at most one purge per delay cycle + mi_atomic_store_release(&mi_arenas_purge_expire, now + mi_arena_purge_delay()); + size_t max_purge_count = (visit_all ? max_arena : 2); + bool all_visited = true; for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { if (mi_arena_try_purge(arena, now, force)) { - if (max_purge_count <= 1) break; + if (max_purge_count <= 1) { + all_visited = false; + break; + } max_purge_count--; } } } + if (all_visited) { + // all arena's were visited and purged: reset global expire + mi_atomic_store_release(&mi_arenas_purge_expire, 0); + } } } From 825dd41769bc01984f7db515fe7df597a71547ab Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 15:29:39 -0800 Subject: [PATCH 14/16] fix build error --- include/mimalloc/atomic.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index c6083102..c4fac766 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -457,6 +457,8 @@ static inline void mi_lock_done(mi_lock_t* lock) { #elif defined(MI_USE_PTHREADS) +void _mi_error_message(int err, const char* fmt, ...); + #define mi_lock_t pthread_mutex_t static inline bool mi_lock_try_acquire(mi_lock_t* lock) { @@ -465,7 +467,7 @@ static inline bool mi_lock_try_acquire(mi_lock_t* lock) { static inline void mi_lock_acquire(mi_lock_t* lock) { const int err = pthread_mutex_lock(lock); if (err != 0) { - mi_error_message(EFAULT, "internal error: lock cannot be acquired\n"); + _mi_error_message(err, "internal error: lock cannot be acquired\n"); } } static inline void mi_lock_release(mi_lock_t* lock) { From 7085b6cec31641fddaca3d40932cda82e91baf07 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 15:38:27 -0800 Subject: [PATCH 15/16] limit candidate search to 4 --- src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page.c b/src/page.c index 8808c358..e1c07a93 100644 --- a/src/page.c +++ b/src/page.c @@ -732,7 +732,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi -------------------------------------------------------------*/ // search for a best next page to use for at most N pages (often cut short if immediate blocks are available) -#define MI_MAX_CANDIDATE_SEARCH (8) +#define MI_MAX_CANDIDATE_SEARCH (4) // is the page not yet used up to its reserved space? static bool mi_page_is_expandable(const mi_page_t* page) { From c138fba149d358465345ce0316c42d626afe1328 Mon Sep 17 00:00:00 2001 From: daanx Date: Sat, 21 Dec 2024 15:49:17 -0800 Subject: [PATCH 16/16] merge from dev --- src/arena-abandon.c | 346 -------------------------------------------- 1 file changed, 346 deletions(-) delete mode 100644 src/arena-abandon.c diff --git a/src/arena-abandon.c b/src/arena-abandon.c deleted file mode 100644 index 460c80fc..00000000 --- a/src/arena-abandon.c +++ /dev/null @@ -1,346 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019-2024, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -#if !defined(MI_IN_ARENA_C) -#error "this file should be included from 'arena.c' (so mi_arena_t is visible)" -// add includes help an IDE -#include "mimalloc.h" -#include "mimalloc/internal.h" -#include "bitmap.h" -#endif - -// Minimal exports for arena-abandoned. -size_t mi_arena_id_index(mi_arena_id_t id); -mi_arena_t* mi_arena_from_index(size_t idx); -size_t mi_arena_get_count(void); -void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex); -bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index); - -/* ----------------------------------------------------------- - Abandoned blocks/segments: - - _mi_arena_segment_clear_abandoned - _mi_arena_segment_mark_abandoned - - This is used to atomically abandon/reclaim segments - (and crosses the arena API but it is convenient to have here). - - Abandoned segments still have live blocks; they get reclaimed - when a thread frees a block in it, or when a thread needs a fresh - segment. - - Abandoned segments are atomically marked in the `block_abandoned` - bitmap of arenas. Any segments allocated outside arenas are put - in the sub-process `abandoned_os_list`. This list is accessed - using locks but this should be uncommon and generally uncontended. - Reclaim and visiting either scan through the `block_abandoned` - bitmaps of the arena's, or visit the `abandoned_os_list` - - A potentially nicer design is to use arena's for everything - and perhaps have virtual arena's to map OS allocated memory - but this would lack the "density" of our current arena's. TBC. ------------------------------------------------------------ */ - - -// reclaim a specific OS abandoned segment; `true` on success. -// sets the thread_id. -static bool mi_arena_segment_os_clear_abandoned(mi_segment_t* segment, bool take_lock) { - mi_assert(segment->memid.memkind != MI_MEM_ARENA); - // not in an arena, remove from list of abandoned os segments - mi_subproc_t* const subproc = segment->subproc; - if (take_lock && !mi_lock_try_acquire(&subproc->abandoned_os_lock)) { - return false; // failed to acquire the lock, we just give up - } - // remove atomically from the abandoned os list (if possible!) - bool reclaimed = false; - mi_segment_t* const next = segment->abandoned_os_next; - mi_segment_t* const prev = segment->abandoned_os_prev; - if (next != NULL || prev != NULL || subproc->abandoned_os_list == segment) { - #if MI_DEBUG>3 - // find ourselves in the abandoned list (and check the count) - bool found = false; - size_t count = 0; - for (mi_segment_t* current = subproc->abandoned_os_list; current != NULL; current = current->abandoned_os_next) { - if (current == segment) { found = true; } - count++; - } - mi_assert_internal(found); - mi_assert_internal(count == mi_atomic_load_relaxed(&subproc->abandoned_os_list_count)); - #endif - // remove (atomically) from the list and reclaim - if (prev != NULL) { prev->abandoned_os_next = next; } - else { subproc->abandoned_os_list = next; } - if (next != NULL) { next->abandoned_os_prev = prev; } - else { subproc->abandoned_os_list_tail = prev; } - segment->abandoned_os_next = NULL; - segment->abandoned_os_prev = NULL; - mi_atomic_decrement_relaxed(&subproc->abandoned_count); - mi_atomic_decrement_relaxed(&subproc->abandoned_os_list_count); - if (take_lock) { // don't reset the thread_id when iterating - mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); - } - reclaimed = true; - } - if (take_lock) { mi_lock_release(&segment->subproc->abandoned_os_lock); } - return reclaimed; -} - -// reclaim a specific abandoned segment; `true` on success. -// sets the thread_id. -bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment) { - if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { - return mi_arena_segment_os_clear_abandoned(segment, true /* take lock */); - } - // arena segment: use the blocks_abandoned bitmap. - size_t arena_idx; - size_t bitmap_idx; - mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); - mi_arena_t* arena = mi_arena_from_index(arena_idx); - mi_assert_internal(arena != NULL); - // reclaim atomically - bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx); - if (was_marked) { - mi_assert_internal(mi_atomic_load_acquire(&segment->thread_id) == 0); - mi_atomic_decrement_relaxed(&segment->subproc->abandoned_count); - mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); - } - // mi_assert_internal(was_marked); - mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); - //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); - return was_marked; -} - - -// mark a specific OS segment as abandoned -static void mi_arena_segment_os_mark_abandoned(mi_segment_t* segment) { - mi_assert(segment->memid.memkind != MI_MEM_ARENA); - // not in an arena; we use a list of abandoned segments - mi_subproc_t* const subproc = segment->subproc; - mi_lock(&subproc->abandoned_os_lock) { - // push on the tail of the list (important for the visitor) - mi_segment_t* prev = subproc->abandoned_os_list_tail; - mi_assert_internal(prev == NULL || prev->abandoned_os_next == NULL); - mi_assert_internal(segment->abandoned_os_prev == NULL); - mi_assert_internal(segment->abandoned_os_next == NULL); - if (prev != NULL) { prev->abandoned_os_next = segment; } - else { subproc->abandoned_os_list = segment; } - subproc->abandoned_os_list_tail = segment; - segment->abandoned_os_prev = prev; - segment->abandoned_os_next = NULL; - mi_atomic_increment_relaxed(&subproc->abandoned_os_list_count); - mi_atomic_increment_relaxed(&subproc->abandoned_count); - // and release the lock - } - return; -} - -// mark a specific segment as abandoned -// clears the thread_id. -void _mi_arena_segment_mark_abandoned(mi_segment_t* segment) -{ - mi_assert_internal(segment->used == segment->abandoned); - mi_atomic_store_release(&segment->thread_id, (uintptr_t)0); // mark as abandoned for multi-thread free's - if mi_unlikely(segment->memid.memkind != MI_MEM_ARENA) { - mi_arena_segment_os_mark_abandoned(segment); - return; - } - // segment is in an arena, mark it in the arena `blocks_abandoned` bitmap - size_t arena_idx; - size_t bitmap_idx; - mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); - mi_arena_t* arena = mi_arena_from_index(arena_idx); - mi_assert_internal(arena != NULL); - // set abandonment atomically - mi_subproc_t* const subproc = segment->subproc; // don't access the segment after setting it abandoned - const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); - if (was_unmarked) { mi_atomic_increment_relaxed(&subproc->abandoned_count); } - mi_assert_internal(was_unmarked); - mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); -} - - -/* ----------------------------------------------------------- - Iterate through the abandoned blocks/segments using a cursor. - This is used for reclaiming and abandoned block visiting. ------------------------------------------------------------ */ - -// start a cursor at a randomized arena -void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current) { - mi_assert_internal(heap == NULL || heap->tld->segments.subproc == subproc); - current->bitmap_idx = 0; - current->subproc = subproc; - current->visit_all = visit_all; - current->hold_visit_lock = false; - const size_t abandoned_count = mi_atomic_load_relaxed(&subproc->abandoned_count); - const size_t abandoned_list_count = mi_atomic_load_relaxed(&subproc->abandoned_os_list_count); - const size_t max_arena = mi_arena_get_count(); - if (heap != NULL && heap->arena_id != _mi_arena_id_none()) { - // for a heap that is bound to one arena, only visit that arena - current->start = mi_arena_id_index(heap->arena_id); - current->end = current->start + 1; - current->os_list_count = 0; - } - else { - // otherwise visit all starting at a random location - if (abandoned_count > abandoned_list_count && max_arena > 0) { - current->start = (heap == NULL || max_arena == 0 ? 0 : (mi_arena_id_t)(_mi_heap_random_next(heap) % max_arena)); - current->end = current->start + max_arena; - } - else { - current->start = 0; - current->end = 0; - } - current->os_list_count = abandoned_list_count; // max entries to visit in the os abandoned list - } - mi_assert_internal(current->start <= max_arena); -} - -void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current) { - if (current->hold_visit_lock) { - mi_lock_release(¤t->subproc->abandoned_os_visit_lock); - current->hold_visit_lock = false; - } -} - -static mi_segment_t* mi_arena_segment_clear_abandoned_at(mi_arena_t* arena, mi_subproc_t* subproc, mi_bitmap_index_t bitmap_idx) { - // try to reclaim an abandoned segment in the arena atomically - if (!_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) return NULL; - mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); - mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); - mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); - // check that the segment belongs to our sub-process - // note: this is the reason we need the `abandoned_visit` lock in the case abandoned visiting is enabled. - // without the lock an abandoned visit may otherwise fail to visit all abandoned segments in the sub-process. - // for regular reclaim it is fine to miss one sometimes so without abandoned visiting we don't need the `abandoned_visit` lock. - if (segment->subproc != subproc) { - // it is from another sub-process, re-mark it and continue searching - const bool was_zero = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); - mi_assert_internal(was_zero); MI_UNUSED(was_zero); - return NULL; - } - else { - // success, we unabandoned a segment in our sub-process - mi_atomic_decrement_relaxed(&subproc->abandoned_count); - return segment; - } -} - -static mi_segment_t* mi_arena_segment_clear_abandoned_next_field(mi_arena_field_cursor_t* previous) { - const size_t max_arena = mi_arena_get_count(); - size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); - size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx); - // visit arena's (from the previous cursor) - for (; previous->start < previous->end; previous->start++, field_idx = 0, bit_idx = 0) { - // index wraps around - size_t arena_idx = (previous->start >= max_arena ? previous->start % max_arena : previous->start); - mi_arena_t* arena = mi_arena_from_index(arena_idx); - if (arena != NULL) { - bool has_lock = false; - // visit the abandoned fields (starting at previous_idx) - for (; field_idx < arena->field_count; field_idx++, bit_idx = 0) { - size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]); - if mi_unlikely(field != 0) { // skip zero fields quickly - // we only take the arena lock if there are actually abandoned segments present - if (!has_lock && mi_option_is_enabled(mi_option_visit_abandoned)) { - has_lock = (previous->visit_all ? (mi_lock_acquire(&arena->abandoned_visit_lock),true) : mi_lock_try_acquire(&arena->abandoned_visit_lock)); - if (!has_lock) { - if (previous->visit_all) { - _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the visitor lock"); - } - // skip to next arena - break; - } - } - mi_assert_internal(has_lock || !mi_option_is_enabled(mi_option_visit_abandoned)); - // visit each set bit in the field (todo: maybe use `ctz` here?) - for (; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) { - // pre-check if the bit is set - size_t mask = ((size_t)1 << bit_idx); - if mi_unlikely((field & mask) == mask) { - mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); - mi_segment_t* const segment = mi_arena_segment_clear_abandoned_at(arena, previous->subproc, bitmap_idx); - if (segment != NULL) { - //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); - if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } - previous->bitmap_idx = mi_bitmap_index_create_ex(field_idx, bit_idx + 1); // start at next one for the next iteration - return segment; - } - } - } - } - } - if (has_lock) { mi_lock_release(&arena->abandoned_visit_lock); } - } - } - return NULL; -} - -static mi_segment_t* mi_arena_segment_clear_abandoned_next_list(mi_arena_field_cursor_t* previous) { - // go through the abandoned_os_list - // we only allow one thread per sub-process to do to visit guarded by the `abandoned_os_visit_lock`. - // The lock is released when the cursor is released. - if (!previous->hold_visit_lock) { - previous->hold_visit_lock = (previous->visit_all ? (mi_lock_acquire(&previous->subproc->abandoned_os_visit_lock),true) - : mi_lock_try_acquire(&previous->subproc->abandoned_os_visit_lock)); - if (!previous->hold_visit_lock) { - if (previous->visit_all) { - _mi_error_message(EFAULT, "internal error: failed to visit all abandoned segments due to failure to acquire the OS visitor lock"); - } - return NULL; // we cannot get the lock, give up - } - } - // One list entry at a time - while (previous->os_list_count > 0) { - previous->os_list_count--; - mi_lock_acquire(&previous->subproc->abandoned_os_lock); // this could contend with concurrent OS block abandonment and reclaim from `free` - mi_segment_t* segment = previous->subproc->abandoned_os_list; - // pop from head of the list, a subsequent mark will push at the end (and thus we iterate through os_list_count entries) - if (segment == NULL || mi_arena_segment_os_clear_abandoned(segment, false /* we already have the lock */)) { - mi_lock_release(&previous->subproc->abandoned_os_lock); - return segment; - } - // already abandoned, try again - mi_lock_release(&previous->subproc->abandoned_os_lock); - } - // done - mi_assert_internal(previous->os_list_count == 0); - return NULL; -} - - -// reclaim abandoned segments -// this does not set the thread id (so it appears as still abandoned) -mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous) { - if (previous->start < previous->end) { - // walk the arena - mi_segment_t* segment = mi_arena_segment_clear_abandoned_next_field(previous); - if (segment != NULL) { return segment; } - } - // no entries in the arena's anymore, walk the abandoned OS list - mi_assert_internal(previous->start == previous->end); - return mi_arena_segment_clear_abandoned_next_list(previous); -} - - -bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { - // (unfortunately) the visit_abandoned option must be enabled from the start. - // This is to avoid taking locks if abandoned list visiting is not required (as for most programs) - if (!mi_option_is_enabled(mi_option_visit_abandoned)) { - _mi_error_message(EFAULT, "internal error: can only visit abandoned blocks when MIMALLOC_VISIT_ABANDONED=ON"); - return false; - } - mi_arena_field_cursor_t current; - _mi_arena_field_cursor_init(NULL, _mi_subproc_from_id(subproc_id), true /* visit all (blocking) */, ¤t); - mi_segment_t* segment; - bool ok = true; - while (ok && (segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL) { - ok = _mi_segment_visit_blocks(segment, heap_tag, visit_blocks, visitor, arg); - _mi_arena_segment_mark_abandoned(segment); - } - _mi_arena_field_cursor_done(¤t); - return ok; -}