diff --git a/ide/vs2022/mimalloc-test.vcxproj b/ide/vs2022/mimalloc-test.vcxproj index a8b36d5e..6e4576fd 100644 --- a/ide/vs2022/mimalloc-test.vcxproj +++ b/ide/vs2022/mimalloc-test.vcxproj @@ -272,14 +272,14 @@ Console + + + {abb5eae7-b3e6-432e-b636-333449892ea6} - - - diff --git a/include/mimalloc.h b/include/mimalloc.h index 24217fae..b0a20e9e 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -279,7 +279,7 @@ mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_commit mi_decl_export void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept; // Experimental: heaps associated with specific memory arena's -typedef int mi_arena_id_t; +typedef void* mi_arena_id_t; mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; @@ -326,7 +326,13 @@ mi_decl_export void mi_heap_guarded_set_size_bound(mi_heap_t* heap, size_t min, //mi_decl_export void mi_os_decommit(void* p, size_t size); mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* size); -mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, mi_arena_id_t* arena_id); +mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id); +mi_decl_export bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena); +mi_decl_export void mi_heap_unload(mi_heap_t* heap); + +// Is a pointer contained in the given arena area? +mi_decl_export bool mi_arena_contains(mi_arena_id_t arena_id, const void* p); + // ------------------------------------------------------ // Convenience diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 95c1aefd..9f01ff34 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2023 Microsoft Research, Daan Leijen +Copyright (c) 2018-2024 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -407,19 +407,45 @@ static inline void mi_atomic_yield(void) { // ---------------------------------------------------------------------- -// Locks are only used for abandoned segment visiting in `arena.c` +// Locks +// These should be light-weight in-process only locks. +// Only used for reserving arena's and to maintain the abandoned list. // ---------------------------------------------------------------------- +#if _MSC_VER +#pragma warning(disable:26110) // unlock with holding lock +#endif + +#define mi_lock(lock) for(bool _go = (mi_lock_acquire(lock),true); _go; (mi_lock_release(lock), _go=false) ) #if defined(_WIN32) +#if 1 +#define mi_lock_t SRWLOCK // slim reader-writer lock + +static inline bool mi_lock_try_acquire(mi_lock_t* lock) { + return TryAcquireSRWLockExclusive(lock); +} +static inline void mi_lock_acquire(mi_lock_t* lock) { + AcquireSRWLockExclusive(lock); +} +static inline void mi_lock_release(mi_lock_t* lock) { + ReleaseSRWLockExclusive(lock); +} +static inline void mi_lock_init(mi_lock_t* lock) { + InitializeSRWLock(lock); +} +static inline void mi_lock_done(mi_lock_t* lock) { + (void)(lock); +} + +#else #define mi_lock_t CRITICAL_SECTION static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return TryEnterCriticalSection(lock); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { EnterCriticalSection(lock); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { LeaveCriticalSection(lock); @@ -431,16 +457,22 @@ static inline void mi_lock_done(mi_lock_t* lock) { DeleteCriticalSection(lock); } +#endif #elif defined(MI_USE_PTHREADS) +void _mi_error_message(int err, const char* fmt, ...); + #define mi_lock_t pthread_mutex_t static inline bool mi_lock_try_acquire(mi_lock_t* lock) { return (pthread_mutex_trylock(lock) == 0); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { - return (pthread_mutex_lock(lock) == 0); +static inline void mi_lock_acquire(mi_lock_t* lock) { + const int err = pthread_mutex_lock(lock); + if (err != 0) { + _mi_error_message(err, "internal error: lock cannot be acquired\n"); + } } static inline void mi_lock_release(mi_lock_t* lock) { pthread_mutex_unlock(lock); @@ -452,18 +484,16 @@ static inline void mi_lock_done(mi_lock_t* lock) { pthread_mutex_destroy(lock); } -/* #elif defined(__cplusplus) #include #define mi_lock_t std::mutex static inline bool mi_lock_try_acquire(mi_lock_t* lock) { - return lock->lock_try_acquire(); + return lock->try_lock(); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { lock->lock(); - return true; } static inline void mi_lock_release(mi_lock_t* lock) { lock->unlock(); @@ -474,7 +504,6 @@ static inline void mi_lock_init(mi_lock_t* lock) { static inline void mi_lock_done(mi_lock_t* lock) { (void)(lock); } -*/ #else @@ -487,12 +516,11 @@ static inline bool mi_lock_try_acquire(mi_lock_t* lock) { uintptr_t expected = 0; return mi_atomic_cas_strong_acq_rel(lock, &expected, (uintptr_t)1); } -static inline bool mi_lock_acquire(mi_lock_t* lock) { +static inline void mi_lock_acquire(mi_lock_t* lock) { for (int i = 0; i < 1000; i++) { // for at most 1000 tries? - if (mi_lock_try_acquire(lock)) return true; + if (mi_lock_try_acquire(lock)) return; mi_atomic_yield(); } - return true; } static inline void mi_lock_release(mi_lock_t* lock) { mi_atomic_store_release(lock, (uintptr_t)0); @@ -507,6 +535,4 @@ static inline void mi_lock_done(mi_lock_t* lock) { #endif - - #endif // MI_ATOMIC_H diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index a5ca3e27..208989e3 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -90,7 +90,6 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c -extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; void _mi_process_load(void); void mi_cdecl _mi_process_done(void); @@ -101,8 +100,10 @@ bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); bool _mi_preloading(void); // true while the C runtime is not initialized yet void _mi_thread_done(mi_heap_t* heap); -mi_tld_t* _mi_tld(void); // current tld: `_mi_tld() == _mi_heap_get_default()->tld` +mi_tld_t* _mi_tld(void); // current tld: `_mi_tld() == _mi_heap_get_default()->tld` +mi_subproc_t* _mi_subproc(void); +mi_subproc_t* _mi_subproc_main(void); mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; size_t _mi_thread_seq_id(void) mi_attr_noexcept; @@ -142,10 +143,12 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t m // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_init(void); -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid); -bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +mi_arena_t* _mi_arena_from_id(mi_arena_id_t id); + +void* _mi_arena_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +void* _mi_arena_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +void _mi_arena_free(void* p, size_t size, mi_memid_t memid); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena); bool _mi_arena_contains(const void* p); void _mi_arenas_collect(bool force_purge); void _mi_arena_unsafe_destroy_all(void); @@ -201,6 +204,7 @@ void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page); // "stats.c" void _mi_stats_done(mi_stats_t* stats); +void _mi_stats_merge_from(mi_stats_t* to, mi_stats_t* from); mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); @@ -418,11 +422,11 @@ static inline bool mi_heap_is_initialized(mi_heap_t* heap) { return (heap != &_mi_heap_empty); } -static inline uintptr_t _mi_ptr_cookie(const void* p) { - extern mi_heap_t _mi_heap_main; - mi_assert_internal(_mi_heap_main.cookie != 0); - return ((uintptr_t)p ^ _mi_heap_main.cookie); -} +//static inline uintptr_t _mi_ptr_cookie(const void* p) { +// extern mi_heap_t _mi_heap_main; +// mi_assert_internal(_mi_heap_main.cookie != 0); +// return ((uintptr_t)p ^ _mi_heap_main.cookie); +//} /* ----------------------------------------------------------- @@ -524,7 +528,7 @@ static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { if (heap != NULL) { page->heap = heap; page->heap_tag = heap->tag; - mi_atomic_store_release(&page->xthread_id, heap->thread_id); + mi_atomic_store_release(&page->xthread_id, heap->tld->thread_id); } else { page->heap = NULL; diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 61681138..cc912752 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -243,9 +243,6 @@ typedef size_t mi_page_flags_t; // atomically in `free.c:mi_free_block_mt`. typedef uintptr_t mi_thread_free_t; -// Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython) -typedef struct mi_subproc_s mi_subproc_t; - // A heap can serve only specific objects signified by its heap tag (e.g. various object types in CPython) typedef uint8_t mi_heaptag_t; @@ -296,10 +293,9 @@ typedef struct mi_page_s { uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary #endif - mi_heap_t* heap; // heap this threads belong to. + mi_heap_t* heap; // the heap owning this page (or NULL for abandoned pages) struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` - mi_subproc_t* subproc; // sub-process of this heap mi_memid_t memid; // provenance of the page memory } mi_page_t; @@ -380,7 +376,7 @@ typedef struct mi_random_cxt_s { // In debug mode there is a padding structure at the end of the blocks to check for buffer overflows -#if (MI_PADDING) +#if MI_PADDING typedef struct mi_padding_s { uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) @@ -397,19 +393,14 @@ typedef struct mi_padding_s { // A heap owns a set of pages. struct mi_heap_s { - mi_tld_t* tld; - // _Atomic(mi_block_t*) thread_delayed_free; - mi_threadid_t thread_id; // thread this heap belongs too - mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) + mi_tld_t* tld; // thread-local data + mi_arena_t* exclusive_arena; // if the heap should only allocate from a specific arena (or NULL) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) - uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation size_t page_count; // total number of pages in the `pages` queues. size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread - mi_memid_t memid; // provenance of the heap struct itseft (meta or os) - long generic_count; long full_page_retain; // how many full pages can be retained per queue (before abondoning them) bool allow_page_reclaim; // `true` if this heap should not reclaim abandoned pages bool allow_page_abandon; // `true` if this heap can abandon pages to reduce memory footprint @@ -422,7 +413,8 @@ struct mi_heap_s { size_t guarded_sample_count; // current sample count (counting down to 0) #endif mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. - mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + mi_page_queue_t pages[MI_BIN_COUNT]; // queue of pages for each size class (or "bin") + mi_memid_t memid; // provenance of the heap struct itself (meta or os) }; @@ -451,18 +443,18 @@ typedef struct mi_stat_counter_s { } mi_stat_counter_t; typedef struct mi_stats_s { - mi_stat_count_t pages; - mi_stat_count_t reserved; - mi_stat_count_t committed; - mi_stat_count_t reset; - mi_stat_count_t purged; - mi_stat_count_t page_committed; - mi_stat_count_t pages_abandoned; - mi_stat_count_t threads; - mi_stat_count_t normal; - mi_stat_count_t huge; - mi_stat_count_t giant; - mi_stat_count_t malloc; + mi_stat_count_t pages; + mi_stat_count_t reserved; + mi_stat_count_t committed; + mi_stat_count_t reset; + mi_stat_count_t purged; + mi_stat_count_t page_committed; + mi_stat_count_t pages_abandoned; + mi_stat_count_t threads; + mi_stat_count_t normal; + mi_stat_count_t huge; + mi_stat_count_t giant; + mi_stat_count_t malloc; mi_stat_counter_t pages_extended; mi_stat_counter_t pages_reclaim_on_alloc; mi_stat_counter_t pages_reclaim_on_free; @@ -480,53 +472,89 @@ typedef struct mi_stats_s { mi_stat_counter_t arena_count; mi_stat_counter_t guarded_alloc_count; #if MI_STAT>1 - mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; + mi_stat_count_t normal_bins[MI_BIN_COUNT]; #endif } mi_stats_t; // add to stat keeping track of the peak -void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); -void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void __mi_stat_increase(mi_stat_count_t* stat, size_t amount); +void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount); +void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount); // adjust stat in special cases to compensate for double counting -void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc); -void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_free); +void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc); +void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_free); +void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc); +void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount, bool on_free); // counters can just be increased -void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); +void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); +void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount); #if (MI_STAT) -#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) -#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) -#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) -#define mi_stat_adjust_increase(stat,amnt,b) _mi_stat_adjust_increase( &(stat), amnt, b) -#define mi_stat_adjust_decrease(stat,amnt,b) _mi_stat_adjust_decrease( &(stat), amnt, b) +#define mi_debug_stat_increase(stat,amount) __mi_stat_increase( &(stat), amount) +#define mi_debug_stat_decrease(stat,amount) __mi_stat_decrease( &(stat), amount) +#define mi_debug_stat_counter_increase(stat,amount) __mi_stat_counter_increase( &(stat), amount) +#define mi_debug_stat_increase_mt(stat,amount) __mi_stat_increase_mt( &(stat), amount) +#define mi_debug_stat_decrease_mt(stat,amount) __mi_stat_decrease_mt( &(stat), amount) +#define mi_debug_stat_counter_increase_mt(stat,amount) __mi_stat_counter_increase_mt( &(stat), amount) +#define mi_debug_stat_adjust_increase_mt(stat,amnt,b) __mi_stat_adjust_increase_mt( &(stat), amnt, b) +#define mi_debug_stat_adjust_decrease_mt(stat,amnt,b) __mi_stat_adjust_decrease_mt( &(stat), amnt, b) #else -#define mi_stat_increase(stat,amount) ((void)0) -#define mi_stat_decrease(stat,amount) ((void)0) -#define mi_stat_counter_increase(stat,amount) ((void)0) -#define mi_stat_adjuct_increase(stat,amnt,b) ((void)0) -#define mi_stat_adjust_decrease(stat,amnt,b) ((void)0) +#define mi_debug_stat_increase(stat,amount) ((void)0) +#define mi_debug_stat_decrease(stat,amount) ((void)0) +#define mi_debug_stat_counter_increase(stat,amount) ((void)0) +#define mi_debug_stat_increase_mt(stat,amount) ((void)0) +#define mi_debug_stat_decrease_mt(stat,amount) ((void)0) +#define mi_debug_stat_counter_increase_mt(stat,amount) ((void)0) +#define mi_debug_stat_adjust_increase(stat,amnt,b) ((void)0) +#define mi_debug_stat_adjust_decrease(stat,amnt,b) ((void)0) #endif -#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) -#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) -#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) +#define mi_subproc_stat_counter_increase(subproc,stat,amount) __mi_stat_counter_increase_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_increase(subproc,stat,amount) __mi_stat_increase_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_decrease(subproc,stat,amount) __mi_stat_decrease_mt( &(subproc)->stats.stat, amount) +#define mi_subproc_stat_adjust_increase(subproc,stat,amnt,b) __mi_stat_adjust_increase_mt( &(subproc)->stats.stat, amnt, b) +#define mi_subproc_stat_adjust_decrease(subproc,stat,amnt,b) __mi_stat_adjust_decrease_mt( &(subproc)->stats.stat, amnt, b) + +#define mi_os_stat_counter_increase(stat,amount) mi_subproc_stat_counter_increase(_mi_subproc(),stat,amount) +#define mi_os_stat_increase(stat,amount) mi_subproc_stat_increase(_mi_subproc(),stat,amount) +#define mi_os_stat_decrease(stat,amount) mi_subproc_stat_decrease(_mi_subproc(),stat,amount) + +#define mi_heap_stat_counter_increase(heap,stat,amount) __mi_stat_counter_increase( &(heap)->tld->stats.stat, amount) +#define mi_heap_stat_increase(heap,stat,amount) __mi_stat_increase( &(heap)->tld->stats.stat, amount) +#define mi_heap_stat_decrease(heap,stat,amount) __mi_stat_decrease( &(heap)->tld->stats.stat, amount) + +#define mi_debug_heap_stat_counter_increase(heap,stat,amount) mi_debug_stat_counter_increase( (heap)->tld->stats.stat, amount) +#define mi_debug_heap_stat_increase(heap,stat,amount) mi_debug_stat_increase( (heap)->tld->stats.stat, amount) +#define mi_debug_heap_stat_decrease(heap,stat,amount) mi_debug_stat_decrease( (heap)->tld->stats.stat, amount) // ------------------------------------------------------ -// Sub processes do not reclaim or visit segments -// from other sub processes +// Sub processes use separate arena's and no heaps/pages/blocks +// are shared between sub processes. +// The subprocess structure contains essentially all static variables (except per subprocess :-)) +// +// Each thread should belong to one sub-process only // ------------------------------------------------------ -struct mi_subproc_s { - _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // count of abandoned pages for this sub-process - _Atomic(size_t) abandoned_os_list_count; // count of abandoned pages in the os-list - mi_lock_t abandoned_os_lock; // lock for the abandoned os pages list (outside of arena's) (this lock protect list operations) - mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list - mi_page_t* abandoned_os_list; // doubly-linked list of abandoned pages outside of arena's (in OS allocated memory) - mi_page_t* abandoned_os_list_tail; // the tail-end of the list - mi_memid_t memid; // provenance of this memory block -}; +#define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`) + // 160 arenas is enough for ~2 TiB memory + +typedef struct mi_subproc_s { + _Atomic(size_t) arena_count; // current count of arena's + _Atomic(mi_arena_t*) arenas[MI_MAX_ARENAS]; // arena's of this sub-process + mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time + _Atomic(int64_t) purge_expire; // expiration is set if any arenas can be purged + + _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process + mi_page_t* os_abandoned_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on) + mi_lock_t os_abandoned_pages_lock; // lock for the os abandoned pages list (this lock protects list operations) + + mi_memid_t memid; // provenance of this memory block (meta or OS) + mi_stats_t stats; // sub-process statistics (tld stats are merged in on thread termination) +} mi_subproc_t; + // ------------------------------------------------------ // Thread Local data @@ -535,20 +563,21 @@ struct mi_subproc_s { // Milliseconds as in `int64_t` to avoid overflows typedef int64_t mi_msecs_t; - // Thread local data struct mi_tld_s { - unsigned long long heartbeat; // monotonic heartbeat count - mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) - mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) - mi_subproc_t* subproc; // sub-process this thread belongs to. - size_t tseq; // thread sequence id - mi_memid_t memid; // provenance of the tld memory itself (meta or OS) - bool recurse; // true if deferred was called; used to prevent infinite recursion. - bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) - mi_stats_t stats; // statistics + mi_threadid_t thread_id; // thread id of this thread + size_t thread_seq; // thread sequence id (linear count of created threads) + mi_subproc_t* subproc; // sub-process this thread belongs to. + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) + unsigned long long heartbeat; // monotonic heartbeat count + bool recurse; // true if deferred was called; used to prevent infinite recursion. + bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) + mi_stats_t stats; // statistics + mi_memid_t memid; // provenance of the tld memory itself (meta or OS) }; + /* ----------------------------------------------------------- Error codes passed to `_mi_fatal_error` All are recoverable but EFAULT is a serious error and aborts by default in secure mode. diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c index 14cbee45..5da9fc0c 100644 --- a/src/alloc-aligned.c +++ b/src/alloc-aligned.c @@ -193,9 +193,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; if mi_likely(is_aligned) { - #if MI_STAT>1 - mi_heap_stat_increase(heap, malloc, size); - #endif + mi_debug_heap_stat_increase(heap, malloc, size); void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen mi_assert_internal(p != NULL); mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); diff --git a/src/alloc.c b/src/alloc.c index 25d6f62e..e5f2b8ae 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -134,7 +134,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, mi_assert(size <= MI_SMALL_SIZE_MAX); #if MI_DEBUG const uintptr_t tid = _mi_thread_id(); - mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local + mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == tid); // heaps are thread local #endif #if (MI_PADDING || MI_GUARDED) if (size == 0) { size = sizeof(void*); } @@ -188,7 +188,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z else { // regular allocation mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == _mi_thread_id()); // heaps are thread local void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_track_malloc(p,size,zero); diff --git a/src/arena-meta.c b/src/arena-meta.c index 49195e22..4928a813 100644 --- a/src/arena-meta.c +++ b/src/arena-meta.c @@ -64,10 +64,11 @@ static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) { // allocate a fresh meta page and add it to the global list. static mi_meta_page_t* mi_meta_page_zalloc(void) { // allocate a fresh arena slice + // note: careful with _mi_subproc as it may recurse into mi_tld and meta_page_zalloc again.. mi_memid_t memid; - mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, + mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(_mi_subproc(), MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, true /* commit*/, true /* allow large */, - _mi_arena_id_none(), 0 /* tseq */, &memid ); + NULL /* req arena */, 0 /* thread_seq */, &memid); if (mpage == NULL) return NULL; mi_assert_internal(_mi_is_aligned(mpage,MI_META_PAGE_ALIGN)); if (!memid.initially_zero) { @@ -147,11 +148,8 @@ mi_decl_noinline void _mi_meta_free(void* p, size_t size, mi_memid_t memid) { _mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE); mi_bbitmap_setN(&mpage->blocks_free, block_idx, block_count); } - else if (mi_memid_is_os(memid)) { - _mi_os_free(p, size, memid); - } else { - mi_assert_internal(mi_memid_needs_no_free(memid)); + _mi_arena_free(p,size,memid); } } diff --git a/src/arena.c b/src/arena.c index e0044392..344dcd2f 100644 --- a/src/arena.c +++ b/src/arena.c @@ -35,7 +35,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo // A memory arena descriptor typedef struct mi_arena_s { mi_memid_t memid; // memid of the memory area - mi_arena_id_t id; // arena id (> 0 where `arena == arenas[arena->id - 1]`) + mi_subproc_t* subproc; // subprocess this arena belongs to (`this 'in' this->subproc->arenas`) size_t slice_count; // total size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`) size_t info_slices; // initial slices reserved for the arena bitmaps @@ -43,7 +43,6 @@ typedef struct mi_arena_s { bool is_exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(mi_msecs_t) purge_expire; // expiration time when slices can be purged from `slices_purge`. - _Atomic(mi_msecs_t) purge_expire_extend; // the purge expiration may be extended by a bit mi_bbitmap_t* slices_free; // is the slice free? (a binned bitmap with size classes) mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible) @@ -57,64 +56,40 @@ typedef struct mi_arena_s { } mi_arena_t; -#define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`) - // 160 arenas is enough for ~2 TiB memory - -// The available arenas -static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; -static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 - - -static mi_lock_t mi_arena_reserve_lock; - -void _mi_arena_init(void) { - mi_lock_init(&mi_arena_reserve_lock); -} - /* ----------------------------------------------------------- Arena id's - id = arena_index + 1 ----------------------------------------------------------- */ -size_t mi_arena_id_index(mi_arena_id_t id) { - return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); -} - -static mi_arena_id_t mi_arena_id_create(size_t arena_index) { - mi_assert_internal(arena_index < MI_MAX_ARENAS); - return (int)arena_index + 1; -} - mi_arena_id_t _mi_arena_id_none(void) { - return 0; + return NULL; } -static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { - return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || - (arena_id == req_arena_id)); +mi_arena_t* _mi_arena_from_id(mi_arena_id_t id) { + return (mi_arena_t*)id; } -bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { + +static bool mi_arena_id_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena) { + return ((arena == req_arena) || // they match, + (req_arena == NULL && !arena->is_exclusive)); // or the arena is not exclusive, and we didn't request a specific one +} + +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena) { if (memid.memkind == MI_MEM_ARENA) { - const mi_arena_t* arena = memid.mem.arena.arena; - return mi_arena_id_is_suitable(arena->id, arena->is_exclusive, request_arena_id); + return mi_arena_id_is_suitable(memid.mem.arena.arena, request_arena); } else { - return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id); + return mi_arena_id_is_suitable(NULL, request_arena); } } -size_t mi_arena_get_count(void) { - return mi_atomic_load_relaxed(&mi_arena_count); +size_t mi_arenas_get_count(mi_subproc_t* subproc) { + return mi_atomic_load_relaxed(&subproc->arena_count); } -mi_arena_t* mi_arena_from_index(size_t idx) { - mi_assert_internal(idx < mi_arena_get_count()); - return mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[idx]); -} - -mi_arena_t* mi_arena_from_id(mi_arena_id_t id) { - return mi_arena_from_index(mi_arena_id_index(id)); +mi_arena_t* mi_arena_from_index(mi_subproc_t* subproc, size_t idx) { + mi_assert_internal(idx < mi_arenas_get_count(subproc)); + return mi_atomic_load_ptr_relaxed(mi_arena_t, &subproc->arenas[idx]); } static size_t mi_arena_info_slices(mi_arena_t* arena) { @@ -152,9 +127,7 @@ uint8_t* mi_arena_slice_start(mi_arena_t* arena, size_t slice_index) { // Arena area void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; - const size_t arena_index = mi_arena_id_index(arena_id); - if (arena_index >= MI_MAX_ARENAS) return NULL; - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + mi_arena_t* arena = _mi_arena_from_id(arena_id); if (arena == NULL) return NULL; if (size != NULL) { *size = mi_size_of_slices(arena->slice_count); } return mi_arena_start(arena); @@ -240,14 +213,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( mi_bitmap_setN(arena->slices_committed, slice_index, slice_count, &already_committed_count); // adjust the stats so we don't double count the commits if (already_committed_count > 0) { - _mi_stat_adjust_decrease(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count), true /* on alloc */); + mi_subproc_stat_adjust_decrease(arena->subproc, committed, mi_size_of_slices(already_committed_count), true /* on alloc */); } // now actually commit bool commit_zero = false; if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero)) { // failed to commit (todo: give warning?) if (already_committed_count > 0) { - _mi_stat_increase(&_mi_stats_main.committed, mi_size_of_slices(already_committed_count)); + mi_subproc_stat_increase(arena->subproc, committed, mi_size_of_slices(already_committed_count)); } memid->initially_committed = false; } @@ -261,15 +234,15 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( memid->initially_zero = false; } } - #endif + #endif } } else { // already fully commited. - // if the OS has overcommit, and this is the first time we access these pages, then + // if the OS has overcommit, and this is the first time we access these pages, then // count the commit now (as at arena reserve we didn't count those commits as these are on-demand) if (_mi_os_has_overcommit() && touched_slices > 0) { - _mi_stat_increase(&_mi_stats_main.committed, mi_size_of_slices(touched_slices)); + mi_subproc_stat_increase( arena->subproc, committed, mi_size_of_slices(touched_slices)); } } // tool support @@ -288,18 +261,17 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count)); if (commit) { mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); } mi_assert_internal(mi_bitmap_is_setN(arena->slices_dirty, slice_index, slice_count)); - + return p; } -// try to reserve a fresh arena space -static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) -{ - // if (_mi_preloading()) return false; // use OS only while pre loading - if (req_arena_id != _mi_arena_id_none()) return false; +static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id); - const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); +// try to reserve a fresh arena space +static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_large, mi_arena_id_t* arena_id) +{ + const size_t arena_count = mi_arenas_get_count(subproc); if (arena_count > (MI_MAX_ARENAS - 4)) return false; // calc reserve @@ -341,18 +313,18 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re // on an OS with overcommit (Linux) we don't count the commit yet as it is on-demand. Once a slice // is actually allocated for the first time it will be counted. const bool adjust = (overcommit && arena_commit); - if (adjust) { _mi_stat_adjust_decrease(&_mi_stats_main.committed, arena_reserve, true /* on alloc */); } + if (adjust) { mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve, true /* on alloc */); } // and try to reserve the arena - int err = mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); + int err = mi_reserve_os_memory_ex2(subproc, arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); if (err != 0) { - if (adjust) { _mi_stat_adjust_increase(&_mi_stats_main.committed, arena_reserve, true); } // roll back + if (adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve, true); } // roll back // failed, try a smaller size? const size_t small_arena_reserve = (MI_SIZE_BITS == 32 ? 128*MI_MiB : 1*MI_GiB); - if (adjust) { _mi_stat_adjust_decrease(&_mi_stats_main.committed, arena_reserve, true); } + if (adjust) { mi_subproc_stat_adjust_decrease( subproc, committed, arena_reserve, true); } if (arena_reserve > small_arena_reserve) { // try again err = mi_reserve_os_memory_ex(small_arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id); - if (err != 0 && adjust) { _mi_stat_adjust_increase(&_mi_stats_main.committed, arena_reserve, true); } // roll back + if (err != 0 && adjust) { mi_subproc_stat_adjust_increase( subproc, committed, arena_reserve, true); } // roll back } } return (err==0); @@ -365,32 +337,27 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re Arena iteration ----------------------------------------------------------- */ -static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, int numa_node, bool allow_large) { +static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_large) { if (!allow_large && arena->is_large) return false; - if (!mi_arena_id_is_suitable(arena->id, arena->is_exclusive, req_arena_id)) return false; - if (req_arena_id == _mi_arena_id_none()) { // if not specific, check numa affinity + if (!mi_arena_id_is_suitable(arena, req_arena)) return false; + if (req_arena == NULL) { // if not specific, check numa affinity const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); if (!numa_suitable) return false; } return true; } - -#define mi_forall_arenas(req_arena_id, tseq, name_arena) \ - { \ - const size_t _arena_count = mi_arena_get_count(); \ - if (_arena_count > 0) { \ - const size_t _arena_cycle = _arena_count - 1; /* first search the arenas below the last one */ \ - size_t _start; \ - if (req_arena_id == _mi_arena_id_none()) { \ - /* always start searching in the arena's below the max */ \ - _start = (_arena_cycle <= 1 ? 0 : (tseq % _arena_cycle)); \ +#define mi_forall_arenas(subproc, req_arena, tseq, name_arena) { \ + const size_t _arena_count = mi_arenas_get_count(subproc); \ + const size_t _arena_cycle = (_arena_count == 0 ? 0 : _arena_count - 1); /* first search the arenas below the last one */ \ + /* always start searching in the arena's below the max */ \ + size_t _start = (_arena_cycle <= 1 ? 0 : (tseq % _arena_cycle)); \ + for (size_t _i = 0; _i < _arena_count; _i++) { \ + mi_arena_t* name_arena; \ + if (req_arena != NULL) { \ + name_arena = req_arena; /* if there is a specific req_arena, only search that one */\ } \ else { \ - _start = mi_arena_id_index(req_arena_id); \ - mi_assert_internal(_start < _arena_count); \ - } \ - for (size_t _i = 0; _i < _arena_count; _i++) { \ size_t _idx; \ if (_i < _arena_cycle) { \ _idx = _i + _start; \ @@ -399,19 +366,20 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are else { \ _idx = _i; /* remaining arena's */ \ } \ - mi_arena_t* const name_arena = mi_arena_from_index(_idx); \ - if (name_arena != NULL) \ - { + name_arena = mi_arena_from_index(subproc,_idx); \ + } \ + if (name_arena != NULL) \ + { #define mi_forall_arenas_end() \ - } \ - if (req_arena_id != _mi_arena_id_none()) break; \ } \ - }} + if (req_arena != NULL) break; \ + } \ + } -#define mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, name_arena) \ - mi_forall_arenas(req_arena_id,tseq,name_arena) { \ - if (mi_arena_is_suitable(name_arena, req_arena_id, -1 /* todo: numa node */, allow_large)) { \ +#define mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, name_arena) \ + mi_forall_arenas(subproc, req_arena,tseq,name_arena) { \ + if (mi_arena_is_suitable(name_arena, req_arena, -1 /* todo: numa node */, allow_large)) { \ #define mi_forall_suitable_arenas_end() \ }} \ @@ -422,17 +390,16 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are ----------------------------------------------------------- */ // allocate slices from the arenas -static mi_decl_noinline void* mi_arena_try_find_free( - size_t slice_count, size_t alignment, - bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) +static mi_decl_noinline void* mi_arenas_try_find_free( + mi_subproc_t* subproc, size_t slice_count, size_t alignment, + bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_OBJ_SIZE)); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); if (alignment > MI_ARENA_SLICE_ALIGN) return NULL; // search arena's - mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, arena) + mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena) { void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid); if (p != NULL) return p; @@ -442,42 +409,42 @@ static mi_decl_noinline void* mi_arena_try_find_free( } // Allocate slices from the arena's -- potentially allocating a fresh arena -static mi_decl_noinline void* mi_arena_try_alloc( +static mi_decl_noinline void* mi_arenas_try_alloc( + mi_subproc_t* subproc, size_t slice_count, size_t alignment, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) + mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); void* p; -again: + // try to find free slices in the arena's - p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); + p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); if (p != NULL) return p; // did we need a specific arena? - if (req_arena_id != _mi_arena_id_none()) return NULL; + if (req_arena != NULL) return NULL; // don't create arena's while preloading (todo: or should we?) if (_mi_preloading()) return NULL; // otherwise, try to reserve a new arena -- but one thread at a time.. (todo: allow 2 or 4 to reduce contention?) - if (mi_lock_try_acquire(&mi_arena_reserve_lock)) { - mi_arena_id_t arena_id = 0; - bool ok = mi_arena_reserve(mi_size_of_slices(slice_count), allow_large, req_arena_id, &arena_id); - mi_lock_release(&mi_arena_reserve_lock); - if (ok) { - // and try allocate in there - mi_assert_internal(req_arena_id == _mi_arena_id_none()); - p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); - if (p != NULL) return p; + const size_t arena_count = mi_arenas_get_count(subproc); + mi_lock(&subproc->arena_reserve_lock) { + if (arena_count == mi_arenas_get_count(subproc)) { + // we are the first to enter the lock, reserve a fresh arena + mi_arena_id_t arena_id = 0; + mi_arena_reserve(subproc, mi_size_of_slices(slice_count), allow_large, &arena_id); + } + else { + // another thread already reserved a new arena } } - else { - // if we are racing with another thread wait until the new arena is reserved (todo: a better yield?) - mi_atomic_yield(); - goto again; - } + // try once more to allocate in the new arena + mi_assert_internal(req_arena == NULL); + p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); + if (p != NULL) return p; return NULL; } @@ -504,10 +471,10 @@ static void* mi_arena_os_alloc_aligned( // Allocate large sized memory -void* _mi_arena_alloc_aligned( +void* _mi_arena_alloc_aligned( mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) + mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert_internal(memid != NULL); mi_assert_internal(size > 0); @@ -516,24 +483,23 @@ void* _mi_arena_alloc_aligned( // const int numa_node = _mi_os_numa_node(&tld->os); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed? - req_arena_id == _mi_arena_id_none() && // not a specific arena? + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed? size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment { const size_t slice_count = mi_slice_count_of_size(size); - void* p = mi_arena_try_alloc(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); + void* p = mi_arenas_try_alloc(subproc,slice_count, alignment, commit, allow_large, req_arena, tseq, memid); if (p != NULL) return p; } // fall back to the OS - void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena_id, memid); + void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena, memid); return p; } -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) +void* _mi_arena_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { - return _mi_arena_alloc_aligned(size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena_id, tseq, memid); + return _mi_arena_alloc_aligned(subproc, size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena, tseq, memid); } @@ -542,7 +508,7 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t Arena page allocation ----------------------------------------------------------- */ -static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag, bool* keep_abandoned) { +static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, mi_heaptag_t heap_tag, bool* keep_abandoned) { // found an abandoned page of the right size mi_page_t* const page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); // can we claim ownership? @@ -554,9 +520,9 @@ static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, *keep_abandoned = true; return false; } - if (subproc != page->subproc || heap_tag != page->heap_tag) { - // wrong sub-process or heap_tag.. we need to unown again - // note: this normally never happens unless subprocesses/heaptags are actually used. + if (heap_tag != page->heap_tag) { + // wrong heap_tag.. we need to unown again + // note: this normally never happens unless heaptags are actually used. // (an unown might free the page, and depending on that we can keep it in the abandoned map or not) // note: a minor wrinkle: the page will still be mapped but the abandoned map entry is (temporarily) clear at this point. // so we cannot check in `mi_arena_free` for this invariant to hold. @@ -564,31 +530,31 @@ static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, *keep_abandoned = !freed; return false; } - // yes, we can reclaim it, keep the abandaned map entry clear + // yes, we can reclaim it, keep the abandoned map entry clear *keep_abandoned = false; return true; } -static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t block_size, mi_arena_id_t req_arena_id, mi_heaptag_t heaptag, mi_tld_t* tld) +static mi_page_t* mi_arena_page_try_find_abandoned(mi_subproc_t* subproc, size_t slice_count, size_t block_size, mi_arena_t* req_arena, mi_heaptag_t heaptag, size_t tseq) { MI_UNUSED(slice_count); const size_t bin = _mi_bin(block_size); mi_assert_internal(bin < MI_BIN_COUNT); // any abandoned in our size class? - mi_subproc_t* const subproc = tld->subproc; mi_assert_internal(subproc != NULL); - if (mi_atomic_load_relaxed(&subproc->abandoned_count[bin]) == 0) return NULL; + if (mi_atomic_load_relaxed(&subproc->abandoned_count[bin]) == 0) { + return NULL; + } // search arena's const bool allow_large = true; - size_t tseq = tld->tseq; - mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, arena) + mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena) { size_t slice_index; mi_bitmap_t* const bitmap = arena->pages_abandoned[bin]; - if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, subproc, heaptag)) { + if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, heaptag)) { // found an abandoned page of the right size // and claimed ownership. mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); @@ -596,8 +562,8 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl mi_assert_internal(mi_page_is_abandoned(page)); mi_assert_internal(mi_arena_has_page(arena,page)); mi_atomic_decrement_relaxed(&subproc->abandoned_count[bin]); - _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); - _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_alloc, 1); + mi_subproc_stat_decrease( arena->subproc, pages_abandoned, 1); + mi_subproc_stat_counter_increase(arena->subproc, pages_reclaim_on_alloc, 1); _mi_page_free_collect(page, false); // update `used` count mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count)); @@ -615,8 +581,8 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl return NULL; } -static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_size, size_t block_alignment, - mi_arena_id_t req_arena_id, mi_tld_t* tld) +static mi_page_t* mi_arena_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment, + mi_arena_t* req_arena, size_t tseq) { const bool allow_large = true; const bool commit = true; @@ -630,7 +596,7 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz !os_align && // not large alignment slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large { - page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, tld->tseq, &memid); + page = (mi_page_t*)mi_arenas_try_alloc(subproc, slice_count, page_alignment, commit, allow_large, req_arena, tseq, &memid); if (page != NULL) { mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count)); mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index); @@ -642,10 +608,10 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz if (os_align) { // note: slice_count already includes the page mi_assert_internal(slice_count >= mi_slice_count_of_size(block_size) + mi_slice_count_of_size(page_alignment)); - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena_id, &memid); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena, &memid); } else { - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena_id, &memid); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena, &memid); } } @@ -698,7 +664,7 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz page->reserved = (uint16_t)reserved; page->page_start = (uint8_t*)page + block_start; page->block_size = block_size; - page->memid = memid; + page->memid = memid; page->free_is_zero = memid.initially_zero; if (block_size > 0 && _mi_is_power_of_two(block_size)) { page->block_size_shift = (uint8_t)mi_ctz(block_size); @@ -706,11 +672,13 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz else { page->block_size_shift = 0; } + // and own it + mi_page_try_claim_ownership(page); + + // register in the page map _mi_page_map_register(page); mi_assert_internal(_mi_ptr_page(page)==page); mi_assert_internal(_mi_ptr_page(mi_page_start(page))==page); - - mi_page_try_claim_ownership(page); mi_assert_internal(mi_page_block_size(page) == block_size); mi_assert_internal(mi_page_is_abandoned(page)); mi_assert_internal(mi_page_is_owned(page)); @@ -718,17 +686,17 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz } static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t slice_count, size_t block_size) { - const mi_arena_id_t req_arena_id = heap->arena_id; + mi_arena_t* req_arena = heap->exclusive_arena; mi_tld_t* const tld = heap->tld; // 1. look for an abandoned page - mi_page_t* page = mi_arena_page_try_find_abandoned(slice_count, block_size, req_arena_id, heap->tag, tld); + mi_page_t* page = mi_arena_page_try_find_abandoned(tld->subproc, slice_count, block_size, req_arena, heap->tag, tld->thread_seq); if (page != NULL) { return page; // return as abandoned } // 2. find a free block, potentially allocating a new arena - page = mi_arena_page_alloc_fresh(slice_count, block_size, 1, req_arena_id, tld); + page = mi_arena_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq); if (page != NULL) { mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count); _mi_page_init(heap, page); @@ -740,13 +708,13 @@ static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t slice_count, size static mi_page_t* mi_singleton_page_alloc(mi_heap_t* heap, size_t block_size, size_t block_alignment) { - const mi_arena_id_t req_arena_id = heap->arena_id; + mi_arena_t* req_arena = heap->exclusive_arena; mi_tld_t* const tld = heap->tld; const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN); const size_t info_size = (os_align ? MI_PAGE_ALIGN : mi_page_info_size()); const size_t slice_count = mi_slice_count_of_size(info_size + block_size); - mi_page_t* page = mi_arena_page_alloc_fresh(slice_count, block_size, block_alignment, req_arena_id, tld); + mi_page_t* page = mi_arena_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq); if (page == NULL) return NULL; mi_assert(page != NULL); @@ -785,14 +753,13 @@ mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t block return page; } -static void mi_arena_free(void* p, size_t size, mi_memid_t memid); - void _mi_arena_page_free(mi_page_t* page) { mi_assert_internal(_mi_is_aligned(page, MI_PAGE_ALIGN)); mi_assert_internal(_mi_ptr_page(page)==page); mi_assert_internal(mi_page_is_owned(page)); mi_assert_internal(mi_page_all_free(page)); - mi_assert_internal(page->next==NULL); + mi_assert_internal(mi_page_is_abandoned(page)); + mi_assert_internal(page->next==NULL && page->prev==NULL); #if MI_DEBUG>1 if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) { @@ -811,11 +778,12 @@ void _mi_arena_page_free(mi_page_t* page) { } #endif + // unregister page _mi_page_map_unregister(page); if (page->memid.memkind == MI_MEM_ARENA) { mi_bitmap_clear(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index); } - mi_arena_free(page, mi_memid_size(page->memid), page->memid); + _mi_arena_free(page, mi_memid_size(page->memid), page->memid); } /* ----------------------------------------------------------- @@ -828,9 +796,8 @@ void _mi_arena_page_abandon(mi_page_t* page) { mi_assert_internal(mi_page_is_owned(page)); mi_assert_internal(mi_page_is_abandoned(page)); mi_assert_internal(!mi_page_all_free(page)); - mi_assert_internal(page->next==NULL); + mi_assert_internal(page->next==NULL && page->prev == NULL); - mi_subproc_t* subproc = page->subproc; if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) { // make available for allocations size_t bin = _mi_bin(mi_page_block_size(page)); @@ -845,13 +812,25 @@ void _mi_arena_page_abandon(mi_page_t* page) { mi_page_set_abandoned_mapped(page); const bool wasclear = mi_bitmap_set(arena->pages_abandoned[bin], slice_index); MI_UNUSED(wasclear); mi_assert_internal(wasclear); - mi_atomic_increment_relaxed(&subproc->abandoned_count[bin]); + mi_atomic_increment_relaxed(&arena->subproc->abandoned_count[bin]); + mi_subproc_stat_increase(arena->subproc, pages_abandoned, 1); } else { - // page is full (or a singleton), page is OS/externally allocated + // page is full (or a singleton), or the page is OS/externally allocated // leave as is; it will be reclaimed when an object is free'd in the page + mi_subproc_t* subproc = _mi_subproc(); + // but for non-arena pages, add to the subproc list so these can be visited + if (page->memid.memkind != MI_MEM_ARENA && mi_option_is_enabled(mi_option_visit_abandoned)) { + mi_lock(&subproc->os_abandoned_pages_lock) { + // push in front + page->prev = NULL; + page->next = subproc->os_abandoned_pages; + if (page->next != NULL) { page->next->prev = page; } + subproc->os_abandoned_pages = page; + } + } + mi_subproc_stat_increase(_mi_subproc(), pages_abandoned, 1); } - _mi_stat_increase(&_mi_stats_main.pages_abandoned, 1); _mi_page_unown(page); } @@ -868,8 +847,9 @@ bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page) { return false; } else { - _mi_stat_counter_increase(&_mi_stats_main.pages_reabandon_full, 1); - _mi_stat_adjust_decrease(&_mi_stats_main.pages_abandoned, 1, true /* on alloc */); // adjust as we are not abandoning fresh + mi_subproc_t* subproc = _mi_subproc(); + mi_subproc_stat_counter_increase( subproc, pages_reabandon_full, 1); + mi_subproc_stat_adjust_decrease( subproc, pages_abandoned, 1, true /* on alloc */); // adjust as we are not abandoning fresh _mi_arena_page_abandon(page); return true; } @@ -892,18 +872,28 @@ void _mi_arena_page_unabandon(mi_page_t* page) { mi_assert_internal(mi_bbitmap_is_clearN(arena->slices_free, slice_index, slice_count)); mi_assert_internal(mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)); - + // this busy waits until a concurrent reader (from alloc_abandoned) is done mi_bitmap_clear_once_set(arena->pages_abandoned[bin], slice_index); mi_page_clear_abandoned_mapped(page); - mi_atomic_decrement_relaxed(&page->subproc->abandoned_count[bin]); + mi_atomic_decrement_relaxed(&arena->subproc->abandoned_count[bin]); + mi_subproc_stat_decrease(arena->subproc, pages_abandoned, 1); } else { - // page is full (or a singleton), page is OS/nly allocated - // nothing to do - // TODO: maintain count of these as well? + // page is full (or a singleton), page is OS allocated + mi_subproc_t* subproc = _mi_subproc(); + mi_subproc_stat_decrease(_mi_subproc(), pages_abandoned, 1); + // if not an arena page, remove from the subproc os pages list + if (page->memid.memkind != MI_MEM_ARENA && mi_option_is_enabled(mi_option_visit_abandoned)) { + mi_lock(&subproc->os_abandoned_pages_lock) { + if (page->prev != NULL) { page->prev->next = page->next; } + if (page->next != NULL) { page->next->prev = page->prev; } + if (subproc->os_abandoned_pages == page) { subproc->os_abandoned_pages = page->next; } + page->next = NULL; + page->prev = NULL; + } + } } - _mi_stat_decrease(&_mi_stats_main.pages_abandoned, 1); } void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { @@ -919,7 +909,7 @@ void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices); static void mi_arenas_try_purge(bool force, bool visit_all); -static void mi_arena_free(void* p, size_t size, mi_memid_t memid) { +void _mi_arena_free(void* p, size_t size, mi_memid_t memid) { if (p==NULL) return; if (size==0) return; @@ -981,12 +971,21 @@ void _mi_arenas_collect(bool force_purge) { mi_arenas_try_purge(force_purge, force_purge /* visit all? */); } + +// Is a pointer contained in the given arena area? +bool mi_arena_contains(mi_arena_id_t arena_id, const void* p) { + mi_arena_t* arena = _mi_arena_from_id(arena_id); + return (mi_arena_start(arena) <= (const uint8_t*)p && + mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) >(const uint8_t*)p); +} + // Is a pointer inside any of our arenas? bool _mi_arena_contains(const void* p) { - const size_t max_arena = mi_arena_get_count(); + mi_subproc_t* subproc = _mi_subproc(); + const size_t max_arena = mi_arenas_get_count(subproc); for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); - if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) >(const uint8_t*)p) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); + if (arena != NULL && mi_arena_contains(arena,p)) { return true; } } @@ -1001,14 +1000,14 @@ bool _mi_arena_contains(const void* p) { // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. -static void mi_arenas_unsafe_destroy(void) { - const size_t max_arena = mi_arena_get_count(); +static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { + const size_t max_arena = mi_arenas_get_count(subproc); size_t new_max_arena = 0; for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena != NULL) { // mi_lock_done(&arena->abandoned_visit_lock); - mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + mi_atomic_store_ptr_release(mi_arena_t, &subproc->arenas[i], NULL); if (mi_memkind_is_os(arena->memid.memkind)) { _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid); } @@ -1017,14 +1016,14 @@ static void mi_arenas_unsafe_destroy(void) { // try to lower the max arena. size_t expected = max_arena; - mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); + mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, new_max_arena); } // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. void _mi_arena_unsafe_destroy_all(void) { - mi_arenas_unsafe_destroy(); + mi_arenas_unsafe_destroy(_mi_subproc()); _mi_arenas_collect(true /* force purge */); // purge non-owned arenas } @@ -1033,40 +1032,36 @@ void _mi_arena_unsafe_destroy_all(void) { Add an arena. ----------------------------------------------------------- */ -static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { +static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal(arena->slice_count > 0); - if (arena_id != NULL) { *arena_id = -1; } + if (arena_id != NULL) { *arena_id = NULL; } // first try to find a NULL entry - const size_t count = mi_arena_get_count(); + const size_t count = mi_arenas_get_count(subproc); size_t i; for (i = 0; i < count; i++) { - if (mi_arena_from_index(i) == NULL) { - arena->id = mi_arena_id_create(i); + if (mi_arena_from_index(subproc,i) == NULL) { mi_arena_t* expected = NULL; - if (mi_atomic_cas_ptr_strong_release(mi_arena_t, &mi_arenas[i], &expected, arena)) { + if (mi_atomic_cas_ptr_strong_release(mi_arena_t, &subproc->arenas[i], &expected, arena)) { // success - if (arena_id != NULL) { *arena_id = arena->id; } + if (arena_id != NULL) { *arena_id = arena; } return true; } - else { - arena->id = _mi_arena_id_none(); - } } } // otherwise increase the max - i = mi_atomic_increment_acq_rel(&mi_arena_count); + i = mi_atomic_increment_acq_rel(&subproc->arena_count); if (i >= MI_MAX_ARENAS) { - mi_atomic_decrement_acq_rel(&mi_arena_count); + mi_atomic_decrement_acq_rel(&subproc->arena_count); + arena->subproc = NULL; return false; } - _mi_stat_counter_increase(&stats->arena_count,1); - arena->id = mi_arena_id_create(i); - mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); - if (arena_id != NULL) { *arena_id = arena->id; } + mi_subproc_stat_counter_increase(arena->subproc, arena_count, 1); + mi_atomic_store_ptr_release(mi_arena_t,&subproc->arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena; } return true; } @@ -1099,7 +1094,7 @@ static mi_bbitmap_t* mi_arena_bbitmap_init(size_t slice_count, uint8_t** base) { } -static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { mi_assert(!is_large || (memid.initially_committed && memid.is_pinned)); mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)); @@ -1138,7 +1133,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int } // init - arena->id = _mi_arena_id_none(); + arena->subproc = subproc; arena->memid = memid; arena->is_exclusive = exclusive; arena->slice_count = slice_count; @@ -1146,7 +1141,6 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; arena->purge_expire = 0; - arena->purge_expire_extend = 0; // mi_lock_init(&arena->abandoned_visit_lock); // init bitmaps @@ -1176,7 +1170,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int mi_bitmap_setN(arena->slices_dirty, 0, info_slices, NULL); } - return mi_arena_add(arena, arena_id, &_mi_stats_main); + return mi_arena_add(subproc, arena, arena_id); } @@ -1187,18 +1181,18 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is memid.initially_committed = is_committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; - return mi_manage_os_memory_ex2(start, size, is_large, numa_node, exclusive, memid, arena_id); + return mi_manage_os_memory_ex2(_mi_subproc(), start, size, is_large, numa_node, exclusive, memid, arena_id); } // Reserve a range of regular OS memory -int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { +static int mi_reserve_os_memory_ex2(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_SLICE_SIZE); // at least one slice mi_memid_t memid; void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid); if (start == NULL) return ENOMEM; const bool is_large = memid.is_pinned; // todo: use separate is_large field? - if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(subproc, start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { _mi_os_free_ex(start, size, commit, memid); _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); return ENOMEM; @@ -1209,6 +1203,11 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc return 0; } +// Reserve a range of regular OS memory +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + return mi_reserve_os_memory_ex2(_mi_subproc(), size, commit, allow_large, exclusive, arena_id); +} + // Manage a range of regular OS memory bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); @@ -1248,7 +1247,7 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, mi_arena_t else if (mi_page_is_abandoned(page)) { c = (mi_page_is_singleton(page) ? 's' : 'f'); } bit_of_page = (long)page->memid.mem.arena.slice_count; buf[bit] = c; - } + } else { char c = '?'; if (bit_of_page > 0) { c = '-'; } @@ -1262,7 +1261,7 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, mi_arena_t } if (bit==MI_BFIELD_BITS-1 && bit_of_page > 1) { c = '>'; } buf[bit] = c; - } + } } return bit_set_count; } @@ -1333,16 +1332,18 @@ static size_t mi_debug_show_bbitmap(const char* header, size_t slice_count, mi_b void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept { - size_t max_arenas = mi_arena_get_count(); + mi_subproc_t* subproc = _mi_subproc(); + size_t max_arenas = mi_arenas_get_count(subproc); size_t free_total = 0; size_t slice_total = 0; //size_t abandoned_total = 0; size_t page_total = 0; for (size_t i = 0; i < max_arenas; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena == NULL) break; + mi_assert(arena->subproc == subproc); slice_total += arena->slice_count; - _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : "")); + _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : ""), arena->subproc); if (show_inuse) { free_total += mi_debug_show_bbitmap("in-use slices", arena->slice_count, arena->slices_free, true, NULL); } @@ -1368,7 +1369,7 @@ void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) ----------------------------------------------------------- */ // reserve at a specific numa node int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - if (arena_id != NULL) *arena_id = -1; + if (arena_id != NULL) *arena_id = NULL; if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); @@ -1382,7 +1383,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, true, numa_node, exclusive, memid, arena_id)) { _mi_os_free(p, hsize, memid); return ENOMEM; } @@ -1455,7 +1456,7 @@ static bool mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_c // update committed bitmap if (needs_recommit) { - _mi_stat_adjust_decrease(&_mi_stats_main.committed, mi_size_of_slices(slice_count - already_committed), false /* on freed */); + mi_subproc_stat_adjust_decrease( arena->subproc, committed, mi_size_of_slices(slice_count - already_committed), false /* on freed */); mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count); } return needs_recommit; @@ -1475,14 +1476,15 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_ } else { // schedule purge + const mi_msecs_t expire = _mi_clock_now() + delay; mi_msecs_t expire0 = 0; - if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, _mi_clock_now() + delay)) { + if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire0, expire)) { // expiration was not yet set - mi_atomic_storei64_release(&arena->purge_expire_extend, 0); + // maybe set the global arenas expire as well (if it wasn't set already) + mi_atomic_casi64_strong_acq_rel(&arena->subproc->purge_expire, &expire0, expire); } - else if (mi_atomic_loadi64_acquire(&arena->purge_expire_extend) < 10*delay) { // limit max extension time + else { // already an expiration was set - mi_atomic_addi64_acq_rel(&arena->purge_expire_extend, (mi_msecs_t)(delay/10)); // add smallish extra delay } mi_bitmap_setN(arena->slices_purge, slice_index, slice_count, NULL); } @@ -1511,7 +1513,7 @@ static bool mi_arena_try_purge_range(mi_arena_t* arena, size_t slice_index, size } static bool mi_arena_try_purge_visitor(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) { - mi_purge_visit_info_t* vinfo = (mi_purge_visit_info_t*)arg; + mi_purge_visit_info_t* vinfo = (mi_purge_visit_info_t*)arg; // try to purge: first claim the free blocks if (mi_arena_try_purge_range(arena, slice_index, slice_count)) { vinfo->any_purged = true; @@ -1536,19 +1538,14 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) { // check pre-conditions if (arena->memid.is_pinned) return false; - mi_msecs_t expire_base = mi_atomic_loadi64_relaxed(&arena->purge_expire); - mi_msecs_t expire_extend = mi_atomic_loadi64_relaxed(&arena->purge_expire_extend); - const mi_msecs_t expire = expire_base + expire_extend; - if (expire == 0) return false; // expired yet? - if (!force && expire > now) return false; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (!force && (expire == 0 || expire > now)) return false; // reset expire (if not already set concurrently) - if (mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire_base, (mi_msecs_t)0)) { - mi_atomic_storei64_release(&arena->purge_expire_extend, (mi_msecs_t)0); // and also reset the extend - } - _mi_stat_counter_increase(&_mi_stats_main.arena_purges, 1); + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0); + mi_subproc_stat_counter_increase(arena->subproc, arena_purges, 1); // go through all purge info's (with max MI_BFIELD_BITS ranges at a time) // this also clears those ranges atomically (so any newly freed blocks will get purged next @@ -1560,30 +1557,46 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) } -static void mi_arenas_try_purge(bool force, bool visit_all) { +static void mi_arenas_try_purge(bool force, bool visit_all) +{ if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled - const size_t max_arena = mi_arena_get_count(); + // check if any arena needs purging? + mi_tld_t* tld = _mi_tld(); + mi_subproc_t* subproc = tld->subproc; + const mi_msecs_t now = _mi_clock_now(); + mi_msecs_t arenas_expire = mi_atomic_load_acquire(&subproc->purge_expire); + if (!force && (arenas_expire == 0 || arenas_expire < now)) return; + + const size_t max_arena = mi_arenas_get_count(subproc); if (max_arena == 0) return; // allow only one thread to purge at a time static mi_atomic_guard_t purge_guard; mi_atomic_guard(&purge_guard) { - const mi_msecs_t now = _mi_clock_now(); - const size_t arena_start = _mi_tld()->tseq % max_arena; - size_t max_purge_count = (visit_all ? max_arena : 1); + // increase global expire: at most one purge per delay cycle + mi_atomic_store_release(&subproc->purge_expire, now + mi_arena_purge_delay()); + const size_t arena_start = tld->thread_seq % max_arena; + size_t max_purge_count = (visit_all ? max_arena : 2); + bool all_visited = true; for (size_t _i = 0; _i < max_arena; _i++) { size_t i = _i + arena_start; if (i >= max_arena) { i -= max_arena; } - mi_arena_t* arena = mi_arena_from_index(i); + mi_arena_t* arena = mi_arena_from_index(subproc,i); if (arena != NULL) { if (mi_arena_try_purge(arena, now, force)) { - if (max_purge_count <= 1) break; + if (max_purge_count <= 1) { + all_visited = false; + break; + } max_purge_count--; } } } + if (all_visited) { + mi_atomic_store_release(&subproc->purge_expire, (mi_msecs_t)0); + } } } @@ -1612,13 +1625,7 @@ static bool mi_arena_pages_reregister(mi_arena_t* arena) { } mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* full_size) { - const size_t count = mi_arena_get_count(); - const size_t arena_idx = mi_arena_id_index(arena_id); - if (count <= arena_idx) { - _mi_warning_message("arena id is invalid (%zu)\n", arena_id); - return false; - } - mi_arena_t* arena = mi_arena_from_id(arena_id); + mi_arena_t* arena = _mi_arena_from_id(arena_id); if (arena==NULL) { return false; } @@ -1649,15 +1656,22 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* _mi_page_map_unregister_range(arena, asize); // set the entry to NULL - mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[arena_idx], NULL); - if (arena_idx + 1 == count) { // try adjust the count? - size_t expected = count; - mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, count-1); + mi_subproc_t* subproc = arena->subproc; + const size_t count = mi_arenas_get_count(subproc); + for(size_t i = 0; i < count; i++) { + if (mi_arena_from_index(subproc, i) == arena) { + mi_atomic_store_ptr_release(mi_arena_t, &subproc->arenas[i], NULL); + if (i + 1 == count) { // try adjust the count? + size_t expected = count; + mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, count-1); + } + break; + } } return true; } -mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, mi_arena_id_t* arena_id) { +mi_decl_export bool mi_arena_reload(void* start, size_t size, mi_arena_id_t* arena_id) { // assume the memory area is already containing the arena if (arena_id != NULL) { *arena_id = _mi_arena_id_none(); } if (start == NULL || size == 0) return false; @@ -1679,13 +1693,10 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, _mi_warning_message("the reloaded arena is not exclusive\n"); return false; } - arena->memid.is_pinned = is_large; - arena->memid.initially_committed = is_committed; - arena->memid.initially_zero = is_zero; + arena->is_exclusive = true; - arena->is_large = is_large; - arena->id = _mi_arena_id_none(); - if (!mi_arena_add(arena, arena_id, &_mi_stats_main)) { + arena->subproc = _mi_subproc(); + if (!mi_arena_add(arena->subproc, arena, arena_id)) { return false; } mi_arena_pages_reregister(arena); diff --git a/src/bitmap.c b/src/bitmap.c index a04762af..60e0f3f9 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -114,7 +114,9 @@ static inline void mi_bfield_atomic_clear_once_set(_Atomic(mi_bfield_t)*b, size_ do { if mi_unlikely((old&mask) == 0) { old = mi_atomic_load_acquire(b); - if ((old&mask)==0) { _mi_stat_counter_increase(&_mi_stats_main.pages_unabandon_busy_wait, 1); } + if ((old&mask)==0) { + mi_subproc_stat_counter_increase(_mi_subproc(), pages_unabandon_busy_wait, 1); + } while ((old&mask)==0) { // busy wait mi_atomic_yield(); old = mi_atomic_load_acquire(b); @@ -1151,7 +1153,6 @@ static inline bool mi_bitmap_find(mi_bitmap_t* bitmap, size_t tseq, size_t n, si typedef struct mi_claim_fun_data_s { mi_arena_t* arena; - mi_subproc_t* subproc; mi_heaptag_t heap_tag; } mi_claim_fun_data_t; @@ -1165,7 +1166,7 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk const size_t slice_index = (chunk_idx * MI_BCHUNK_BITS) + cidx; mi_assert_internal(slice_index < mi_bitmap_max_bits(bitmap)); bool keep_set = true; - if ((*claim_fun)(slice_index, claim_data->arena, claim_data->subproc, claim_data->heap_tag, &keep_set)) { + if ((*claim_fun)(slice_index, claim_data->arena, claim_data->heap_tag, &keep_set)) { // success! mi_assert_internal(!keep_set); *pidx = slice_index; @@ -1190,9 +1191,9 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk // Find a set bit in the bitmap and try to atomically clear it and claim it. // (Used to find pages in the pages_abandoned bitmaps.) mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, - mi_claim_fun_t* claim, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag) + mi_claim_fun_t* claim, mi_arena_t* arena, mi_heaptag_t heap_tag) { - mi_claim_fun_data_t claim_data = { arena, subproc, heap_tag }; + mi_claim_fun_data_t claim_data = { arena, heap_tag }; return mi_bitmap_find(bitmap, tseq, 1, pidx, &mi_bitmap_try_find_and_claim_visit, (void*)claim, &claim_data); } diff --git a/src/bitmap.h b/src/bitmap.h index ed37e975..7ee0e9bc 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -177,13 +177,13 @@ static inline bool mi_bitmap_is_clear(mi_bitmap_t* bitmap, size_t idx) { // Called once a bit is cleared to see if the memory slice can be claimed. -typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag, bool* keep_set); +typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, mi_heaptag_t heap_tag, bool* keep_set); // Find a set bits in the bitmap, atomically clear it, and check if `claim` returns true. // If not claimed, continue on (potentially setting the bit again depending on `keep_set`). // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, - mi_claim_fun_t* claim, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag ); + mi_claim_fun_t* claim, mi_arena_t* arena, mi_heaptag_t heap_tag ); // Atomically clear a bit but only if it is set. Will block otherwise until the bit is set. diff --git a/src/free.c b/src/free.c index 14034593..88f784c7 100644 --- a/src/free.c +++ b/src/free.c @@ -210,7 +210,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { if (mi_page_all_free(page)) { // first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish) - _mi_arena_page_unabandon(page); + _mi_arena_page_unabandon(page); // we can free the page directly _mi_arena_page_free(page); return; @@ -234,15 +234,15 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag); if ((tagheap != NULL) && // don't reclaim across heap object types (tagheap->allow_page_reclaim) && // we are allowed to reclaim abandoned pages - (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? ) - (_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?) + // (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? ) + (_mi_arena_memid_is_suitable(page->memid, tagheap->exclusive_arena)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?) ) { if (mi_page_queue(tagheap, page->block_size)->first != NULL) { // don't reclaim for an block_size we don't use // first remove it from the abandoned pages in the arena -- this waits for any readers to finish _mi_arena_page_unabandon(page); _mi_heap_page_reclaim(tagheap, page); - _mi_stat_counter_increase(&_mi_stats_main.pages_reclaim_on_free, 1); + mi_heap_stat_counter_increase(tagheap, pages_reclaim_on_free, 1); return; } } diff --git a/src/heap.c b/src/heap.c index a24b8356..7b2ca741 100644 --- a/src/heap.c +++ b/src/heap.c @@ -143,7 +143,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - + // collect arenas (this is program wide so don't force purges on abandonment of threads) _mi_arenas_collect(collect == MI_FORCE /* force purge? */); } @@ -184,20 +184,19 @@ mi_heap_t* mi_heap_get_backing(void) { mi_assert_internal(heap!=NULL); mi_heap_t* bheap = heap->tld->heap_backing; mi_assert_internal(bheap!=NULL); - mi_assert_internal(bheap->thread_id == _mi_thread_id()); + mi_assert_internal(bheap->tld->thread_id == _mi_thread_id()); return bheap; } // todo: make order of parameters consistent (but would that break compat with CPython?) -void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t heap_tag, mi_tld_t* tld) +void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t heap_tag, mi_tld_t* tld) { - mi_assert_internal(heap!=NULL); + mi_assert_internal(heap!=NULL); mi_memid_t memid = heap->memid; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->memid = memid; heap->tld = tld; // avoid reading the thread-local tld during initialization - heap->thread_id = _mi_thread_id(); - heap->arena_id = arena_id; + heap->exclusive_arena = _mi_arena_from_id(arena_id); heap->allow_page_reclaim = !noreclaim; heap->allow_page_abandon = (!noreclaim && mi_option_get(mi_option_full_page_retain) >= 0); heap->full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); @@ -211,7 +210,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint heap->full_page_retain = heap->full_page_retain / 4; } } - + if (heap->tld->heap_backing == NULL) { heap->tld->heap_backing = heap; // first heap becomes the backing heap _mi_random_init(&heap->random); @@ -220,8 +219,8 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint _mi_random_split(&heap->tld->heap_backing->random, &heap->random); } heap->cookie = _mi_heap_random_next(heap) | 1; - heap->keys[0] = _mi_heap_random_next(heap); - heap->keys[1] = _mi_heap_random_next(heap); + //heap->keys[0] = _mi_heap_random_next(heap); + //heap->keys[1] = _mi_heap_random_next(heap);*/ _mi_heap_guarded_init(heap); // push on the thread local heaps list @@ -234,7 +233,15 @@ mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena mi_assert(heap_tag >= 0 && heap_tag < 256); // allocate and initialize a heap mi_memid_t memid; - mi_heap_t* heap = (mi_heap_t*)_mi_meta_zalloc(sizeof(mi_heap_t), &memid); + mi_heap_t* heap; + if (arena_id == _mi_arena_id_none()) { + heap = (mi_heap_t*)_mi_meta_zalloc(sizeof(mi_heap_t), &memid); + } + else { + // heaps associated wita a specific arena are allocated in that arena + // note: takes up at least one slice which is quite wasteful... + heap = (mi_heap_t*)_mi_arena_alloc(_mi_subproc(), _mi_align_up(sizeof(mi_heap_t),MI_ARENA_MIN_OBJ_SIZE), true, true, _mi_arena_from_id(arena_id), tld->thread_seq, &memid); + } if (heap==NULL) { _mi_error_message(ENOMEM, "unable to allocate heap meta-data\n"); return NULL; @@ -247,7 +254,7 @@ mi_heap_t* _mi_heap_create(int heap_tag, bool allow_destroy, mi_arena_id_t arena mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi_arena_id_t arena_id) { mi_heap_t* bheap = mi_heap_get_backing(); mi_assert_internal(bheap != NULL); - return _mi_heap_create(heap_tag, allow_destroy, arena_id, bheap->tld); + return _mi_heap_create(heap_tag, allow_destroy, arena_id, bheap->tld); } mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { @@ -260,7 +267,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { } bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) { - return _mi_arena_memid_is_suitable(memid, heap->arena_id); + return _mi_arena_memid_is_suitable(memid, heap->exclusive_arena); } uintptr_t _mi_heap_random_next(mi_heap_t* heap) { @@ -279,7 +286,7 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { } // called from `mi_heap_destroy` and `mi_heap_delete` to free the internal heap resources. -static void mi_heap_free(mi_heap_t* heap) { +static void mi_heap_free(mi_heap_t* heap, bool do_free_mem) { mi_assert(heap != NULL); mi_assert_internal(mi_heap_is_initialized(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; @@ -306,7 +313,9 @@ static void mi_heap_free(mi_heap_t* heap) { mi_assert_internal(heap->tld->heaps != NULL); // and free the used memory - _mi_meta_free(heap, sizeof(*heap), heap->memid); + if (do_free_mem) { + _mi_meta_free(heap, sizeof(*heap), heap->memid); + } } // return a heap on the same thread as `heap` specialized for the specified tag (if it exists) @@ -340,17 +349,17 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ if (bsize > MI_LARGE_MAX_OBJ_SIZE) { mi_heap_stat_decrease(heap, huge, bsize); } -#if (MI_STAT) + #if (MI_STAT) _mi_page_free_collect(page, false); // update used count const size_t inuse = page->used; if (bsize <= MI_LARGE_MAX_OBJ_SIZE) { mi_heap_stat_decrease(heap, normal, bsize * inuse); -#if (MI_STAT>1) + #if (MI_STAT>1) mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], inuse); -#endif + #endif } mi_heap_stat_decrease(heap, malloc, bsize * inuse); // todo: off for aligned blocks... -#endif + #endif /// pretend it is all free now mi_assert_internal(mi_page_thread_free(page) == NULL); @@ -402,7 +411,7 @@ void mi_heap_destroy(mi_heap_t* heap) { #endif // free all pages _mi_heap_destroy_pages(heap); - mi_heap_free(heap); + mi_heap_free(heap,true); } #endif } @@ -461,20 +470,11 @@ void mi_heap_delete(mi_heap_t* heap) mi_assert_expensive(mi_heap_is_valid(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return; - /* - mi_heap_t* bheap = heap->tld->heap_backing; - if (bheap != heap && mi_heaps_are_compatible(bheap,heap)) { - // transfer still used pages to the backing heap - mi_heap_absorb(bheap, heap); - } - else - */ - { - // abandon all pages - _mi_heap_collect_abandon(heap); - } + // abandon all pages + _mi_heap_collect_abandon(heap); + mi_assert_internal(heap->page_count==0); - mi_heap_free(heap); + mi_heap_free(heap,true); } mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { @@ -488,7 +488,63 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { } +/* ----------------------------------------------------------- + Load/unload heaps +----------------------------------------------------------- */ +void mi_heap_unload(mi_heap_t* heap) { + mi_assert(mi_heap_is_initialized(heap)); + mi_assert_expensive(mi_heap_is_valid(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return; + if (heap->exclusive_arena == NULL) { + _mi_warning_message("cannot unload heaps that are not associated with an exclusive arena\n"); + return; + } + + // abandon all pages so all thread'id in the pages are cleared + _mi_heap_collect_abandon(heap); + mi_assert_internal(heap->page_count==0); + // remove from heap list + mi_heap_free(heap, false /* but don't actually free the memory */); + + // disassociate from the current thread-local and static state + heap->tld = NULL; + return; +} + +bool mi_heap_reload(mi_heap_t* heap, mi_arena_id_t arena_id) { + mi_assert(mi_heap_is_initialized(heap)); + if (heap==NULL || !mi_heap_is_initialized(heap)) return false; + if (heap->exclusive_arena == NULL) { + _mi_warning_message("cannot reload heaps that were not associated with an exclusive arena\n"); + return false; + } + if (heap->tld != NULL) { + _mi_warning_message("cannot reload heaps that were not unloaded first\n"); + return false; + } + mi_arena_t* arena = _mi_arena_from_id(arena_id); + if (heap->exclusive_arena != arena) { + _mi_warning_message("trying to reload a heap at a different arena address: %p vs %p\n", heap->exclusive_arena, arena); + return false; + } + + mi_assert_internal(heap->page_count==0); + + // re-associate from the current thread-local and static state + heap->tld = _mi_tld(); + + // reinit direct pages (as we may be in a different process) + mi_assert_internal(heap->page_count == 0); + for (int i = 0; i < MI_PAGES_DIRECT; i++) { + heap->pages_free_direct[i] = (mi_page_t*)&_mi_page_empty; + } + + // push on the thread local heaps list + heap->next = heap->tld->heaps; + heap->tld->heaps = heap; + return true; +} /* ----------------------------------------------------------- Analysis diff --git a/src/init.c b/src/init.c index 241a3826..673fdb55 100644 --- a/src/init.c +++ b/src/init.c @@ -11,31 +11,31 @@ terms of the MIT license. A copy of the license can be found in the file #include // memcpy, memset #include // atexit -#define MI_MEMID_STATIC {{{NULL,0}}, MI_MEM_STATIC, true /* pinned */, true /* committed */, false /* zero */ } +#define MI_MEMID_INIT(kind) {{{NULL,0}}, kind, true /* pinned */, true /* committed */, false /* zero */ } +#define MI_MEMID_STATIC MI_MEMID_INIT(MI_MEM_STATIC) // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - MI_ATOMIC_VAR_INIT(0), // xthread_id - NULL, // free - 0, // used - 0, // capacity - 0, // reserved capacity - 0, // block size shift - 0, // retire_expire - NULL, // local_free - MI_ATOMIC_VAR_INIT(0), // xthread_free - MI_ATOMIC_VAR_INIT(0), // xflags - 0, // block_size - NULL, // page_start - 0, // heap tag - false, // is_zero + MI_ATOMIC_VAR_INIT(0), // xthread_id + NULL, // free + 0, // used + 0, // capacity + 0, // reserved capacity + 0, // block size shift + 0, // retire_expire + NULL, // local_free + MI_ATOMIC_VAR_INIT(0), // xthread_free + MI_ATOMIC_VAR_INIT(0), // xflags + 0, // block_size + NULL, // page_start + 0, // heap tag + false, // is_zero #if (MI_PADDING || MI_ENCODE_FREELIST) - { 0, 0 }, + { 0, 0 }, // keys #endif - NULL, // xheap - NULL, NULL, // next, prev - NULL, // subproc - MI_MEMID_STATIC // memid + NULL, // xheap + NULL, NULL, // next, prev + MI_MEMID_STATIC // memid }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) @@ -96,28 +96,76 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- +static mi_decl_cache_align mi_subproc_t subproc_main; + +static mi_decl_cache_align mi_tld_t tld_empty = { + 0, // thread_id + 0, // thread_seq + &subproc_main, // subproc + NULL, // heap_backing + NULL, // heaps list + 0, // heartbeat + false, // recurse + false, // is_in_threadpool + { MI_STATS_NULL }, // stats + MI_MEMID_STATIC // memid +}; + mi_decl_cache_align const mi_heap_t _mi_heap_empty = { - NULL, - // MI_ATOMIC_VAR_INIT(NULL), // thread delayed free - 0, // thread_id - 0, // arena_id - 0, // cookie - { 0, 0 }, // keys - { {0}, {0}, 0, true }, // random - 0, // page count - MI_BIN_FULL, 0, // page retired min/max - NULL, // next - MI_MEMID_STATIC, // memid - 0, - 0, // full page retain - false, // can reclaim - true, // can eager abandon - 0, // tag + &tld_empty, // tld + NULL, // exclusive_arena + 0, // cookie + //{ 0, 0 }, // keys + { {0}, {0}, 0, true }, // random + 0, // page count + MI_BIN_FULL, 0, // page retired min/max + NULL, // next + 0, // full page retain + false, // can reclaim + true, // can eager abandon + 0, // tag #if MI_GUARDED - 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) + 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) #endif MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY + MI_PAGE_QUEUES_EMPTY, + MI_MEMID_STATIC +}; + +extern mi_heap_t heap_main; + +static mi_decl_cache_align mi_tld_t tld_main = { + 0, // thread_id + 0, // thread_seq + &subproc_main, // subproc + &heap_main, // heap_backing + &heap_main, // heaps list + 0, // heartbeat + false, // recurse + false, // is_in_threadpool + { MI_STATS_NULL }, // stats + MI_MEMID_STATIC // memid +}; + +mi_decl_cache_align mi_heap_t heap_main = { + &tld_main, // thread local data + NULL, // exclusive arena + 0, // initial cookie + //{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0x846ca68b}, {0}, 0, true }, // random + 0, // page count + MI_BIN_FULL, 0, // page retired min/max + NULL, // next heap + 2, // full page retain + true, // allow page reclaim + true, // allow page abandon + 0, // tag + #if MI_GUARDED + 0, 0, 0, 0, 0, + #endif + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY, + MI_MEMID_STATIC }; @@ -125,49 +173,9 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { return _mi_prim_thread_id(); } - // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; -extern mi_heap_t _mi_heap_main; - -static mi_decl_cache_align mi_subproc_t mi_subproc_default; - -static mi_decl_cache_align mi_tld_t tld_main = { - 0, - &_mi_heap_main, // heap_backing - &_mi_heap_main, // heaps list - &mi_subproc_default, // subproc - 0, // tseq - MI_MEMID_STATIC, // memid - false, // recurse - false, // is_in_threadpool - { MI_STATS_NULL } // stats -}; - -mi_decl_cache_align mi_heap_t _mi_heap_main = { - &tld_main, - // MI_ATOMIC_VAR_INIT(NULL), // thread delayed free list - 0, // thread id - 0, // initial cookie - 0, // arena id - { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) - { {0x846ca68b}, {0}, 0, true }, // random - 0, // page count - MI_BIN_FULL, 0, // page retired min/max - NULL, // next heap - MI_MEMID_STATIC, // memid - 0, - 2, // full page retain - true, // allow page reclaim - true, // allow page abandon - 0, // tag - #if MI_GUARDED - 0, 0, 0, 0, 0, - #endif - MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY -}; bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. @@ -212,30 +220,46 @@ void _mi_heap_guarded_init(mi_heap_t* heap) { } #endif - -static void mi_heap_main_init(void) { - if (_mi_heap_main.cookie == 0) { - _mi_heap_main.thread_id = _mi_thread_id(); - _mi_heap_main.cookie = 1; - #if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB) - _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking - #else - _mi_random_init(&_mi_heap_main.random); - #endif - _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); - mi_lock_init(&mi_subproc_default.abandoned_os_lock); - mi_lock_init(&mi_subproc_default.abandoned_os_visit_lock); - _mi_heap_guarded_init(&_mi_heap_main); - _mi_heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); - _mi_heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); +// Initialize main subproc +static void mi_subproc_main_init(void) { + if (subproc_main.memid.memkind != MI_MEM_STATIC) { + subproc_main.memid = _mi_memid_create(MI_MEM_STATIC); + mi_lock_init(&subproc_main.os_abandoned_pages_lock); + mi_lock_init(&subproc_main.arena_reserve_lock); } } -mi_heap_t* _mi_heap_main_get(void) { +// Initialize main tld +static void mi_tld_main_init(void) { + if (tld_main.thread_id == 0) { + tld_main.thread_id = _mi_prim_thread_id(); + } +} + +// Initialization of the (statically allocated) main heap, and the main tld and subproc. +static void mi_heap_main_init(void) { + if (heap_main.cookie == 0) { + mi_subproc_main_init(); + mi_tld_main_init(); + // heap + heap_main.cookie = 1; + #if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB) + _mi_random_init_weak(&heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking + #else + _mi_random_init(&heap_main.random); + #endif + heap_main.cookie = _mi_heap_random_next(&heap_main); + //heap_main.keys[0] = _mi_heap_random_next(&heap_main); + //heap_main.keys[1] = _mi_heap_random_next(&heap_main); + _mi_heap_guarded_init(&heap_main); + heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); + heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); + } +} + +mi_heap_t* heap_main_get(void) { mi_heap_main_init(); - return &_mi_heap_main; + return &heap_main; } @@ -243,14 +267,21 @@ mi_heap_t* _mi_heap_main_get(void) { Thread local data ----------------------------------------------------------- */ -// Thread sequence number -static _Atomic(size_t) mi_tcount; +// Count current and total created threads +static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); +static _Atomic(size_t) thread_total_count; + +size_t _mi_current_thread_count(void) { + return mi_atomic_load_relaxed(&thread_count); +} + // The mimalloc thread local data -mi_decl_thread mi_tld_t* mi_tld; +mi_decl_thread mi_tld_t* thread_tld = &tld_empty; // Allocate fresh tld static mi_tld_t* mi_tld_alloc(void) { + mi_atomic_increment_relaxed(&thread_count); if (_mi_is_main_thread()) { return &tld_main; } @@ -267,8 +298,9 @@ static mi_tld_t* mi_tld_alloc(void) { tld->memid = memid; tld->heap_backing = NULL; tld->heaps = NULL; - tld->subproc = &mi_subproc_default; - tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->subproc = &subproc_main; + tld->thread_id = _mi_prim_thread_id(); + tld->thread_seq = mi_atomic_add_acq_rel(&thread_total_count, 1); tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool(); return tld; } @@ -278,27 +310,49 @@ static mi_tld_t* mi_tld_alloc(void) { mi_decl_noinline static void mi_tld_free(void) { mi_tld_t* tld = _mi_tld(); - mi_tld = MI_TLD_INVALID; - _mi_meta_free(tld, sizeof(mi_tld_t), tld->memid); + if (tld != NULL && tld != MI_TLD_INVALID) { + _mi_stats_done(&tld->stats); + _mi_meta_free(tld, sizeof(mi_tld_t), tld->memid); + } + tld = MI_TLD_INVALID; + mi_atomic_decrement_relaxed(&thread_count); } mi_decl_noinline mi_tld_t* _mi_tld(void) { - if (mi_tld == MI_TLD_INVALID) { - _mi_error_message(EFAULT, "internal error: tld accessed after the thread terminated\n"); - mi_tld = NULL; + mi_tld_t* tld = thread_tld; + if (tld == MI_TLD_INVALID) { + _mi_error_message(EFAULT, "internal error: tld is accessed after the thread terminated\n"); + thread_tld = &tld_empty; } - if (mi_tld==NULL) { - mi_tld = mi_tld_alloc(); + if (tld==&tld_empty) { + thread_tld = tld = mi_tld_alloc(); } - return mi_tld; + return tld; } +mi_subproc_t* _mi_subproc(void) { + // should work without doing initialization (as it may be called from `_mi_tld -> mi_tld_alloc ... -> os_alloc -> _mi_subproc()` + // todo: this will still fail on OS systems where the first access to a thread-local causes allocation. + // on such systems we can check for this with the _mi_prim_get_default_heap as those are protected (by being + // stored in a TLS slot for example) + mi_heap_t* heap = mi_prim_get_default_heap(); + if (heap == NULL || heap == &_mi_heap_empty) { + return _mi_subproc_main(); + } + else { + return thread_tld->subproc; // don't call `_mi_tld()` + } +} /* ----------------------------------------------------------- Sub process ----------------------------------------------------------- */ +mi_subproc_t* _mi_subproc_main(void) { + return &subproc_main; +} + mi_subproc_id_t mi_subproc_main(void) { return NULL; } @@ -307,42 +361,44 @@ mi_subproc_id_t mi_subproc_new(void) { mi_memid_t memid; mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid); if (subproc == NULL) return NULL; - subproc->abandoned_os_list = NULL; subproc->memid = memid; - mi_lock_init(&subproc->abandoned_os_lock); - mi_lock_init(&subproc->abandoned_os_visit_lock); + mi_lock_init(&subproc->os_abandoned_pages_lock); + mi_lock_init(&subproc->arena_reserve_lock); return subproc; } mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) { - return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id); + return (subproc_id == NULL ? &subproc_main : (mi_subproc_t*)subproc_id); } void mi_subproc_delete(mi_subproc_id_t subproc_id) { if (subproc_id == NULL) return; mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id); - // check if there are no abandoned segments still.. + // check if there are os pages still.. bool safe_to_delete = false; - if (mi_lock_acquire(&subproc->abandoned_os_lock)) { - if (subproc->abandoned_os_list == NULL) { + mi_lock(&subproc->os_abandoned_pages_lock) { + if (subproc->os_abandoned_pages == NULL) { safe_to_delete = true; } - mi_lock_release(&subproc->abandoned_os_lock); } if (!safe_to_delete) return; + + // merge stats back into the main subproc? + _mi_stats_merge_from(&_mi_subproc_main()->stats, &subproc->stats); + // safe to release // todo: should we refcount subprocesses? - mi_lock_done(&subproc->abandoned_os_lock); - mi_lock_done(&subproc->abandoned_os_visit_lock); + mi_lock_done(&subproc->os_abandoned_pages_lock); + mi_lock_done(&subproc->arena_reserve_lock); _mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid); } void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { - mi_heap_t* heap = mi_heap_get_default(); - if (heap == NULL) return; - mi_assert(heap->tld->subproc == &mi_subproc_default); - if (heap->tld->subproc != &mi_subproc_default) return; - heap->tld->subproc = _mi_subproc_from_id(subproc_id); + mi_tld_t* tld = _mi_tld(); + if (tld == NULL) return; + mi_assert(tld->subproc == &subproc_main); + if (tld->subproc != &subproc_main) return; + tld->subproc = _mi_subproc_from_id(subproc_id); } @@ -354,10 +410,10 @@ void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { static bool _mi_thread_heap_init(void) { if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { - // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization + // mi_assert_internal(heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); - _mi_heap_set_default_direct(&_mi_heap_main); + _mi_heap_set_default_direct(&heap_main); //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { @@ -374,7 +430,7 @@ static bool _mi_thread_heap_init(void) { _mi_heap_set_default_direct(heap); // now that the heap is set for this thread, we can set the thread-local tld. - mi_tld = tld; + thread_tld = tld; } return false; } @@ -385,7 +441,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { if (!mi_heap_is_initialized(heap)) return true; // reset default heap - _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct(_mi_is_main_thread() ? &heap_main : (mi_heap_t*)&_mi_heap_empty); // switch to backing heap heap = heap->tld->heap_backing; @@ -405,22 +461,19 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_backing(heap)); // collect if not the main thread - if (heap != &_mi_heap_main) { + if (heap != &heap_main) { _mi_heap_collect_abandon(heap); } - // merge stats - _mi_stats_done(&heap->tld->stats); - // free heap meta data _mi_meta_free(heap, sizeof(mi_heap_t), heap->memid); - if (heap == &_mi_heap_main) { + if (heap == &heap_main) { #if 0 // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, // there may still be delete/free calls after the mi_fls_done is called. Issue #207 _mi_heap_destroy_pages(heap); - mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); + mi_assert_internal(heap->tld->heap_backing == &heap_main); #endif } @@ -451,19 +504,14 @@ static void mi_process_setup_auto_thread_done(void) { if (tls_initialized) return; tls_initialized = true; _mi_prim_thread_init_auto_done(); - _mi_heap_set_default_direct(&_mi_heap_main); + _mi_heap_set_default_direct(&heap_main); } bool _mi_is_main_thread(void) { - return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); + return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id()); } -static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); - -size_t _mi_current_thread_count(void) { - return mi_atomic_load_relaxed(&thread_count); -} // This is called from the `mi_malloc_generic` void mi_thread_init(void) mi_attr_noexcept @@ -476,8 +524,7 @@ void mi_thread_init(void) mi_attr_noexcept // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) if (_mi_thread_heap_init()) return; // returns true if already initialized - _mi_stat_increase(&_mi_stats_main.threads, 1); - mi_atomic_increment_relaxed(&thread_count); + mi_subproc_stat_increase(_mi_subproc_main(), threads, 1); //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } @@ -499,11 +546,10 @@ void _mi_thread_done(mi_heap_t* heap) } // adjust stats - mi_atomic_decrement_relaxed(&thread_count); - _mi_stat_decrease(&_mi_stats_main.threads, 1); + mi_subproc_stat_decrease(_mi_subproc_main(), threads, 1); // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... - if (heap->thread_id != _mi_thread_id()) return; + if (heap->tld->thread_id != _mi_prim_thread_id()) return; // abandon the thread local heap _mi_thread_heap_done(heap); // returns true if already ran @@ -562,7 +608,7 @@ void _mi_process_load(void) { } // reseed random - _mi_random_reinit_if_weak(&_mi_heap_main.random); + _mi_random_reinit_if_weak(&heap_main.random); } #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) @@ -589,7 +635,7 @@ void mi_process_init(void) mi_attr_noexcept { // ensure we are called once static mi_atomic_once_t process_init; #if _MSC_VER < 1920 - mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main + mi_heap_main_init(); // vs2017 can dynamically re-initialize heap_main #endif if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; @@ -597,10 +643,11 @@ void mi_process_init(void) mi_attr_noexcept { mi_process_setup_auto_thread_done(); mi_detect_cpu_features(); + mi_subproc_main_init(); + mi_tld_main_init(); + mi_heap_main_init(); _mi_os_init(); _mi_page_map_init(); - _mi_arena_init(); - mi_heap_main_init(); #if MI_DEBUG _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif @@ -611,7 +658,7 @@ void mi_process_init(void) mi_attr_noexcept { #endif mi_thread_init(); - #if defined(_WIN32) + #if defined(_WIN32) && defined(MI_WIN_USE_FLS) // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup // will not call _mi_thread_done on the (still executing) main thread. See issue #508. @@ -672,7 +719,7 @@ void mi_cdecl _mi_process_done(void) { mi_stats_print(NULL); } _mi_allocator_done(); - _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); + _mi_verbose_message("process done: 0x%zx\n", tld_main.thread_id); os_preloading = true; // don't call the C runtime anymore } diff --git a/src/os.c b/src/os.c index 86ecb16b..53e8f571 100644 --- a/src/os.c +++ b/src/os.c @@ -114,9 +114,9 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed) { _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } if (still_committed) { - _mi_stat_decrease(&os_stats->committed, size); + mi_os_stat_decrease(committed, size); } - _mi_stat_decrease(&os_stats->reserved, size); + mi_os_stat_decrease(reserved, size); } void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { @@ -171,11 +171,11 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large); } - _mi_stat_counter_increase(&os_stats->mmap_calls, 1); + mi_os_stat_counter_increase(mmap_calls, 1); if (p != NULL) { - _mi_stat_increase(&os_stats->reserved, size); + mi_os_stat_increase(reserved, size); if (commit) { - _mi_stat_increase(&os_stats->committed, size); + mi_os_stat_increase(committed, size); // seems needed for asan (or `mimalloc-test-api` fails) #ifdef MI_TRACK_ASAN if (*is_zero) { mi_track_mem_defined(p,size); } @@ -290,7 +290,7 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); - + bool os_is_large = false; bool os_is_zero = false; void* os_base = NULL; @@ -379,8 +379,8 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* bool _mi_os_commit(void* addr, size_t size, bool* is_zero) { if (is_zero != NULL) { *is_zero = false; } - _mi_stat_increase(&os_stats->committed, size); // use size for precise commit vs. decommit - _mi_stat_counter_increase(&os_stats->commit_calls, 1); + mi_os_stat_increase(committed, size); // use size for precise commit vs. decommit + mi_os_stat_counter_increase(commit_calls, 1); // page align range size_t csize; @@ -408,7 +408,7 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero) { static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit) { mi_assert_internal(needs_recommit!=NULL); - _mi_stat_decrease(&os_stats->committed, size); + mi_os_stat_decrease(committed, size); // page align size_t csize; @@ -440,8 +440,8 @@ bool _mi_os_reset(void* addr, size_t size) { size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) - _mi_stat_increase(&os_stats->reset, csize); - _mi_stat_counter_increase(&os_stats->reset_calls, 1); + mi_os_stat_increase(reset, csize); + mi_os_stat_counter_increase(reset_calls, 1); #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN memset(start, 0, csize); // pretend it is eagerly reset @@ -460,8 +460,8 @@ bool _mi_os_reset(void* addr, size_t size) { bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset) { if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? - _mi_stat_counter_increase(&os_stats->purge_calls, 1); - _mi_stat_increase(&os_stats->purged, size); + mi_os_stat_counter_increase(purge_calls, 1); + mi_os_stat_increase(purged, size); if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) @@ -595,8 +595,8 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // success, record it page++; // increase before timeout check (see issue #711) - _mi_stat_increase(&os_stats->committed, MI_HUGE_OS_PAGE_SIZE); - _mi_stat_increase(&os_stats->reserved, MI_HUGE_OS_PAGE_SIZE); + mi_os_stat_increase(committed, MI_HUGE_OS_PAGE_SIZE); + mi_os_stat_increase(reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout if (max_msecs > 0) { diff --git a/src/page.c b/src/page.c index 333234de..af30392a 100644 --- a/src/page.c +++ b/src/page.c @@ -387,9 +387,9 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { const size_t bsize = mi_page_block_size(page); if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? - mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); + mi_debug_heap_stat_counter_increase(heap, page_no_retire, 1); + page->retire_expire = (bsize <= MI_SMALL_MAX_OBJ_SIZE ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; mi_assert_internal(index < MI_BIN_FULL && index < MI_BIN_HUGE); @@ -554,7 +554,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { size_t page_size; //uint8_t* page_start = mi_page_area(page, &page_size); - mi_heap_stat_counter_increase(heap, pages_extended, 1); + mi_debug_heap_stat_counter_increase(heap, pages_extended, 1); // calculate the extend count const size_t bsize = mi_page_block_size(page); @@ -583,7 +583,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { } // enable the new free list page->capacity += (uint16_t)extend; - mi_heap_stat_increase(heap, page_committed, extend * bsize); + mi_debug_heap_stat_increase(heap, page_committed, extend * bsize); mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -591,7 +591,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { void _mi_page_init(mi_heap_t* heap, mi_page_t* page) { mi_assert(page != NULL); mi_page_set_heap(page, heap); - page->subproc = heap->tld->subproc; + size_t page_size; uint8_t* page_start = mi_page_area(page, &page_size); MI_UNUSED(page_start); mi_track_mem_noaccess(page_start,page_size); @@ -682,7 +682,8 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m _mi_page_free(page_candidate, pq); page_candidate = page; } - else if (page->used >= page_candidate->used && !mi_page_is_mostly_used(page)) { + // prefer to reuse fuller pages (in the hope the less used page gets freed) + else if (page->used >= page_candidate->used && !mi_page_is_mostly_used(page) && !mi_page_is_expandable(page)) { page_candidate = page; } // if we find a non-expandable candidate, or searched for N pages, return with the best candidate @@ -708,8 +709,8 @@ static mi_decl_noinline mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, m page = next; } // for each page - mi_heap_stat_counter_increase(heap, searches, count); - + mi_debug_heap_stat_counter_increase(heap, searches, count); + // set the page to the best candidate if (page_candidate != NULL) { page = page_candidate; diff --git a/src/stats.c b/src/stats.c index bb17b936..102373ec 100644 --- a/src/stats.c +++ b/src/stats.c @@ -19,88 +19,93 @@ terms of the MIT license. A copy of the license can be found in the file Statistics operations ----------------------------------------------------------- */ -static bool mi_is_in_main(void* stat) { - return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main - && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t))); +static void mi_stat_update_mt(mi_stat_count_t* stat, int64_t amount) { + if (amount == 0) return; + // add atomically + int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_maxi64_relaxed(&stat->peak, current + amount); + if (amount > 0) { + mi_atomic_addi64_relaxed(&stat->allocated, amount); + } + else { + mi_atomic_addi64_relaxed(&stat->freed, -amount); + } } static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (amount == 0) return; - if mi_unlikely(mi_is_in_main(stat)) - { - // add atomically (for abandoned pages) - int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount); - mi_atomic_maxi64_relaxed(&stat->peak, current + amount); - if (amount > 0) { - mi_atomic_addi64_relaxed(&stat->allocated,amount); - } - else { - mi_atomic_addi64_relaxed(&stat->freed, -amount); - } + // add thread local + stat->current += amount; + if (stat->current > stat->peak) stat->peak = stat->current; + if (amount > 0) { + stat->allocated += amount; } else { - // add thread local - stat->current += amount; - if (stat->current > stat->peak) stat->peak = stat->current; - if (amount > 0) { - stat->allocated += amount; - } - else { - stat->freed += -amount; - } + stat->freed += -amount; } } + // Adjust stats to compensate; for example before committing a range, // first adjust downwards with parts that were already committed so // we avoid double counting. +static void mi_stat_adjust_mt(mi_stat_count_t* stat, int64_t amount, bool on_alloc) { + if (amount == 0) return; + // adjust atomically + mi_atomic_addi64_relaxed(&stat->current, amount); + mi_atomic_addi64_relaxed((on_alloc ? &stat->allocated : &stat->freed), amount); +} + static void mi_stat_adjust(mi_stat_count_t* stat, int64_t amount, bool on_alloc) { if (amount == 0) return; - if mi_unlikely(mi_is_in_main(stat)) - { - // adjust atomically - mi_atomic_addi64_relaxed(&stat->current, amount); - mi_atomic_addi64_relaxed((on_alloc ? &stat->allocated : &stat->freed), amount); + stat->current += amount; + if (on_alloc) { + stat->allocated += amount; } else { - // don't affect the peak - stat->current += amount; - if (on_alloc) { - stat->allocated += amount; - } - else { - stat->freed += amount; - } + stat->freed += amount; } } -void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { - if (mi_is_in_main(stat)) { - mi_atomic_addi64_relaxed( &stat->count, 1 ); - mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount ); - } - else { - stat->count++; - stat->total += amount; - } +void __mi_stat_counter_increase_mt(mi_stat_counter_t* stat, size_t amount) { + mi_atomic_addi64_relaxed(&stat->count, 1); + mi_atomic_addi64_relaxed(&stat->total, (int64_t)amount); } -void _mi_stat_increase(mi_stat_count_t* stat, size_t amount) { +void __mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { + stat->count++; + stat->total += amount; +} + +void __mi_stat_increase_mt(mi_stat_count_t* stat, size_t amount) { + mi_stat_update_mt(stat, (int64_t)amount); +} +void __mi_stat_increase(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, (int64_t)amount); } -void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { +void __mi_stat_decrease_mt(mi_stat_count_t* stat, size_t amount) { + mi_stat_update_mt(stat, -((int64_t)amount)); +} +void __mi_stat_decrease(mi_stat_count_t* stat, size_t amount) { mi_stat_update(stat, -((int64_t)amount)); } -void _mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc) { +void __mi_stat_adjust_increase_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc) { + mi_stat_adjust_mt(stat, (int64_t)amount, on_alloc); +} +void __mi_stat_adjust_increase(mi_stat_count_t* stat, size_t amount, bool on_alloc) { mi_stat_adjust(stat, (int64_t)amount, on_alloc); } -void _mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_alloc) { +void __mi_stat_adjust_decrease_mt(mi_stat_count_t* stat, size_t amount, bool on_alloc) { + mi_stat_adjust_mt(stat, -((int64_t)amount), on_alloc); +} +void __mi_stat_adjust_decrease(mi_stat_count_t* stat, size_t amount, bool on_alloc) { mi_stat_adjust(stat, -((int64_t)amount), on_alloc); } + // must be thread safe as it is called from stats_merge static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64_t unit) { if (stat==src) return; @@ -401,36 +406,37 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) static mi_msecs_t mi_process_start; // = 0 -static mi_stats_t* mi_stats_get_default(void) { - mi_heap_t* heap = mi_heap_get_default(); - return &heap->tld->stats; -} - -static void mi_stats_merge_from(mi_stats_t* stats) { - if (stats != &_mi_stats_main) { - mi_stats_add(&_mi_stats_main, stats); - memset(stats, 0, sizeof(mi_stats_t)); - } +// return thread local stats +static mi_stats_t* mi_get_tld_stats(void) { + return &_mi_tld()->stats; } void mi_stats_reset(void) mi_attr_noexcept { - mi_stats_t* stats = mi_stats_get_default(); - if (stats != &_mi_stats_main) { memset(stats, 0, sizeof(mi_stats_t)); } - memset(&_mi_stats_main, 0, sizeof(mi_stats_t)); + mi_stats_t* stats = mi_get_tld_stats(); + mi_subproc_t* subproc = _mi_subproc(); + if (stats != &subproc->stats) { _mi_memzero(stats, sizeof(mi_stats_t)); } + _mi_memzero(&subproc->stats, sizeof(mi_stats_t)); if (mi_process_start == 0) { mi_process_start = _mi_clock_start(); }; } -void mi_stats_merge(void) mi_attr_noexcept { - mi_stats_merge_from( mi_stats_get_default() ); +void _mi_stats_merge_from(mi_stats_t* to, mi_stats_t* from) { + if (to != from) { + mi_stats_add(to, from); + _mi_memzero(from, sizeof(mi_stats_t)); + } } void _mi_stats_done(mi_stats_t* stats) { // called from `mi_thread_done` - mi_stats_merge_from(stats); + _mi_stats_merge_from(&_mi_subproc()->stats, stats); +} + +void mi_stats_merge(void) mi_attr_noexcept { + _mi_stats_done( mi_get_tld_stats() ); } void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - mi_stats_merge_from(mi_stats_get_default()); - _mi_stats_print(&_mi_stats_main, out, arg); + mi_stats_merge(); + _mi_stats_print(&_mi_subproc()->stats, out, arg); } void mi_stats_print(void* out) mi_attr_noexcept { @@ -439,7 +445,7 @@ void mi_stats_print(void* out) mi_attr_noexcept { } void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { - _mi_stats_print(mi_stats_get_default(), out, arg); + _mi_stats_print(mi_get_tld_stats(), out, arg); } @@ -473,11 +479,12 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { + mi_subproc_t* subproc = _mi_subproc(); mi_process_info_t pinfo; _mi_memzero_var(pinfo); pinfo.elapsed = _mi_clock_end(mi_process_start); - pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.current))); + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)(&subproc->stats.committed.peak))); pinfo.current_rss = pinfo.current_commit; pinfo.peak_rss = pinfo.peak_commit; pinfo.utime = 0; diff --git a/test/test-stress.c b/test/test-stress.c index 384e3911..6933e6a2 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -44,20 +44,18 @@ static int ITER = 10; static int THREADS = 4; static int SCALE = 10; static int ITER = 20; -#define ALLOW_LARGE false #elif 0 static int THREADS = 32; static int SCALE = 50; static int ITER = 50; -#define ALLOW_LARGE false -#elif 0 -static int THREADS = 64; -static int SCALE = 400; -static int ITER = 10; +#elif 1 +static int THREADS = 32; +static int SCALE = 25; +static int ITER = 50; #define ALLOW_LARGE true #else static int THREADS = 32; // more repeatable if THREADS <= #processors -static int SCALE = 25; // scaling factor +static int SCALE = 50; // scaling factor static int ITER = 50; // N full iterations destructing and re-creating all threads #endif @@ -66,7 +64,7 @@ static int ITER = 50; // N full iterations destructing and re-creating a #define STRESS // undefine for leak test #ifndef ALLOW_LARGE -#define ALLOW_LARGE true +#define ALLOW_LARGE false #endif static bool allow_large_objects = ALLOW_LARGE; // allow very large objects? (set to `true` if SCALE>100) @@ -363,7 +361,7 @@ int main(int argc, char** argv) { #else mi_stats_print(NULL); // so we see rss/commit/elapsed #endif - //mi_stats_print(NULL); + mi_stats_print(NULL); //bench_end_program(); return 0; }