From a5b7d7f26461d0d241b6de41f215d63dbfa642cb Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 20 Dec 2024 21:38:31 -0800 Subject: [PATCH] subprocesses own arena's --- include/mimalloc.h | 2 +- include/mimalloc/atomic.h | 2 +- include/mimalloc/internal.h | 15 +- include/mimalloc/types.h | 56 +++---- src/alloc.c | 4 +- src/arena-meta.c | 6 +- src/arena.c | 315 +++++++++++++++++------------------- src/bitmap.c | 7 +- src/bitmap.h | 4 +- src/free.c | 6 +- src/heap.c | 7 +- src/init.c | 259 ++++++++++++++++------------- src/page.c | 2 +- 13 files changed, 351 insertions(+), 334 deletions(-) diff --git a/include/mimalloc.h b/include/mimalloc.h index 24217fae..7a58e54c 100644 --- a/include/mimalloc.h +++ b/include/mimalloc.h @@ -279,7 +279,7 @@ mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_commit mi_decl_export void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept; // Experimental: heaps associated with specific memory arena's -typedef int mi_arena_id_t; +typedef void* mi_arena_id_t; mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; diff --git a/include/mimalloc/atomic.h b/include/mimalloc/atomic.h index 7dc492f6..ddb5a9a3 100644 --- a/include/mimalloc/atomic.h +++ b/include/mimalloc/atomic.h @@ -434,7 +434,7 @@ static inline void mi_lock_init(mi_lock_t* lock) { InitializeSRWLock(lock); } static inline void mi_lock_done(mi_lock_t* lock) { - // nothing + (void)(lock); } diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index a5ca3e27..24792f8c 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -101,8 +101,10 @@ bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); bool _mi_preloading(void); // true while the C runtime is not initialized yet void _mi_thread_done(mi_heap_t* heap); -mi_tld_t* _mi_tld(void); // current tld: `_mi_tld() == _mi_heap_get_default()->tld` +mi_tld_t* _mi_tld(void); // current tld: `_mi_tld() == _mi_heap_get_default()->tld` +mi_subproc_t* _mi_subproc(void); +mi_subproc_t* _mi_subproc_main(void); mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; size_t _mi_thread_seq_id(void) mi_attr_noexcept; @@ -142,10 +144,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t m // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_init(void); -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid); -bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +mi_arena_t* _mi_arena_from_id(mi_arena_id_t id); + +void* _mi_arena_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +void* _mi_arena_alloc_aligned(mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena); bool _mi_arena_contains(const void* p); void _mi_arenas_collect(bool force_purge); void _mi_arena_unsafe_destroy_all(void); @@ -524,7 +527,7 @@ static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { if (heap != NULL) { page->heap = heap; page->heap_tag = heap->tag; - mi_atomic_store_release(&page->xthread_id, heap->thread_id); + mi_atomic_store_release(&page->xthread_id, heap->tld->thread_id); } else { page->heap = NULL; diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 0cf909d0..4d43e887 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -243,9 +243,6 @@ typedef size_t mi_page_flags_t; // atomically in `free.c:mi_free_block_mt`. typedef uintptr_t mi_thread_free_t; -// Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython) -typedef struct mi_subproc_s mi_subproc_t; - // A heap can serve only specific objects signified by its heap tag (e.g. various object types in CPython) typedef uint8_t mi_heaptag_t; @@ -299,7 +296,6 @@ typedef struct mi_page_s { mi_heap_t* heap; // heap this threads belong to. struct mi_page_s* next; // next page owned by the heap with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` - mi_subproc_t* subproc; // sub-process of this heap mi_memid_t memid; // provenance of the page memory } mi_page_t; @@ -380,7 +376,7 @@ typedef struct mi_random_cxt_s { // In debug mode there is a padding structure at the end of the blocks to check for buffer overflows -#if (MI_PADDING) +#if MI_PADDING typedef struct mi_padding_s { uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) @@ -397,10 +393,8 @@ typedef struct mi_padding_s { // A heap owns a set of pages. struct mi_heap_s { - mi_tld_t* tld; - // _Atomic(mi_block_t*) thread_delayed_free; - mi_threadid_t thread_id; // thread this heap belongs too - mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) + mi_tld_t* tld; // thread-local data + mi_arena_t* exclusive_arena; // if the heap belongs to a specific arena (or NULL) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation @@ -408,7 +402,6 @@ struct mi_heap_s { size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread - mi_memid_t memid; // provenance of the heap struct itseft (meta or os) long full_page_retain; // how many full pages can be retained per queue (before abondoning them) bool allow_page_reclaim; // `true` if this heap should not reclaim abandoned pages bool allow_page_abandon; // `true` if this heap can abandon pages to reduce memory footprint @@ -421,7 +414,8 @@ struct mi_heap_s { size_t guarded_sample_count; // current sample count (counting down to 0) #endif mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. - mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + mi_page_queue_t pages[MI_BIN_COUNT]; // queue of pages for each size class (or "bin") + mi_memid_t memid; // provenance of the heap struct itself (meta or os) }; @@ -479,7 +473,7 @@ typedef struct mi_stats_s { mi_stat_counter_t arena_count; mi_stat_counter_t guarded_alloc_count; #if MI_STAT>1 - mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; + mi_stat_count_t normal_bins[MI_BIN_COUNT]; #endif } mi_stats_t; @@ -513,19 +507,24 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // ------------------------------------------------------ -// Sub processes do not reclaim or visit segments -// from other sub processes +// Sub processes use separate arena's and no heaps/pages/blocks +// are shared between sub processes. +// Each thread should also belong to one sub-process only // ------------------------------------------------------ -struct mi_subproc_s { - _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // count of abandoned pages for this sub-process - _Atomic(size_t) abandoned_os_list_count; // count of abandoned pages in the os-list - mi_lock_t abandoned_os_lock; // lock for the abandoned os pages list (outside of arena's) (this lock protect list operations) - mi_lock_t abandoned_os_visit_lock; // ensure only one thread per subproc visits the abandoned os list - mi_page_t* abandoned_os_list; // doubly-linked list of abandoned pages outside of arena's (in OS allocated memory) - mi_page_t* abandoned_os_list_tail; // the tail-end of the list - mi_memid_t memid; // provenance of this memory block -}; +#define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`) + // 160 arenas is enough for ~2 TiB memory + +typedef struct mi_subproc_s { + _Atomic(size_t) arena_count; // current count of arena's + _Atomic(mi_arena_t*) arenas[MI_MAX_ARENAS]; // arena's of this sub-process + mi_lock_t arena_reserve_lock; // lock to ensure arena's get reserved one at a time + _Atomic(size_t) abandoned_count[MI_BIN_COUNT]; // total count of abandoned pages for this sub-process + mi_page_queue_t os_pages; // list of pages that OS allocated and not in an arena (only used if `mi_option_visit_abandoned` is on) + mi_lock_t os_pages_lock; // lock for the os pages list (this lock protects list operations) + mi_memid_t memid; // provenance of this memory block (meta or OS) +} mi_subproc_t; + // ------------------------------------------------------ // Thread Local data @@ -534,20 +533,21 @@ struct mi_subproc_s { // Milliseconds as in `int64_t` to avoid overflows typedef int64_t mi_msecs_t; - // Thread local data struct mi_tld_s { - unsigned long long heartbeat; // monotonic heartbeat count + mi_threadid_t thread_id; // thread id of this thread + size_t thread_seq; // thread sequence id (linear count of created threads) + mi_subproc_t* subproc; // sub-process this thread belongs to. mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) - mi_subproc_t* subproc; // sub-process this thread belongs to. - size_t tseq; // thread sequence id - mi_memid_t memid; // provenance of the tld memory itself (meta or OS) + unsigned long long heartbeat; // monotonic heartbeat count bool recurse; // true if deferred was called; used to prevent infinite recursion. bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) mi_stats_t stats; // statistics + mi_memid_t memid; // provenance of the tld memory itself (meta or OS) }; + /* ----------------------------------------------------------- Error codes passed to `_mi_fatal_error` All are recoverable but EFAULT is a serious error and aborts by default in secure mode. diff --git a/src/alloc.c b/src/alloc.c index 25d6f62e..e5f2b8ae 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -134,7 +134,7 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, mi_assert(size <= MI_SMALL_SIZE_MAX); #if MI_DEBUG const uintptr_t tid = _mi_thread_id(); - mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local + mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == tid); // heaps are thread local #endif #if (MI_PADDING || MI_GUARDED) if (size == 0) { size = sizeof(void*); } @@ -188,7 +188,7 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z else { // regular allocation mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + mi_assert(heap->tld->thread_id == 0 || heap->tld->thread_id == _mi_thread_id()); // heaps are thread local void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic mi_track_malloc(p,size,zero); diff --git a/src/arena-meta.c b/src/arena-meta.c index ceda06ba..f28c50e9 100644 --- a/src/arena-meta.c +++ b/src/arena-meta.c @@ -64,10 +64,12 @@ static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) { // allocate a fresh meta page and add it to the global list. static mi_meta_page_t* mi_meta_page_zalloc(void) { // allocate a fresh arena slice + // note: we always use subproc_main directly for the meta-data since at thread start the metadata for the + // tld and heap need to be (meta) allocated and at that time we cannot read the tld pointer (yet). mi_memid_t memid; - mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, + mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(_mi_subproc_main(), MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, true /* commit*/, true /* allow large */, - _mi_arena_id_none(), 0 /* tseq */, &memid ); + NULL, 0 /* tseq */, &memid ); if (mpage == NULL) return NULL; mi_assert_internal(_mi_is_aligned(mpage,MI_META_PAGE_ALIGN)); if (!memid.initially_zero) { diff --git a/src/arena.c b/src/arena.c index 74cd4977..bb846da9 100644 --- a/src/arena.c +++ b/src/arena.c @@ -35,7 +35,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo // A memory arena descriptor typedef struct mi_arena_s { mi_memid_t memid; // memid of the memory area - mi_arena_id_t id; // arena id (> 0 where `arena == arenas[arena->id - 1]`) + mi_subproc_t* subproc; // subprocess this arena belongs to (`this 'in' this->subproc->arenas`) size_t slice_count; // total size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`) size_t info_slices; // initial slices reserved for the arena bitmaps @@ -64,64 +64,45 @@ typedef struct mi_purge_info_s { } mi_purge_info_t; -#define MI_MAX_ARENAS (160) // Limited for now (and takes up .bss).. but arena's scale up exponentially (see `mi_arena_reserve`) - // 160 arenas is enough for ~2 TiB memory - -// The available arenas -static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; -static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 - - -static mi_lock_t mi_arena_reserve_lock; - -void _mi_arena_init(void) { - mi_lock_init(&mi_arena_reserve_lock); -} /* ----------------------------------------------------------- Arena id's - id = arena_index + 1 ----------------------------------------------------------- */ -size_t mi_arena_id_index(mi_arena_id_t id) { - return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); -} - -static mi_arena_id_t mi_arena_id_create(size_t arena_index) { - mi_assert_internal(arena_index < MI_MAX_ARENAS); - return (int)arena_index + 1; +static mi_arena_id_t mi_arena_id_create(mi_arena_t* arena) { + return arena; } mi_arena_id_t _mi_arena_id_none(void) { - return 0; + return NULL; } -static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { - return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || - (arena_id == req_arena_id)); +mi_arena_t* _mi_arena_from_id(mi_arena_id_t id) { + return (mi_arena_t*)id; } -bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { + +static bool mi_arena_id_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena) { + return ((arena == req_arena) || // they match, + (req_arena == NULL && !arena->is_exclusive)); // or the arena is not exclusive, and we didn't request a specific one +} + +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_t* request_arena) { if (memid.memkind == MI_MEM_ARENA) { - const mi_arena_t* arena = memid.mem.arena.arena; - return mi_arena_id_is_suitable(arena->id, arena->is_exclusive, request_arena_id); + return mi_arena_id_is_suitable(memid.mem.arena.arena, request_arena); } else { - return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id); + return mi_arena_id_is_suitable(NULL, request_arena); } } -size_t mi_arena_get_count(void) { - return mi_atomic_load_relaxed(&mi_arena_count); +size_t mi_arenas_get_count(mi_subproc_t* subproc) { + return mi_atomic_load_relaxed(&subproc->arena_count); } -mi_arena_t* mi_arena_from_index(size_t idx) { - mi_assert_internal(idx < mi_arena_get_count()); - return mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[idx]); -} - -mi_arena_t* mi_arena_from_id(mi_arena_id_t id) { - return mi_arena_from_index(mi_arena_id_index(id)); +mi_arena_t* mi_arena_from_index(mi_subproc_t* subproc, size_t idx) { + mi_assert_internal(idx < mi_arenas_get_count(subproc)); + return mi_atomic_load_ptr_relaxed(mi_arena_t, &subproc->arenas[idx]); } static size_t mi_arena_info_slices(mi_arena_t* arena) { @@ -159,9 +140,7 @@ uint8_t* mi_arena_slice_start(mi_arena_t* arena, size_t slice_index) { // Arena area void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; - const size_t arena_index = mi_arena_id_index(arena_id); - if (arena_index >= MI_MAX_ARENAS) return NULL; - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + mi_arena_t* arena = _mi_arena_from_id(arena_id); if (arena == NULL) return NULL; if (size != NULL) { *size = mi_size_of_slices(arena->slice_count); } return mi_arena_start(arena); @@ -297,12 +276,12 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // try to reserve a fresh arena space -static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) +static bool mi_arena_reserve(mi_subproc_t* subproc, size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t* arena_id) { // if (_mi_preloading()) return false; // use OS only while pre loading if (req_arena_id != _mi_arena_id_none()) return false; - const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); + const size_t arena_count = mi_arenas_get_count(subproc); if (arena_count > (MI_MAX_ARENAS - 4)) return false; // calc reserve @@ -368,32 +347,27 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re Arena iteration ----------------------------------------------------------- */ -static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, int numa_node, bool allow_large) { +static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_t* req_arena, int numa_node, bool allow_large) { if (!allow_large && arena->is_large) return false; - if (!mi_arena_id_is_suitable(arena->id, arena->is_exclusive, req_arena_id)) return false; - if (req_arena_id == _mi_arena_id_none()) { // if not specific, check numa affinity + if (!mi_arena_id_is_suitable(arena, req_arena)) return false; + if (req_arena == NULL) { // if not specific, check numa affinity const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); if (!numa_suitable) return false; } return true; } - -#define mi_forall_arenas(req_arena_id, tseq, name_arena) \ - { \ - const size_t _arena_count = mi_arena_get_count(); \ - if (_arena_count > 0) { \ - const size_t _arena_cycle = _arena_count - 1; /* first search the arenas below the last one */ \ - size_t _start; \ - if (req_arena_id == _mi_arena_id_none()) { \ - /* always start searching in the arena's below the max */ \ - _start = (_arena_cycle <= 1 ? 0 : (tseq % _arena_cycle)); \ +#define mi_forall_arenas(subproc, req_arena, tseq, name_arena) { \ + const size_t _arena_count = mi_arenas_get_count(subproc); \ + const size_t _arena_cycle = (_arena_count == 0 ? 0 : _arena_count - 1); /* first search the arenas below the last one */ \ + /* always start searching in the arena's below the max */ \ + size_t _start = (_arena_cycle <= 1 ? 0 : (tseq % _arena_cycle)); \ + for (size_t _i = 0; _i < _arena_count; _i++) { \ + mi_arena_t* name_arena; \ + if (req_arena != NULL) { \ + name_arena = req_arena; /* if there is a specific req_arena, only search that one */\ } \ else { \ - _start = mi_arena_id_index(req_arena_id); \ - mi_assert_internal(_start < _arena_count); \ - } \ - for (size_t _i = 0; _i < _arena_count; _i++) { \ size_t _idx; \ if (_i < _arena_cycle) { \ _idx = _i + _start; \ @@ -402,19 +376,20 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are else { \ _idx = _i; /* remaining arena's */ \ } \ - mi_arena_t* const name_arena = mi_arena_from_index(_idx); \ - if (name_arena != NULL) \ - { + name_arena = mi_arena_from_index(subproc,_idx); \ + } \ + if (name_arena != NULL) \ + { #define mi_forall_arenas_end() \ - } \ - if (req_arena_id != _mi_arena_id_none()) break; \ } \ - }} + if (req_arena != NULL) break; \ + } \ + } -#define mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, name_arena) \ - mi_forall_arenas(req_arena_id,tseq,name_arena) { \ - if (mi_arena_is_suitable(name_arena, req_arena_id, -1 /* todo: numa node */, allow_large)) { \ +#define mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, name_arena) \ + mi_forall_arenas(subproc, req_arena,tseq,name_arena) { \ + if (mi_arena_is_suitable(name_arena, req_arena, -1 /* todo: numa node */, allow_large)) { \ #define mi_forall_suitable_arenas_end() \ }} \ @@ -425,17 +400,16 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are ----------------------------------------------------------- */ // allocate slices from the arenas -static mi_decl_noinline void* mi_arena_try_find_free( - size_t slice_count, size_t alignment, - bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) +static mi_decl_noinline void* mi_arenas_try_find_free( + mi_subproc_t* subproc, size_t slice_count, size_t alignment, + bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_OBJ_SIZE)); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); if (alignment > MI_ARENA_SLICE_ALIGN) return NULL; // search arena's - mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, arena) + mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena) { void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid); if (p != NULL) return p; @@ -445,42 +419,43 @@ static mi_decl_noinline void* mi_arena_try_find_free( } // Allocate slices from the arena's -- potentially allocating a fresh arena -static mi_decl_noinline void* mi_arena_try_alloc( +static mi_decl_noinline void* mi_arenas_try_alloc( + mi_subproc_t* subproc, size_t slice_count, size_t alignment, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) + mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); void* p; // try to find free slices in the arena's - p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); + p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); if (p != NULL) return p; // did we need a specific arena? - if (req_arena_id != _mi_arena_id_none()) return NULL; + if (req_arena != NULL) return NULL; // don't create arena's while preloading (todo: or should we?) if (_mi_preloading()) return NULL; // otherwise, try to reserve a new arena -- but one thread at a time.. (todo: allow 2 or 4 to reduce contention?) - const size_t arena_count = mi_arena_get_count(); - if (mi_lock_acquire(&mi_arena_reserve_lock)) { + const size_t arena_count = mi_arenas_get_count(subproc); + if (mi_lock_acquire(&subproc->arena_reserve_lock)) { bool ok = true; - if (arena_count == mi_arena_get_count()) { + if (arena_count == mi_arenas_get_count(subproc)) { // we are the first to enter the lock, reserve a fresh arena mi_arena_id_t arena_id = 0; - ok = mi_arena_reserve(mi_size_of_slices(slice_count), allow_large, req_arena_id, &arena_id); + ok = mi_arena_reserve(subproc, mi_size_of_slices(slice_count), allow_large, req_arena, &arena_id); } else { // another thread already reserved a new arena } - mi_lock_release(&mi_arena_reserve_lock); + mi_lock_release(&subproc->arena_reserve_lock); if (ok) { // try once more to allocate in the new arena - mi_assert_internal(req_arena_id == _mi_arena_id_none()); - p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); + mi_assert_internal(req_arena == NULL); + p = mi_arenas_try_find_free(subproc, slice_count, alignment, commit, allow_large, req_arena, tseq, memid); if (p != NULL) return p; } } @@ -510,10 +485,10 @@ static void* mi_arena_os_alloc_aligned( // Allocate large sized memory -void* _mi_arena_alloc_aligned( +void* _mi_arena_alloc_aligned( mi_subproc_t* subproc, size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) + mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { mi_assert_internal(memid != NULL); mi_assert_internal(size > 0); @@ -522,24 +497,24 @@ void* _mi_arena_alloc_aligned( // const int numa_node = _mi_os_numa_node(&tld->os); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) - if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed? - req_arena_id == _mi_arena_id_none() && // not a specific arena? + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) && // is arena allocation allowed? + req_arena == NULL && // not a specific arena? size >= MI_ARENA_MIN_OBJ_SIZE && size <= MI_ARENA_MAX_OBJ_SIZE && // and not too small/large alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment { const size_t slice_count = mi_slice_count_of_size(size); - void* p = mi_arena_try_alloc(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); + void* p = mi_arenas_try_alloc(subproc,slice_count, alignment, commit, allow_large, req_arena, tseq, memid); if (p != NULL) return p; } // fall back to the OS - void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena_id, memid); + void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena, memid); return p; } -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) +void* _mi_arena_alloc(mi_subproc_t* subproc, size_t size, bool commit, bool allow_large, mi_arena_t* req_arena, size_t tseq, mi_memid_t* memid) { - return _mi_arena_alloc_aligned(size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena_id, tseq, memid); + return _mi_arena_alloc_aligned(subproc, size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena, tseq, memid); } @@ -548,7 +523,7 @@ void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t Arena page allocation ----------------------------------------------------------- */ -static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag, bool* keep_abandoned) { +static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, mi_heaptag_t heap_tag, bool* keep_abandoned) { // found an abandoned page of the right size mi_page_t* const page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); // can we claim ownership? @@ -560,9 +535,9 @@ static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, *keep_abandoned = true; return false; } - if (subproc != page->subproc || heap_tag != page->heap_tag) { - // wrong sub-process or heap_tag.. we need to unown again - // note: this normally never happens unless subprocesses/heaptags are actually used. + if (heap_tag != page->heap_tag) { + // wrong heap_tag.. we need to unown again + // note: this normally never happens unless heaptags are actually used. // (an unown might free the page, and depending on that we can keep it in the abandoned map or not) // note: a minor wrinkle: the page will still be mapped but the abandoned map entry is (temporarily) clear at this point. // so we cannot check in `mi_arena_free` for this invariant to hold. @@ -570,31 +545,31 @@ static bool mi_arena_try_claim_abandoned(size_t slice_index, mi_arena_t* arena, *keep_abandoned = !freed; return false; } - // yes, we can reclaim it, keep the abandaned map entry clear + // yes, we can reclaim it, keep the abandoned map entry clear *keep_abandoned = false; return true; } -static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t block_size, mi_arena_id_t req_arena_id, mi_heaptag_t heaptag, mi_tld_t* tld) +static mi_page_t* mi_arena_page_try_find_abandoned(mi_subproc_t* subproc, size_t slice_count, size_t block_size, mi_arena_t* req_arena, mi_heaptag_t heaptag, size_t tseq) { MI_UNUSED(slice_count); const size_t bin = _mi_bin(block_size); mi_assert_internal(bin < MI_BIN_COUNT); // any abandoned in our size class? - mi_subproc_t* const subproc = tld->subproc; mi_assert_internal(subproc != NULL); - if (mi_atomic_load_relaxed(&subproc->abandoned_count[bin]) == 0) return NULL; + if (mi_atomic_load_relaxed(&subproc->abandoned_count[bin]) == 0) { + return NULL; + } // search arena's const bool allow_large = true; - size_t tseq = tld->tseq; - mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, arena) + mi_forall_suitable_arenas(subproc, req_arena, tseq, allow_large, arena) { size_t slice_index; mi_bitmap_t* const bitmap = arena->pages_abandoned[bin]; - if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, subproc, heaptag)) { + if (mi_bitmap_try_find_and_claim(bitmap, tseq, &slice_index, &mi_arena_try_claim_abandoned, arena, heaptag)) { // found an abandoned page of the right size // and claimed ownership. mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); @@ -621,8 +596,8 @@ static mi_page_t* mi_arena_page_try_find_abandoned(size_t slice_count, size_t bl return NULL; } -static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_size, size_t block_alignment, - mi_arena_id_t req_arena_id, mi_tld_t* tld) +static mi_page_t* mi_arena_page_alloc_fresh(mi_subproc_t* subproc, size_t slice_count, size_t block_size, size_t block_alignment, + mi_arena_t* req_arena, size_t tseq) { const bool allow_large = true; const bool commit = true; @@ -636,7 +611,7 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz !os_align && // not large alignment slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large { - page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, tld->tseq, &memid); + page = (mi_page_t*)mi_arenas_try_alloc(subproc, slice_count, page_alignment, commit, allow_large, req_arena, tseq, &memid); if (page != NULL) { mi_assert_internal(mi_bitmap_is_clearN(memid.mem.arena.arena->pages, memid.mem.arena.slice_index, memid.mem.arena.slice_count)); mi_bitmap_set(memid.mem.arena.arena->pages, memid.mem.arena.slice_index); @@ -648,10 +623,10 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz if (os_align) { // note: slice_count already includes the page mi_assert_internal(slice_count >= mi_slice_count_of_size(block_size) + mi_slice_count_of_size(page_alignment)); - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena_id, &memid); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena, &memid); } else { - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena_id, &memid); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena, &memid); } } @@ -724,17 +699,17 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz } static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t slice_count, size_t block_size) { - const mi_arena_id_t req_arena_id = heap->arena_id; + mi_arena_t* req_arena = heap->exclusive_arena; mi_tld_t* const tld = heap->tld; // 1. look for an abandoned page - mi_page_t* page = mi_arena_page_try_find_abandoned(slice_count, block_size, req_arena_id, heap->tag, tld); + mi_page_t* page = mi_arena_page_try_find_abandoned(tld->subproc, slice_count, block_size, req_arena, heap->tag, tld->thread_seq); if (page != NULL) { return page; // return as abandoned } // 2. find a free block, potentially allocating a new arena - page = mi_arena_page_alloc_fresh(slice_count, block_size, 1, req_arena_id, tld); + page = mi_arena_page_alloc_fresh(tld->subproc, slice_count, block_size, 1, req_arena, tld->thread_seq); if (page != NULL) { mi_assert_internal(page->memid.memkind != MI_MEM_ARENA || page->memid.mem.arena.slice_count == slice_count); _mi_page_init(heap, page); @@ -746,13 +721,13 @@ static mi_page_t* mi_arena_page_allocN(mi_heap_t* heap, size_t slice_count, size static mi_page_t* mi_singleton_page_alloc(mi_heap_t* heap, size_t block_size, size_t block_alignment) { - const mi_arena_id_t req_arena_id = heap->arena_id; + mi_arena_t* req_arena = heap->exclusive_arena; mi_tld_t* const tld = heap->tld; const bool os_align = (block_alignment > MI_PAGE_MAX_OVERALLOC_ALIGN); const size_t info_size = (os_align ? MI_PAGE_ALIGN : mi_page_info_size()); const size_t slice_count = mi_slice_count_of_size(info_size + block_size); - mi_page_t* page = mi_arena_page_alloc_fresh(slice_count, block_size, block_alignment, req_arena_id, tld); + mi_page_t* page = mi_arena_page_alloc_fresh(tld->subproc, slice_count, block_size, block_alignment, req_arena, tld->thread_seq); if (page == NULL) return NULL; mi_assert(page != NULL); @@ -836,7 +811,6 @@ void _mi_arena_page_abandon(mi_page_t* page) { mi_assert_internal(!mi_page_all_free(page)); mi_assert_internal(page->next==NULL); - mi_subproc_t* subproc = page->subproc; if (page->memid.memkind==MI_MEM_ARENA && !mi_page_is_full(page)) { // make available for allocations size_t bin = _mi_bin(mi_page_block_size(page)); @@ -851,7 +825,7 @@ void _mi_arena_page_abandon(mi_page_t* page) { mi_page_set_abandoned_mapped(page); const bool wasclear = mi_bitmap_set(arena->pages_abandoned[bin], slice_index); MI_UNUSED(wasclear); mi_assert_internal(wasclear); - mi_atomic_increment_relaxed(&subproc->abandoned_count[bin]); + mi_atomic_increment_relaxed(&arena->subproc->abandoned_count[bin]); } else { // page is full (or a singleton), page is OS/externally allocated @@ -902,7 +876,7 @@ void _mi_arena_page_unabandon(mi_page_t* page) { // this busy waits until a concurrent reader (from alloc_abandoned) is done mi_bitmap_clear_once_set(arena->pages_abandoned[bin], slice_index); mi_page_clear_abandoned_mapped(page); - mi_atomic_decrement_relaxed(&page->subproc->abandoned_count[bin]); + mi_atomic_decrement_relaxed(&arena->subproc->abandoned_count[bin]); } else { // page is full (or a singleton), page is OS/nly allocated @@ -989,9 +963,10 @@ void _mi_arenas_collect(bool force_purge) { // Is a pointer inside any of our arenas? bool _mi_arena_contains(const void* p) { - const size_t max_arena = mi_arena_get_count(); + mi_subproc_t* subproc = _mi_subproc(); + const size_t max_arena = mi_arenas_get_count(subproc); for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena != NULL && mi_arena_start(arena) <= (const uint8_t*)p && mi_arena_start(arena) + mi_size_of_slices(arena->slice_count) >(const uint8_t*)p) { return true; } @@ -1007,14 +982,14 @@ bool _mi_arena_contains(const void* p) { // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. -static void mi_arenas_unsafe_destroy(void) { - const size_t max_arena = mi_arena_get_count(); +static void mi_arenas_unsafe_destroy(mi_subproc_t* subproc) { + const size_t max_arena = mi_arenas_get_count(subproc); size_t new_max_arena = 0; for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena != NULL) { // mi_lock_done(&arena->abandoned_visit_lock); - mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + mi_atomic_store_ptr_release(mi_arena_t, &subproc->arenas[i], NULL); if (mi_memkind_is_os(arena->memid.memkind)) { _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid); } @@ -1023,14 +998,14 @@ static void mi_arenas_unsafe_destroy(void) { // try to lower the max arena. size_t expected = max_arena; - mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); + mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, new_max_arena); } // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. void _mi_arena_unsafe_destroy_all(void) { - mi_arenas_unsafe_destroy(); + mi_arenas_unsafe_destroy(_mi_subproc()); _mi_arenas_collect(true /* force purge */); // purge non-owned arenas } @@ -1039,40 +1014,36 @@ void _mi_arena_unsafe_destroy_all(void) { Add an arena. ----------------------------------------------------------- */ -static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { +static bool mi_arena_add(mi_subproc_t* subproc, mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { mi_assert_internal(arena != NULL); mi_assert_internal(arena->slice_count > 0); - if (arena_id != NULL) { *arena_id = -1; } + if (arena_id != NULL) { *arena_id = NULL; } // first try to find a NULL entry - const size_t count = mi_arena_get_count(); + const size_t count = mi_arenas_get_count(subproc); size_t i; for (i = 0; i < count; i++) { - if (mi_arena_from_index(i) == NULL) { - arena->id = mi_arena_id_create(i); + if (mi_arena_from_index(subproc,i) == NULL) { mi_arena_t* expected = NULL; - if (mi_atomic_cas_ptr_strong_release(mi_arena_t, &mi_arenas[i], &expected, arena)) { + if (mi_atomic_cas_ptr_strong_release(mi_arena_t, &subproc->arenas[i], &expected, arena)) { // success - if (arena_id != NULL) { *arena_id = arena->id; } + if (arena_id != NULL) { *arena_id = arena; } return true; - } - else { - arena->id = _mi_arena_id_none(); - } + } } } // otherwise increase the max - i = mi_atomic_increment_acq_rel(&mi_arena_count); + i = mi_atomic_increment_acq_rel(&subproc->arena_count); if (i >= MI_MAX_ARENAS) { - mi_atomic_decrement_acq_rel(&mi_arena_count); + mi_atomic_decrement_acq_rel(&subproc->arena_count); + arena->subproc = NULL; return false; } _mi_stat_counter_increase(&stats->arena_count,1); - arena->id = mi_arena_id_create(i); - mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); - if (arena_id != NULL) { *arena_id = arena->id; } + mi_atomic_store_ptr_release(mi_arena_t,&subproc->arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena; } return true; } @@ -1099,7 +1070,7 @@ static mi_bitmap_t* mi_arena_bitmap_init(size_t slice_count, uint8_t** base) { } -static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(mi_subproc_t* subproc, void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { mi_assert(!is_large || (memid.initially_committed && memid.is_pinned)); mi_assert(_mi_is_aligned(start,MI_ARENA_SLICE_SIZE)); @@ -1138,7 +1109,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int } // init - arena->id = _mi_arena_id_none(); + arena->subproc = subproc; arena->memid = memid; arena->is_exclusive = exclusive; arena->slice_count = slice_count; @@ -1176,7 +1147,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int mi_bitmap_setN(arena->slices_dirty, 0, info_slices, NULL); } - return mi_arena_add(arena, arena_id, &_mi_stats_main); + return mi_arena_add(subproc, arena, arena_id, &_mi_stats_main); } @@ -1187,7 +1158,7 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is memid.initially_committed = is_committed; memid.initially_zero = is_zero; memid.is_pinned = is_large; - return mi_manage_os_memory_ex2(start, size, is_large, numa_node, exclusive, memid, arena_id); + return mi_manage_os_memory_ex2(_mi_subproc(), start, size, is_large, numa_node, exclusive, memid, arena_id); } // Reserve a range of regular OS memory @@ -1198,7 +1169,7 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid); if (start == NULL) return ENOMEM; const bool is_large = memid.is_pinned; // todo: use separate is_large field? - if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(_mi_subproc(), start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { _mi_os_free_ex(start, size, commit, memid); _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); return ENOMEM; @@ -1307,16 +1278,18 @@ static size_t mi_debug_show_bitmap(const char* header, size_t slice_count, mi_bi } void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) mi_attr_noexcept { - size_t max_arenas = mi_arena_get_count(); + mi_subproc_t* subproc = _mi_subproc(); + size_t max_arenas = mi_arenas_get_count(subproc); size_t free_total = 0; size_t slice_total = 0; //size_t abandoned_total = 0; size_t page_total = 0; for (size_t i = 0; i < max_arenas; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &subproc->arenas[i]); if (arena == NULL) break; + mi_assert(arena->subproc == subproc); slice_total += arena->slice_count; - _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : "")); + _mi_output_message("arena %zu at %p: %zu slices (%zu MiB)%s, subproc: %p\n", i, arena, arena->slice_count, mi_size_of_slices(arena->slice_count)/MI_MiB, (arena->memid.is_pinned ? ", pinned" : "", arena->subproc)); if (show_inuse) { free_total += mi_debug_show_bitmap("in-use slices", arena->slice_count, arena->slices_free, true, NULL); } @@ -1342,7 +1315,7 @@ void mi_debug_show_arenas(bool show_pages, bool show_inuse, bool show_committed) ----------------------------------------------------------- */ // reserve at a specific numa node int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { - if (arena_id != NULL) *arena_id = -1; + if (arena_id != NULL) *arena_id = NULL; if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); @@ -1356,7 +1329,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + if (!mi_manage_os_memory_ex2(_mi_subproc(), p, hsize, true, numa_node, exclusive, memid, arena_id)) { _mi_os_free(p, hsize, memid); return ENOMEM; } @@ -1538,10 +1511,13 @@ static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) } -static void mi_arenas_try_purge(bool force, bool visit_all) { +static void mi_arenas_try_purge(bool force, bool visit_all) +{ if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled - const size_t max_arena = mi_arena_get_count(); + mi_tld_t* tld = _mi_tld(); + mi_subproc_t* subproc = tld->subproc; + const size_t max_arena = mi_arenas_get_count(subproc); if (max_arena == 0) return; // allow only one thread to purge at a time @@ -1549,12 +1525,12 @@ static void mi_arenas_try_purge(bool force, bool visit_all) { mi_atomic_guard(&purge_guard) { const mi_msecs_t now = _mi_clock_now(); - const size_t arena_start = _mi_tld()->tseq % max_arena; + const size_t arena_start = tld->thread_seq % max_arena; size_t max_purge_count = (visit_all ? max_arena : 1); for (size_t _i = 0; _i < max_arena; _i++) { size_t i = _i + arena_start; if (i >= max_arena) { i -= max_arena; } - mi_arena_t* arena = mi_arena_from_index(i); + mi_arena_t* arena = mi_arena_from_index(subproc,i); if (arena != NULL) { if (mi_arena_try_purge(arena, now, force)) { if (max_purge_count <= 1) break; @@ -1590,13 +1566,7 @@ static bool mi_arena_pages_reregister(mi_arena_t* arena) { } mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* accessed_size, size_t* full_size) { - const size_t count = mi_arena_get_count(); - const size_t arena_idx = mi_arena_id_index(arena_id); - if (count <= arena_idx) { - _mi_warning_message("arena id is invalid (%zu)\n", arena_id); - return false; - } - mi_arena_t* arena = mi_arena_from_id(arena_id); + mi_arena_t* arena = _mi_arena_from_id(arena_id); if (arena==NULL) { return false; } @@ -1627,10 +1597,17 @@ mi_decl_export bool mi_arena_unload(mi_arena_id_t arena_id, void** base, size_t* _mi_page_map_unregister_range(arena, asize); // set the entry to NULL - mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[arena_idx], NULL); - if (arena_idx + 1 == count) { // try adjust the count? - size_t expected = count; - mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, count-1); + mi_subproc_t* subproc = arena->subproc; + const size_t count = mi_arenas_get_count(subproc); + for(size_t i = 0; i < count; i++) { + if (mi_arena_from_index(subproc, i) == arena) { + mi_atomic_store_ptr_release(mi_arena_t, &subproc->arenas[i], NULL); + if (i + 1 == count) { // try adjust the count? + size_t expected = count; + mi_atomic_cas_strong_acq_rel(&subproc->arena_count, &expected, count-1); + } + break; + } } return true; } @@ -1662,8 +1639,8 @@ mi_decl_export bool mi_arena_reload(void* start, size_t size, bool is_committed, arena->memid.initially_zero = is_zero; arena->is_exclusive = true; arena->is_large = is_large; - arena->id = _mi_arena_id_none(); - if (!mi_arena_add(arena, arena_id, &_mi_stats_main)) { + arena->subproc = NULL; + if (!mi_arena_add(_mi_subproc(), arena, arena_id, &_mi_stats_main)) { return false; } mi_arena_pages_reregister(arena); diff --git a/src/bitmap.c b/src/bitmap.c index 6fae1ed6..6352e4ea 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1228,7 +1228,6 @@ bool mi_bitmap_try_find_and_clearN_(mi_bitmap_t* bitmap, size_t tseq, size_t n, typedef struct mi_claim_fun_data_s { mi_arena_t* arena; - mi_subproc_t* subproc; mi_heaptag_t heap_tag; } mi_claim_fun_data_t; @@ -1242,7 +1241,7 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk const size_t slice_index = (chunk_idx * MI_BCHUNK_BITS) + cidx; mi_assert_internal(slice_index < mi_bitmap_max_bits(bitmap)); bool keep_set = true; - if ((*claim_fun)(slice_index, claim_data->arena, claim_data->subproc, claim_data->heap_tag, &keep_set)) { + if ((*claim_fun)(slice_index, claim_data->arena, claim_data->heap_tag, &keep_set)) { // success! mi_assert_internal(!keep_set); *pidx = slice_index; @@ -1267,9 +1266,9 @@ static bool mi_bitmap_try_find_and_claim_visit(mi_bitmap_t* bitmap, size_t chunk // Find a set bit in the bitmap and try to atomically clear it and claim it. // (Used to find pages in the pages_abandoned bitmaps.) mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, - mi_claim_fun_t* claim, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag) + mi_claim_fun_t* claim, mi_arena_t* arena, mi_heaptag_t heap_tag) { - mi_claim_fun_data_t claim_data = { arena, subproc, heap_tag }; + mi_claim_fun_data_t claim_data = { arena, heap_tag }; return mi_bitmap_find(bitmap, tseq, 1, pidx, &mi_bitmap_try_find_and_claim_visit, (void*)claim, &claim_data); } diff --git a/src/bitmap.h b/src/bitmap.h index 47c22025..16ecea07 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -208,13 +208,13 @@ mi_decl_nodiscard static inline bool mi_bitmap_try_find_and_clearN(mi_bitmap_t* // Called once a bit is cleared to see if the memory slice can be claimed. -typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag, bool* keep_set); +typedef bool (mi_claim_fun_t)(size_t slice_index, mi_arena_t* arena, mi_heaptag_t heap_tag, bool* keep_set); // Find a set bits in the bitmap, atomically clear it, and check if `claim` returns true. // If not claimed, continue on (potentially setting the bit again depending on `keep_set`). // Returns true on success, and in that case sets the index: `0 <= *pidx <= MI_BITMAP_MAX_BITS-n`. mi_decl_nodiscard bool mi_bitmap_try_find_and_claim(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx, - mi_claim_fun_t* claim, mi_arena_t* arena, mi_subproc_t* subproc, mi_heaptag_t heap_tag ); + mi_claim_fun_t* claim, mi_arena_t* arena, mi_heaptag_t heap_tag ); // Atomically clear a bit but only if it is set. Will block otherwise until the bit is set. diff --git a/src/free.c b/src/free.c index 14034593..770856da 100644 --- a/src/free.c +++ b/src/free.c @@ -210,7 +210,7 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { if (mi_page_all_free(page)) { // first remove it from the abandoned pages in the arena (if mapped, this waits for any readers to finish) - _mi_arena_page_unabandon(page); + _mi_arena_page_unabandon(page); // we can free the page directly _mi_arena_page_free(page); return; @@ -234,8 +234,8 @@ static void mi_decl_noinline mi_free_try_collect_mt(mi_page_t* page) { mi_heap_t* const tagheap = _mi_heap_by_tag(heap, page->heap_tag); if ((tagheap != NULL) && // don't reclaim across heap object types (tagheap->allow_page_reclaim) && // we are allowed to reclaim abandoned pages - (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? ) - (_mi_arena_memid_is_suitable(page->memid, tagheap->arena_id)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?) + // (page->subproc == tagheap->tld->subproc) && // don't reclaim across sub-processes; todo: make this check faster (integrate with _mi_heap_by_tag ? ) + (_mi_arena_memid_is_suitable(page->memid, tagheap->exclusive_arena)) // don't reclaim across unsuitable arena's; todo: inline arena_is_suitable (?) ) { if (mi_page_queue(tagheap, page->block_size)->first != NULL) { // don't reclaim for an block_size we don't use diff --git a/src/heap.c b/src/heap.c index dee404d2..e8743691 100644 --- a/src/heap.c +++ b/src/heap.c @@ -178,7 +178,7 @@ mi_heap_t* mi_heap_get_backing(void) { mi_assert_internal(heap!=NULL); mi_heap_t* bheap = heap->tld->heap_backing; mi_assert_internal(bheap!=NULL); - mi_assert_internal(bheap->thread_id == _mi_thread_id()); + mi_assert_internal(bheap->tld->thread_id == _mi_thread_id()); return bheap; } @@ -190,8 +190,7 @@ void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->memid = memid; heap->tld = tld; // avoid reading the thread-local tld during initialization - heap->thread_id = _mi_thread_id(); - heap->arena_id = arena_id; + heap->exclusive_arena = _mi_arena_from_id(arena_id); heap->allow_page_reclaim = !noreclaim; heap->allow_page_abandon = (!noreclaim && mi_option_get(mi_option_full_page_retain) >= 0); heap->full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); @@ -254,7 +253,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { } bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) { - return _mi_arena_memid_is_suitable(memid, heap->arena_id); + return _mi_arena_memid_is_suitable(memid, heap->exclusive_arena); } uintptr_t _mi_heap_random_next(mi_heap_t* heap) { diff --git a/src/init.c b/src/init.c index 9a26d56f..a15a9c6c 100644 --- a/src/init.c +++ b/src/init.c @@ -33,8 +33,7 @@ const mi_page_t _mi_page_empty = { { 0, 0 }, #endif NULL, // xheap - NULL, NULL, // next, prev - NULL, // subproc + NULL, NULL, // next, prev MI_MEMID_STATIC // memid }; @@ -96,27 +95,76 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- +static mi_decl_cache_align mi_subproc_t subproc_main; + +static mi_decl_cache_align mi_tld_t tld_empty = { + 0, // thread_id + 0, // thread_seq + &subproc_main, // subproc + NULL, // heap_backing + NULL, // heaps list + 0, // heartbeat + false, // recurse + false, // is_in_threadpool + { MI_STATS_NULL }, // stats + MI_MEMID_STATIC // memid +}; + mi_decl_cache_align const mi_heap_t _mi_heap_empty = { - NULL, - // MI_ATOMIC_VAR_INIT(NULL), // thread delayed free - 0, // thread_id - 0, // arena_id - 0, // cookie - { 0, 0 }, // keys - { {0}, {0}, 0, true }, // random - 0, // page count - MI_BIN_FULL, 0, // page retired min/max - NULL, // next - MI_MEMID_STATIC, // memid - 0, // full page retain - false, // can reclaim - true, // can eager abandon - 0, // tag + &tld_empty, // tld + NULL, // exclusive_arena + 0, // cookie + { 0, 0 }, // keys + { {0}, {0}, 0, true }, // random + 0, // page count + MI_BIN_FULL, 0, // page retired min/max + NULL, // next + 0, // full page retain + false, // can reclaim + true, // can eager abandon + 0, // tag #if MI_GUARDED - 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) + 0, 0, 0, 0, 1, // count is 1 so we never write to it (see `internal.h:mi_heap_malloc_use_guarded`) #endif MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY + MI_PAGE_QUEUES_EMPTY, + MI_MEMID_STATIC +}; + +extern mi_heap_t heap_main; + +static mi_decl_cache_align mi_tld_t tld_main = { + 0, // thread_id + 0, // thread_seq + &subproc_main, // subproc + &heap_main, // heap_backing + &heap_main, // heaps list + 0, // heartbeat + false, // recurse + false, // is_in_threadpool + { MI_STATS_NULL }, // stats + MI_MEMID_STATIC // memid +}; + +mi_decl_cache_align mi_heap_t heap_main = { + &tld_main, // thread local data + 0, // initial cookie + 0, // arena id + { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) + { {0x846ca68b}, {0}, 0, true }, // random + 0, // page count + MI_BIN_FULL, 0, // page retired min/max + NULL, // next heap + 2, // full page retain + true, // allow page reclaim + true, // allow page abandon + 0, // tag + #if MI_GUARDED + 0, 0, 0, 0, 0, + #endif + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY, + MI_MEMID_STATIC }; @@ -124,49 +172,9 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { return _mi_prim_thread_id(); } - // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; -extern mi_heap_t _mi_heap_main; - -static mi_decl_cache_align mi_subproc_t mi_subproc_default; - -static mi_decl_cache_align mi_tld_t tld_main = { - 0, - &_mi_heap_main, // heap_backing - &_mi_heap_main, // heaps list - &mi_subproc_default, // subproc - 0, // tseq - MI_MEMID_STATIC, // memid - false, // recurse - false, // is_in_threadpool - { MI_STATS_NULL } // stats -}; - -mi_decl_cache_align mi_heap_t _mi_heap_main = { - &tld_main, - // MI_ATOMIC_VAR_INIT(NULL), // thread delayed free list - 0, // thread id - 0, // initial cookie - 0, // arena id - { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) - { {0x846ca68b}, {0}, 0, true }, // random - 0, // page count - MI_BIN_FULL, 0, // page retired min/max - NULL, // next heap - MI_MEMID_STATIC, // memid - 2, // full page retain - true, // allow page reclaim - true, // allow page abandon - 0, // tag - #if MI_GUARDED - 0, 0, 0, 0, 0, - #endif - MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY -}; - bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. mi_stats_t _mi_stats_main = { MI_STATS_NULL }; @@ -210,30 +218,46 @@ void _mi_heap_guarded_init(mi_heap_t* heap) { } #endif - -static void mi_heap_main_init(void) { - if (_mi_heap_main.cookie == 0) { - _mi_heap_main.thread_id = _mi_thread_id(); - _mi_heap_main.cookie = 1; - #if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB) - _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking - #else - _mi_random_init(&_mi_heap_main.random); - #endif - _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); - _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); - mi_lock_init(&mi_subproc_default.abandoned_os_lock); - mi_lock_init(&mi_subproc_default.abandoned_os_visit_lock); - _mi_heap_guarded_init(&_mi_heap_main); - _mi_heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); - _mi_heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); +// Initialize main subproc +static void mi_subproc_main_init(void) { + if (subproc_main.memid.memkind != MI_MEM_STATIC) { + subproc_main.memid = _mi_memid_create(MI_MEM_STATIC); + mi_lock_init(&subproc_main.os_pages_lock); + mi_lock_init(&subproc_main.arena_reserve_lock); } } -mi_heap_t* _mi_heap_main_get(void) { +// Initialize main tld +static void mi_tld_main_init(void) { + if (tld_main.thread_id == 0) { + tld_main.thread_id = _mi_prim_thread_id(); + } +} + +// Initialization of the (statically allocated) main heap, and the main tld and subproc. +static void mi_heap_main_init(void) { + if (heap_main.cookie == 0) { + mi_subproc_main_init(); + mi_tld_main_init(); + // heap + heap_main.cookie = 1; + #if defined(__APPLE__) || defined(_WIN32) && !defined(MI_SHARED_LIB) + _mi_random_init_weak(&heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking + #else + _mi_random_init(&heap_main.random); + #endif + heap_main.cookie = _mi_heap_random_next(&heap_main); + heap_main.keys[0] = _mi_heap_random_next(&heap_main); + heap_main.keys[1] = _mi_heap_random_next(&heap_main); + _mi_heap_guarded_init(&heap_main); + heap_main.allow_page_abandon = (mi_option_get(mi_option_full_page_retain) >= 0); + heap_main.full_page_retain = mi_option_get_clamp(mi_option_full_page_retain, -1, 32); + } +} + +mi_heap_t* heap_main_get(void) { mi_heap_main_init(); - return &_mi_heap_main; + return &heap_main; } @@ -265,8 +289,9 @@ static mi_tld_t* mi_tld_alloc(void) { tld->memid = memid; tld->heap_backing = NULL; tld->heaps = NULL; - tld->subproc = &mi_subproc_default; - tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->subproc = &subproc_main; + tld->thread_id = _mi_prim_thread_id(); + tld->thread_seq = mi_atomic_add_acq_rel(&mi_tcount, 1); tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool(); return tld; } @@ -291,12 +316,24 @@ mi_decl_noinline mi_tld_t* _mi_tld(void) { return mi_tld; } +mi_subproc_t* _mi_subproc(void) { + if (_mi_is_main_thread()) { // during initialization we should not recurse over reading the _mi_tld + return &subproc_main; + } + else { + return _mi_tld()->subproc; + } +} /* ----------------------------------------------------------- Sub process ----------------------------------------------------------- */ +mi_subproc_t* _mi_subproc_main(void) { + return &subproc_main; +} + mi_subproc_id_t mi_subproc_main(void) { return NULL; } @@ -305,42 +342,41 @@ mi_subproc_id_t mi_subproc_new(void) { mi_memid_t memid; mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid); if (subproc == NULL) return NULL; - subproc->abandoned_os_list = NULL; subproc->memid = memid; - mi_lock_init(&subproc->abandoned_os_lock); - mi_lock_init(&subproc->abandoned_os_visit_lock); + mi_lock_init(&subproc->os_pages_lock); + mi_lock_init(&subproc->arena_reserve_lock); return subproc; } mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id) { - return (subproc_id == NULL ? &mi_subproc_default : (mi_subproc_t*)subproc_id); + return (subproc_id == NULL ? &subproc_main : (mi_subproc_t*)subproc_id); } void mi_subproc_delete(mi_subproc_id_t subproc_id) { if (subproc_id == NULL) return; mi_subproc_t* subproc = _mi_subproc_from_id(subproc_id); - // check if there are no abandoned segments still.. + // check if there are os pages still.. bool safe_to_delete = false; - if (mi_lock_acquire(&subproc->abandoned_os_lock)) { - if (subproc->abandoned_os_list == NULL) { + if (mi_lock_acquire(&subproc->os_pages_lock)) { + if (subproc->os_pages.first == NULL) { safe_to_delete = true; } - mi_lock_release(&subproc->abandoned_os_lock); + mi_lock_release(&subproc->os_pages_lock); } if (!safe_to_delete) return; // safe to release // todo: should we refcount subprocesses? - mi_lock_done(&subproc->abandoned_os_lock); - mi_lock_done(&subproc->abandoned_os_visit_lock); + mi_lock_done(&subproc->os_pages_lock); + mi_lock_done(&subproc->arena_reserve_lock); _mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid); } void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { - mi_heap_t* heap = mi_heap_get_default(); - if (heap == NULL) return; - mi_assert(heap->tld->subproc == &mi_subproc_default); - if (heap->tld->subproc != &mi_subproc_default) return; - heap->tld->subproc = _mi_subproc_from_id(subproc_id); + mi_tld_t* tld = _mi_tld(); + if (tld == NULL) return; + mi_assert(tld->subproc == &subproc_main); + if (tld->subproc != &subproc_main) return; + tld->subproc = _mi_subproc_from_id(subproc_id); } @@ -352,10 +388,10 @@ void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { static bool _mi_thread_heap_init(void) { if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { - // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization + // mi_assert_internal(heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); - _mi_heap_set_default_direct(&_mi_heap_main); + _mi_heap_set_default_direct(&heap_main); //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { @@ -383,7 +419,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { if (!mi_heap_is_initialized(heap)) return true; // reset default heap - _mi_heap_set_default_direct(_mi_is_main_thread() ? &_mi_heap_main : (mi_heap_t*)&_mi_heap_empty); + _mi_heap_set_default_direct(_mi_is_main_thread() ? &heap_main : (mi_heap_t*)&_mi_heap_empty); // switch to backing heap heap = heap->tld->heap_backing; @@ -403,7 +439,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_backing(heap)); // collect if not the main thread - if (heap != &_mi_heap_main) { + if (heap != &heap_main) { _mi_heap_collect_abandon(heap); } @@ -413,12 +449,12 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { // free heap meta data _mi_meta_free(heap, sizeof(mi_heap_t), heap->memid); - if (heap == &_mi_heap_main) { + if (heap == &heap_main) { #if 0 // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, // there may still be delete/free calls after the mi_fls_done is called. Issue #207 _mi_heap_destroy_pages(heap); - mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); + mi_assert_internal(heap->tld->heap_backing == &heap_main); #endif } @@ -449,12 +485,12 @@ static void mi_process_setup_auto_thread_done(void) { if (tls_initialized) return; tls_initialized = true; _mi_prim_thread_init_auto_done(); - _mi_heap_set_default_direct(&_mi_heap_main); + _mi_heap_set_default_direct(&heap_main); } bool _mi_is_main_thread(void) { - return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); + return (tld_main.thread_id==0 || tld_main.thread_id == _mi_thread_id()); } static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); @@ -501,7 +537,7 @@ void _mi_thread_done(mi_heap_t* heap) _mi_stat_decrease(&_mi_stats_main.threads, 1); // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... - if (heap->thread_id != _mi_thread_id()) return; + if (heap->tld->thread_id != _mi_prim_thread_id()) return; // abandon the thread local heap _mi_thread_heap_done(heap); // returns true if already ran @@ -560,7 +596,7 @@ void _mi_process_load(void) { } // reseed random - _mi_random_reinit_if_weak(&_mi_heap_main.random); + _mi_random_reinit_if_weak(&heap_main.random); } #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) @@ -587,7 +623,7 @@ void mi_process_init(void) mi_attr_noexcept { // ensure we are called once static mi_atomic_once_t process_init; #if _MSC_VER < 1920 - mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main + mi_heap_main_init(); // vs2017 can dynamically re-initialize heap_main #endif if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; @@ -595,10 +631,11 @@ void mi_process_init(void) mi_attr_noexcept { mi_process_setup_auto_thread_done(); mi_detect_cpu_features(); + mi_subproc_main_init(); + mi_tld_main_init(); + mi_heap_main_init(); _mi_os_init(); _mi_page_map_init(); - _mi_arena_init(); - mi_heap_main_init(); #if MI_DEBUG _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif @@ -609,7 +646,7 @@ void mi_process_init(void) mi_attr_noexcept { #endif mi_thread_init(); - #if defined(_WIN32) + #if defined(_WIN32) && defined(MI_WIN_USE_FLS) // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup // will not call _mi_thread_done on the (still executing) main thread. See issue #508. @@ -670,7 +707,7 @@ void mi_cdecl _mi_process_done(void) { mi_stats_print(NULL); } _mi_allocator_done(); - _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); + _mi_verbose_message("process done: 0x%zx\n", tld_main.thread_id); os_preloading = true; // don't call the C runtime anymore } diff --git a/src/page.c b/src/page.c index d97537d1..0444b47e 100644 --- a/src/page.c +++ b/src/page.c @@ -591,7 +591,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page) { void _mi_page_init(mi_heap_t* heap, mi_page_t* page) { mi_assert(page != NULL); mi_page_set_heap(page, heap); - page->subproc = heap->tld->subproc; + size_t page_size; uint8_t* page_start = mi_page_area(page, &page_size); MI_UNUSED(page_start); mi_track_mem_noaccess(page_start,page_size);