From 2084df3dde95aa2fb2bb73e8fc3eff2f7edc6662 Mon Sep 17 00:00:00 2001 From: daanx Date: Sun, 8 Dec 2024 12:20:54 -0800 Subject: [PATCH] add dedicated meta data allocation for threads and tld --- CMakeLists.txt | 1 + ide/vs2022/mimalloc-override.vcxproj | 1 + ide/vs2022/mimalloc-override.vcxproj.filters | 1 + ide/vs2022/mimalloc.vcxproj | 1 + ide/vs2022/mimalloc.vcxproj.filters | 1 + include/mimalloc/internal.h | 251 +++++++++---------- include/mimalloc/types.h | 32 ++- src/arena-meta.c | 156 ++++++++++++ src/arena.c | 90 +++---- src/heap.c | 50 ++-- src/init.c | 184 ++++++-------- src/os.c | 133 +++++----- src/page-map.c | 6 +- src/prim/windows/prim.c | 14 +- src/static.c | 1 + 15 files changed, 511 insertions(+), 411 deletions(-) create mode 100644 src/arena-meta.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e47cfe6..6df4ba5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ set(mi_sources src/alloc-aligned.c src/alloc-posix.c src/arena.c + src/arena-meta.c src/bitmap.c src/heap.c src/init.c diff --git a/ide/vs2022/mimalloc-override.vcxproj b/ide/vs2022/mimalloc-override.vcxproj index a5d5c34c..eebc4d8a 100644 --- a/ide/vs2022/mimalloc-override.vcxproj +++ b/ide/vs2022/mimalloc-override.vcxproj @@ -236,6 +236,7 @@ + diff --git a/ide/vs2022/mimalloc-override.vcxproj.filters b/ide/vs2022/mimalloc-override.vcxproj.filters index 60c7a1fb..0e63822c 100644 --- a/ide/vs2022/mimalloc-override.vcxproj.filters +++ b/ide/vs2022/mimalloc-override.vcxproj.filters @@ -58,6 +58,7 @@ Sources + diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index e9a4a339..d8cc25b1 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -214,6 +214,7 @@ + false diff --git a/ide/vs2022/mimalloc.vcxproj.filters b/ide/vs2022/mimalloc.vcxproj.filters index a47efddd..7fc4ba9c 100644 --- a/ide/vs2022/mimalloc.vcxproj.filters +++ b/ide/vs2022/mimalloc.vcxproj.filters @@ -58,6 +58,7 @@ Sources + diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 28eca4bb..4c8256a0 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -27,8 +27,6 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #pragma warning(disable:26812) // unscoped enum warning -#pragma warning(disable:28159) // don't use GetVersion -#pragma warning(disable:4996) // don't use GetVersion #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) #define mi_decl_align(a) __declspec(align(a)) @@ -58,42 +56,52 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_externc #endif +// "libc.c" +#include +void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args); +void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...); +char _mi_toupper(char c); +int _mi_strnicmp(const char* s, const char* t, size_t n); +void _mi_strlcpy(char* dest, const char* src, size_t dest_size); +void _mi_strlcat(char* dest, const char* src, size_t dest_size); +size_t _mi_strlen(const char* s); +size_t _mi_strnlen(const char* s, size_t max_len); +bool _mi_getenv(const char* name, char* result, size_t result_size); // "options.c" -void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); -void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); -void _mi_warning_message(const char* fmt, ...); -void _mi_verbose_message(const char* fmt, ...); -void _mi_trace_message(const char* fmt, ...); -void _mi_output_message(const char* fmt, ...); -void _mi_options_init(void); -long _mi_option_get_fast(mi_option_t option); -void _mi_error_message(int err, const char* fmt, ...); +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); +void _mi_warning_message(const char* fmt, ...); +void _mi_verbose_message(const char* fmt, ...); +void _mi_trace_message(const char* fmt, ...); +void _mi_output_message(const char* fmt, ...); +void _mi_options_init(void); +long _mi_option_get_fast(mi_option_t option); +void _mi_error_message(int err, const char* fmt, ...); // random.c -void _mi_random_init(mi_random_ctx_t* ctx); -void _mi_random_init_weak(mi_random_ctx_t* ctx); -void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); -void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); -uintptr_t _mi_random_next(mi_random_ctx_t* ctx); -uintptr_t _mi_heap_random_next(mi_heap_t* heap); -uintptr_t _mi_os_random_weak(uintptr_t extra_seed); +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_init_weak(mi_random_ctx_t* ctx); +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; -void _mi_process_load(void); +void _mi_process_load(void); void mi_cdecl _mi_process_done(void); -bool _mi_is_redirected(void); -bool _mi_allocator_init(const char** message); -void _mi_allocator_done(void); -bool _mi_is_main_thread(void); -size_t _mi_current_thread_count(void); -bool _mi_preloading(void); // true while the C runtime is not initialized yet -void _mi_thread_done(mi_heap_t* heap); -void _mi_thread_data_collect(void); -void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap); +bool _mi_is_redirected(void); +bool _mi_allocator_init(const char** message); +void _mi_allocator_done(void); +bool _mi_is_main_thread(void); +size_t _mi_current_thread_count(void); +bool _mi_preloading(void); // true while the C runtime is not initialized yet +void _mi_thread_done(mi_heap_t* heap); +mi_tld_t* _mi_tld(void); // current tld: `_mi_tld() == _mi_heap_get_default()->tld` mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; size_t _mi_thread_seq_id(void) mi_attr_noexcept; @@ -103,116 +111,94 @@ mi_subproc_t* _mi_subproc_from_id(mi_subproc_id_t subproc_id); void _mi_heap_guarded_init(mi_heap_t* heap); // os.c -void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); -void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); -void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); -void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_memid_t* memid); +void* _mi_os_zalloc(size_t size, mi_memid_t* memid); +void _mi_os_free(void* p, size_t size, mi_memid_t memid); +void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid); -size_t _mi_os_page_size(void); -size_t _mi_os_good_alloc_size(size_t size); -bool _mi_os_has_overcommit(void); -bool _mi_os_has_virtual_reserve(void); -size_t _mi_os_virtual_address_bits(void); +size_t _mi_os_page_size(void); +size_t _mi_os_good_alloc_size(size_t size); +bool _mi_os_has_overcommit(void); +bool _mi_os_has_virtual_reserve(void); +size_t _mi_os_virtual_address_bits(void); -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); +bool _mi_os_reset(void* addr, size_t size); +bool _mi_os_commit(void* p, size_t size, bool* is_zero); +bool _mi_os_decommit(void* addr, size_t size); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_purge(void* p, size_t size); +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); -void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid); -void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); -bool _mi_os_use_large_page(size_t size, size_t alignment); -size_t _mi_os_large_page_size(void); +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); +bool _mi_os_use_large_page(size_t size, size_t alignment); +size_t _mi_os_large_page_size(void); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_init(void); -void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld); -bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); -bool _mi_arena_contains(const void* p); -void _mi_arenas_collect(bool force_purge, mi_stats_t* stats); -void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); +void _mi_arena_init(void); +void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid); +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +bool _mi_arena_contains(const void* p); +void _mi_arenas_collect(bool force_purge); +void _mi_arena_unsafe_destroy_all(void); -mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment); -void _mi_arena_page_free(mi_page_t* page); -void _mi_arena_page_abandon(mi_page_t* page); -void _mi_arena_page_unabandon(mi_page_t* page); -bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page); +mi_page_t* _mi_arena_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment); +void _mi_arena_page_free(mi_page_t* page); +void _mi_arena_page_abandon(mi_page_t* page); +void _mi_arena_page_unabandon(mi_page_t* page); +bool _mi_arena_page_try_reabandon_to_mapped(mi_page_t* page); -bool _mi_arena_try_reclaim(mi_heap_t* heap, mi_page_t* page); -void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap); - -void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid); -void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size); - -/* -typedef struct mi_arena_field_cursor_s { // abstract struct - size_t os_list_count; // max entries to visit in the OS abandoned list - size_t start; // start arena idx (may need to be wrapped) - size_t end; // end arena idx (exclusive, may need to be wrapped) - size_t bitmap_idx; // current bit idx for an arena - mi_subproc_t* subproc; // only visit blocks in this sub-process - bool visit_all; // ensure all abandoned blocks are seen (blocking) - bool hold_visit_lock; // if the subproc->abandoned_os_visit_lock is held -} mi_arena_field_cursor_t; -void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_subproc_t* subproc, bool visit_all, mi_arena_field_cursor_t* current); -mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous); -void _mi_arena_field_cursor_done(mi_arena_field_cursor_t* current); -*/ +// arena-meta.c +void* _mi_meta_zalloc( size_t size, mi_memid_t* memid ); +void _mi_meta_free(void* p, size_t size, mi_memid_t memid); // "page-map.c" -bool _mi_page_map_init(void); -void _mi_page_map_register(mi_page_t* page); -void _mi_page_map_unregister(mi_page_t* page); - +bool _mi_page_map_init(void); +void _mi_page_map_register(mi_page_t* page); +void _mi_page_map_unregister(mi_page_t* page); // "page.c" -void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; -void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks -void _mi_page_unfull(mi_page_t* page); -void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq); // free the page -void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... -void _mi_page_force_abandon(mi_page_t* page); +void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks +void _mi_page_unfull(mi_page_t* page); +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq); // free the page +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_page_force_abandon(mi_page_t* page); +void _mi_heap_collect_retired(mi_heap_t* heap, bool force); -// void _mi_heap_delayed_free_all(mi_heap_t* heap); -// bool _mi_heap_delayed_free_partial(mi_heap_t* heap); -void _mi_heap_collect_retired(mi_heap_t* heap, bool force); +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); +void _mi_deferred_free(mi_heap_t* heap, bool force); -size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); -void _mi_deferred_free(mi_heap_t* heap, bool force); +void _mi_page_free_collect(mi_page_t* page,bool force); +void _mi_page_init(mi_heap_t* heap, mi_page_t* page); -void _mi_page_free_collect(mi_page_t* page,bool force); -// void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments -void _mi_page_init(mi_heap_t* heap, mi_page_t* page); - -size_t _mi_bin_size(uint8_t bin); // for stats -uint8_t _mi_bin(size_t size); // for stats +size_t _mi_bin_size(uint8_t bin); // for stats +uint8_t _mi_bin(size_t size); // for stats // "heap.c" -void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag); -void _mi_heap_destroy_pages(mi_heap_t* heap); -void _mi_heap_collect_abandon(mi_heap_t* heap); -void _mi_heap_set_default_direct(mi_heap_t* heap); -bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); -void _mi_heap_unsafe_destroy_all(void); -mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag); -void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page); -bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg); -void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page); +void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag); +void _mi_heap_destroy_pages(mi_heap_t* heap); +void _mi_heap_collect_abandon(mi_heap_t* heap); +void _mi_heap_set_default_direct(mi_heap_t* heap); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); +void _mi_heap_unsafe_destroy_all(void); +mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag); +void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page); +bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg); +void _mi_heap_page_reclaim(mi_heap_t* heap, mi_page_t* page); // "stats.c" -void _mi_stats_done(mi_stats_t* stats); +void _mi_stats_done(mi_stats_t* stats); mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); @@ -226,20 +212,6 @@ void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, siz void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p); void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); -// bool _mi_free_delayed_block(mi_block_t* block); - - -// "libc.c" -#include -void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args); -void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...); -char _mi_toupper(char c); -int _mi_strnicmp(const char* s, const char* t, size_t n); -void _mi_strlcpy(char* dest, const char* src, size_t dest_size); -void _mi_strlcat(char* dest, const char* src, size_t dest_size); -size_t _mi_strlen(const char* s); -size_t _mi_strnlen(const char* s, size_t max_len); -bool _mi_getenv(const char* name, char* result, size_t result_size); #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); @@ -449,9 +421,6 @@ static inline uintptr_t _mi_ptr_cookie(const void* p) { return ((uintptr_t)p ^ _mi_heap_main.cookie); } -static inline mi_tld_t* _mi_tld(void) { - return mi_heap_get_default()->tld; -} /* ----------------------------------------------------------- Pages @@ -908,6 +877,16 @@ static inline mi_memid_t _mi_memid_create_os(void* base, size_t size, bool commi return memid; } +static inline mi_memid_t _mi_memid_create_meta(void* mpage, size_t block_idx, size_t block_count) { + mi_memid_t memid = _mi_memid_create(MI_MEM_META); + memid.mem.meta.meta_page = mpage; + memid.mem.meta.block_index = (uint32_t)block_idx; + memid.mem.meta.block_count = (uint32_t)block_count; + memid.initially_committed = true; + memid.initially_zero = true; + memid.is_pinned = true; + return memid; +} // ------------------------------------------------------------------- // Fast "random" shuffle @@ -937,13 +916,13 @@ static inline uintptr_t _mi_random_shuffle(uintptr_t x) { // Optimize numa node access for the common case (= one node) // ------------------------------------------------------------------- -int _mi_os_numa_node_get(mi_os_tld_t* tld); +int _mi_os_numa_node_get(void); size_t _mi_os_numa_node_count_get(void); extern _Atomic(size_t) _mi_numa_node_count; -static inline int _mi_os_numa_node(mi_os_tld_t* tld) { +static inline int _mi_os_numa_node(void) { if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } - else return _mi_os_numa_node_get(tld); + else return _mi_os_numa_node_get(); } static inline size_t _mi_os_numa_node_count(void) { const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index e10786a0..d0a77c5f 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -155,6 +155,7 @@ typedef enum mi_memkind_e { MI_MEM_NONE, // not allocated MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) + MI_MEM_META, // allocated with the meta data allocator MI_MEM_OS, // allocated from the OS MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory) MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) @@ -165,6 +166,11 @@ static inline bool mi_memkind_is_os(mi_memkind_t memkind) { return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); } +static inline bool mi_memkind_needs_no_free(mi_memkind_t memkind) { + return (memkind <= MI_MEM_STATIC); +} + + typedef struct mi_memid_os_info { void* base; // actual base address of the block (used for offset aligned allocations) size_t size; // allocated full size @@ -178,10 +184,17 @@ typedef struct mi_memid_arena_info { bool is_exclusive; // this arena can only be used for specific arena allocations } mi_memid_arena_info_t; +typedef struct mi_memid_meta_info { + void* meta_page; // meta-page that contains the block + uint32_t block_index; // block index in the meta-data page + uint32_t block_count; // allocated blocks +} mi_memid_meta_info_t; + typedef struct mi_memid_s { union { mi_memid_os_info_t os; // only used for MI_MEM_OS mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA + mi_memid_meta_info_t meta; // only used for MI_MEM_META } mem; bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages) bool initially_committed;// `true` if the memory was originally allocated as committed @@ -190,6 +203,14 @@ typedef struct mi_memid_s { } mi_memid_t; +static inline bool mi_memid_is_os(mi_memid_t memid) { + return mi_memkind_is_os(memid.memkind); +} + +static inline bool mi_memid_needs_no_free(mi_memid_t memid) { + return mi_memkind_needs_no_free(memid.memkind); +} + // ------------------------------------------------------ // Mimalloc pages contain allocated blocks // ------------------------------------------------------ @@ -399,7 +420,8 @@ struct mi_heap_s { size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread - bool allow_page_reclaim; // `true` if this heap can reclaim abandoned pages + mi_memid_t memid; // provenance of the heap struct itseft (meta or os) + bool allow_page_reclaim; // `true` if this heap should not reclaim abandoned pages bool allow_page_abandon; // `true` if this heap can abandon pages to reduce memory footprint uint8_t tag; // custom tag, can be used for separating heaps based on the object types #if MI_GUARDED @@ -560,12 +582,6 @@ struct mi_subproc_s { typedef int64_t mi_msecs_t; -// OS thread local data -typedef struct mi_os_tld_s { - size_t region_idx; // start point for next allocation - mi_stats_t* stats; // points to tld stats -} mi_os_tld_t; - // Thread local data struct mi_tld_s { unsigned long long heartbeat; // monotonic heartbeat count @@ -573,9 +589,9 @@ struct mi_tld_s { mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) mi_subproc_t* subproc; // sub-process this thread belongs to. size_t tseq; // thread sequence id + mi_memid_t memid; // provenance of the tld memory itself (meta or OS) bool recurse; // true if deferred was called; used to prevent infinite recursion. bool is_in_threadpool; // true if this thread is part of a threadpool (and can run arbitrary tasks) - mi_os_tld_t os; // os tld mi_stats_t stats; // statistics }; diff --git a/src/arena-meta.c b/src/arena-meta.c new file mode 100644 index 00000000..0fb4dfa5 --- /dev/null +++ b/src/arena-meta.c @@ -0,0 +1,156 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ---------------------------------------------------------------------------- + We have a special "mini" allocator just for allocation of meta-data like + the heap (`mi_heap_t`) or thread-local data (`mi_tld_t`). + + We reuse the bitmap of the arena's for allocation of 64b blocks inside + an arena slice (64KiB). + We always ensure that meta data is zero'd (we zero on `free`) +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "bitmap.h" + +/* ----------------------------------------------------------- + Meta data allocation +----------------------------------------------------------- */ + +#define MI_META_PAGE_SIZE MI_ARENA_SLICE_SIZE +#define MI_META_PAGE_ALIGN MI_ARENA_SLICE_ALIGN + +#define MI_META_BLOCK_SIZE (64) +#define MI_META_BLOCK_ALIGN MI_META_BLOCK_SIZE +#define MI_META_BLOCKS_PER_PAGE (MI_ARENA_SLICE_SIZE / MI_META_BLOCK_SIZE) // 1024 +#define MI_META_MAX_SIZE (MI_BCHUNK_SIZE * MI_META_BLOCK_SIZE) + +typedef struct mi_meta_page_s { + _Atomic(struct mi_meta_page_s*) next; // a linked list of meta-data pages (never released) + mi_memid_t memid; // provenance of the meta-page memory itself + mi_bitmap_t blocks_free; // a small bitmap with 1 bit per block. +} mi_meta_page_t; + +static mi_decl_cache_align _Atomic(mi_meta_page_t*) mi_meta_pages = MI_ATOMIC_VAR_INIT(NULL); + + +#if MI_DEBUG > 1 +static mi_meta_page_t* mi_meta_page_of_ptr(void* p, size_t* block_idx) { + mi_meta_page_t* mpage = (mi_meta_page_t*)mi_align_down_ptr(p,MI_META_PAGE_ALIGN); + if (block_idx != NULL) { + *block_idx = ((uint8_t*)p - (uint8_t*)mpage) / MI_META_BLOCK_SIZE; + } + return mpage; +} +#endif + +static mi_meta_page_t* mi_meta_page_next( mi_meta_page_t* mpage ) { + return mi_atomic_load_ptr_acquire(mi_meta_page_t, &mpage->next); +} + +static void* mi_meta_block_start( mi_meta_page_t* mpage, size_t block_idx ) { + mi_assert_internal(_mi_is_aligned(mpage,MI_META_PAGE_ALIGN)); + mi_assert_internal(block_idx < MI_META_BLOCKS_PER_PAGE); + void* p = ((uint8_t*)mpage + (block_idx * MI_META_BLOCK_SIZE)); + mi_assert_internal(mpage == mi_meta_page_of_ptr(p,NULL)); + return p; +} + +// allocate a fresh meta page and add it to the global list. +static mi_meta_page_t* mi_meta_page_zalloc(void) { + // allocate a fresh arena slice + mi_memid_t memid; + mi_meta_page_t* mpage = (mi_meta_page_t*)_mi_arena_alloc_aligned(MI_ARENA_SLICE_SIZE, MI_ARENA_SLICE_ALIGN, 0, + true /* commit*/, true /* allow large */, + _mi_arena_id_none(), 0 /* tseq */, &memid ); + if (mpage == NULL) return NULL; + mi_assert_internal(_mi_is_aligned(mpage,MI_META_PAGE_ALIGN)); + if (!memid.initially_zero) { + _mi_memzero_aligned(mpage, MI_ARENA_SLICE_SIZE); + } + + // initialize the page + mpage->memid = memid; + mi_bitmap_init(&mpage->blocks_free, MI_META_BLOCKS_PER_PAGE, true /* already_zero */); + const size_t mpage_size = offsetof(mi_meta_page_t,blocks_free) + mi_bitmap_size(MI_META_BLOCKS_PER_PAGE, NULL); + const size_t info_blocks = _mi_divide_up(mpage_size,MI_META_BLOCK_SIZE); + mi_assert_internal(info_blocks < MI_META_BLOCKS_PER_PAGE); + mi_bitmap_unsafe_setN(&mpage->blocks_free, info_blocks, MI_META_BLOCKS_PER_PAGE - info_blocks); + + // push atomically in front of the meta page list + // (note: there is no ABA issue since we never free meta-pages) + mi_meta_page_t* old = mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages); + do { + mi_atomic_store_ptr_release(mi_meta_page_t, &mpage->next, old); + } while(!mi_atomic_cas_ptr_weak_acq_rel(mi_meta_page_t,&mi_meta_pages,&old,mpage)); + return mpage; +} + + +// allocate meta-data +void* _mi_meta_zalloc( size_t size, mi_memid_t* pmemid ) +{ + mi_assert_internal(pmemid != NULL); + size = _mi_align_up(size,MI_META_BLOCK_SIZE); + if (size == 0 || size > MI_META_MAX_SIZE) return NULL; + const size_t block_count = _mi_divide_up(size,MI_META_BLOCK_SIZE); + mi_assert_internal(block_count > 0 && block_count < MI_BCHUNK_BITS); + mi_meta_page_t* mpage0 = mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages); + mi_meta_page_t* mpage = mpage0; + while (mpage != NULL) { + size_t block_idx; + if (mi_bitmap_try_find_and_clearN(&mpage->blocks_free, block_count, 0, &block_idx)) { + // found and claimed `block_count` blocks + *pmemid = _mi_memid_create_meta(mpage, block_idx, block_count); + return mi_meta_block_start(mpage,block_idx); + } + else { + mpage = mi_meta_page_next(mpage); + } + } + // failed to find space in existing pages + if (mi_atomic_load_ptr_acquire(mi_meta_page_t,&mi_meta_pages) != mpage0) { + // the page list was updated by another thread in the meantime, retry + return _mi_meta_zalloc(size,pmemid); + } + // otherwise, allocate a fresh metapage and try once more + mpage = mi_meta_page_zalloc(); + if (mpage != NULL) { + size_t block_idx; + if (mi_bitmap_try_find_and_clearN(&mpage->blocks_free, block_count, 0, &block_idx)) { + // found and claimed `block_count` blocks + *pmemid = _mi_memid_create_meta(mpage, block_idx, block_count); + return mi_meta_block_start(mpage,block_idx); + } + } + // if all this failed, allocate from the OS + return _mi_os_alloc(size, pmemid); +} + +// free meta-data +void _mi_meta_free(void* p, size_t size, mi_memid_t memid) { + if (p==NULL) return; + if (memid.memkind == MI_MEM_META) { + mi_assert_internal(_mi_divide_up(size, MI_META_BLOCK_SIZE) == memid.mem.meta.block_count); + const size_t block_count = memid.mem.meta.block_count; + const size_t block_idx = memid.mem.meta.block_index; + mi_meta_page_t* mpage = (mi_meta_page_t*)memid.mem.meta.meta_page; + mi_assert_internal(mi_meta_page_of_ptr(p,NULL) == mpage); + mi_assert_internal(block_idx + block_count < MI_META_BLOCKS_PER_PAGE); + mi_assert_internal(mi_bitmap_is_clearN(&mpage->blocks_free, block_idx, block_count)); + // we zero on free (and on the initial page allocation) so we don't need a "dirty" map + _mi_memzero_aligned(mi_meta_block_start(mpage, block_idx), block_count*MI_META_BLOCK_SIZE); + mi_bitmap_clearN(&mpage->blocks_free, block_idx, block_count); + } + else if (mi_memid_is_os(memid)) { + _mi_os_free(p, size, memid); + } + else { + mi_assert_internal(mi_memid_needs_no_free(memid)); + } +} diff --git a/src/arena.c b/src/arena.c index fa7d53ed..2558165a 100644 --- a/src/arena.c +++ b/src/arena.c @@ -214,7 +214,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // not fully committed: commit the full range and set the commit bits // (this may race and we may double-commit which is fine) bool commit_zero = false; - if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero, NULL)) { + if (!_mi_os_commit(p, mi_size_of_slices(slice_count), &commit_zero)) { memid->initially_committed = false; } else { @@ -364,14 +364,13 @@ static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_are static mi_decl_noinline void* mi_arena_try_find_free( size_t slice_count, size_t alignment, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) + mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) { mi_assert_internal(slice_count <= mi_slice_count_of_size(MI_ARENA_MAX_OBJ_SIZE)); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); if (alignment > MI_ARENA_SLICE_ALIGN) return NULL; // search arena's - const size_t tseq = tld->tseq; mi_forall_suitable_arenas(req_arena_id, tseq, allow_large, arena) { void* p = mi_arena_try_alloc_at(arena, slice_count, commit, tseq, memid); @@ -385,14 +384,14 @@ static mi_decl_noinline void* mi_arena_try_find_free( static mi_decl_noinline void* mi_arena_try_alloc( size_t slice_count, size_t alignment, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) + mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) { mi_assert(slice_count <= MI_ARENA_MAX_OBJ_SLICES); mi_assert(alignment <= MI_ARENA_SLICE_ALIGN); void* p; again: // try to find free slices in the arena's - p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, memid, tld); + p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); if (p != NULL) return p; // did we need a specific arena? @@ -406,7 +405,7 @@ again: if (ok) { // and try allocate in there mi_assert_internal(req_arena_id == _mi_arena_id_none()); - p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, memid, tld); + p = mi_arena_try_find_free(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); if (p != NULL) return p; } } @@ -423,7 +422,7 @@ again: static void* mi_arena_os_alloc_aligned( size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) + mi_arena_id_t req_arena_id, mi_memid_t* memid) { // if we cannot use OS allocation, return NULL if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) { @@ -432,10 +431,10 @@ static void* mi_arena_os_alloc_aligned( } if (align_offset > 0) { - return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, &tld->stats); + return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid); } else { - return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, &tld->stats); + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid); } } @@ -444,9 +443,9 @@ static void* mi_arena_os_alloc_aligned( void* _mi_arena_alloc_aligned( size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, - mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) + mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) { - mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(memid != NULL); mi_assert_internal(size > 0); // *memid = _mi_memid_none(); @@ -459,18 +458,18 @@ void* _mi_arena_alloc_aligned( alignment <= MI_ARENA_SLICE_ALIGN && align_offset == 0) // and good alignment { const size_t slice_count = mi_slice_count_of_size(size); - void* p = mi_arena_try_alloc(slice_count, alignment, commit, allow_large, req_arena_id, memid, tld); + void* p = mi_arena_try_alloc(slice_count, alignment, commit, allow_large, req_arena_id, tseq, memid); if (p != NULL) return p; } // fall back to the OS - void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena_id, memid, tld); + void* p = mi_arena_os_alloc_aligned(size, alignment, align_offset, commit, allow_large, req_arena_id, memid); return p; } -void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, size_t tseq, mi_memid_t* memid) { - return _mi_arena_alloc_aligned(size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_SLICE_SIZE, 0, commit, allow_large, req_arena_id, tseq, memid); } @@ -566,7 +565,7 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz !os_align && // not large alignment slice_count <= MI_ARENA_MAX_OBJ_SLICES) // and not too large { - page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, &memid, tld); + page = (mi_page_t*)mi_arena_try_alloc(slice_count, page_alignment, commit, allow_large, req_arena_id, tld->tseq, &memid); } // otherwise fall back to the OS @@ -574,10 +573,10 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz if (os_align) { // note: slice_count already includes the page mi_assert_internal(slice_count >= mi_slice_count_of_size(block_size) + mi_slice_count_of_size(page_alignment)); - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena_id, &memid, tld); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), block_alignment, page_alignment /* align offset */, commit, allow_large, req_arena_id, &memid); } else { - page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena_id, &memid, tld); + page = (mi_page_t*)mi_arena_os_alloc_aligned(mi_size_of_slices(slice_count), page_alignment, 0 /* align offset */, commit, allow_large, req_arena_id, &memid); } } @@ -725,7 +724,7 @@ void _mi_arena_page_free(mi_page_t* page) { #endif _mi_page_map_unregister(page); - _mi_arena_free(page, 1, 1, page->memid, NULL); + _mi_arena_free(page, 1, 1, page->memid); } /* ----------------------------------------------------------- @@ -831,16 +830,15 @@ void _mi_arena_reclaim_all_abandoned(mi_heap_t* heap) { /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ -static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices, mi_stats_t* stats); -static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats); +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices); +static void mi_arenas_try_purge(bool force, bool visit_all); -void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid) { mi_assert_internal(size > 0); mi_assert_internal(committed_size <= size); if (p==NULL) return; if (size==0) return; const bool all_committed = (committed_size == size); - if (stats==NULL) { stats = &_mi_stats_main; } // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) mi_track_mem_undefined(p, size); @@ -851,7 +849,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) _mi_stat_decrease(&_mi_stats_main.committed, committed_size); } - _mi_os_free(p, size, memid, stats); + _mi_os_free(p, size, memid); } else if (memid.memkind == MI_MEM_ARENA) { // allocated in an arena @@ -894,7 +892,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi } */ // (delay) purge the entire range - mi_arena_schedule_purge(arena, slice_index, slice_count, stats); + mi_arena_schedule_purge(arena, slice_index, slice_count); } // and make it available to others again @@ -904,13 +902,16 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi return; }; } + else if (memid.memkind == MI_MEM_META) { + _mi_meta_free(p, size, memid); + } else { // arena was none, external, or static; nothing to do - mi_assert_internal(memid.memkind < MI_MEM_OS); + mi_assert_internal(mi_memid_needs_no_free(memid)); } // purge expired decommits - mi_arenas_try_purge(false, false, stats); + mi_arenas_try_purge(false, false); } // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` @@ -924,7 +925,7 @@ static void mi_arenas_unsafe_destroy(void) { mi_lock_done(&arena->abandoned_visit_lock); if (mi_memkind_is_os(arena->memid.memkind)) { mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); - _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid, &_mi_stats_main); + _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid); } } } @@ -935,15 +936,15 @@ static void mi_arenas_unsafe_destroy(void) { } // Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired -void _mi_arenas_collect(bool force_purge, mi_stats_t* stats) { - mi_arenas_try_purge(force_purge, force_purge /* visit all? */, stats); +void _mi_arenas_collect(bool force_purge) { + mi_arenas_try_purge(force_purge, force_purge /* visit all? */); } // destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` // for dynamic libraries that are unloaded and need to release all their allocated memory. -void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { +void _mi_arena_unsafe_destroy_all(void) { mi_arenas_unsafe_destroy(); - _mi_arenas_collect(true /* force purge */, stats); // purge non-owned arenas + _mi_arenas_collect(true /* force purge */); // purge non-owned arenas } // Is a pointer inside any of our arenas? @@ -1036,7 +1037,7 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int // commit & zero if needed bool is_zero = memid.initially_zero; if (!memid.initially_committed) { - _mi_os_commit(arena, mi_size_of_slices(info_slices), NULL, &_mi_stats_main); + _mi_os_commit(arena, mi_size_of_slices(info_slices), NULL); } if (!is_zero) { _mi_memzero(arena, mi_size_of_slices(info_slices)); @@ -1096,11 +1097,11 @@ int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exc if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_SLICE_SIZE); // at least one slice mi_memid_t memid; - void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid, &_mi_stats_main); + void* start = _mi_os_alloc_aligned(size, MI_ARENA_SLICE_ALIGN, commit, allow_large, &memid); if (start == NULL) return ENOMEM; const bool is_large = memid.is_pinned; // todo: use separate is_large field? if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { - _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); + _mi_os_free_ex(start, size, commit, memid); _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); return ENOMEM; } @@ -1219,7 +1220,7 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { - _mi_os_free(p, hsize, memid, &_mi_stats_main); + _mi_os_free(p, hsize, memid); return ENOMEM; } return 0; @@ -1281,14 +1282,14 @@ static long mi_arena_purge_delay(void) { // reset or decommit in an arena and update the committed/decommit bitmaps // assumes we own the area (i.e. slices_free is claimed by us) -static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices, mi_stats_t* stats) { +static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices) { mi_assert_internal(!arena->memid.is_pinned); const size_t size = mi_size_of_slices(slices); void* const p = mi_arena_slice_start(arena, slice_index); bool needs_recommit; if (mi_bitmap_is_setN(arena->slices_committed, slice_index, slices)) { // all slices are committed, we can purge freely - needs_recommit = _mi_os_purge(p, size, stats); + needs_recommit = _mi_os_purge(p, size); } else { // some slices are not committed -- this can happen when a partially committed slice is freed @@ -1296,7 +1297,7 @@ static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices, // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), // and also undo the decommit stats (as it was already adjusted) mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); - needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */); if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); } } @@ -1312,13 +1313,13 @@ static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices, // Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. // Note: assumes we (still) own the area as we may purge immediately -static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices, mi_stats_t* stats) { +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices) { const long delay = mi_arena_purge_delay(); if (delay < 0) return; // is purging allowed at all? if (_mi_preloading() || delay == 0) { // decommit directly - mi_arena_purge(arena, slice_index, slices, stats); + mi_arena_purge(arena, slice_index, slices); } else { // schedule decommit @@ -1327,14 +1328,13 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_ } -static void mi_arenas_try_purge(bool force, bool visit_all, mi_stats_t* stats) { +static void mi_arenas_try_purge(bool force, bool visit_all) { if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); if (max_arena == 0) return; // _mi_error_message(EFAULT, "purging not yet implemented\n"); - MI_UNUSED(stats); MI_UNUSED(visit_all); MI_UNUSED(force); } @@ -1385,7 +1385,7 @@ void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid) { if (p != NULL) return p; // or fall back to the OS - p = _mi_os_alloc(size, memid, &_mi_stats_main); + p = _mi_os_alloc(size, memid); if (p == NULL) return NULL; // zero the OS memory if needed @@ -1398,7 +1398,7 @@ void* _mi_arena_meta_zalloc(size_t size, mi_memid_t* memid) { void _mi_arena_meta_free(void* p, mi_memid_t memid, size_t size) { if (mi_memkind_is_os(memid.memkind)) { - _mi_os_free(p, size, memid, &_mi_stats_main); + _mi_os_free(p, size, memid); } else { mi_assert(memid.memkind == MI_MEM_STATIC); diff --git a/src/heap.c b/src/heap.c index 3bf8b976..d2914361 100644 --- a/src/heap.c +++ b/src/heap.c @@ -119,36 +119,31 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_deferred_free(heap, force); // python/cpython#112532: we may be called from a thread that is not the owner of the heap - const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id()); + // const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id()); // note: never reclaim on collect but leave it to threads that need storage to reclaim - if ( - #ifdef NDEBUG - collect == MI_FORCE - #else - collect >= MI_FORCE - #endif - && is_main_thread && mi_heap_is_backing(heap) && heap->allow_page_reclaim) - { - // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. - // if all memory is freed by now, all segments should be freed. - // note: this only collects in the current subprocess - _mi_arena_reclaim_all_abandoned(heap); - } + //if ( + //#ifdef NDEBUG + // collect == MI_FORCE + //#else + // collect >= MI_FORCE + //#endif + // && is_main_thread && mi_heap_is_backing(heap) && heap->allow_page_reclaim) + //{ + // // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. + // // if all memory is freed by now, all segments should be freed. + // // note: this only collects in the current subprocess + // _mi_arena_reclaim_all_abandoned(heap); + //} // collect retired pages _mi_heap_collect_retired(heap, force); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); - - // if forced, collect thread data cache on program-exit (or shared library unload) - if (force && is_main_thread && mi_heap_is_backing(heap)) { - _mi_thread_data_collect(); // collect thread data cache - } - + // collect arenas (this is program wide so don't force purges on abandonment of threads) - _mi_arenas_collect(collect == MI_FORCE /* force purge? */, &heap->tld->stats); + _mi_arenas_collect(collect == MI_FORCE /* force purge? */); } void _mi_heap_collect_abandon(mi_heap_t* heap) { @@ -187,24 +182,25 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag) { +void _mi_heap_init(mi_heap_t* heap, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag) { _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); - heap->tld = tld; + heap->tld = _mi_tld(); heap->thread_id = _mi_thread_id(); heap->arena_id = arena_id; heap->allow_page_reclaim = !noreclaim; heap->allow_page_abandon = (!noreclaim && mi_option_get(mi_option_full_page_retain) >= 0); heap->tag = tag; - if (tld->is_in_threadpool) { + if (heap->tld->is_in_threadpool) { // if we run as part of a thread pool it is better to not arbitrarily reclaim abandoned pages into our heap. // (but abandoning is good in this case) heap->allow_page_reclaim = false; } - if (heap == tld->heap_backing) { + if (heap->tld->heap_backing == NULL) { + heap->tld->heap_backing = heap; // first heap becomes the backing heap _mi_random_init(&heap->random); } else { - _mi_random_split(&tld->heap_backing->random, &heap->random); + _mi_random_split(&heap->tld->heap_backing->random, &heap->random); } heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); @@ -220,7 +216,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new_ex(int heap_tag, bool allow_destroy, mi mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? if (heap == NULL) return NULL; mi_assert(heap_tag >= 0 && heap_tag < 256); - _mi_heap_init(heap, bheap->tld, arena_id, allow_destroy /* no reclaim? */, (uint8_t)heap_tag /* heap tag */); + _mi_heap_init(heap, arena_id, allow_destroy /* no reclaim? */, (uint8_t)heap_tag /* heap tag */); return heap; } diff --git a/src/init.c b/src/init.c index b66efc69..f09821b4 100644 --- a/src/init.c +++ b/src/init.c @@ -96,6 +96,8 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- +#define MI_MEMID_STATIC {{{0}},true /* pinned */, true /* committed */, false /* zero */, MI_MEM_STATIC } + mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, // MI_ATOMIC_VAR_INIT(NULL), // thread delayed free @@ -107,6 +109,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next + MI_MEMID_STATIC, // memid false, // can reclaim true, // can eager abandon 0, // tag @@ -135,9 +138,9 @@ static mi_decl_cache_align mi_tld_t tld_main = { &_mi_heap_main, &_mi_heap_main, &mi_subproc_default, // subproc 0, // tseq + MI_MEMID_STATIC, // memid false, // recurse false, // is_in_threadpool - { 0, &tld_main.stats }, // os { MI_STATS_NULL } // stats }; @@ -152,6 +155,7 @@ mi_decl_cache_align mi_heap_t _mi_heap_main = { 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next heap + MI_MEMID_STATIC, // memid true, // allow page reclaim true, // allow page abandon 0, // tag @@ -230,6 +234,47 @@ mi_heap_t* _mi_heap_main_get(void) { } +/* ----------------------------------------------------------- + Thread local data +----------------------------------------------------------- */ + +// Thread sequence number +static _Atomic(size_t) mi_tcount; + +// The mimalloc thread local data +mi_decl_thread mi_tld_t* mi_tld; + +// Allocate fresh tld +static mi_tld_t* mi_tld_alloc(void) { + if (_mi_is_main_thread()) { + return &tld_main; + } + else { + mi_memid_t memid; + mi_tld_t* tld = (mi_tld_t*)_mi_meta_zalloc(sizeof(mi_tld_t), &memid); + if (tld==NULL) { + _mi_error_message(ENOMEM, "unable to allocate memory for thread local data\n"); + return NULL; + } + tld->memid = memid; + tld->heap_backing = NULL; + tld->heaps = NULL; + tld->subproc = &mi_subproc_default; + tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); + tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool(); + return tld; + } +} + +mi_tld_t* _mi_tld(void) { + if (mi_tld==NULL) { + mi_tld = mi_tld_alloc(); + } + return mi_tld; +} + + + /* ----------------------------------------------------------- Sub process ----------------------------------------------------------- */ @@ -239,11 +284,11 @@ mi_subproc_id_t mi_subproc_main(void) { } mi_subproc_id_t mi_subproc_new(void) { - mi_memid_t memid = _mi_memid_none(); - mi_subproc_t* subproc = (mi_subproc_t*)_mi_arena_meta_zalloc(sizeof(mi_subproc_t), &memid); + mi_memid_t memid; + mi_subproc_t* subproc = (mi_subproc_t*)_mi_meta_zalloc(sizeof(mi_subproc_t),&memid); if (subproc == NULL) return NULL; - subproc->memid = memid; subproc->abandoned_os_list = NULL; + subproc->memid = memid; mi_lock_init(&subproc->abandoned_os_lock); mi_lock_init(&subproc->abandoned_os_visit_lock); return subproc; @@ -269,7 +314,7 @@ void mi_subproc_delete(mi_subproc_id_t subproc_id) { // todo: should we refcount subprocesses? mi_lock_done(&subproc->abandoned_os_lock); mi_lock_done(&subproc->abandoned_os_visit_lock); - _mi_arena_meta_free(subproc, subproc->memid, sizeof(mi_subproc_t)); + _mi_meta_free(subproc, sizeof(mi_subproc_t), subproc->memid); } void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { @@ -281,94 +326,10 @@ void mi_subproc_add_current_thread(mi_subproc_id_t subproc_id) { } - /* ----------------------------------------------------------- - Initialization and freeing of the thread local heaps + Allocate heap data ----------------------------------------------------------- */ -// note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). -typedef struct mi_thread_data_s { - mi_heap_t heap; // must come first due to cast in `_mi_heap_done` - mi_tld_t tld; - mi_memid_t memid; // must come last due to zero'ing -} mi_thread_data_t; - - -// Thread meta-data is allocated directly from the OS. For -// some programs that do not use thread pools and allocate and -// destroy many OS threads, this may causes too much overhead -// per thread so we maintain a small cache of recently freed metadata. - -#define TD_CACHE_SIZE (32) -static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; - -static mi_thread_data_t* mi_thread_data_zalloc(void) { - // try to find thread metadata in the cache - bool is_zero = false; - mi_thread_data_t* td = NULL; - for (int i = 0; i < TD_CACHE_SIZE; i++) { - td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); - if (td != NULL) { - // found cached allocation, try use it - td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); - if (td != NULL) { - break; - } - } - } - - // if that fails, allocate as meta data - if (td == NULL) { - mi_memid_t memid; - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); - if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); - if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); - } - } - if (td != NULL) { - td->memid = memid; - is_zero = memid.initially_zero; - } - } - - if (td != NULL && !is_zero) { - _mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid)); - } - return td; -} - -static void mi_thread_data_free( mi_thread_data_t* tdfree ) { - // try to add the thread metadata to the cache - for (int i = 0; i < TD_CACHE_SIZE; i++) { - mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); - if (td == NULL) { - mi_thread_data_t* expected = NULL; - if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) { - return; - } - } - } - // if that fails, just free it directly - _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); -} - -void _mi_thread_data_collect(void) { - // free all thread metadata from the cache - for (int i = 0; i < TD_CACHE_SIZE; i++) { - mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); - if (td != NULL) { - td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); - if (td != NULL) { - _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main); - } - } - } -} - // Initialize the thread local default heap, called from `mi_thread_init` static bool _mi_thread_heap_init(void) { if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; @@ -380,32 +341,21 @@ static bool _mi_thread_heap_init(void) { //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { - // use `_mi_os_alloc` to allocate directly from the OS - mi_thread_data_t* td = mi_thread_data_zalloc(); - if (td == NULL) return false; - - mi_tld_t* tld = &td->tld; - mi_heap_t* heap = &td->heap; - _mi_tld_init(tld, heap); // must be before `_mi_heap_init` - _mi_heap_init(heap, tld, _mi_arena_id_none(), false /* can reclaim */, 0 /* default tag */); + // allocate heap and thread local data + mi_tld_t* tld = _mi_tld(); // allocates & initializes tld if needed + mi_memid_t memid; + mi_heap_t* heap = (tld == NULL ? NULL : (mi_heap_t*)_mi_meta_zalloc(sizeof(mi_heap_t), &memid)); + if (heap==NULL || tld==NULL) { + _mi_error_message(ENOMEM, "unable to allocate heap meta-data\n"); + return false; + } + heap->memid = memid; + _mi_heap_init(heap, _mi_arena_id_none(), false /* can reclaim */, 0 /* default tag */); _mi_heap_set_default_direct(heap); } return false; } -// Thread sequence number -static _Atomic(size_t) mi_tcount; - -// initialize thread local data -void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { - _mi_memzero_aligned(tld,sizeof(mi_tld_t)); - tld->heap_backing = bheap; - tld->heaps = NULL; - tld->subproc = &mi_subproc_default; - tld->tseq = mi_atomic_add_acq_rel(&mi_tcount, 1); - tld->os.stats = &tld->stats; - tld->is_in_threadpool = _mi_prim_thread_is_in_threadpool(); -} // Free the thread local default heap (called from `mi_thread_done`) static bool _mi_thread_heap_done(mi_heap_t* heap) { @@ -441,7 +391,7 @@ static bool _mi_thread_heap_done(mi_heap_t* heap) { // free if not the main thread if (heap != &_mi_heap_main) { - mi_thread_data_free((mi_thread_data_t*)heap); + _mi_meta_free(heap, sizeof(mi_heap_t), heap->memid); } else { #if 0 @@ -533,7 +483,13 @@ void _mi_thread_done(mi_heap_t* heap) if (heap->thread_id != _mi_thread_id()) return; // abandon the thread local heap - if (_mi_thread_heap_done(heap)) return; // returns true if already ran + _mi_thread_heap_done(heap); // returns true if already ran + + // free thread local data + if (mi_tld != NULL) { + _mi_meta_free(mi_tld, sizeof(mi_tld_t), mi_tld->memid); + mi_tld = NULL; + } } void _mi_heap_set_default_direct(mi_heap_t* heap) { @@ -689,7 +645,7 @@ void mi_cdecl _mi_process_done(void) { if (mi_option_is_enabled(mi_option_destroy_on_exit)) { mi_collect(true /* force */); _mi_heap_unsafe_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_arena_unsafe_destroy_all(& _mi_heap_main_get()->tld->stats); + _mi_arena_unsafe_destroy_all(); } if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { diff --git a/src/os.c b/src/os.c index 0c020302..b913fb1c 100644 --- a/src/os.c +++ b/src/os.c @@ -9,6 +9,8 @@ terms of the MIT license. A copy of the license can be found in the file #include "mimalloc/atomic.h" #include "mimalloc/prim.h" +// always use main stats for OS calls +#define os_stats (&_mi_stats_main) /* ----------------------------------------------------------- Initialization. @@ -89,8 +91,8 @@ void _mi_os_init(void) { /* ----------------------------------------------------------- Util -------------------------------------------------------------- */ -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); +bool _mi_os_decommit(void* addr, size_t size); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { MI_UNUSED(try_alignment); MI_UNUSED(size); @@ -102,11 +104,9 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { Free memory -------------------------------------------------------------- */ -static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); +static void mi_os_free_huge_os_pages(void* p, size_t size); -static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; +static void mi_os_prim_free(void* addr, size_t size, bool still_committed) { mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) int err = _mi_prim_free(addr, size); @@ -114,13 +114,12 @@ static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_st _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } if (still_committed) { - _mi_stat_decrease(&stats->committed, size); + _mi_stat_decrease(&os_stats->committed, size); } - _mi_stat_decrease(&stats->reserved, size); + _mi_stat_decrease(&os_stats->reserved, size); } -void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats) { - if (stats == NULL) stats = &_mi_stats_main; +void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid) { if (mi_memkind_is_os(memid.memkind)) { size_t csize = memid.mem.os.size; if (csize==0) { _mi_os_good_alloc_size(size); } @@ -135,10 +134,10 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me // free it if (memid.memkind == MI_MEM_OS_HUGE) { mi_assert(memid.is_pinned); - mi_os_free_huge_os_pages(base, csize, stats); + mi_os_free_huge_os_pages(base, csize); } else { - mi_os_prim_free(base, csize, still_committed, stats); + mi_os_prim_free(base, csize, still_committed); } } else { @@ -147,9 +146,8 @@ void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t me } } -void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) { - if (stats == NULL) stats = &_mi_stats_main; - _mi_os_free_ex(p, size, true, memid, stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid) { + _mi_os_free_ex(p, size, true, memid); } @@ -159,7 +157,7 @@ void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats) { // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. // Also `hint_addr` is a hint and may be ignored. -static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { +static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_zero != NULL); mi_assert_internal(is_large != NULL); @@ -173,13 +171,11 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), addr: %p, size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, hint_addr, size, try_alignment, commit, allow_large); } - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - _mi_stat_counter_increase(&stats->mmap_calls, 1); + _mi_stat_counter_increase(&os_stats->mmap_calls, 1); if (p != NULL) { - _mi_stat_increase(&stats->reserved, size); + _mi_stat_increase(&os_stats->reserved, size); if (commit) { - _mi_stat_increase(&stats->committed, size); + _mi_stat_increase(&os_stats->committed, size); // seems needed for asan (or `mimalloc-test-api` fails) #ifdef MI_TRACK_ASAN if (*is_zero) { mi_track_mem_defined(p,size); } @@ -190,14 +186,14 @@ static void* mi_os_prim_alloc_at(void* hint_addr, size_t size, size_t try_alignm return p; } -static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { - return mi_os_prim_alloc_at(NULL, size, try_alignment, commit, allow_large, is_large, is_zero, tld_stats); +static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero) { + return mi_os_prim_alloc_at(NULL, size, try_alignment, commit, allow_large, is_large, is_zero); } // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) { +static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_large != NULL); @@ -213,7 +209,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // try first with a requested alignment hint (this will usually be aligned directly on Win 10+ or BSD) void* p = NULL; if (try_direct_alloc) { - p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); + p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero); } // aligned already? @@ -227,13 +223,13 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); } #endif - if (p != NULL) { mi_os_prim_free(p, size, commit, stats); } + if (p != NULL) { mi_os_prim_free(p, size, commit); } if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; if (!mi_os_mem_config.has_partial_free) { // win32 virtualAlloc cannot free parts of an allocated block // over-allocate uncommitted (virtual) memory - p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); + p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero); if (p == NULL) return NULL; // set p to the aligned part in the full region @@ -244,12 +240,12 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit // explicitly commit only the aligned part if (commit) { - _mi_os_commit(p, size, NULL, stats); + _mi_os_commit(p, size, NULL); } } else { // mmap can free inside an allocation // overallocate... - p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); + p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero); if (p == NULL) return NULL; // and selectively unmap parts around the over-allocated area. @@ -258,8 +254,8 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = over_size - pre_size - mid_size; mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); - if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); } - if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } + if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit); } + if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit); } // we can return the aligned pointer on `mmap` systems p = aligned_p; *base = aligned_p; // since we freed the pre part, `*base == p`. @@ -275,33 +271,31 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit OS API: alloc and alloc_aligned ----------------------------------------------------------- */ -void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { +void* _mi_os_alloc(size_t size, mi_memid_t* memid) { *memid = _mi_memid_none(); if (size == 0) return NULL; - if (stats == NULL) stats = &_mi_stats_main; size = _mi_os_good_alloc_size(size); bool os_is_large = false; bool os_is_zero = false; - void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); + void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero); if (p != NULL) { *memid = _mi_memid_create_os(p, size, true, os_is_zero, os_is_large); } return p; } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid) { MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings *memid = _mi_memid_none(); if (size == 0) return NULL; - if (stats == NULL) stats = &_mi_stats_main; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); bool os_is_large = false; bool os_is_zero = false; void* os_base = NULL; - void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, stats ); + void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base); if (p != NULL) { *memid = _mi_memid_create_os(p, size, commit, os_is_zero, os_is_large); memid->mem.os.base = os_base; @@ -311,9 +305,8 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allo return p; } -void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { - MI_UNUSED(stats); - void* p = _mi_os_alloc(size, memid, &_mi_stats_main); +void* _mi_os_zalloc(size_t size, mi_memid_t* memid) { + void* p = _mi_os_alloc(size, memid); if (p == NULL) return NULL; // zero the OS memory if needed @@ -332,27 +325,26 @@ void* _mi_os_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { to use the actual start of the memory region. ----------------------------------------------------------- */ -void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) { +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid) { mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); *memid = _mi_memid_none(); - if (stats == NULL) stats = &_mi_stats_main; if (offset == 0) { // regular aligned allocation - return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats); + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid); } else { // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; - void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, stats); + void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid); if (start == NULL) return NULL; void* const p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); // decommit the overallocation at the start if (commit && extra > _mi_os_page_size()) { - _mi_os_decommit(start, extra, stats); + _mi_os_decommit(start, extra); } return p; } @@ -386,12 +378,10 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; +bool _mi_os_commit(void* addr, size_t size, bool* is_zero) { if (is_zero != NULL) { *is_zero = false; } - _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit - _mi_stat_counter_increase(&stats->commit_calls, 1); + _mi_stat_increase(&os_stats->committed, size); // use size for precise commit vs. decommit + _mi_stat_counter_increase(&os_stats->commit_calls, 1); // page align range size_t csize; @@ -417,11 +407,9 @@ bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats return true; } -static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; +static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit) { mi_assert_internal(needs_recommit!=NULL); - _mi_stat_decrease(&stats->committed, size); + _mi_stat_decrease(&os_stats->committed, size); // page align size_t csize; @@ -438,9 +426,9 @@ static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_ return (err == 0); } -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { +bool _mi_os_decommit(void* addr, size_t size) { bool needs_recommit; - return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats); + return mi_os_decommit_ex(addr, size, &needs_recommit); } @@ -448,13 +436,13 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { +bool _mi_os_reset(void* addr, size_t size) { // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) - _mi_stat_increase(&stats->reset, csize); - _mi_stat_counter_increase(&stats->reset_calls, 1); + _mi_stat_increase(&os_stats->reset, csize); + _mi_stat_counter_increase(&os_stats->reset_calls, 1); #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN memset(start, 0, csize); // pretend it is eagerly reset @@ -470,22 +458,22 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. -bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset) { if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? - _mi_stat_counter_increase(&stats->purge_calls, 1); - _mi_stat_increase(&stats->purged, size); + _mi_stat_counter_increase(&os_stats->purge_calls, 1); + _mi_stat_increase(&os_stats->purged, size); if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? !_mi_preloading()) // don't decommit during preloading (unsafe) { bool needs_recommit = true; - mi_os_decommit_ex(p, size, &needs_recommit, stats); + mi_os_decommit_ex(p, size, &needs_recommit); return needs_recommit; } else { if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed - _mi_os_reset(p, size, stats); + _mi_os_reset(p, size); } return false; // needs no recommit } @@ -493,8 +481,8 @@ bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) // either resets or decommits memory, returns true if the memory needs // to be recommitted if it is to be re-used later on. -bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) { - return _mi_os_purge_ex(p, size, true, stats); +bool _mi_os_purge(void* p, size_t size) { + return _mi_os_purge_ex(p, size, true); } @@ -601,15 +589,15 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // no success, issue a warning and break if (p != NULL) { _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); - mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main); + mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true); } break; } // success, record it page++; // increase before timeout check (see issue #711) - _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); - _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); + _mi_stat_increase(&os_stats->committed, MI_HUGE_OS_PAGE_SIZE); + _mi_stat_increase(&os_stats->reserved, MI_HUGE_OS_PAGE_SIZE); // check for timeout if (max_msecs > 0) { @@ -643,11 +631,11 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. -static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size) { if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { - mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats); + mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true); size -= MI_HUGE_OS_PAGE_SIZE; base += MI_HUGE_OS_PAGE_SIZE; } @@ -676,8 +664,7 @@ size_t _mi_os_numa_node_count_get(void) { return count; } -int _mi_os_numa_node_get(mi_os_tld_t* tld) { - MI_UNUSED(tld); +int _mi_os_numa_node_get(void) { size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 diff --git a/src/page-map.c b/src/page-map.c index 7a00d172..5c712346 100644 --- a/src/page-map.c +++ b/src/page-map.c @@ -29,7 +29,7 @@ bool _mi_page_map_init(void) { // mi_bitmap_init(&mi_page_map_commit, MI_BITMAP_MIN_BIT_COUNT, true); mi_page_map_all_committed = (page_map_size <= 1*MI_MiB); // _mi_os_has_overcommit(); // commit on-access on Linux systems? - _mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid, NULL); + _mi_page_map = (uint8_t*)_mi_os_alloc_aligned(page_map_size, 1, mi_page_map_all_committed, true, &mi_page_map_memid); if (_mi_page_map==NULL) { _mi_error_message(ENOMEM, "unable to reserve virtual memory for the page map (%zu KiB)\n", page_map_size / MI_KiB); return false; @@ -41,7 +41,7 @@ bool _mi_page_map_init(void) { // commit the first part so NULL pointers get resolved without an access violation if (!mi_page_map_all_committed) { bool is_zero; - _mi_os_commit(_mi_page_map, _mi_os_page_size(), &is_zero, NULL); + _mi_os_commit(_mi_page_map, _mi_os_page_size(), &is_zero); if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(_mi_page_map, _mi_os_page_size()); } } _mi_page_map[0] = 1; // so _mi_ptr_page(NULL) == NULL @@ -60,7 +60,7 @@ static void mi_page_map_ensure_committed(size_t idx, size_t slice_count) { bool is_zero; uint8_t* const start = _mi_page_map + (i*mi_page_map_entries_per_commit_bit); const size_t size = mi_page_map_entries_per_commit_bit; - _mi_os_commit(start, size, &is_zero, NULL); + _mi_os_commit(start, size, &is_zero); if (!is_zero && !mi_page_map_memid.initially_zero) { _mi_memzero(start,size); } mi_bitmap_set(&mi_page_map_commit, i); } diff --git a/src/prim/windows/prim.c b/src/prim/windows/prim.c index 80522f47..e06b278d 100644 --- a/src/prim/windows/prim.c +++ b/src/prim/windows/prim.c @@ -17,6 +17,11 @@ terms of the MIT license. A copy of the license can be found in the file // Dynamically bind Windows API points for portability //--------------------------------------------- +#if defined(_MSC_VER) +#pragma warning(disable:28159) // don't use GetVersion +#pragma warning(disable:4996) // don't use GetVersion +#endif + static DWORD win_major_version = 6; static DWORD win_minor_version = 0; @@ -126,8 +131,8 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) SYSTEM_INFO si; GetSystemInfo(&si); if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; } - if (si.dwAllocationGranularity > 0) { - config->alloc_granularity = si.dwAllocationGranularity; + if (si.dwAllocationGranularity > 0) { + config->alloc_granularity = si.dwAllocationGranularity; win_allocation_granularity = si.dwAllocationGranularity; } // get virtual address bits @@ -141,7 +146,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) if (memInKiB > 0 && memInKiB < (SIZE_MAX / MI_KiB)) { config->physical_memory = (size_t)(memInKiB * MI_KiB); } - } + } // get the VirtualAlloc2 function HINSTANCE hDll; hDll = LoadLibrary(TEXT("kernelbase.dll")); @@ -818,14 +823,13 @@ static void NTAPI mi_win_main(PVOID module, DWORD reason, LPVOID reserved) { } #endif - bool _mi_prim_thread_is_in_threadpool(void) { #if (MI_ARCH_X64 || MI_ARCH_X86) if (win_major_version >= 6) { // check if this thread belongs to a windows threadpool // see: _TEB* const teb = NtCurrentTeb(); - void* const pool_data = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778))); + void* const pool_data = *((void**)((uint8_t*)teb + (MI_SIZE_BITS == 32 ? 0x0F90 : 0x1778))); return (pool_data != NULL); } #endif diff --git a/src/static.c b/src/static.c index 0a8fa447..dd874f16 100644 --- a/src/static.c +++ b/src/static.c @@ -24,6 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "alloc-aligned.c" #include "alloc-posix.c" #include "arena.c" +#include "arena-meta.c" #include "bitmap.c" #include "heap.c" #include "init.c"