diff --git a/ide/vs2022/mimalloc.vcxproj b/ide/vs2022/mimalloc.vcxproj index 3f1280ee..34bb28fe 100644 --- a/ide/vs2022/mimalloc.vcxproj +++ b/ide/vs2022/mimalloc.vcxproj @@ -116,7 +116,7 @@ true Default ../../include - MI_DEBUG=3;%(PreprocessorDefinitions); + MI_DEBUG=3;MI_GUARDED=1;%(PreprocessorDefinitions); CompileAsCpp false stdcpp20 diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index a2e1d5d7..3c5bd486 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -581,7 +581,7 @@ static inline bool mi_page_immediate_available(const mi_page_t* page) { return (page->free != NULL); } - + // is the page not yet used up to its reserved space? static inline bool mi_page_is_expandable(const mi_page_t* page) { mi_assert_internal(page != NULL); @@ -714,6 +714,12 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { Guarded objects ------------------------------------------------------------------- */ #if MI_GUARDED + +// we always align guarded pointers in a block at an offset +// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones +#define MI_BLOCK_TAG_ALIGNED ((mi_encoded_t)(0)) +#define MI_BLOCK_TAG_GUARDED (~MI_BLOCK_TAG_ALIGNED) + static inline bool mi_block_ptr_is_guarded(const mi_block_t* block, const void* p) { const ptrdiff_t offset = (uint8_t*)p - (uint8_t*)block; return (offset >= (ptrdiff_t)(sizeof(mi_block_t)) && block->next == MI_BLOCK_TAG_GUARDED); @@ -895,6 +901,7 @@ static inline mi_memid_t _mi_memid_create_meta(void* mpage, size_t block_idx, si return memid; } + // ------------------------------------------------------------------- // Fast "random" shuffle // ------------------------------------------------------------------- diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index dc1c93fe..cc64a400 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -97,16 +97,8 @@ terms of the MIT license. A copy of the license can be found in the file #endif -// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread. -// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a -// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from -// another thread so the memory becomes "virtually" available (and eventually gets properly freed by -// the owning thread). -// #define MI_HUGE_PAGE_ABANDON 1 - - // ------------------------------------------------------ -// Main internal data-structures +// Sizes of internal data-structures // ------------------------------------------------------ // Sizes are for 64-bit @@ -145,21 +137,32 @@ terms of the MIT license. A copy of the license can be found in the file // We never allocate more than PTRDIFF_MAX (see also ) #define MI_MAX_ALLOC_SIZE PTRDIFF_MAX +// ------------------------------------------------------ +// Arena's are large reserved areas of memory allocated from +// the OS that are managed by mimalloc to efficiently +// allocate MI_ARENA_SLICE_SIZE slices of memory for the +// mimalloc pages. +// ------------------------------------------------------ + +// A large memory arena where pages are allocated in. +typedef struct mi_arena_s mi_arena_t; // defined in `arena.c` + // --------------------------------------------------------------- // a memory id tracks the provenance of arena/OS allocated memory // --------------------------------------------------------------- -// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. +// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated. +// The memid keeps track of this. typedef enum mi_memkind_e { MI_MEM_NONE, // not allocated MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) - MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) - MI_MEM_META, // allocated with the meta data allocator + MI_MEM_STATIC, // allocated in a static area and should not be freed (the initial main heap data for example (`init.c`)) + MI_MEM_META, // allocated with the meta data allocator (`arena-meta.c`) MI_MEM_OS, // allocated from the OS MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory) MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) - MI_MEM_ARENA // allocated from an arena (the usual case) + MI_MEM_ARENA // allocated from an arena (the usual case) (`arena.c`) } mi_memkind_t; static inline bool mi_memkind_is_os(mi_memkind_t memkind) { @@ -178,10 +181,9 @@ typedef struct mi_memid_os_info { } mi_memid_os_info_t; typedef struct mi_memid_arena_info { - uint32_t slice_index; // base index in the arena + mi_arena_t* arena; // arena that contains this memory + uint32_t slice_index; // slice index in the arena uint32_t slice_count; // allocated slices - mi_arena_id_t id; // arena id (>= 1) - bool is_exclusive; // this arena can only be used for specific arena allocations } mi_memid_arena_info_t; typedef struct mi_memid_meta_info { @@ -196,10 +198,10 @@ typedef struct mi_memid_s { mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA mi_memid_meta_info_t meta; // only used for MI_MEM_META } mem; + mi_memkind_t memkind; bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages) bool initially_committed;// `true` if the memory was originally allocated as committed bool initially_zero; // `true` if the memory was originally zero initialized - mi_memkind_t memkind; } mi_memid_t; @@ -227,32 +229,21 @@ typedef struct mi_block_s { mi_encoded_t next; } mi_block_t; -#if MI_GUARDED -// we always align guarded pointers in a block at an offset -// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones -#define MI_BLOCK_TAG_ALIGNED ((mi_encoded_t)(0)) -#define MI_BLOCK_TAG_GUARDED (~MI_BLOCK_TAG_ALIGNED) -#endif - - -// The owned flags are used for efficient multi-threaded free-ing -// When we push on the page thread free queue of an abandoned page, -// we also atomically get to own it. This is needed to atomically -// abandon a page (while other threads could concurrently free blocks in it). -typedef enum mi_owned_e { - MI_OWNED = 0, // some heap owns this page - MI_ABANDONED = 1, // the page is abandoned -} mi_owned_t; - // The `in_full` and `has_aligned` page flags are put in the same field // to efficiently test if both are false (`full_aligned == 0`) in the `mi_free` routine. +// `has_aligned` is true if the page has pointers at an offset in a block (so we unalign before free-ing) +// `in_full_queue` is true if the page is full and resides in the full queue (so we move it to a regular queue on free-ing) #define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01) #define MI_PAGE_HAS_ALIGNED MI_ZU(0x02) typedef size_t mi_page_flags_t; // Thread free list. -// We use the bottom bit of the pointer for `mi_owned_t` flags +// Points to a list of blocks that are freed by other threads. +// The low-bit is set if the page is owned by the current thread. (`mi_page_is_owned`). +// Ownership is required before we can read any non-atomic fields in the page. +// This way we can push a block on the thread free list and try to claim ownership +// atomically in `free.c:mi_free_block_mt`. typedef uintptr_t mi_thread_free_t; // Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython) @@ -276,19 +267,17 @@ typedef uint8_t mi_heaptag_t; // // We don't count `freed` (as |free|) but use `used` to reduce // the number of memory accesses in the `mi_page_all_free` function(s). -// +// // Notes: -// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc` +// - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`). +// - If a page is not part of a heap it is called "abandoned" -- in +// that case the `xthreadid` is 0 or 1 (1 is for abandoned pages that +// are in the abandoned page lists of an arena, these are called "mapped" abandoned pages). +// - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc` // - Using `uint16_t` does not seem to slow things down -// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize -// concurrent frees where only the first concurrent free adds to the owning -// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`). -// The invariant is that no-delayed-free is only set if there is -// at least one block that will be added, or as already been added, to -// the owning heap `thread_delayed_free` list. This guarantees that pages -// will be freed correctly even if only other threads free blocks. + typedef struct mi_page_s { - _Atomic(mi_threadid_t) xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned) + _Atomic(mi_threadid_t) xthread_id; // thread this page belongs to. (= heap->thread_id, or 0 or 1 if abandoned) mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) uint16_t used; // number of blocks in use (including blocks in `thread_free`) @@ -299,7 +288,7 @@ typedef struct mi_page_s { mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads - _Atomic(mi_page_flags_t) xflags; // `in_full` and `has_aligned` flags + _Atomic(mi_page_flags_t) xflags; // `in_full_queue` and `has_aligned` flags size_t block_size; // size available in each block (always `>0`) uint8_t* page_start; // start of the blocks @@ -355,7 +344,7 @@ typedef enum mi_page_kind_e { MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages MI_PAGE_LARGE, // larger blocks go into 4MiB pages MI_PAGE_SINGLETON // page containing a single block. - // used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`. + // used for blocks `> MI_LARGE_MAX_OBJ_SIZE` or an aligment `> MI_PAGE_MAX_OVERALLOC_ALIGN`. } mi_page_kind_t; @@ -366,7 +355,7 @@ typedef enum mi_page_kind_e { // A heap just owns a set of pages for allocation and // can only be allocate/reallocate from the thread that created it. // Freeing blocks can be done from any thread though. -// Per thread, the segments are shared among its heaps. +// // Per thread, there is always a default heap that is // used for allocation; it is initialized to statically // point to an empty heap to avoid initialization checks @@ -436,16 +425,6 @@ struct mi_heap_s { mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") }; -// ------------------------------------------------------ -// Arena's -// These are large reserved areas of memory allocated from -// the OS that are managed by mimalloc to efficiently -// allocate MI_SLICE_SIZE slices of memory for the -// mimalloc pages. -// ------------------------------------------------------ - -// A large memory arena where pages are allocated in. -typedef struct mi_arena_s mi_arena_t; // ------------------------------------------------------ // Debug diff --git a/src/arena.c b/src/arena.c index a05e1f5d..c9d21c75 100644 --- a/src/arena.c +++ b/src/arena.c @@ -35,14 +35,13 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo // A memory arena descriptor typedef struct mi_arena_s { mi_memid_t memid; // memid of the memory area - mi_arena_id_t id; // arena id; 0 for non-specific - + mi_arena_id_t id; // arena id (> 0 where `arena == arenas[arena->id - 1]`) + size_t slice_count; // size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`) size_t info_slices; // initial slices reserved for the arena bitmaps int numa_node; // associated NUMA node - bool exclusive; // only allow allocations if specifically for this arena + bool is_exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) - mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited _Atomic(mi_msecs_t) purge_expire; // expiration time when slices should be decommitted from `slices_decommit`. mi_bitmap_t* slices_free; // is the slice free? @@ -93,7 +92,8 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { if (memid.memkind == MI_MEM_ARENA) { - return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); + const mi_arena_t* arena = memid.mem.arena.arena; + return mi_arena_id_is_suitable(arena->id, arena->is_exclusive, request_arena_id); } else { return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id); @@ -152,34 +152,25 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { // Create an arena memid -static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, size_t slice_index, size_t slice_count) { +static mi_memid_t mi_memid_create_arena(mi_arena_t* arena, size_t slice_index, size_t slice_count) { mi_assert_internal(slice_index < UINT32_MAX); mi_assert_internal(slice_count < UINT32_MAX); mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); - memid.mem.arena.id = id; + memid.mem.arena.arena = arena; memid.mem.arena.slice_index = (uint32_t)slice_index; - memid.mem.arena.slice_count = (uint32_t)slice_count; - memid.mem.arena.is_exclusive = is_exclusive; + memid.mem.arena.slice_count = (uint32_t)slice_count; return memid; } -// returns if the arena is exclusive -static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, size_t* slice_index, size_t* slice_count) { +// get the arena and slice span +static mi_arena_t* mi_arena_from_memid(mi_memid_t memid, size_t* slice_index, size_t* slice_count) { mi_assert_internal(memid.memkind == MI_MEM_ARENA); - *arena_index = mi_arena_id_index(memid.mem.arena.id); + mi_arena_t* arena = memid.mem.arena.arena; if (slice_index) *slice_index = memid.mem.arena.slice_index; if (slice_count) *slice_count = memid.mem.arena.slice_count; - return memid.mem.arena.is_exclusive; + return arena; } -// get the arena and slice index -static mi_arena_t* mi_arena_from_memid(mi_memid_t memid, size_t* slice_index, size_t* slice_count) { - size_t arena_index; - mi_arena_memid_indices(memid, &arena_index, slice_index, slice_count); - return mi_arena_from_index(arena_index); -} - - static mi_arena_t* mi_page_arena(mi_page_t* page, size_t* slice_index, size_t* slice_count) { // todo: maybe store the arena* directly in the page? return mi_arena_from_memid(page->memid, slice_index, slice_count); @@ -198,7 +189,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at( // claimed it! void* p = mi_arena_slice_start(arena, slice_index); - *memid = mi_memid_create_arena(arena->id, arena->exclusive, slice_index, slice_count); + *memid = mi_memid_create_arena(arena, slice_index, slice_count); memid->is_pinned = arena->memid.is_pinned; // set the dirty bits @@ -323,7 +314,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, int numa_node, bool allow_large) { if (!allow_large && arena->is_large) return false; - if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return false; + if (!mi_arena_id_is_suitable(arena->id, arena->is_exclusive, req_arena_id)) return false; if (req_arena_id == _mi_arena_id_none()) { // if not specific, check numa affinity const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); if (!numa_suitable) return false; @@ -628,8 +619,8 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz // this ensures that all blocks in such pages are OS page size aligned (which is needed for the guard pages) const size_t os_page_size = _mi_os_page_size(); mi_assert_internal(MI_PAGE_ALIGN >= os_page_size); - if (block_size % os_page_size == 0 && block_size > os_page_size /* at least 2 or more */ ) { - block_start = _mi_align_up(_mi_page_info_size(), os_page_size); + if (!os_align && block_size % os_page_size == 0 && block_size > os_page_size /* at least 2 or more */ ) { + block_start = _mi_align_up(mi_page_info_size(), os_page_size); } else #endif @@ -961,7 +952,7 @@ static void mi_arenas_unsafe_destroy(void) { for (size_t i = 0; i < max_arena; i++) { mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); if (arena != NULL) { - mi_lock_done(&arena->abandoned_visit_lock); + // mi_lock_done(&arena->abandoned_visit_lock); if (mi_memkind_is_os(arena->memid.memkind)) { mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); _mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid); @@ -1085,13 +1076,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int // init arena->id = _mi_arena_id_none(); arena->memid = memid; - arena->exclusive = exclusive; + arena->is_exclusive = exclusive; arena->slice_count = slice_count; arena->info_slices = info_slices; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; arena->purge_expire = 0; - mi_lock_init(&arena->abandoned_visit_lock); + // mi_lock_init(&arena->abandoned_visit_lock); // init bitmaps uint8_t* base = mi_arena_start(arena) + bitmap_base; diff --git a/src/init.c b/src/init.c index ae1ae086..a5a0819e 100644 --- a/src/init.c +++ b/src/init.c @@ -11,6 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // memcpy, memset #include // atexit +#define MI_MEMID_STATIC {{{NULL,0}}, MI_MEM_STATIC, true /* pinned */, true /* committed */, false /* zero */ } // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { @@ -34,7 +35,7 @@ const mi_page_t _mi_page_empty = { NULL, // xheap NULL, NULL, // next, prev NULL, // subproc - { {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid + MI_MEMID_STATIC // memid }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) @@ -96,8 +97,6 @@ const mi_page_t _mi_page_empty = { // may lead to allocation itself on some platforms) // -------------------------------------------------------- -#define MI_MEMID_STATIC {{{NULL,0}}, true /* pinned */, true /* committed */, false /* zero */, MI_MEM_STATIC } - mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, // MI_ATOMIC_VAR_INIT(NULL), // thread delayed free