fix comments in types; fix guarded alignment bug

This commit is contained in:
daanx 2024-12-11 13:04:37 -08:00
parent 64eea823e4
commit 565656919e
5 changed files with 67 additions and 91 deletions

View file

@ -116,7 +116,7 @@
<SDLCheck>true</SDLCheck>
<ConformanceMode>Default</ConformanceMode>
<AdditionalIncludeDirectories>../../include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<PreprocessorDefinitions>MI_DEBUG=3;MI_GUARDED=1;%(PreprocessorDefinitions);</PreprocessorDefinitions>
<CompileAs>CompileAsCpp</CompileAs>
<SupportJustMyCode>false</SupportJustMyCode>
<LanguageStandard>stdcpp20</LanguageStandard>

View file

@ -714,6 +714,12 @@ static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
Guarded objects
------------------------------------------------------------------- */
#if MI_GUARDED
// we always align guarded pointers in a block at an offset
// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
#define MI_BLOCK_TAG_ALIGNED ((mi_encoded_t)(0))
#define MI_BLOCK_TAG_GUARDED (~MI_BLOCK_TAG_ALIGNED)
static inline bool mi_block_ptr_is_guarded(const mi_block_t* block, const void* p) {
const ptrdiff_t offset = (uint8_t*)p - (uint8_t*)block;
return (offset >= (ptrdiff_t)(sizeof(mi_block_t)) && block->next == MI_BLOCK_TAG_GUARDED);
@ -895,6 +901,7 @@ static inline mi_memid_t _mi_memid_create_meta(void* mpage, size_t block_idx, si
return memid;
}
// -------------------------------------------------------------------
// Fast "random" shuffle
// -------------------------------------------------------------------

View file

@ -97,16 +97,8 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread.
// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from
// another thread so the memory becomes "virtually" available (and eventually gets properly freed by
// the owning thread).
// #define MI_HUGE_PAGE_ABANDON 1
// ------------------------------------------------------
// Main internal data-structures
// Sizes of internal data-structures
// ------------------------------------------------------
// Sizes are for 64-bit
@ -145,21 +137,32 @@ terms of the MIT license. A copy of the license can be found in the file
// We never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX
// ------------------------------------------------------
// Arena's are large reserved areas of memory allocated from
// the OS that are managed by mimalloc to efficiently
// allocate MI_ARENA_SLICE_SIZE slices of memory for the
// mimalloc pages.
// ------------------------------------------------------
// A large memory arena where pages are allocated in.
typedef struct mi_arena_s mi_arena_t; // defined in `arena.c`
// ---------------------------------------------------------------
// a memory id tracks the provenance of arena/OS allocated memory
// ---------------------------------------------------------------
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
// Memory can reside in arena's, direct OS allocated, meta-data pages, or statically allocated.
// The memid keeps track of this.
typedef enum mi_memkind_e {
MI_MEM_NONE, // not allocated
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
MI_MEM_META, // allocated with the meta data allocator
MI_MEM_STATIC, // allocated in a static area and should not be freed (the initial main heap data for example (`init.c`))
MI_MEM_META, // allocated with the meta data allocator (`arena-meta.c`)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case)
MI_MEM_ARENA // allocated from an arena (the usual case) (`arena.c`)
} mi_memkind_t;
static inline bool mi_memkind_is_os(mi_memkind_t memkind) {
@ -178,10 +181,9 @@ typedef struct mi_memid_os_info {
} mi_memid_os_info_t;
typedef struct mi_memid_arena_info {
uint32_t slice_index; // base index in the arena
mi_arena_t* arena; // arena that contains this memory
uint32_t slice_index; // slice index in the arena
uint32_t slice_count; // allocated slices
mi_arena_id_t id; // arena id (>= 1)
bool is_exclusive; // this arena can only be used for specific arena allocations
} mi_memid_arena_info_t;
typedef struct mi_memid_meta_info {
@ -196,10 +198,10 @@ typedef struct mi_memid_s {
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
mi_memid_meta_info_t meta; // only used for MI_MEM_META
} mem;
mi_memkind_t memkind;
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
bool initially_committed;// `true` if the memory was originally allocated as committed
bool initially_zero; // `true` if the memory was originally zero initialized
mi_memkind_t memkind;
} mi_memid_t;
@ -227,32 +229,21 @@ typedef struct mi_block_s {
mi_encoded_t next;
} mi_block_t;
#if MI_GUARDED
// we always align guarded pointers in a block at an offset
// the block `next` field is then used as a tag to distinguish regular offset aligned blocks from guarded ones
#define MI_BLOCK_TAG_ALIGNED ((mi_encoded_t)(0))
#define MI_BLOCK_TAG_GUARDED (~MI_BLOCK_TAG_ALIGNED)
#endif
// The owned flags are used for efficient multi-threaded free-ing
// When we push on the page thread free queue of an abandoned page,
// we also atomically get to own it. This is needed to atomically
// abandon a page (while other threads could concurrently free blocks in it).
typedef enum mi_owned_e {
MI_OWNED = 0, // some heap owns this page
MI_ABANDONED = 1, // the page is abandoned
} mi_owned_t;
// The `in_full` and `has_aligned` page flags are put in the same field
// to efficiently test if both are false (`full_aligned == 0`) in the `mi_free` routine.
// `has_aligned` is true if the page has pointers at an offset in a block (so we unalign before free-ing)
// `in_full_queue` is true if the page is full and resides in the full queue (so we move it to a regular queue on free-ing)
#define MI_PAGE_IN_FULL_QUEUE MI_ZU(0x01)
#define MI_PAGE_HAS_ALIGNED MI_ZU(0x02)
typedef size_t mi_page_flags_t;
// Thread free list.
// We use the bottom bit of the pointer for `mi_owned_t` flags
// Points to a list of blocks that are freed by other threads.
// The low-bit is set if the page is owned by the current thread. (`mi_page_is_owned`).
// Ownership is required before we can read any non-atomic fields in the page.
// This way we can push a block on the thread free list and try to claim ownership
// atomically in `free.c:mi_free_block_mt`.
typedef uintptr_t mi_thread_free_t;
// Sub processes are used to keep memory separate between them (e.g. multiple interpreters in CPython)
@ -278,17 +269,15 @@ typedef uint8_t mi_heaptag_t;
// the number of memory accesses in the `mi_page_all_free` function(s).
//
// Notes:
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Non-atomic fields can only be accessed if having ownership (low bit of `xthread_free`).
// - If a page is not part of a heap it is called "abandoned" -- in
// that case the `xthreadid` is 0 or 1 (1 is for abandoned pages that
// are in the abandoned page lists of an arena, these are called "mapped" abandoned pages).
// - The layout is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Using `uint16_t` does not seem to slow things down
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s {
_Atomic(mi_threadid_t) xthread_id; // thread this page belongs to. (= xheap->thread_id, or 0 if abandoned)
_Atomic(mi_threadid_t) xthread_id; // thread this page belongs to. (= heap->thread_id, or 0 or 1 if abandoned)
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
@ -299,7 +288,7 @@ typedef struct mi_page_s {
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(mi_page_flags_t) xflags; // `in_full` and `has_aligned` flags
_Atomic(mi_page_flags_t) xflags; // `in_full_queue` and `has_aligned` flags
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the blocks
@ -355,7 +344,7 @@ typedef enum mi_page_kind_e {
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages
MI_PAGE_LARGE, // larger blocks go into 4MiB pages
MI_PAGE_SINGLETON // page containing a single block.
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`.
// used for blocks `> MI_LARGE_MAX_OBJ_SIZE` or an aligment `> MI_PAGE_MAX_OVERALLOC_ALIGN`.
} mi_page_kind_t;
@ -366,7 +355,7 @@ typedef enum mi_page_kind_e {
// A heap just owns a set of pages for allocation and
// can only be allocate/reallocate from the thread that created it.
// Freeing blocks can be done from any thread though.
// Per thread, the segments are shared among its heaps.
//
// Per thread, there is always a default heap that is
// used for allocation; it is initialized to statically
// point to an empty heap to avoid initialization checks
@ -436,16 +425,6 @@ struct mi_heap_s {
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
};
// ------------------------------------------------------
// Arena's
// These are large reserved areas of memory allocated from
// the OS that are managed by mimalloc to efficiently
// allocate MI_SLICE_SIZE slices of memory for the
// mimalloc pages.
// ------------------------------------------------------
// A large memory arena where pages are allocated in.
typedef struct mi_arena_s mi_arena_t;
// ------------------------------------------------------
// Debug

View file

@ -35,14 +35,13 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo
// A memory arena descriptor
typedef struct mi_arena_s {
mi_memid_t memid; // memid of the memory area
mi_arena_id_t id; // arena id; 0 for non-specific
mi_arena_id_t id; // arena id (> 0 where `arena == arenas[arena->id - 1]`)
size_t slice_count; // size of the area in arena slices (of `MI_ARENA_SLICE_SIZE`)
size_t info_slices; // initial slices reserved for the arena bitmaps
int numa_node; // associated NUMA node
bool exclusive; // only allow allocations if specifically for this arena
bool is_exclusive; // only allow allocations if specifically for this arena
bool is_large; // memory area consists of large- or huge OS pages (always committed)
mi_lock_t abandoned_visit_lock; // lock is only used when abandoned segments are being visited
_Atomic(mi_msecs_t) purge_expire; // expiration time when slices should be decommitted from `slices_decommit`.
mi_bitmap_t* slices_free; // is the slice free?
@ -93,7 +92,8 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus
bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) {
if (memid.memkind == MI_MEM_ARENA) {
return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id);
const mi_arena_t* arena = memid.mem.arena.arena;
return mi_arena_id_is_suitable(arena->id, arena->is_exclusive, request_arena_id);
}
else {
return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id);
@ -152,34 +152,25 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
// Create an arena memid
static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, size_t slice_index, size_t slice_count) {
static mi_memid_t mi_memid_create_arena(mi_arena_t* arena, size_t slice_index, size_t slice_count) {
mi_assert_internal(slice_index < UINT32_MAX);
mi_assert_internal(slice_count < UINT32_MAX);
mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA);
memid.mem.arena.id = id;
memid.mem.arena.arena = arena;
memid.mem.arena.slice_index = (uint32_t)slice_index;
memid.mem.arena.slice_count = (uint32_t)slice_count;
memid.mem.arena.is_exclusive = is_exclusive;
return memid;
}
// returns if the arena is exclusive
static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, size_t* slice_index, size_t* slice_count) {
// get the arena and slice span
static mi_arena_t* mi_arena_from_memid(mi_memid_t memid, size_t* slice_index, size_t* slice_count) {
mi_assert_internal(memid.memkind == MI_MEM_ARENA);
*arena_index = mi_arena_id_index(memid.mem.arena.id);
mi_arena_t* arena = memid.mem.arena.arena;
if (slice_index) *slice_index = memid.mem.arena.slice_index;
if (slice_count) *slice_count = memid.mem.arena.slice_count;
return memid.mem.arena.is_exclusive;
return arena;
}
// get the arena and slice index
static mi_arena_t* mi_arena_from_memid(mi_memid_t memid, size_t* slice_index, size_t* slice_count) {
size_t arena_index;
mi_arena_memid_indices(memid, &arena_index, slice_index, slice_count);
return mi_arena_from_index(arena_index);
}
static mi_arena_t* mi_page_arena(mi_page_t* page, size_t* slice_index, size_t* slice_count) {
// todo: maybe store the arena* directly in the page?
return mi_arena_from_memid(page->memid, slice_index, slice_count);
@ -198,7 +189,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(
// claimed it!
void* p = mi_arena_slice_start(arena, slice_index);
*memid = mi_memid_create_arena(arena->id, arena->exclusive, slice_index, slice_count);
*memid = mi_memid_create_arena(arena, slice_index, slice_count);
memid->is_pinned = arena->memid.is_pinned;
// set the dirty bits
@ -323,7 +314,7 @@ static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t re
static inline bool mi_arena_is_suitable(mi_arena_t* arena, mi_arena_id_t req_arena_id, int numa_node, bool allow_large) {
if (!allow_large && arena->is_large) return false;
if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return false;
if (!mi_arena_id_is_suitable(arena->id, arena->is_exclusive, req_arena_id)) return false;
if (req_arena_id == _mi_arena_id_none()) { // if not specific, check numa affinity
const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node);
if (!numa_suitable) return false;
@ -628,8 +619,8 @@ static mi_page_t* mi_arena_page_alloc_fresh(size_t slice_count, size_t block_siz
// this ensures that all blocks in such pages are OS page size aligned (which is needed for the guard pages)
const size_t os_page_size = _mi_os_page_size();
mi_assert_internal(MI_PAGE_ALIGN >= os_page_size);
if (block_size % os_page_size == 0 && block_size > os_page_size /* at least 2 or more */ ) {
block_start = _mi_align_up(_mi_page_info_size(), os_page_size);
if (!os_align && block_size % os_page_size == 0 && block_size > os_page_size /* at least 2 or more */ ) {
block_start = _mi_align_up(mi_page_info_size(), os_page_size);
}
else
#endif
@ -961,7 +952,7 @@ static void mi_arenas_unsafe_destroy(void) {
for (size_t i = 0; i < max_arena; i++) {
mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]);
if (arena != NULL) {
mi_lock_done(&arena->abandoned_visit_lock);
// mi_lock_done(&arena->abandoned_visit_lock);
if (mi_memkind_is_os(arena->memid.memkind)) {
mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL);
_mi_os_free(mi_arena_start(arena), mi_arena_size(arena), arena->memid);
@ -1085,13 +1076,13 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int
// init
arena->id = _mi_arena_id_none();
arena->memid = memid;
arena->exclusive = exclusive;
arena->is_exclusive = exclusive;
arena->slice_count = slice_count;
arena->info_slices = info_slices;
arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
arena->is_large = is_large;
arena->purge_expire = 0;
mi_lock_init(&arena->abandoned_visit_lock);
// mi_lock_init(&arena->abandoned_visit_lock);
// init bitmaps
uint8_t* base = mi_arena_start(arena) + bitmap_base;

View file

@ -11,6 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file
#include <string.h> // memcpy, memset
#include <stdlib.h> // atexit
#define MI_MEMID_STATIC {{{NULL,0}}, MI_MEM_STATIC, true /* pinned */, true /* committed */, false /* zero */ }
// Empty page used to initialize the small free pages array
const mi_page_t _mi_page_empty = {
@ -34,7 +35,7 @@ const mi_page_t _mi_page_empty = {
NULL, // xheap
NULL, NULL, // next, prev
NULL, // subproc
{ {{ NULL, 0}}, false, false, false, MI_MEM_NONE } // memid
MI_MEMID_STATIC // memid
};
#define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty)
@ -96,8 +97,6 @@ const mi_page_t _mi_page_empty = {
// may lead to allocation itself on some platforms)
// --------------------------------------------------------
#define MI_MEMID_STATIC {{{NULL,0}}, true /* pinned */, true /* committed */, false /* zero */, MI_MEM_STATIC }
mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL,
// MI_ATOMIC_VAR_INIT(NULL), // thread delayed free