Merge branch 'dev-slice' into fix-passing-heap-v2

This commit is contained in:
Teodor Spæren 2024-03-31 23:23:17 +02:00 committed by GitHub
commit 8e3d7add99
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG key ID: B5690EEEBB952194
19 changed files with 929 additions and 820 deletions

View file

@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_noinline __declspec(noinline)
#define mi_decl_thread __declspec(thread)
#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
#define mi_decl_weak
#define mi_decl_weak
#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
#define mi_decl_noinline __attribute__((noinline))
#define mi_decl_thread __thread
@ -40,7 +40,7 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_noinline
#define mi_decl_thread __thread // hope for the best :-)
#define mi_decl_cache_align
#define mi_decl_weak
#define mi_decl_weak
#endif
#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
@ -133,8 +133,8 @@ void _mi_arena_segment_mark_abandoned(mi_segment_t* segment);
size_t _mi_arena_segment_abandoned_count(void);
typedef struct mi_arena_field_cursor_s { // abstract
mi_arena_id_t start;
int count;
mi_arena_id_t start;
int count;
size_t bitmap_idx;
} mi_arena_field_cursor_t;
void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current);
@ -203,9 +203,9 @@ void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block);
void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
// "libc.c"
@ -437,9 +437,14 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
// therefore we align one byte before `p`.
// We check for NULL afterwards on 64-bit systems to improve codegen for `mi_free`.
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
mi_assert_internal(p != NULL);
return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
mi_segment_t* const segment = (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
#if MI_INTPTR_SIZE <= 4
return (p==NULL ? NULL : segment);
#else
return ((intptr_t)segment <= 0 ? NULL : segment);
#endif
}
static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
@ -454,6 +459,7 @@ static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
// Segment belonging to a page
static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
mi_assert_internal(page!=NULL);
mi_segment_t* segment = _mi_ptr_segment(page);
mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
return segment;
@ -482,31 +488,28 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const
}
// Quick page start for initialized pages
static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
return _mi_segment_page_start(segment, page, page_size);
static inline uint8_t* mi_page_start(const mi_page_t* page) {
mi_assert_internal(page->page_start != NULL);
mi_assert_expensive(_mi_segment_page_start(_mi_page_segment(page),page,NULL) == page->page_start);
return page->page_start;
}
// Get the page containing the pointer
static inline mi_page_t* _mi_ptr_page(void* p) {
mi_assert_internal(p!=NULL);
return _mi_segment_page_of(_mi_ptr_segment(p), p);
}
// Get the block size of a page (special case for huge objects)
static inline size_t mi_page_block_size(const mi_page_t* page) {
const size_t bsize = page->xblock_size;
mi_assert_internal(bsize > 0);
if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) {
return bsize;
}
else {
size_t psize;
_mi_segment_page_start(_mi_page_segment(page), page, &psize);
return psize;
}
mi_assert_internal(page->block_size > 0);
return page->block_size;
}
static inline bool mi_page_is_huge(const mi_page_t* page) {
return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
mi_assert_internal((page->is_huge && _mi_page_segment(page)->kind == MI_SEGMENT_HUGE) ||
(!page->is_huge && _mi_page_segment(page)->kind != MI_SEGMENT_HUGE));
return page->is_huge;
}
// Get the usable block size of a page without fixed padding.

View file

@ -1,5 +1,5 @@
/* ----------------------------------------------------------------------------
Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
Copyright (c) 2018-2024, Microsoft Research, Daan Leijen
This is free software; you can redistribute it and/or modify it under the
terms of the MIT license. A copy of the license can be found in the file
"LICENSE" at the root of this distribution.
@ -13,9 +13,12 @@ terms of the MIT license. A copy of the license can be found in the file
// mi_heap_t : all data for a thread-local heap, contains
// lists of all managed heap pages.
// mi_segment_t : a larger chunk of memory (32GiB) from where pages
// are allocated.
// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from
// are allocated. A segment is divided in slices (64KiB) from
// which pages are allocated.
// mi_page_t : a "mimalloc" page (usually 64KiB or 512KiB) from
// where objects are allocated.
// Note: we write "OS page" for OS memory pages while
// using plain "page" for mimalloc pages (`mi_page_t`).
// --------------------------------------------------------------------------
@ -89,10 +92,11 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
// We used to abandon huge pages but to eagerly deallocate if freed from another thread,
// but that makes it not possible to visit them during a heap walk or include them in a
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from
// another thread so most memory is available until it gets properly freed by the owning thread.
// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread.
// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a
// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from
// another thread so the memory becomes "virtually" available (and eventually gets properly freed by
// the owning thread).
// #define MI_HUGE_PAGE_ABANDON 1
@ -192,17 +196,14 @@ typedef int32_t mi_ssize_t;
#error "mimalloc internal: define more bins"
#endif
// Maximum slice offset (15)
#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
// Used as a special value to encode block sizes in 32 bits.
#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB))
// blocks up to this size are always allocated aligned
#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE)
// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments
#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments
#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
// Maximum slice count (255) for which we can find the page for interior pointers
#define MI_MAX_SLICE_OFFSET_COUNT ((MI_BLOCK_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
// ------------------------------------------------------
@ -227,7 +228,7 @@ typedef enum mi_delayed_e {
MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim
MI_NEVER_DELAYED_FREE = 3 // sticky: used for abondoned pages without a owning heap; this only resets on page reclaim
} mi_delayed_t;
@ -266,7 +267,6 @@ typedef uintptr_t mi_thread_free_t;
// implement a monotonic heartbeat. The `thread_free` list is needed for
// avoiding atomic operations in the common case.
//
//
// `used - |thread_free|` == actual blocks that are in use (alive)
// `used - |thread_free| + |free| + |local_free| == capacity`
//
@ -274,16 +274,13 @@ typedef uintptr_t mi_thread_free_t;
// the number of memory accesses in the `mi_page_all_free` function(s).
//
// Notes:
// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc`
// - Using `uint16_t` does not seem to slow things down
// - The size is 8 words on 64-bit which helps the page index calculations
// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
// and 12 are still good for address calculation)
// - To limit the structure size, the `xblock_size` is 32-bits only; for
// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
// - The size is 12 words on 64-bit which helps the page index calculations
// (and 14 words on 32-bit, and encoded free lists add 2 words)
// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
@ -292,20 +289,24 @@ typedef struct mi_page_s {
// "owned" by the segment
uint32_t slice_count; // slices in this page (0 if not a page)
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized
uint8_t is_committed:1; // `true` if the page virtual memory is committed
uint8_t is_zero_init:1; // `true` if the page was initially zero initialized
uint8_t is_huge:1; // `true` if the page is in a huge segment (`segment->kind == MI_SEGMENT_HUGE`)
// padding
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
uint8_t free_is_zero : 1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire : 7; // expiration count for retired blocks
uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized
uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint32_t used; // number of blocks in use (including blocks in `thread_free`)
uint32_t xblock_size; // size available in each block (always `>0`)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`)
// padding
size_t block_size; // size available in each block (always `>0`)
uint8_t* page_start; // start of the page area containing the blocks
#if (MI_ENCODE_FREELIST || MI_PADDING)
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
@ -317,10 +318,8 @@ typedef struct mi_page_s {
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
#if MI_INTPTR_SIZE==8
uintptr_t padding[1];
#endif
// 64-bit 11 words, 32-bit 13 words, (+2 for secure)
void* padding[1];
} mi_page_t;
@ -331,14 +330,15 @@ typedef struct mi_page_s {
typedef enum mi_page_kind_e {
MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment
MI_PAGE_LARGE, // larger blocks go into a page of just one block
MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment.
MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment
MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment
MI_PAGE_HUGE // a huge page is a single page in a segment of variable size
// used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`.
} mi_page_kind_t;
typedef enum mi_segment_kind_e {
MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
MI_SEGMENT_HUGE, // segment with just one huge page inside.
} mi_segment_kind_t;
// ------------------------------------------------------
@ -371,13 +371,17 @@ typedef mi_page_t mi_slice_t;
typedef int64_t mi_msecs_t;
// ---------------------------------------------------------------
// a memory id tracks the provenance of arena/OS allocated memory
// ---------------------------------------------------------------
// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this.
typedef enum mi_memkind_e {
MI_MEM_NONE, // not allocated
MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example)
MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example)
MI_MEM_OS, // allocated from the OS
MI_MEM_OS_HUGE, // allocated as huge os pages
MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory)
MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`)
MI_MEM_ARENA // allocated from an arena (the usual case)
} mi_memkind_t;
@ -394,7 +398,7 @@ typedef struct mi_memid_os_info {
typedef struct mi_memid_arena_info {
size_t block_index; // index in the arena
mi_arena_id_t id; // arena id (>= 1)
bool is_exclusive; // the arena can only be used for specific arena allocations
bool is_exclusive; // this arena can only be used for specific arena allocations
} mi_memid_arena_info_t;
typedef struct mi_memid_s {
@ -402,46 +406,56 @@ typedef struct mi_memid_s {
mi_memid_os_info_t os; // only used for MI_MEM_OS
mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA
} mem;
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages)
bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages)
bool initially_committed;// `true` if the memory was originally allocated as committed
bool initially_zero; // `true` if the memory was originally zero initialized
mi_memkind_t memkind;
} mi_memid_t;
// Segments are large allocated memory blocks (8mb on 64 bit) from
// the OS. Inside segments we allocated fixed size _pages_ that
// contain blocks.
// -----------------------------------------------------------------------------------------
// Segments are large allocated memory blocks (8mb on 64 bit) from arenas or the OS.
//
// Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks.
// The start of a segment is this structure with a fixed number of slice entries (`slices`)
// usually followed by a guard OS page and the actual allocation area with pages.
// While a page is not allocated, we view it's data as a `mi_slice_t` (instead of a `mi_page_t`).
// Of any free area, the first slice has the info and `slice_offset == 0`; for any subsequent
// slices part of the area, the `slice_offset` is the byte offset back to the first slice
// (so we can quickly find the page info on a free, `internal.h:_mi_segment_page_of`).
// For slices, the `block_size` field is repurposed to signify if a slice is used (`1`) or not (`0`).
// Small and medium pages use a fixed amount of slices to reduce slice fragmentation, while
// large and huge pages span a variable amount of slices.
typedef struct mi_segment_s {
// constant fields
mi_memid_t memid; // memory id for arena allocation
bool allow_decommit;
bool allow_purge;
mi_memid_t memid; // memory id for arena/OS allocation
bool allow_decommit; // can we decommmit the memory
bool allow_purge; // can we purge the memory (reset or decommit)
size_t segment_size;
// segment fields
mi_msecs_t purge_expire;
mi_commit_mask_t purge_mask;
mi_commit_mask_t commit_mask;
mi_msecs_t purge_expire; // purge slices in the `purge_mask` after this time
mi_commit_mask_t purge_mask; // slices that can be purged
mi_commit_mask_t commit_mask; // slices that are currently committed
// from here is zero initialized
struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation)
size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
size_t abandoned_visits; // count how often this segment is visited during abondoned reclamation (to force reclaim if it takes too long)
size_t used; // count of pages in use
uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
size_t segment_info_slices; // initial slices we are using segment info and possible guard pages.
size_t segment_info_slices; // initial count of slices that we are using for segment info and possible guard pages.
// layout like this to optimize access in `mi_free`
mi_segment_kind_t kind;
size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment
mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one extra final entry for huge blocks with large alignment
} mi_segment_t;
@ -498,8 +512,6 @@ typedef struct mi_padding_s {
// A heap owns a set of pages.
struct mi_heap_s {
mi_tld_t* tld;
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
_Atomic(mi_block_t*) thread_delayed_free;
mi_threadid_t thread_id; // thread this heap belongs too
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
@ -511,6 +523,8 @@ struct mi_heap_s {
size_t page_retired_max; // largest retired index into the `pages` array.
mi_heap_t* next; // list of heaps per thread
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
};
@ -626,6 +640,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
// ------------------------------------------------------
// Thread Local data
// ------------------------------------------------------