take 16 bits from used field to create a fast unalign path

This commit is contained in:
Daan 2024-03-18 01:40:03 -07:00
parent 355f44f373
commit cc809b0cd4
6 changed files with 47 additions and 28 deletions

View file

@ -202,7 +202,7 @@ void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, siz
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block); bool _mi_free_delayed_block(mi_block_t* block);
void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
// "libc.c" // "libc.c"

View file

@ -273,7 +273,7 @@ typedef uintptr_t mi_thread_free_t;
// and 12 are still good for address calculation) // and 12 are still good for address calculation)
// - To limit the structure size, the `xblock_size` is 32-bits only; for // - To limit the structure size, the `xblock_size` is 32-bits only; for
// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size // blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
// - `thread_free` uses the bottom bits as a delayed-free flags to optimize // - `xthread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning // concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). // heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is // The invariant is that no-delayed-free is only set if there is
@ -295,19 +295,21 @@ typedef struct mi_page_s {
uint8_t retire_expire:7; // expiration count for retired blocks uint8_t retire_expire:7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
uint32_t used; // number of blocks in use (including blocks in `thread_free`)
uint32_t xblock_size; // size available in each block (always `>0`)
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
uint16_t used; // number of blocks in use (including blocks in `thread_free`)
uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift == block_size)` (used for quick block start finding for aligned pointers)
uint8_t block_offset_adj; // if not zero, then `(page_start - (uint8_t*)page - 8*(block_offset_adj-1)) % block_size == 0)` (used for quick block start finding for aligned pointers)
uint32_t xblock_size; // size available in each block (always `>0`)
#if (MI_ENCODE_FREELIST || MI_PADDING) #if (MI_ENCODE_FREELIST || MI_PADDING)
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
#endif #endif
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap; _Atomic(uintptr_t) xheap;
struct mi_page_s* next; // next page owned by this thread with the same `block_size` struct mi_page_s* next; // next page owned by the heap with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` struct mi_page_s* prev; // previous page owned by the heap with the same `block_size`
} mi_page_t; } mi_page_t;
@ -386,8 +388,8 @@ typedef struct mi_segment_s {
uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie`
// layout like this to optimize access in `mi_free` // layout like this to optimize access in `mi_free`
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`).
mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge
mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages
} mi_segment_t; } mi_segment_t;
@ -446,8 +448,6 @@ typedef struct mi_padding_s {
// A heap owns a set of pages. // A heap owns a set of pages.
struct mi_heap_s { struct mi_heap_s {
mi_tld_t* tld; mi_tld_t* tld;
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
_Atomic(mi_block_t*) thread_delayed_free; _Atomic(mi_block_t*) thread_delayed_free;
mi_threadid_t thread_id; // thread this heap belongs too mi_threadid_t thread_id; // thread this heap belongs too
mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
@ -459,6 +459,8 @@ struct mi_heap_s {
size_t page_retired_max; // largest retired index into the `pages` array. size_t page_retired_max; // largest retired index into the `pages` array.
mi_heap_t* next; // list of heaps per thread mi_heap_t* next; // list of heaps per thread
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
}; };

View file

@ -37,8 +37,8 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
} }
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
// pop from the free list // pop from the free list
page->used++;
page->free = mi_block_next(page, block); page->free = mi_block_next(page, block);
page->used++;
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
#if MI_DEBUG>3 #if MI_DEBUG>3
if (page->free_is_zero) { if (page->free_is_zero) {

View file

@ -249,25 +249,30 @@ static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool
// Adjust a block that was allocated aligned, to the actual start of the block in the page. // Adjust a block that was allocated aligned, to the actual start of the block in the page.
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) {
mi_assert_internal(page!=NULL && p!=NULL); mi_assert_internal(page!=NULL && p!=NULL);
const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); const size_t diff = (mi_likely(page->block_offset_adj != 0)
const size_t adjust = (diff % mi_page_block_size(page)); ? (uint8_t*)p - (uint8_t*)page - 8*(page->block_offset_adj-1)
: (uint8_t*)p - _mi_page_start(segment, page, NULL));
const size_t adjust = (mi_likely(page->block_size_shift != 0)
? diff & (((size_t)1 << page->block_size_shift) - 1)
: diff % mi_page_block_size(page));
return (mi_block_t*)((uintptr_t)p - adjust); return (mi_block_t*)((uintptr_t)p - adjust);
} }
// free a local pointer // free a local pointer
static void mi_decl_noinline mi_free_generic_local(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { static void mi_decl_noinline mi_free_generic_local(mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept {
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
mi_free_block_local(page, block, true); mi_free_block_local(page, block, true);
} }
// free a pointer owned by another thread // free a pointer owned by another thread
static void mi_decl_noinline mi_free_generic_mt(const mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept { static void mi_decl_noinline mi_free_generic_mt(mi_segment_t* segment, mi_page_t* page, void* p) mi_attr_noexcept {
mi_block_t* const block = _mi_page_ptr_unalign(segment, page, p); // don't check `has_aligned` flag to avoid a race (issue #865) mi_block_t* const block = _mi_page_ptr_unalign(segment, page, p); // don't check `has_aligned` flag to avoid a race (issue #865)
mi_free_block_mt(segment, page, block); mi_free_block_mt(segment, page, block);
} }
// generic free (for runtime integration) // generic free (for runtime integration)
void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
if (is_local) mi_free_generic_local(segment,page,p); if (is_local) mi_free_generic_local(segment,page,p);
else mi_free_generic_mt(segment,page,p); else mi_free_generic_mt(segment,page,p);
} }
@ -469,7 +474,7 @@ static mi_decl_noinline void mi_free_block_mt(mi_segment_t* segment, mi_page_t*
// ------------------------------------------------------ // ------------------------------------------------------
// Bytes available in a block // Bytes available in a block
mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept {
const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
const size_t size = mi_page_usable_size_of(page, block); const size_t size = mi_page_usable_size_of(page, block);
const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;

View file

@ -21,9 +21,11 @@ const mi_page_t _mi_page_empty = {
false, // is_zero false, // is_zero
0, // retire_expire 0, // retire_expire
NULL, // free NULL, // free
0, // used
0, // xblock_size
NULL, // local_free NULL, // local_free
0, // used
0, // block size shift
0, // block offset adj
0, // xblock_size
#if (MI_PADDING || MI_ENCODE_FREELIST) #if (MI_PADDING || MI_ENCODE_FREELIST)
{ 0, 0 }, { 0, 0 },
#endif #endif
@ -93,8 +95,6 @@ const mi_page_t _mi_page_empty = {
mi_decl_cache_align const mi_heap_t _mi_heap_empty = { mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
NULL, NULL,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_ATOMIC_VAR_INIT(NULL), MI_ATOMIC_VAR_INIT(NULL),
0, // tid 0, // tid
0, // cookie 0, // cookie
@ -104,7 +104,9 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
0, // page count 0, // page count
MI_BIN_FULL, 0, // page retired min/max MI_BIN_FULL, 0, // page retired min/max
NULL, // next NULL, // next
false false,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
}; };
@ -130,8 +132,6 @@ static mi_tld_t tld_main = {
mi_heap_t _mi_heap_main = { mi_heap_t _mi_heap_main = {
&tld_main, &tld_main,
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY,
MI_ATOMIC_VAR_INIT(NULL), MI_ATOMIC_VAR_INIT(NULL),
0, // thread id 0, // thread id
0, // initial cookie 0, // initial cookie
@ -141,7 +141,9 @@ mi_heap_t _mi_heap_main = {
0, // page count 0, // page count
MI_BIN_FULL, 0, // page retired min/max MI_BIN_FULL, 0, // page retired min/max
NULL, // next heap NULL, // next heap
false // can reclaim false, // can reclaim
MI_SMALL_PAGES_EMPTY,
MI_PAGE_QUEUES_EMPTY
}; };
bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`.

View file

@ -660,7 +660,6 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_page_set_heap(page, heap); mi_page_set_heap(page, heap);
size_t page_size; size_t page_size;
const void* page_start = _mi_segment_page_start(segment, page, block_size, &page_size, NULL); const void* page_start = _mi_segment_page_start(segment, page, block_size, &page_size, NULL);
MI_UNUSED(page_start);
mi_track_mem_noaccess(page_start,page_size); mi_track_mem_noaccess(page_start,page_size);
page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE);
mi_assert_internal(page_size / block_size < (1L<<16)); mi_assert_internal(page_size / block_size < (1L<<16));
@ -677,6 +676,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size)); mi_assert_expensive(!page->is_zero_init || mi_mem_is_zero(page_start, page_size));
} }
#endif #endif
if (_mi_is_power_of_two(block_size) && block_size > 0) {
page->block_size_shift = (uint32_t)(mi_ctz((uintptr_t)block_size));
}
const ptrdiff_t start_offset = (uint8_t*)page_start - (uint8_t*)page;
const ptrdiff_t start_adjust = start_offset % block_size;
if (start_offset >= 0 && (start_adjust % 8) == 0 && (start_adjust/8) < 255) {
page->block_offset_adj = (uint8_t)((start_adjust/8) + 1);
}
mi_assert_internal(page->capacity == 0); mi_assert_internal(page->capacity == 0);
mi_assert_internal(page->free == NULL); mi_assert_internal(page->free == NULL);
@ -690,6 +698,8 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page->keys[0] != 0); mi_assert_internal(page->keys[0] != 0);
mi_assert_internal(page->keys[1] != 0); mi_assert_internal(page->keys[1] != 0);
#endif #endif
mi_assert_internal(page->block_size_shift == 0 || (block_size == (1UL << page->block_size_shift)));
mi_assert_internal(page->block_offset_adj == 0 || (((uint8_t*)page_start - (uint8_t*)page - 8*(page->block_offset_adj-1))) % block_size == 0);
mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_expensive(mi_page_is_valid_init(page));
// initialize an initial free list // initialize an initial free list