diff --git a/include/mimalloc/internal.h b/include/mimalloc/internal.h index 61780781..4ea2d855 100644 --- a/include/mimalloc/internal.h +++ b/include/mimalloc/internal.h @@ -490,7 +490,7 @@ static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const // Quick page start for initialized pages static inline uint8_t* mi_page_start(const mi_page_t* page) { mi_assert_internal(page->page_start != NULL); - mi_assert_expensive( _mi_segment_page_start(_mi_page_segment(page), page, NULL) == page->page_start); + mi_assert_expensive(_mi_segment_page_start(_mi_page_segment(page),page,NULL) == page->page_start); return page->page_start; } diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index 4e96c5ec..e5cece08 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -17,8 +17,8 @@ terms of the MIT license. A copy of the license can be found in the file // which pages are allocated. // mi_page_t : a "mimalloc" page (usually 64KiB or 512KiB) from // where objects are allocated. -// Note: we always explicitly use "OS page" to refer to OS pages -// and just use "page" to refer to mimalloc pages (`mi_page_t`) +// Note: we write "OS page" for OS memory pages while +// using plain "page" for mimalloc pages (`mi_page_t`). // -------------------------------------------------------------------------- @@ -92,10 +92,11 @@ terms of the MIT license. A copy of the license can be found in the file #endif -// We used to abandon huge pages but to eagerly deallocate if freed from another thread, -// but that makes it not possible to visit them during a heap walk or include them in a -// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from -// another thread so most memory is available until it gets properly freed by the owning thread. +// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread. +// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a +// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from +// another thread so the memory becomes "virtually" available (and eventually gets properly freed by +// the owning thread). // #define MI_HUGE_PAGE_ABANDON 1 @@ -227,7 +228,7 @@ typedef enum mi_delayed_e { MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list - MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim + MI_NEVER_DELAYED_FREE = 3 // sticky: used for abondoned pages without a owning heap; this only resets on page reclaim } mi_delayed_t; @@ -329,10 +330,10 @@ typedef struct mi_page_s { typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment - MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment - MI_PAGE_LARGE, // larger blocks go into a page of just one block - MI_PAGE_HUGE, // huge blocks (> `MI_LARGE_OBJ_SIZE_MAX) or with alignment `> MI_BLOCK_ALIGNMENT_MAX` - // are put into a single page in a single `MI_SEGMENT_HUGE` segment. + MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment + MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment + MI_PAGE_HUGE // a huge page is a single page in a segment of variable size + // used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`. } mi_page_kind_t; typedef enum mi_segment_kind_e { @@ -370,13 +371,17 @@ typedef mi_page_t mi_slice_t; typedef int64_t mi_msecs_t; +// --------------------------------------------------------------- +// a memory id tracks the provenance of arena/OS allocated memory +// --------------------------------------------------------------- + // Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. typedef enum mi_memkind_e { MI_MEM_NONE, // not allocated MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) MI_MEM_OS, // allocated from the OS - MI_MEM_OS_HUGE, // allocated as huge os pages + MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory) MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) MI_MEM_ARENA // allocated from an arena (the usual case) } mi_memkind_t; @@ -393,7 +398,7 @@ typedef struct mi_memid_os_info { typedef struct mi_memid_arena_info { size_t block_index; // index in the arena mi_arena_id_t id; // arena id (>= 1) - bool is_exclusive; // the arena can only be used for specific arena allocations + bool is_exclusive; // this arena can only be used for specific arena allocations } mi_memid_arena_info_t; typedef struct mi_memid_s { @@ -401,13 +406,14 @@ typedef struct mi_memid_s { mi_memid_os_info_t os; // only used for MI_MEM_OS mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA } mem; - bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages) bool initially_committed;// `true` if the memory was originally allocated as committed bool initially_zero; // `true` if the memory was originally zero initialized mi_memkind_t memkind; } mi_memid_t; +// ----------------------------------------------------------------------------------------- // Segments are large allocated memory blocks (8mb on 64 bit) from arenas or the OS. // // Inside segments we allocated fixed size mimalloc pages (`mi_page_t`) that contain blocks. @@ -634,6 +640,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) + // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ diff --git a/src/libc.c b/src/libc.c index f1412722..dd6b4007 100644 --- a/src/libc.c +++ b/src/libc.c @@ -210,7 +210,7 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) { if (c == 'x' || c == 'u') { if (numtype == 'z') x = va_arg(args, size_t); else if (numtype == 't') x = va_arg(args, uintptr_t); // unsigned ptrdiff_t - else if (numtype == 'L') x = va_arg(args, unsigned long long); + else if (numtype == 'L') x = (uintptr_t)va_arg(args, unsigned long long); else x = va_arg(args, unsigned long); } else if (c == 'p') { @@ -231,7 +231,7 @@ void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) { intptr_t x = 0; if (numtype == 'z') x = va_arg(args, intptr_t ); else if (numtype == 't') x = va_arg(args, ptrdiff_t); - else if (numtype == 'L') x = va_arg(args, long long); + else if (numtype == 'L') x = (intptr_t)va_arg(args, long long); else x = va_arg(args, long); char pre = 0; if (x < 0) { diff --git a/src/page.c b/src/page.c index 05de541a..eaf78526 100644 --- a/src/page.c +++ b/src/page.c @@ -84,7 +84,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { uint8_t* start = mi_page_start(page); mi_assert_internal(start == _mi_segment_page_start(_mi_page_segment(page), page, NULL)); - //const size_t bsize = mi_page_block_size(page); + mi_assert_internal(page->is_huge == (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); diff --git a/src/segment.c b/src/segment.c index 1d2f1e47..2dc404e5 100644 --- a/src/segment.c +++ b/src/segment.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -1263,26 +1263,31 @@ void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { } static long mi_segment_get_reclaim_tries(void) { - // limit the tries to 10% (default) of the abandoned segments with at least 8 tries, and at most 1024. + // limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries. const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100); if (perc <= 0) return 0; const size_t total_count = _mi_arena_segment_abandoned_count(); + if (total_count == 0) return 0; const size_t relative_count = (total_count > 10000 ? (total_count / 100) * perc : (total_count * perc) / 100); // avoid overflow - long max_tries = (long)(relative_count < 8 ? 8 : (relative_count > 1024 ? 1024 : relative_count)); + long max_tries = (long)(relative_count <= 1 ? 1 : (relative_count > 1024 ? 1024 : relative_count)); + if (max_tries < 8 && total_count > 8) { max_tries = 8; } return max_tries; } static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slices, size_t block_size, bool* reclaimed, mi_segments_tld_t* tld) { *reclaimed = false; - mi_segment_t* segment; - mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap,¤t); long max_tries = mi_segment_get_reclaim_tries(); + if (max_tries <= 0) return NULL; + + mi_segment_t* segment; + mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, ¤t); while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) { segment->abandoned_visits++; - // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments - // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way? + // todo: should we respect numa affinity for abondoned reclaim? perhaps only for the first visit? + // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments and use many tries + // Perhaps we can skip non-suitable ones in a better way? bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid); bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees) if (segment->used == 0) { diff --git a/src/stats.c b/src/stats.c index dd51c13e..a9364027 100644 --- a/src/stats.c +++ b/src/stats.c @@ -175,13 +175,28 @@ static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* ar static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) { _mi_fprintf(out, arg,"%10s:", msg); - if (unit > 0) { - mi_print_amount(stat->peak, unit, out, arg); - mi_print_amount(stat->allocated, unit, out, arg); - mi_print_amount(stat->freed, unit, out, arg); - mi_print_amount(stat->current, unit, out, arg); - mi_print_amount(unit, 1, out, arg); - mi_print_count(stat->allocated, unit, out, arg); + if (unit != 0) { + if (unit > 0) { + mi_print_amount(stat->peak, unit, out, arg); + mi_print_amount(stat->allocated, unit, out, arg); + mi_print_amount(stat->freed, unit, out, arg); + mi_print_amount(stat->current, unit, out, arg); + mi_print_amount(unit, 1, out, arg); + mi_print_count(stat->allocated, unit, out, arg); + } + else { + mi_print_amount(stat->peak, -1, out, arg); + mi_print_amount(stat->allocated, -1, out, arg); + mi_print_amount(stat->freed, -1, out, arg); + mi_print_amount(stat->current, -1, out, arg); + if (unit == -1) { + _mi_fprintf(out, arg, "%24s", ""); + } + else { + mi_print_amount(-unit, 1, out, arg); + mi_print_count((stat->allocated / -unit), 0, out, arg); + } + } if (stat->allocated > stat->freed) { _mi_fprintf(out, arg, " "); _mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok)); @@ -191,23 +206,6 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64 _mi_fprintf(out, arg, " ok\n"); } } - else if (unit<0) { - mi_print_amount(stat->peak, -1, out, arg); - mi_print_amount(stat->allocated, -1, out, arg); - mi_print_amount(stat->freed, -1, out, arg); - mi_print_amount(stat->current, -1, out, arg); - if (unit==-1) { - _mi_fprintf(out, arg, "%24s", ""); - } - else { - mi_print_amount(-unit, 1, out, arg); - mi_print_count((stat->allocated / -unit), 0, out, arg); - } - if (stat->allocated > stat->freed) - _mi_fprintf(out, arg, " not all freed!\n"); - else - _mi_fprintf(out, arg, " ok\n"); - } else { mi_print_amount(stat->peak, 1, out, arg); mi_print_amount(stat->allocated, 1, out, arg);