From 4c81c3cf90135fd2d3e00be19faf3c5fd7d53f71 Mon Sep 17 00:00:00 2001 From: daanx Date: Fri, 13 Dec 2024 13:17:00 -0800 Subject: [PATCH] enable purging of free committed slices from arenas --- include/mimalloc/types.h | 2 +- src/arena.c | 162 +++++++++++++++++++++++++++++---------- src/bitmap.c | 59 +++++++++++++- src/bitmap.h | 23 +++++- src/options.c | 10 ++- src/prim/unix/prim.c | 6 +- test/test-stress.c | 16 ++-- 7 files changed, 222 insertions(+), 56 deletions(-) diff --git a/include/mimalloc/types.h b/include/mimalloc/types.h index f4bfa07a..bf1cb5c8 100644 --- a/include/mimalloc/types.h +++ b/include/mimalloc/types.h @@ -321,7 +321,7 @@ typedef struct mi_page_s { // The max object size are checked to not waste more than 12.5% internally over the page sizes. // (Except for large pages since huge objects are allocated in 4MiB chunks) -#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/8) // < 8 KiB +#define MI_SMALL_MAX_OBJ_SIZE ((MI_SMALL_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 8 KiB #define MI_MEDIUM_MAX_OBJ_SIZE ((MI_MEDIUM_PAGE_SIZE-MI_PAGE_INFO_SIZE)/4) // < 128 KiB #define MI_LARGE_MAX_OBJ_SIZE ((MI_LARGE_PAGE_SIZE-MI_PAGE_INFO_SIZE)/2) // < 2 MiB #define MI_LARGE_MAX_OBJ_WSIZE (MI_LARGE_MAX_OBJ_SIZE/MI_SIZE_SIZE) diff --git a/src/arena.c b/src/arena.c index 8cf61b74..9f95a699 100644 --- a/src/arena.c +++ b/src/arena.c @@ -42,13 +42,13 @@ typedef struct mi_arena_s { int numa_node; // associated NUMA node bool is_exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) - _Atomic(mi_msecs_t) purge_expire; // expiration time when pages can be purged from `pages_purge`. + _Atomic(mi_msecs_t) purge_expire; // expiration time when slices can be purged from `slices_purge`. mi_bitmap_t* slices_free; // is the slice free? mi_bitmap_t* slices_committed; // is the slice committed? (i.e. accessible) mi_bitmap_t* slices_dirty; // is the slice potentially non-zero? + mi_bitmap_t* slices_purge; // slices that can be purged mi_bitmap_t* pages; // all registered pages (abandoned and owned) - mi_bitmap_t* pages_purge; // pages that are scheduled to be purged mi_bitmap_t* pages_abandoned[MI_BIN_COUNT]; // abandoned pages per size bin (a set bit means the start of the page) // the full queue contains abandoned full pages // followed by the bitmaps (whose sizes depend on the arena size) @@ -57,8 +57,8 @@ typedef struct mi_arena_s { // Every "page" in `pages_purge` points to purge info // (since we use it for any free'd range and not just for pages) typedef struct mi_purge_info_s { - mi_msecs_t expire; - size_t slice_count; + _Atomic(mi_msecs_t) expire; + _Atomic(size_t) slice_count; } mi_purge_info_t; @@ -1123,8 +1123,8 @@ static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int arena->slices_free = mi_arena_bitmap_init(slice_count,&base); arena->slices_committed = mi_arena_bitmap_init(slice_count,&base); arena->slices_dirty = mi_arena_bitmap_init(slice_count,&base); + arena->slices_purge = mi_arena_bitmap_init(slice_count, &base); arena->pages = mi_arena_bitmap_init(slice_count, &base); - arena->pages_purge = mi_arena_bitmap_init(slice_count, &base); for( size_t i = 0; i < MI_ARENA_BIN_COUNT; i++) { arena->pages_abandoned[i] = mi_arena_bitmap_init(slice_count,&base); } @@ -1224,16 +1224,12 @@ static size_t mi_debug_show_page_bfield(mi_bfield_t field, char* buf, mi_arena_t else if (_mi_meta_is_meta_page(start)) { c = 'm'; } else if (slice_index + bit < arena->info_slices) { c = 'i'; } // else if (mi_bitmap_is_setN(arena->pages_purge, slice_index + bit, NULL)) { c = '*'; } - else if (mi_bitmap_is_setN(arena->slices_free, slice_index+bit, 1)) { - if (mi_bitmap_is_setN(arena->pages_purge, slice_index + bit, 1)) { - mi_assert_internal(bit_of_page <= 0); - mi_purge_info_t* pinfo = (mi_purge_info_t*)start; - c = '!'; - bit_of_page = (long)pinfo->slice_count; - } - if (mi_bitmap_is_setN(arena->slices_committed, slice_index + bit, 1)) { c = '_'; } + else if (mi_bitmap_is_set(arena->slices_free, slice_index+bit)) { + if (mi_bitmap_is_set(arena->slices_purge, slice_index + bit)) { c = '!'; } + else if (mi_bitmap_is_setN(arena->slices_committed, slice_index + bit, 1)) { c = '_'; } else { c = '.'; } } + if (bit==MI_BFIELD_BITS-1 && bit_of_page > 1) { c = '>'; } buf[bit] = c; } } @@ -1390,53 +1386,121 @@ static long mi_arena_purge_delay(void) { return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); } -// reset or decommit in an arena and update the committed/decommit bitmaps +// reset or decommit in an arena and update the commit bitmap // assumes we own the area (i.e. slices_free is claimed by us) -static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slices) { +static void mi_arena_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) { mi_assert_internal(!arena->memid.is_pinned); - const size_t size = mi_size_of_slices(slices); + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); + + const size_t size = mi_size_of_slices(slice_count); void* const p = mi_arena_slice_start(arena, slice_index); - bool needs_recommit; - if (mi_bitmap_is_setN(arena->slices_committed, slice_index, slices)) { - // all slices are committed, we can purge freely + bool needs_recommit = false; // reset needs no recommit, decommit does need it + if (mi_bitmap_is_setN(arena->slices_committed, slice_index, slice_count)) { + // all slices are committed, we can purge the entire range needs_recommit = _mi_os_purge(p, size); } else { - // some slices are not committed -- this can happen when a partially committed slice is freed - // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge - // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), - // and also undo the decommit stats (as it was already adjusted) - mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); - needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */); - if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); } + mi_assert_internal(false); // ? } - // clear the purged slices - mi_bitmap_clearN(arena->slices_purge, slices, slice_index); - // update committed bitmap if (needs_recommit) { - mi_bitmap_clearN(arena->slices_committed, slices, slice_index); + mi_bitmap_clearN(arena->slices_committed, slice_index, slice_count); } } // Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. // Note: assumes we (still) own the area as we may purge immediately -static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slices) { +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t slice_index, size_t slice_count) { const long delay = mi_arena_purge_delay(); if (delay < 0 || _mi_preloading()) return; // is purging allowed at all? + mi_assert_internal(mi_bitmap_is_clearN(arena->slices_free, slice_index, slice_count)); if (delay == 0) { - // decommit directly - mi_arena_purge(arena, slice_index, slices); + // purge directly + mi_arena_purge(arena, slice_index, slice_count); } else { - // schedule decommit - _mi_error_message(EFAULT, "purging not yet implemented\n"); + // schedule purge + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire == 0) { + mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); + } + //else { + // mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay + //} + mi_bitmap_setN(arena->slices_purge, slice_index, slice_count, NULL); } } +typedef struct mi_purge_visit_info_s { + mi_msecs_t now; + mi_msecs_t delay; + bool all_purged; + bool any_purged; +} mi_purge_visit_info_t; + +static bool mi_arena_try_purge_range(mi_arena_t* arena, size_t slice_index, size_t slice_count) { + if (mi_bitmap_try_clearN(arena->slices_free, slice_index, slice_count)) { + // purge + mi_arena_purge(arena, slice_index, slice_count); + // and reset the free range + mi_bitmap_setN(arena->slices_free, slice_index, slice_count, NULL); + return true; + } + else { + return false; + } +} + +static bool mi_arena_try_purge_visitor(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) { + mi_purge_visit_info_t* vinfo = (mi_purge_visit_info_t*)arg; + // try to purge: first claim the free blocks + if (mi_arena_try_purge_range(arena, slice_index, slice_count)) { + vinfo->any_purged = true; + } + else { + // failed to claim the full range, try per slice instead + for (size_t i = 0; i < slice_count; i++) { + vinfo->any_purged = vinfo->any_purged || mi_arena_try_purge_range(arena, slice_index + i, 1); + } + } + // done: clear the purge bits + mi_bitmap_clearN(arena->slices_purge, slice_index, slice_count); + return true; // continue +} + + + +// returns true if anything was purged +static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force) +{ + // check pre-conditions + if (arena->memid.is_pinned) return false; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire == 0) return false; + + // expired yet? + if (!force && expire > now) return false; + + // reset expire (if not already set concurrently) + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0); + + // go through all purge info's + // todo: instead of visiting per-bit, we should visit per range of bits + mi_purge_visit_info_t vinfo = { now, mi_arena_purge_delay(), true /*all?*/, false /*any?*/}; + _mi_bitmap_forall_set(arena->slices_purge, &mi_arena_try_purge_visitor, arena, &vinfo); + + // if not fully purged, make sure to purge again in the future + if (!vinfo.all_purged) { + const long delay = mi_arena_purge_delay(); + mi_msecs_t expected = 0; + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expected, _mi_clock_now() + delay); + } + return vinfo.any_purged; +} + static void mi_arenas_try_purge(bool force, bool visit_all) { if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled @@ -1444,12 +1508,27 @@ static void mi_arenas_try_purge(bool force, bool visit_all) { const size_t max_arena = mi_arena_get_count(); if (max_arena == 0) return; - // _mi_error_message(EFAULT, "purging not yet implemented\n"); - MI_UNUSED(visit_all); - MI_UNUSED(force); + // allow only one thread to purge at a time + static mi_atomic_guard_t purge_guard; + mi_atomic_guard(&purge_guard) + { + const mi_msecs_t now = _mi_clock_now(); + const size_t arena_start = _mi_tld()->tseq % max_arena; + size_t max_purge_count = (visit_all ? max_arena : 1); + for (size_t _i = 0; _i < max_arena; _i++) { + size_t i = _i + arena_start; + if (i >= max_arena) { i -= max_arena; } + mi_arena_t* arena = mi_arena_from_index(i); + if (arena != NULL) { + if (mi_arena_try_purge(arena, now, force)) { + if (max_purge_count <= 1) break; + max_purge_count--; + } + } + } + } } - bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { MI_UNUSED(subproc_id); MI_UNUSED(heap_tag); MI_UNUSED(visit_blocks); MI_UNUSED(visitor); MI_UNUSED(arg); _mi_error_message(EINVAL, "implement mi_abandoned_visit_blocks\n"); @@ -1460,8 +1539,9 @@ bool mi_abandoned_visit_blocks(mi_subproc_id_t subproc_id, int heap_tag, bool vi /* ----------------------------------------------------------- Unloading and reloading an arena. ----------------------------------------------------------- */ -static bool mi_arena_page_register(size_t slice_index, mi_arena_t* arena, void* arg) { - MI_UNUSED(arg); +static bool mi_arena_page_register(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg) { + MI_UNUSED(arg); MI_UNUSED(slice_count); + mi_assert_internal(slice_count == 1); mi_page_t* page = (mi_page_t*)mi_arena_slice_start(arena, slice_index); mi_assert_internal(mi_bitmap_is_setN(page->memid.mem.arena.arena->pages, page->memid.mem.arena.slice_index, 1)); _mi_page_map_register(page); diff --git a/src/bitmap.c b/src/bitmap.c index 649a7046..88b45a5e 100644 --- a/src/bitmap.c +++ b/src/bitmap.c @@ -1051,6 +1051,23 @@ bool mi_bitmap_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n, s return mi_bitmap_xsetN_(set, bitmap, idx, n, already_xset); } +// ------- mi_bitmap_try_clearN --------------------------------------- + +bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n) { + mi_assert_internal(n>0); + mi_assert_internal(n<=MI_BCHUNK_BITS); + mi_assert_internal(idx + n <= mi_bitmap_max_bits(bitmap)); + + const size_t chunk_idx = idx / MI_BCHUNK_BITS; + const size_t cidx = idx % MI_BCHUNK_BITS; + mi_assert_internal(cidx + n <= MI_BCHUNK_BITS); // don't cross chunks (for now) + mi_assert_internal(chunk_idx < mi_bitmap_chunk_count(bitmap)); + if (cidx + n > MI_BCHUNK_BITS) return false; + bool maybe_all_clear; + const bool cleared = mi_bchunk_try_clearN(&bitmap->chunks[chunk_idx], cidx, n, &maybe_all_clear); + if (cleared && maybe_all_clear) { mi_bitmap_chunkmap_try_clear(bitmap, chunk_idx); } + return cleared; +} // ------- mi_bitmap_is_xset --------------------------------------- @@ -1071,6 +1088,7 @@ bool mi_bitmap_is_xsetN(mi_xset_t set, mi_bitmap_t* bitmap, size_t idx, size_t n + /* -------------------------------------------------------------------------------- Iterate through a bfield -------------------------------------------------------------------------------- */ @@ -1144,7 +1162,7 @@ static inline bool mi_bitmap_find(mi_bitmap_t* bitmap, size_t tseq, size_t n, si // and for each chunkmap entry we iterate over its bits to find the chunks mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[cmap_idx]); size_t cmap_entry_cycle = (cmap_idx != cmap_acc ? MI_BFIELD_BITS : cmap_acc_bits); - mi_bfield_cycle_iterate(cmap_entry, tseq, cmap_entry_cycle, eidx, Y) + mi_bfield_cycle_iterate(cmap_entry, tseq%8, cmap_entry_cycle, eidx, Y) // reduce the tseq to 8 bins to reduce using extra memory (see `mstress`) { mi_assert_internal(eidx <= MI_BFIELD_BITS); const size_t chunk_idx = cmap_idx*MI_BFIELD_BITS + eidx; @@ -1314,10 +1332,47 @@ bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_a size_t bidx; while (mi_bfield_foreach_bit(&b, &bidx)) { const size_t idx = base_idx + bidx; - if (!visit(idx, arena, arg)) return false; + if (!visit(idx, 1, arena, arg)) return false; } } } } return true; } + +// Visit all set bits in a bitmap but try to return ranges (within bfields) if possible. +// used by purging to purge larger ranges if possible +// todo: optimize further? maybe use avx512 to directly get all indices using a mask_compressstore? +bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg) { + // for all chunkmap entries + const size_t chunkmap_max = _mi_divide_up(mi_bitmap_chunk_count(bitmap), MI_BFIELD_BITS); + for (size_t i = 0; i < chunkmap_max; i++) { + mi_bfield_t cmap_entry = mi_atomic_load_relaxed(&bitmap->chunkmap.bfields[i]); + size_t cmap_idx; + // for each chunk (corresponding to a set bit in a chunkmap entry) + while (mi_bfield_foreach_bit(&cmap_entry, &cmap_idx)) { + const size_t chunk_idx = i*MI_BFIELD_BITS + cmap_idx; + // for each chunk field + mi_bchunk_t* const chunk = &bitmap->chunks[chunk_idx]; + for (size_t j = 0; j < MI_BCHUNK_FIELDS; j++) { + const size_t base_idx = (chunk_idx*MI_BCHUNK_BITS) + (j*MI_BFIELD_BITS); + mi_bfield_t b = mi_atomic_load_relaxed(&chunk->bfields[j]); + size_t bshift = 0; + size_t bidx; + while (mi_bfield_find_least_bit(b, &bidx)) { + b >>= bidx; + bshift += bidx; + const size_t rng = mi_ctz(~b); // all the set bits from bidx + mi_assert_internal(rng>=1); + const size_t idx = base_idx + bshift + bidx; + if (!visit(idx, rng, arena, arg)) return false; + // skip rng + b >>= rng; + bshift += rng; + } + } + } + } + return true; +} + diff --git a/src/bitmap.h b/src/bitmap.h index 7fd09f43..72ba69c1 100644 --- a/src/bitmap.h +++ b/src/bitmap.h @@ -171,6 +171,22 @@ static inline bool mi_bitmap_is_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n return mi_bitmap_is_xsetN(MI_BIT_CLEAR, bitmap, idx, n); } +static inline bool mi_bitmap_is_set(mi_bitmap_t* bitmap, size_t idx) { + return mi_bitmap_is_setN(bitmap, idx, 1); +} + +static inline bool mi_bitmap_is_clear(mi_bitmap_t* bitmap, size_t idx) { + return mi_bitmap_is_clearN(bitmap, idx, 1); +} + + +bool mi_bitmap_try_clearN(mi_bitmap_t* bitmap, size_t idx, size_t n); + +static inline bool mi_bitmap_try_clear(mi_bitmap_t* bitmap, size_t idx) { + return mi_bitmap_try_clearN(bitmap, idx, 1); +} + + // Specialized versions for common bit sequence sizes mi_decl_nodiscard bool mi_bitmap_try_find_and_clear(mi_bitmap_t* bitmap, size_t tseq, size_t* pidx); // 1-bit @@ -212,9 +228,12 @@ void mi_bitmap_clear_once_set(mi_bitmap_t* bitmap, size_t idx); bool mi_bitmap_bsr(mi_bitmap_t* bitmap, size_t* idx); -typedef bool (mi_forall_set_fun_t)(size_t slice_index, mi_arena_t* arena, void* arg2); +typedef bool (mi_forall_set_fun_t)(size_t slice_index, size_t slice_count, mi_arena_t* arena, void* arg2); -// Visit all set bits in a bitmap +// Visit all set bits in a bitmap (`slice_count == 1`) bool _mi_bitmap_forall_set(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg); +// Visit all set bits in a bitmap with larger ranges if possible (`slice_count >= 1`) +bool _mi_bitmap_forall_set_ranges(mi_bitmap_t* bitmap, mi_forall_set_fun_t* visit, mi_arena_t* arena, void* arg); + #endif // MI_BITMAP_H diff --git a/src/options.c b/src/options.c index 8fcee452..4f1a00b8 100644 --- a/src/options.c +++ b/src/options.c @@ -79,8 +79,12 @@ typedef struct mi_option_desc_s { #endif #ifndef MI_DEFAULT_ALLOW_LARGE_OS_PAGES +#if defined(__linux__) && !defined(__ANDROID__) +#define MI_DEFAULT_ALLOW_LARGE_OS_PAGES 1 +#else #define MI_DEFAULT_ALLOW_LARGE_OS_PAGES 0 #endif +#endif #ifndef MI_DEFAULT_RESERVE_HUGE_OS_PAGES #define MI_DEFAULT_RESERVE_HUGE_OS_PAGES 0 @@ -132,7 +136,7 @@ static mi_option_desc_t options[_mi_option_last] = #else { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif - { -1, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds + { 1000,UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose @@ -141,7 +145,7 @@ static mi_option_desc_t options[_mi_option_last] = { 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! { MI_DEFAULT_ARENA_RESERVE, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`) - { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's + { 1, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, { MI_DEFAULT_DISALLOW_ARENA_ALLOC, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) { 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. @@ -192,7 +196,7 @@ void _mi_options_init(void) { } } _mi_verbose_message("guarded build: %s\n", mi_option_get(mi_option_guarded_sample_rate) != 0 ? "enabled" : "disabled"); - #endif + #endif } long _mi_option_get_fast(mi_option_t option) { diff --git a/src/prim/unix/prim.c b/src/prim/unix/prim.c index e1ca3964..eb351f69 100644 --- a/src/prim/unix/prim.c +++ b/src/prim/unix/prim.c @@ -61,6 +61,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #endif +#define MI_UNIX_LARGE_PAGE_SIZE (2*MI_MiB) // TODO: can we query the OS for this? //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. @@ -146,7 +147,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) } #endif } - config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + config->large_page_size = MI_UNIX_LARGE_PAGE_SIZE; config->has_overcommit = unix_detect_overcommit(); config->has_partial_free = true; // mmap can free in parts config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) @@ -361,6 +362,9 @@ int _mi_prim_alloc(void* hint_addr, size_t size, size_t try_alignment, bool comm mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); + if (hint_addr == NULL && size >= 8*MI_UNIX_LARGE_PAGE_SIZE && try_alignment > 1 && _mi_is_power_of_two(try_alignment) && try_alignment < MI_UNIX_LARGE_PAGE_SIZE) { + try_alignment = MI_UNIX_LARGE_PAGE_SIZE; // try to align along large page size for larger allocations + } *is_zero = true; int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); diff --git a/test/test-stress.c b/test/test-stress.c index 4fe6e0c6..126a7601 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -40,10 +40,10 @@ static int ITER = 20; static int THREADS = 8; static int SCALE = 10; static int ITER = 10; -#elif 1 +#elif 0 static int THREADS = 4; static int SCALE = 10; -static int ITER = 10; +static int ITER = 20; #define ALLOW_LARGE false #elif 0 static int THREADS = 32; @@ -260,8 +260,12 @@ static void test_stress(void) { //mi_debug_show_arenas(); #endif #if !defined(NDEBUG) || defined(MI_TSAN) - if ((n + 1) % 10 == 0) - { printf("- iterations left: %3d\n", ITER - (n + 1)); } + if ((n + 1) % 10 == 0) { + printf("- iterations left: %3d\n", ITER - (n + 1)); + //mi_debug_show_arenas(true, false, false); + //mi_collect(true); + //mi_debug_show_arenas(true, false, false); + } #endif } // clean up @@ -344,8 +348,8 @@ int main(int argc, char** argv) { #ifndef USE_STD_MALLOC #ifndef NDEBUG //mi_debug_show_arenas(true, true, false); - // mi_debug_show_arenas(true, false, false); - // mi_collect(true); + mi_debug_show_arenas(true, false, false); + mi_collect(true); mi_debug_show_arenas(true,false,false); #else mi_collect(false);